]> gcc.gnu.org Git - gcc.git/blob - gcc/config/rs6000/rs6000.c
re PR target/68805 (ICE while var-tracking in simplify_binary_operation_1 with -g...
[gcc.git] / gcc / config / rs6000 / rs6000.c
1 /* Subroutines used for code generation on IBM RS/6000.
2 Copyright (C) 1991-2015 Free Software Foundation, Inc.
3 Contributed by Richard Kenner (kenner@vlsi1.ultra.nyu.edu)
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published
9 by the Free Software Foundation; either version 3, or (at your
10 option) any later version.
11
12 GCC is distributed in the hope that it will be useful, but WITHOUT
13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
14 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
15 License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
20
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "backend.h"
25 #include "rtl.h"
26 #include "tree.h"
27 #include "gimple.h"
28 #include "cfghooks.h"
29 #include "cfgloop.h"
30 #include "df.h"
31 #include "tm_p.h"
32 #include "stringpool.h"
33 #include "expmed.h"
34 #include "optabs.h"
35 #include "regs.h"
36 #include "ira.h"
37 #include "recog.h"
38 #include "cgraph.h"
39 #include "diagnostic-core.h"
40 #include "insn-attr.h"
41 #include "flags.h"
42 #include "alias.h"
43 #include "fold-const.h"
44 #include "stor-layout.h"
45 #include "calls.h"
46 #include "print-tree.h"
47 #include "varasm.h"
48 #include "explow.h"
49 #include "expr.h"
50 #include "output.h"
51 #include "dbxout.h"
52 #include "common/common-target.h"
53 #include "langhooks.h"
54 #include "reload.h"
55 #include "sched-int.h"
56 #include "gimplify.h"
57 #include "gimple-iterator.h"
58 #include "gimple-walk.h"
59 #include "intl.h"
60 #include "params.h"
61 #include "tm-constrs.h"
62 #include "tree-vectorizer.h"
63 #include "target-globals.h"
64 #include "builtins.h"
65 #include "context.h"
66 #include "tree-pass.h"
67 #if TARGET_XCOFF
68 #include "xcoffout.h" /* get declarations of xcoff_*_section_name */
69 #endif
70 #if TARGET_MACHO
71 #include "gstab.h" /* for N_SLINE */
72 #endif
73 #include "case-cfn-macros.h"
74
75 /* This file should be included last. */
76 #include "target-def.h"
77
78 #ifndef TARGET_NO_PROTOTYPE
79 #define TARGET_NO_PROTOTYPE 0
80 #endif
81
82 #define min(A,B) ((A) < (B) ? (A) : (B))
83 #define max(A,B) ((A) > (B) ? (A) : (B))
84
85 /* Structure used to define the rs6000 stack */
86 typedef struct rs6000_stack {
87 int reload_completed; /* stack info won't change from here on */
88 int first_gp_reg_save; /* first callee saved GP register used */
89 int first_fp_reg_save; /* first callee saved FP register used */
90 int first_altivec_reg_save; /* first callee saved AltiVec register used */
91 int lr_save_p; /* true if the link reg needs to be saved */
92 int cr_save_p; /* true if the CR reg needs to be saved */
93 unsigned int vrsave_mask; /* mask of vec registers to save */
94 int push_p; /* true if we need to allocate stack space */
95 int calls_p; /* true if the function makes any calls */
96 int world_save_p; /* true if we're saving *everything*:
97 r13-r31, cr, f14-f31, vrsave, v20-v31 */
98 enum rs6000_abi abi; /* which ABI to use */
99 int gp_save_offset; /* offset to save GP regs from initial SP */
100 int fp_save_offset; /* offset to save FP regs from initial SP */
101 int altivec_save_offset; /* offset to save AltiVec regs from initial SP */
102 int lr_save_offset; /* offset to save LR from initial SP */
103 int cr_save_offset; /* offset to save CR from initial SP */
104 int vrsave_save_offset; /* offset to save VRSAVE from initial SP */
105 int spe_gp_save_offset; /* offset to save spe 64-bit gprs */
106 int varargs_save_offset; /* offset to save the varargs registers */
107 int ehrd_offset; /* offset to EH return data */
108 int ehcr_offset; /* offset to EH CR field data */
109 int reg_size; /* register size (4 or 8) */
110 HOST_WIDE_INT vars_size; /* variable save area size */
111 int parm_size; /* outgoing parameter size */
112 int save_size; /* save area size */
113 int fixed_size; /* fixed size of stack frame */
114 int gp_size; /* size of saved GP registers */
115 int fp_size; /* size of saved FP registers */
116 int altivec_size; /* size of saved AltiVec registers */
117 int cr_size; /* size to hold CR if not in fixed area */
118 int vrsave_size; /* size to hold VRSAVE */
119 int altivec_padding_size; /* size of altivec alignment padding */
120 int spe_gp_size; /* size of 64-bit GPR save size for SPE */
121 int spe_padding_size;
122 HOST_WIDE_INT total_size; /* total bytes allocated for stack */
123 int spe_64bit_regs_used;
124 int savres_strategy;
125 } rs6000_stack_t;
126
127 /* A C structure for machine-specific, per-function data.
128 This is added to the cfun structure. */
129 typedef struct GTY(()) machine_function
130 {
131 /* Whether the instruction chain has been scanned already. */
132 int insn_chain_scanned_p;
133 /* Flags if __builtin_return_address (n) with n >= 1 was used. */
134 int ra_needs_full_frame;
135 /* Flags if __builtin_return_address (0) was used. */
136 int ra_need_lr;
137 /* Cache lr_save_p after expansion of builtin_eh_return. */
138 int lr_save_state;
139 /* Whether we need to save the TOC to the reserved stack location in the
140 function prologue. */
141 bool save_toc_in_prologue;
142 /* Offset from virtual_stack_vars_rtx to the start of the ABI_V4
143 varargs save area. */
144 HOST_WIDE_INT varargs_save_offset;
145 /* Temporary stack slot to use for SDmode copies. This slot is
146 64-bits wide and is allocated early enough so that the offset
147 does not overflow the 16-bit load/store offset field. */
148 rtx sdmode_stack_slot;
149 /* Alternative internal arg pointer for -fsplit-stack. */
150 rtx split_stack_arg_pointer;
151 bool split_stack_argp_used;
152 /* Flag if r2 setup is needed with ELFv2 ABI. */
153 bool r2_setup_needed;
154 } machine_function;
155
156 /* Support targetm.vectorize.builtin_mask_for_load. */
157 static GTY(()) tree altivec_builtin_mask_for_load;
158
159 /* Set to nonzero once AIX common-mode calls have been defined. */
160 static GTY(()) int common_mode_defined;
161
162 /* Label number of label created for -mrelocatable, to call to so we can
163 get the address of the GOT section */
164 static int rs6000_pic_labelno;
165
166 #ifdef USING_ELFOS_H
167 /* Counter for labels which are to be placed in .fixup. */
168 int fixuplabelno = 0;
169 #endif
170
171 /* Whether to use variant of AIX ABI for PowerPC64 Linux. */
172 int dot_symbols;
173
174 /* Specify the machine mode that pointers have. After generation of rtl, the
175 compiler makes no further distinction between pointers and any other objects
176 of this machine mode. The type is unsigned since not all things that
177 include rs6000.h also include machmode.h. */
178 unsigned rs6000_pmode;
179
180 /* Width in bits of a pointer. */
181 unsigned rs6000_pointer_size;
182
183 #ifdef HAVE_AS_GNU_ATTRIBUTE
184 /* Flag whether floating point values have been passed/returned. */
185 static bool rs6000_passes_float;
186 /* Flag whether vector values have been passed/returned. */
187 static bool rs6000_passes_vector;
188 /* Flag whether small (<= 8 byte) structures have been returned. */
189 static bool rs6000_returns_struct;
190 #endif
191
192 /* Value is TRUE if register/mode pair is acceptable. */
193 bool rs6000_hard_regno_mode_ok_p[NUM_MACHINE_MODES][FIRST_PSEUDO_REGISTER];
194
195 /* Maximum number of registers needed for a given register class and mode. */
196 unsigned char rs6000_class_max_nregs[NUM_MACHINE_MODES][LIM_REG_CLASSES];
197
198 /* How many registers are needed for a given register and mode. */
199 unsigned char rs6000_hard_regno_nregs[NUM_MACHINE_MODES][FIRST_PSEUDO_REGISTER];
200
201 /* Map register number to register class. */
202 enum reg_class rs6000_regno_regclass[FIRST_PSEUDO_REGISTER];
203
204 static int dbg_cost_ctrl;
205
206 /* Built in types. */
207 tree rs6000_builtin_types[RS6000_BTI_MAX];
208 tree rs6000_builtin_decls[RS6000_BUILTIN_COUNT];
209
210 /* Flag to say the TOC is initialized */
211 int toc_initialized;
212 char toc_label_name[10];
213
214 /* Cached value of rs6000_variable_issue. This is cached in
215 rs6000_variable_issue hook and returned from rs6000_sched_reorder2. */
216 static short cached_can_issue_more;
217
218 static GTY(()) section *read_only_data_section;
219 static GTY(()) section *private_data_section;
220 static GTY(()) section *tls_data_section;
221 static GTY(()) section *tls_private_data_section;
222 static GTY(()) section *read_only_private_data_section;
223 static GTY(()) section *sdata2_section;
224 static GTY(()) section *toc_section;
225
226 struct builtin_description
227 {
228 const HOST_WIDE_INT mask;
229 const enum insn_code icode;
230 const char *const name;
231 const enum rs6000_builtins code;
232 };
233
234 /* Describe the vector unit used for modes. */
235 enum rs6000_vector rs6000_vector_unit[NUM_MACHINE_MODES];
236 enum rs6000_vector rs6000_vector_mem[NUM_MACHINE_MODES];
237
238 /* Register classes for various constraints that are based on the target
239 switches. */
240 enum reg_class rs6000_constraints[RS6000_CONSTRAINT_MAX];
241
242 /* Describe the alignment of a vector. */
243 int rs6000_vector_align[NUM_MACHINE_MODES];
244
245 /* Map selected modes to types for builtins. */
246 static GTY(()) tree builtin_mode_to_type[MAX_MACHINE_MODE][2];
247
248 /* What modes to automatically generate reciprocal divide estimate (fre) and
249 reciprocal sqrt (frsqrte) for. */
250 unsigned char rs6000_recip_bits[MAX_MACHINE_MODE];
251
252 /* Masks to determine which reciprocal esitmate instructions to generate
253 automatically. */
254 enum rs6000_recip_mask {
255 RECIP_SF_DIV = 0x001, /* Use divide estimate */
256 RECIP_DF_DIV = 0x002,
257 RECIP_V4SF_DIV = 0x004,
258 RECIP_V2DF_DIV = 0x008,
259
260 RECIP_SF_RSQRT = 0x010, /* Use reciprocal sqrt estimate. */
261 RECIP_DF_RSQRT = 0x020,
262 RECIP_V4SF_RSQRT = 0x040,
263 RECIP_V2DF_RSQRT = 0x080,
264
265 /* Various combination of flags for -mrecip=xxx. */
266 RECIP_NONE = 0,
267 RECIP_ALL = (RECIP_SF_DIV | RECIP_DF_DIV | RECIP_V4SF_DIV
268 | RECIP_V2DF_DIV | RECIP_SF_RSQRT | RECIP_DF_RSQRT
269 | RECIP_V4SF_RSQRT | RECIP_V2DF_RSQRT),
270
271 RECIP_HIGH_PRECISION = RECIP_ALL,
272
273 /* On low precision machines like the power5, don't enable double precision
274 reciprocal square root estimate, since it isn't accurate enough. */
275 RECIP_LOW_PRECISION = (RECIP_ALL & ~(RECIP_DF_RSQRT | RECIP_V2DF_RSQRT))
276 };
277
278 /* -mrecip options. */
279 static struct
280 {
281 const char *string; /* option name */
282 unsigned int mask; /* mask bits to set */
283 } recip_options[] = {
284 { "all", RECIP_ALL },
285 { "none", RECIP_NONE },
286 { "div", (RECIP_SF_DIV | RECIP_DF_DIV | RECIP_V4SF_DIV
287 | RECIP_V2DF_DIV) },
288 { "divf", (RECIP_SF_DIV | RECIP_V4SF_DIV) },
289 { "divd", (RECIP_DF_DIV | RECIP_V2DF_DIV) },
290 { "rsqrt", (RECIP_SF_RSQRT | RECIP_DF_RSQRT | RECIP_V4SF_RSQRT
291 | RECIP_V2DF_RSQRT) },
292 { "rsqrtf", (RECIP_SF_RSQRT | RECIP_V4SF_RSQRT) },
293 { "rsqrtd", (RECIP_DF_RSQRT | RECIP_V2DF_RSQRT) },
294 };
295
296 /* Pointer to function (in rs6000-c.c) that can define or undefine target
297 macros that have changed. Languages that don't support the preprocessor
298 don't link in rs6000-c.c, so we can't call it directly. */
299 void (*rs6000_target_modify_macros_ptr) (bool, HOST_WIDE_INT, HOST_WIDE_INT);
300
301 /* Simplfy register classes into simpler classifications. We assume
302 GPR_REG_TYPE - FPR_REG_TYPE are ordered so that we can use a simple range
303 check for standard register classes (gpr/floating/altivec/vsx) and
304 floating/vector classes (float/altivec/vsx). */
305
306 enum rs6000_reg_type {
307 NO_REG_TYPE,
308 PSEUDO_REG_TYPE,
309 GPR_REG_TYPE,
310 VSX_REG_TYPE,
311 ALTIVEC_REG_TYPE,
312 FPR_REG_TYPE,
313 SPR_REG_TYPE,
314 CR_REG_TYPE,
315 SPE_ACC_TYPE,
316 SPEFSCR_REG_TYPE
317 };
318
319 /* Map register class to register type. */
320 static enum rs6000_reg_type reg_class_to_reg_type[N_REG_CLASSES];
321
322 /* First/last register type for the 'normal' register types (i.e. general
323 purpose, floating point, altivec, and VSX registers). */
324 #define IS_STD_REG_TYPE(RTYPE) IN_RANGE(RTYPE, GPR_REG_TYPE, FPR_REG_TYPE)
325
326 #define IS_FP_VECT_REG_TYPE(RTYPE) IN_RANGE(RTYPE, VSX_REG_TYPE, FPR_REG_TYPE)
327
328
329 /* Register classes we care about in secondary reload or go if legitimate
330 address. We only need to worry about GPR, FPR, and Altivec registers here,
331 along an ANY field that is the OR of the 3 register classes. */
332
333 enum rs6000_reload_reg_type {
334 RELOAD_REG_GPR, /* General purpose registers. */
335 RELOAD_REG_FPR, /* Traditional floating point regs. */
336 RELOAD_REG_VMX, /* Altivec (VMX) registers. */
337 RELOAD_REG_ANY, /* OR of GPR, FPR, Altivec masks. */
338 N_RELOAD_REG
339 };
340
341 /* For setting up register classes, loop through the 3 register classes mapping
342 into real registers, and skip the ANY class, which is just an OR of the
343 bits. */
344 #define FIRST_RELOAD_REG_CLASS RELOAD_REG_GPR
345 #define LAST_RELOAD_REG_CLASS RELOAD_REG_VMX
346
347 /* Map reload register type to a register in the register class. */
348 struct reload_reg_map_type {
349 const char *name; /* Register class name. */
350 int reg; /* Register in the register class. */
351 };
352
353 static const struct reload_reg_map_type reload_reg_map[N_RELOAD_REG] = {
354 { "Gpr", FIRST_GPR_REGNO }, /* RELOAD_REG_GPR. */
355 { "Fpr", FIRST_FPR_REGNO }, /* RELOAD_REG_FPR. */
356 { "VMX", FIRST_ALTIVEC_REGNO }, /* RELOAD_REG_VMX. */
357 { "Any", -1 }, /* RELOAD_REG_ANY. */
358 };
359
360 /* Mask bits for each register class, indexed per mode. Historically the
361 compiler has been more restrictive which types can do PRE_MODIFY instead of
362 PRE_INC and PRE_DEC, so keep track of sepaate bits for these two. */
363 typedef unsigned char addr_mask_type;
364
365 #define RELOAD_REG_VALID 0x01 /* Mode valid in register.. */
366 #define RELOAD_REG_MULTIPLE 0x02 /* Mode takes multiple registers. */
367 #define RELOAD_REG_INDEXED 0x04 /* Reg+reg addressing. */
368 #define RELOAD_REG_OFFSET 0x08 /* Reg+offset addressing. */
369 #define RELOAD_REG_PRE_INCDEC 0x10 /* PRE_INC/PRE_DEC valid. */
370 #define RELOAD_REG_PRE_MODIFY 0x20 /* PRE_MODIFY valid. */
371 #define RELOAD_REG_AND_M16 0x40 /* AND -16 addressing. */
372
373 /* Register type masks based on the type, of valid addressing modes. */
374 struct rs6000_reg_addr {
375 enum insn_code reload_load; /* INSN to reload for loading. */
376 enum insn_code reload_store; /* INSN to reload for storing. */
377 enum insn_code reload_fpr_gpr; /* INSN to move from FPR to GPR. */
378 enum insn_code reload_gpr_vsx; /* INSN to move from GPR to VSX. */
379 enum insn_code reload_vsx_gpr; /* INSN to move from VSX to GPR. */
380 enum insn_code fusion_gpr_ld; /* INSN for fusing gpr ADDIS/loads. */
381 /* INSNs for fusing addi with loads
382 or stores for each reg. class. */
383 enum insn_code fusion_addi_ld[(int)N_RELOAD_REG];
384 enum insn_code fusion_addi_st[(int)N_RELOAD_REG];
385 /* INSNs for fusing addis with loads
386 or stores for each reg. class. */
387 enum insn_code fusion_addis_ld[(int)N_RELOAD_REG];
388 enum insn_code fusion_addis_st[(int)N_RELOAD_REG];
389 addr_mask_type addr_mask[(int)N_RELOAD_REG]; /* Valid address masks. */
390 bool scalar_in_vmx_p; /* Scalar value can go in VMX. */
391 bool fused_toc; /* Mode supports TOC fusion. */
392 };
393
394 static struct rs6000_reg_addr reg_addr[NUM_MACHINE_MODES];
395
396 /* Helper function to say whether a mode supports PRE_INC or PRE_DEC. */
397 static inline bool
398 mode_supports_pre_incdec_p (machine_mode mode)
399 {
400 return ((reg_addr[mode].addr_mask[RELOAD_REG_ANY] & RELOAD_REG_PRE_INCDEC)
401 != 0);
402 }
403
404 /* Helper function to say whether a mode supports PRE_MODIFY. */
405 static inline bool
406 mode_supports_pre_modify_p (machine_mode mode)
407 {
408 return ((reg_addr[mode].addr_mask[RELOAD_REG_ANY] & RELOAD_REG_PRE_MODIFY)
409 != 0);
410 }
411
412 /* Return true if we have D-form addressing in altivec registers. */
413 static inline bool
414 mode_supports_vmx_dform (machine_mode mode)
415 {
416 return ((reg_addr[mode].addr_mask[RELOAD_REG_VMX] & RELOAD_REG_OFFSET) != 0);
417 }
418
419 \f
420 /* Target cpu costs. */
421
422 struct processor_costs {
423 const int mulsi; /* cost of SImode multiplication. */
424 const int mulsi_const; /* cost of SImode multiplication by constant. */
425 const int mulsi_const9; /* cost of SImode mult by short constant. */
426 const int muldi; /* cost of DImode multiplication. */
427 const int divsi; /* cost of SImode division. */
428 const int divdi; /* cost of DImode division. */
429 const int fp; /* cost of simple SFmode and DFmode insns. */
430 const int dmul; /* cost of DFmode multiplication (and fmadd). */
431 const int sdiv; /* cost of SFmode division (fdivs). */
432 const int ddiv; /* cost of DFmode division (fdiv). */
433 const int cache_line_size; /* cache line size in bytes. */
434 const int l1_cache_size; /* size of l1 cache, in kilobytes. */
435 const int l2_cache_size; /* size of l2 cache, in kilobytes. */
436 const int simultaneous_prefetches; /* number of parallel prefetch
437 operations. */
438 const int sfdf_convert; /* cost of SF->DF conversion. */
439 };
440
441 const struct processor_costs *rs6000_cost;
442
443 /* Processor costs (relative to an add) */
444
445 /* Instruction size costs on 32bit processors. */
446 static const
447 struct processor_costs size32_cost = {
448 COSTS_N_INSNS (1), /* mulsi */
449 COSTS_N_INSNS (1), /* mulsi_const */
450 COSTS_N_INSNS (1), /* mulsi_const9 */
451 COSTS_N_INSNS (1), /* muldi */
452 COSTS_N_INSNS (1), /* divsi */
453 COSTS_N_INSNS (1), /* divdi */
454 COSTS_N_INSNS (1), /* fp */
455 COSTS_N_INSNS (1), /* dmul */
456 COSTS_N_INSNS (1), /* sdiv */
457 COSTS_N_INSNS (1), /* ddiv */
458 32, /* cache line size */
459 0, /* l1 cache */
460 0, /* l2 cache */
461 0, /* streams */
462 0, /* SF->DF convert */
463 };
464
465 /* Instruction size costs on 64bit processors. */
466 static const
467 struct processor_costs size64_cost = {
468 COSTS_N_INSNS (1), /* mulsi */
469 COSTS_N_INSNS (1), /* mulsi_const */
470 COSTS_N_INSNS (1), /* mulsi_const9 */
471 COSTS_N_INSNS (1), /* muldi */
472 COSTS_N_INSNS (1), /* divsi */
473 COSTS_N_INSNS (1), /* divdi */
474 COSTS_N_INSNS (1), /* fp */
475 COSTS_N_INSNS (1), /* dmul */
476 COSTS_N_INSNS (1), /* sdiv */
477 COSTS_N_INSNS (1), /* ddiv */
478 128, /* cache line size */
479 0, /* l1 cache */
480 0, /* l2 cache */
481 0, /* streams */
482 0, /* SF->DF convert */
483 };
484
485 /* Instruction costs on RS64A processors. */
486 static const
487 struct processor_costs rs64a_cost = {
488 COSTS_N_INSNS (20), /* mulsi */
489 COSTS_N_INSNS (12), /* mulsi_const */
490 COSTS_N_INSNS (8), /* mulsi_const9 */
491 COSTS_N_INSNS (34), /* muldi */
492 COSTS_N_INSNS (65), /* divsi */
493 COSTS_N_INSNS (67), /* divdi */
494 COSTS_N_INSNS (4), /* fp */
495 COSTS_N_INSNS (4), /* dmul */
496 COSTS_N_INSNS (31), /* sdiv */
497 COSTS_N_INSNS (31), /* ddiv */
498 128, /* cache line size */
499 128, /* l1 cache */
500 2048, /* l2 cache */
501 1, /* streams */
502 0, /* SF->DF convert */
503 };
504
505 /* Instruction costs on MPCCORE processors. */
506 static const
507 struct processor_costs mpccore_cost = {
508 COSTS_N_INSNS (2), /* mulsi */
509 COSTS_N_INSNS (2), /* mulsi_const */
510 COSTS_N_INSNS (2), /* mulsi_const9 */
511 COSTS_N_INSNS (2), /* muldi */
512 COSTS_N_INSNS (6), /* divsi */
513 COSTS_N_INSNS (6), /* divdi */
514 COSTS_N_INSNS (4), /* fp */
515 COSTS_N_INSNS (5), /* dmul */
516 COSTS_N_INSNS (10), /* sdiv */
517 COSTS_N_INSNS (17), /* ddiv */
518 32, /* cache line size */
519 4, /* l1 cache */
520 16, /* l2 cache */
521 1, /* streams */
522 0, /* SF->DF convert */
523 };
524
525 /* Instruction costs on PPC403 processors. */
526 static const
527 struct processor_costs ppc403_cost = {
528 COSTS_N_INSNS (4), /* mulsi */
529 COSTS_N_INSNS (4), /* mulsi_const */
530 COSTS_N_INSNS (4), /* mulsi_const9 */
531 COSTS_N_INSNS (4), /* muldi */
532 COSTS_N_INSNS (33), /* divsi */
533 COSTS_N_INSNS (33), /* divdi */
534 COSTS_N_INSNS (11), /* fp */
535 COSTS_N_INSNS (11), /* dmul */
536 COSTS_N_INSNS (11), /* sdiv */
537 COSTS_N_INSNS (11), /* ddiv */
538 32, /* cache line size */
539 4, /* l1 cache */
540 16, /* l2 cache */
541 1, /* streams */
542 0, /* SF->DF convert */
543 };
544
545 /* Instruction costs on PPC405 processors. */
546 static const
547 struct processor_costs ppc405_cost = {
548 COSTS_N_INSNS (5), /* mulsi */
549 COSTS_N_INSNS (4), /* mulsi_const */
550 COSTS_N_INSNS (3), /* mulsi_const9 */
551 COSTS_N_INSNS (5), /* muldi */
552 COSTS_N_INSNS (35), /* divsi */
553 COSTS_N_INSNS (35), /* divdi */
554 COSTS_N_INSNS (11), /* fp */
555 COSTS_N_INSNS (11), /* dmul */
556 COSTS_N_INSNS (11), /* sdiv */
557 COSTS_N_INSNS (11), /* ddiv */
558 32, /* cache line size */
559 16, /* l1 cache */
560 128, /* l2 cache */
561 1, /* streams */
562 0, /* SF->DF convert */
563 };
564
565 /* Instruction costs on PPC440 processors. */
566 static const
567 struct processor_costs ppc440_cost = {
568 COSTS_N_INSNS (3), /* mulsi */
569 COSTS_N_INSNS (2), /* mulsi_const */
570 COSTS_N_INSNS (2), /* mulsi_const9 */
571 COSTS_N_INSNS (3), /* muldi */
572 COSTS_N_INSNS (34), /* divsi */
573 COSTS_N_INSNS (34), /* divdi */
574 COSTS_N_INSNS (5), /* fp */
575 COSTS_N_INSNS (5), /* dmul */
576 COSTS_N_INSNS (19), /* sdiv */
577 COSTS_N_INSNS (33), /* ddiv */
578 32, /* cache line size */
579 32, /* l1 cache */
580 256, /* l2 cache */
581 1, /* streams */
582 0, /* SF->DF convert */
583 };
584
585 /* Instruction costs on PPC476 processors. */
586 static const
587 struct processor_costs ppc476_cost = {
588 COSTS_N_INSNS (4), /* mulsi */
589 COSTS_N_INSNS (4), /* mulsi_const */
590 COSTS_N_INSNS (4), /* mulsi_const9 */
591 COSTS_N_INSNS (4), /* muldi */
592 COSTS_N_INSNS (11), /* divsi */
593 COSTS_N_INSNS (11), /* divdi */
594 COSTS_N_INSNS (6), /* fp */
595 COSTS_N_INSNS (6), /* dmul */
596 COSTS_N_INSNS (19), /* sdiv */
597 COSTS_N_INSNS (33), /* ddiv */
598 32, /* l1 cache line size */
599 32, /* l1 cache */
600 512, /* l2 cache */
601 1, /* streams */
602 0, /* SF->DF convert */
603 };
604
605 /* Instruction costs on PPC601 processors. */
606 static const
607 struct processor_costs ppc601_cost = {
608 COSTS_N_INSNS (5), /* mulsi */
609 COSTS_N_INSNS (5), /* mulsi_const */
610 COSTS_N_INSNS (5), /* mulsi_const9 */
611 COSTS_N_INSNS (5), /* muldi */
612 COSTS_N_INSNS (36), /* divsi */
613 COSTS_N_INSNS (36), /* divdi */
614 COSTS_N_INSNS (4), /* fp */
615 COSTS_N_INSNS (5), /* dmul */
616 COSTS_N_INSNS (17), /* sdiv */
617 COSTS_N_INSNS (31), /* ddiv */
618 32, /* cache line size */
619 32, /* l1 cache */
620 256, /* l2 cache */
621 1, /* streams */
622 0, /* SF->DF convert */
623 };
624
625 /* Instruction costs on PPC603 processors. */
626 static const
627 struct processor_costs ppc603_cost = {
628 COSTS_N_INSNS (5), /* mulsi */
629 COSTS_N_INSNS (3), /* mulsi_const */
630 COSTS_N_INSNS (2), /* mulsi_const9 */
631 COSTS_N_INSNS (5), /* muldi */
632 COSTS_N_INSNS (37), /* divsi */
633 COSTS_N_INSNS (37), /* divdi */
634 COSTS_N_INSNS (3), /* fp */
635 COSTS_N_INSNS (4), /* dmul */
636 COSTS_N_INSNS (18), /* sdiv */
637 COSTS_N_INSNS (33), /* ddiv */
638 32, /* cache line size */
639 8, /* l1 cache */
640 64, /* l2 cache */
641 1, /* streams */
642 0, /* SF->DF convert */
643 };
644
645 /* Instruction costs on PPC604 processors. */
646 static const
647 struct processor_costs ppc604_cost = {
648 COSTS_N_INSNS (4), /* mulsi */
649 COSTS_N_INSNS (4), /* mulsi_const */
650 COSTS_N_INSNS (4), /* mulsi_const9 */
651 COSTS_N_INSNS (4), /* muldi */
652 COSTS_N_INSNS (20), /* divsi */
653 COSTS_N_INSNS (20), /* divdi */
654 COSTS_N_INSNS (3), /* fp */
655 COSTS_N_INSNS (3), /* dmul */
656 COSTS_N_INSNS (18), /* sdiv */
657 COSTS_N_INSNS (32), /* ddiv */
658 32, /* cache line size */
659 16, /* l1 cache */
660 512, /* l2 cache */
661 1, /* streams */
662 0, /* SF->DF convert */
663 };
664
665 /* Instruction costs on PPC604e processors. */
666 static const
667 struct processor_costs ppc604e_cost = {
668 COSTS_N_INSNS (2), /* mulsi */
669 COSTS_N_INSNS (2), /* mulsi_const */
670 COSTS_N_INSNS (2), /* mulsi_const9 */
671 COSTS_N_INSNS (2), /* muldi */
672 COSTS_N_INSNS (20), /* divsi */
673 COSTS_N_INSNS (20), /* divdi */
674 COSTS_N_INSNS (3), /* fp */
675 COSTS_N_INSNS (3), /* dmul */
676 COSTS_N_INSNS (18), /* sdiv */
677 COSTS_N_INSNS (32), /* ddiv */
678 32, /* cache line size */
679 32, /* l1 cache */
680 1024, /* l2 cache */
681 1, /* streams */
682 0, /* SF->DF convert */
683 };
684
685 /* Instruction costs on PPC620 processors. */
686 static const
687 struct processor_costs ppc620_cost = {
688 COSTS_N_INSNS (5), /* mulsi */
689 COSTS_N_INSNS (4), /* mulsi_const */
690 COSTS_N_INSNS (3), /* mulsi_const9 */
691 COSTS_N_INSNS (7), /* muldi */
692 COSTS_N_INSNS (21), /* divsi */
693 COSTS_N_INSNS (37), /* divdi */
694 COSTS_N_INSNS (3), /* fp */
695 COSTS_N_INSNS (3), /* dmul */
696 COSTS_N_INSNS (18), /* sdiv */
697 COSTS_N_INSNS (32), /* ddiv */
698 128, /* cache line size */
699 32, /* l1 cache */
700 1024, /* l2 cache */
701 1, /* streams */
702 0, /* SF->DF convert */
703 };
704
705 /* Instruction costs on PPC630 processors. */
706 static const
707 struct processor_costs ppc630_cost = {
708 COSTS_N_INSNS (5), /* mulsi */
709 COSTS_N_INSNS (4), /* mulsi_const */
710 COSTS_N_INSNS (3), /* mulsi_const9 */
711 COSTS_N_INSNS (7), /* muldi */
712 COSTS_N_INSNS (21), /* divsi */
713 COSTS_N_INSNS (37), /* divdi */
714 COSTS_N_INSNS (3), /* fp */
715 COSTS_N_INSNS (3), /* dmul */
716 COSTS_N_INSNS (17), /* sdiv */
717 COSTS_N_INSNS (21), /* ddiv */
718 128, /* cache line size */
719 64, /* l1 cache */
720 1024, /* l2 cache */
721 1, /* streams */
722 0, /* SF->DF convert */
723 };
724
725 /* Instruction costs on Cell processor. */
726 /* COSTS_N_INSNS (1) ~ one add. */
727 static const
728 struct processor_costs ppccell_cost = {
729 COSTS_N_INSNS (9/2)+2, /* mulsi */
730 COSTS_N_INSNS (6/2), /* mulsi_const */
731 COSTS_N_INSNS (6/2), /* mulsi_const9 */
732 COSTS_N_INSNS (15/2)+2, /* muldi */
733 COSTS_N_INSNS (38/2), /* divsi */
734 COSTS_N_INSNS (70/2), /* divdi */
735 COSTS_N_INSNS (10/2), /* fp */
736 COSTS_N_INSNS (10/2), /* dmul */
737 COSTS_N_INSNS (74/2), /* sdiv */
738 COSTS_N_INSNS (74/2), /* ddiv */
739 128, /* cache line size */
740 32, /* l1 cache */
741 512, /* l2 cache */
742 6, /* streams */
743 0, /* SF->DF convert */
744 };
745
746 /* Instruction costs on PPC750 and PPC7400 processors. */
747 static const
748 struct processor_costs ppc750_cost = {
749 COSTS_N_INSNS (5), /* mulsi */
750 COSTS_N_INSNS (3), /* mulsi_const */
751 COSTS_N_INSNS (2), /* mulsi_const9 */
752 COSTS_N_INSNS (5), /* muldi */
753 COSTS_N_INSNS (17), /* divsi */
754 COSTS_N_INSNS (17), /* divdi */
755 COSTS_N_INSNS (3), /* fp */
756 COSTS_N_INSNS (3), /* dmul */
757 COSTS_N_INSNS (17), /* sdiv */
758 COSTS_N_INSNS (31), /* ddiv */
759 32, /* cache line size */
760 32, /* l1 cache */
761 512, /* l2 cache */
762 1, /* streams */
763 0, /* SF->DF convert */
764 };
765
766 /* Instruction costs on PPC7450 processors. */
767 static const
768 struct processor_costs ppc7450_cost = {
769 COSTS_N_INSNS (4), /* mulsi */
770 COSTS_N_INSNS (3), /* mulsi_const */
771 COSTS_N_INSNS (3), /* mulsi_const9 */
772 COSTS_N_INSNS (4), /* muldi */
773 COSTS_N_INSNS (23), /* divsi */
774 COSTS_N_INSNS (23), /* divdi */
775 COSTS_N_INSNS (5), /* fp */
776 COSTS_N_INSNS (5), /* dmul */
777 COSTS_N_INSNS (21), /* sdiv */
778 COSTS_N_INSNS (35), /* ddiv */
779 32, /* cache line size */
780 32, /* l1 cache */
781 1024, /* l2 cache */
782 1, /* streams */
783 0, /* SF->DF convert */
784 };
785
786 /* Instruction costs on PPC8540 processors. */
787 static const
788 struct processor_costs ppc8540_cost = {
789 COSTS_N_INSNS (4), /* mulsi */
790 COSTS_N_INSNS (4), /* mulsi_const */
791 COSTS_N_INSNS (4), /* mulsi_const9 */
792 COSTS_N_INSNS (4), /* muldi */
793 COSTS_N_INSNS (19), /* divsi */
794 COSTS_N_INSNS (19), /* divdi */
795 COSTS_N_INSNS (4), /* fp */
796 COSTS_N_INSNS (4), /* dmul */
797 COSTS_N_INSNS (29), /* sdiv */
798 COSTS_N_INSNS (29), /* ddiv */
799 32, /* cache line size */
800 32, /* l1 cache */
801 256, /* l2 cache */
802 1, /* prefetch streams /*/
803 0, /* SF->DF convert */
804 };
805
806 /* Instruction costs on E300C2 and E300C3 cores. */
807 static const
808 struct processor_costs ppce300c2c3_cost = {
809 COSTS_N_INSNS (4), /* mulsi */
810 COSTS_N_INSNS (4), /* mulsi_const */
811 COSTS_N_INSNS (4), /* mulsi_const9 */
812 COSTS_N_INSNS (4), /* muldi */
813 COSTS_N_INSNS (19), /* divsi */
814 COSTS_N_INSNS (19), /* divdi */
815 COSTS_N_INSNS (3), /* fp */
816 COSTS_N_INSNS (4), /* dmul */
817 COSTS_N_INSNS (18), /* sdiv */
818 COSTS_N_INSNS (33), /* ddiv */
819 32,
820 16, /* l1 cache */
821 16, /* l2 cache */
822 1, /* prefetch streams /*/
823 0, /* SF->DF convert */
824 };
825
826 /* Instruction costs on PPCE500MC processors. */
827 static const
828 struct processor_costs ppce500mc_cost = {
829 COSTS_N_INSNS (4), /* mulsi */
830 COSTS_N_INSNS (4), /* mulsi_const */
831 COSTS_N_INSNS (4), /* mulsi_const9 */
832 COSTS_N_INSNS (4), /* muldi */
833 COSTS_N_INSNS (14), /* divsi */
834 COSTS_N_INSNS (14), /* divdi */
835 COSTS_N_INSNS (8), /* fp */
836 COSTS_N_INSNS (10), /* dmul */
837 COSTS_N_INSNS (36), /* sdiv */
838 COSTS_N_INSNS (66), /* ddiv */
839 64, /* cache line size */
840 32, /* l1 cache */
841 128, /* l2 cache */
842 1, /* prefetch streams /*/
843 0, /* SF->DF convert */
844 };
845
846 /* Instruction costs on PPCE500MC64 processors. */
847 static const
848 struct processor_costs ppce500mc64_cost = {
849 COSTS_N_INSNS (4), /* mulsi */
850 COSTS_N_INSNS (4), /* mulsi_const */
851 COSTS_N_INSNS (4), /* mulsi_const9 */
852 COSTS_N_INSNS (4), /* muldi */
853 COSTS_N_INSNS (14), /* divsi */
854 COSTS_N_INSNS (14), /* divdi */
855 COSTS_N_INSNS (4), /* fp */
856 COSTS_N_INSNS (10), /* dmul */
857 COSTS_N_INSNS (36), /* sdiv */
858 COSTS_N_INSNS (66), /* ddiv */
859 64, /* cache line size */
860 32, /* l1 cache */
861 128, /* l2 cache */
862 1, /* prefetch streams /*/
863 0, /* SF->DF convert */
864 };
865
866 /* Instruction costs on PPCE5500 processors. */
867 static const
868 struct processor_costs ppce5500_cost = {
869 COSTS_N_INSNS (5), /* mulsi */
870 COSTS_N_INSNS (5), /* mulsi_const */
871 COSTS_N_INSNS (4), /* mulsi_const9 */
872 COSTS_N_INSNS (5), /* muldi */
873 COSTS_N_INSNS (14), /* divsi */
874 COSTS_N_INSNS (14), /* divdi */
875 COSTS_N_INSNS (7), /* fp */
876 COSTS_N_INSNS (10), /* dmul */
877 COSTS_N_INSNS (36), /* sdiv */
878 COSTS_N_INSNS (66), /* ddiv */
879 64, /* cache line size */
880 32, /* l1 cache */
881 128, /* l2 cache */
882 1, /* prefetch streams /*/
883 0, /* SF->DF convert */
884 };
885
886 /* Instruction costs on PPCE6500 processors. */
887 static const
888 struct processor_costs ppce6500_cost = {
889 COSTS_N_INSNS (5), /* mulsi */
890 COSTS_N_INSNS (5), /* mulsi_const */
891 COSTS_N_INSNS (4), /* mulsi_const9 */
892 COSTS_N_INSNS (5), /* muldi */
893 COSTS_N_INSNS (14), /* divsi */
894 COSTS_N_INSNS (14), /* divdi */
895 COSTS_N_INSNS (7), /* fp */
896 COSTS_N_INSNS (10), /* dmul */
897 COSTS_N_INSNS (36), /* sdiv */
898 COSTS_N_INSNS (66), /* ddiv */
899 64, /* cache line size */
900 32, /* l1 cache */
901 128, /* l2 cache */
902 1, /* prefetch streams /*/
903 0, /* SF->DF convert */
904 };
905
906 /* Instruction costs on AppliedMicro Titan processors. */
907 static const
908 struct processor_costs titan_cost = {
909 COSTS_N_INSNS (5), /* mulsi */
910 COSTS_N_INSNS (5), /* mulsi_const */
911 COSTS_N_INSNS (5), /* mulsi_const9 */
912 COSTS_N_INSNS (5), /* muldi */
913 COSTS_N_INSNS (18), /* divsi */
914 COSTS_N_INSNS (18), /* divdi */
915 COSTS_N_INSNS (10), /* fp */
916 COSTS_N_INSNS (10), /* dmul */
917 COSTS_N_INSNS (46), /* sdiv */
918 COSTS_N_INSNS (72), /* ddiv */
919 32, /* cache line size */
920 32, /* l1 cache */
921 512, /* l2 cache */
922 1, /* prefetch streams /*/
923 0, /* SF->DF convert */
924 };
925
926 /* Instruction costs on POWER4 and POWER5 processors. */
927 static const
928 struct processor_costs power4_cost = {
929 COSTS_N_INSNS (3), /* mulsi */
930 COSTS_N_INSNS (2), /* mulsi_const */
931 COSTS_N_INSNS (2), /* mulsi_const9 */
932 COSTS_N_INSNS (4), /* muldi */
933 COSTS_N_INSNS (18), /* divsi */
934 COSTS_N_INSNS (34), /* divdi */
935 COSTS_N_INSNS (3), /* fp */
936 COSTS_N_INSNS (3), /* dmul */
937 COSTS_N_INSNS (17), /* sdiv */
938 COSTS_N_INSNS (17), /* ddiv */
939 128, /* cache line size */
940 32, /* l1 cache */
941 1024, /* l2 cache */
942 8, /* prefetch streams /*/
943 0, /* SF->DF convert */
944 };
945
946 /* Instruction costs on POWER6 processors. */
947 static const
948 struct processor_costs power6_cost = {
949 COSTS_N_INSNS (8), /* mulsi */
950 COSTS_N_INSNS (8), /* mulsi_const */
951 COSTS_N_INSNS (8), /* mulsi_const9 */
952 COSTS_N_INSNS (8), /* muldi */
953 COSTS_N_INSNS (22), /* divsi */
954 COSTS_N_INSNS (28), /* divdi */
955 COSTS_N_INSNS (3), /* fp */
956 COSTS_N_INSNS (3), /* dmul */
957 COSTS_N_INSNS (13), /* sdiv */
958 COSTS_N_INSNS (16), /* ddiv */
959 128, /* cache line size */
960 64, /* l1 cache */
961 2048, /* l2 cache */
962 16, /* prefetch streams */
963 0, /* SF->DF convert */
964 };
965
966 /* Instruction costs on POWER7 processors. */
967 static const
968 struct processor_costs power7_cost = {
969 COSTS_N_INSNS (2), /* mulsi */
970 COSTS_N_INSNS (2), /* mulsi_const */
971 COSTS_N_INSNS (2), /* mulsi_const9 */
972 COSTS_N_INSNS (2), /* muldi */
973 COSTS_N_INSNS (18), /* divsi */
974 COSTS_N_INSNS (34), /* divdi */
975 COSTS_N_INSNS (3), /* fp */
976 COSTS_N_INSNS (3), /* dmul */
977 COSTS_N_INSNS (13), /* sdiv */
978 COSTS_N_INSNS (16), /* ddiv */
979 128, /* cache line size */
980 32, /* l1 cache */
981 256, /* l2 cache */
982 12, /* prefetch streams */
983 COSTS_N_INSNS (3), /* SF->DF convert */
984 };
985
986 /* Instruction costs on POWER8 processors. */
987 static const
988 struct processor_costs power8_cost = {
989 COSTS_N_INSNS (3), /* mulsi */
990 COSTS_N_INSNS (3), /* mulsi_const */
991 COSTS_N_INSNS (3), /* mulsi_const9 */
992 COSTS_N_INSNS (3), /* muldi */
993 COSTS_N_INSNS (19), /* divsi */
994 COSTS_N_INSNS (35), /* divdi */
995 COSTS_N_INSNS (3), /* fp */
996 COSTS_N_INSNS (3), /* dmul */
997 COSTS_N_INSNS (14), /* sdiv */
998 COSTS_N_INSNS (17), /* ddiv */
999 128, /* cache line size */
1000 32, /* l1 cache */
1001 256, /* l2 cache */
1002 12, /* prefetch streams */
1003 COSTS_N_INSNS (3), /* SF->DF convert */
1004 };
1005
1006 /* Instruction costs on POWER9 processors. */
1007 static const
1008 struct processor_costs power9_cost = {
1009 COSTS_N_INSNS (3), /* mulsi */
1010 COSTS_N_INSNS (3), /* mulsi_const */
1011 COSTS_N_INSNS (3), /* mulsi_const9 */
1012 COSTS_N_INSNS (3), /* muldi */
1013 COSTS_N_INSNS (19), /* divsi */
1014 COSTS_N_INSNS (35), /* divdi */
1015 COSTS_N_INSNS (3), /* fp */
1016 COSTS_N_INSNS (3), /* dmul */
1017 COSTS_N_INSNS (14), /* sdiv */
1018 COSTS_N_INSNS (17), /* ddiv */
1019 128, /* cache line size */
1020 32, /* l1 cache */
1021 256, /* l2 cache */
1022 12, /* prefetch streams */
1023 COSTS_N_INSNS (3), /* SF->DF convert */
1024 };
1025
1026 /* Instruction costs on POWER A2 processors. */
1027 static const
1028 struct processor_costs ppca2_cost = {
1029 COSTS_N_INSNS (16), /* mulsi */
1030 COSTS_N_INSNS (16), /* mulsi_const */
1031 COSTS_N_INSNS (16), /* mulsi_const9 */
1032 COSTS_N_INSNS (16), /* muldi */
1033 COSTS_N_INSNS (22), /* divsi */
1034 COSTS_N_INSNS (28), /* divdi */
1035 COSTS_N_INSNS (3), /* fp */
1036 COSTS_N_INSNS (3), /* dmul */
1037 COSTS_N_INSNS (59), /* sdiv */
1038 COSTS_N_INSNS (72), /* ddiv */
1039 64,
1040 16, /* l1 cache */
1041 2048, /* l2 cache */
1042 16, /* prefetch streams */
1043 0, /* SF->DF convert */
1044 };
1045
1046 \f
1047 /* Table that classifies rs6000 builtin functions (pure, const, etc.). */
1048 #undef RS6000_BUILTIN_1
1049 #undef RS6000_BUILTIN_2
1050 #undef RS6000_BUILTIN_3
1051 #undef RS6000_BUILTIN_A
1052 #undef RS6000_BUILTIN_D
1053 #undef RS6000_BUILTIN_E
1054 #undef RS6000_BUILTIN_H
1055 #undef RS6000_BUILTIN_P
1056 #undef RS6000_BUILTIN_Q
1057 #undef RS6000_BUILTIN_S
1058 #undef RS6000_BUILTIN_X
1059
1060 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE) \
1061 { NAME, ICODE, MASK, ATTR },
1062
1063 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE) \
1064 { NAME, ICODE, MASK, ATTR },
1065
1066 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE) \
1067 { NAME, ICODE, MASK, ATTR },
1068
1069 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE) \
1070 { NAME, ICODE, MASK, ATTR },
1071
1072 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE) \
1073 { NAME, ICODE, MASK, ATTR },
1074
1075 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE) \
1076 { NAME, ICODE, MASK, ATTR },
1077
1078 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE) \
1079 { NAME, ICODE, MASK, ATTR },
1080
1081 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE) \
1082 { NAME, ICODE, MASK, ATTR },
1083
1084 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE) \
1085 { NAME, ICODE, MASK, ATTR },
1086
1087 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE) \
1088 { NAME, ICODE, MASK, ATTR },
1089
1090 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE) \
1091 { NAME, ICODE, MASK, ATTR },
1092
1093 struct rs6000_builtin_info_type {
1094 const char *name;
1095 const enum insn_code icode;
1096 const HOST_WIDE_INT mask;
1097 const unsigned attr;
1098 };
1099
1100 static const struct rs6000_builtin_info_type rs6000_builtin_info[] =
1101 {
1102 #include "rs6000-builtin.def"
1103 };
1104
1105 #undef RS6000_BUILTIN_1
1106 #undef RS6000_BUILTIN_2
1107 #undef RS6000_BUILTIN_3
1108 #undef RS6000_BUILTIN_A
1109 #undef RS6000_BUILTIN_D
1110 #undef RS6000_BUILTIN_E
1111 #undef RS6000_BUILTIN_H
1112 #undef RS6000_BUILTIN_P
1113 #undef RS6000_BUILTIN_Q
1114 #undef RS6000_BUILTIN_S
1115 #undef RS6000_BUILTIN_X
1116
1117 /* Support for -mveclibabi=<xxx> to control which vector library to use. */
1118 static tree (*rs6000_veclib_handler) (combined_fn, tree, tree);
1119
1120 \f
1121 static bool rs6000_debug_legitimate_address_p (machine_mode, rtx, bool);
1122 static bool spe_func_has_64bit_regs_p (void);
1123 static struct machine_function * rs6000_init_machine_status (void);
1124 static int rs6000_ra_ever_killed (void);
1125 static tree rs6000_handle_longcall_attribute (tree *, tree, tree, int, bool *);
1126 static tree rs6000_handle_altivec_attribute (tree *, tree, tree, int, bool *);
1127 static tree rs6000_handle_struct_attribute (tree *, tree, tree, int, bool *);
1128 static tree rs6000_builtin_vectorized_libmass (combined_fn, tree, tree);
1129 static void rs6000_emit_set_long_const (rtx, HOST_WIDE_INT);
1130 static int rs6000_memory_move_cost (machine_mode, reg_class_t, bool);
1131 static bool rs6000_debug_rtx_costs (rtx, machine_mode, int, int, int *, bool);
1132 static int rs6000_debug_address_cost (rtx, machine_mode, addr_space_t,
1133 bool);
1134 static int rs6000_debug_adjust_cost (rtx_insn *, rtx, rtx_insn *, int);
1135 static bool is_microcoded_insn (rtx_insn *);
1136 static bool is_nonpipeline_insn (rtx_insn *);
1137 static bool is_cracked_insn (rtx_insn *);
1138 static bool is_load_insn (rtx, rtx *);
1139 static bool is_store_insn (rtx, rtx *);
1140 static bool set_to_load_agen (rtx_insn *,rtx_insn *);
1141 static bool insn_terminates_group_p (rtx_insn *, enum group_termination);
1142 static bool insn_must_be_first_in_group (rtx_insn *);
1143 static bool insn_must_be_last_in_group (rtx_insn *);
1144 static void altivec_init_builtins (void);
1145 static tree builtin_function_type (machine_mode, machine_mode,
1146 machine_mode, machine_mode,
1147 enum rs6000_builtins, const char *name);
1148 static void rs6000_common_init_builtins (void);
1149 static void paired_init_builtins (void);
1150 static rtx paired_expand_predicate_builtin (enum insn_code, tree, rtx);
1151 static void spe_init_builtins (void);
1152 static void htm_init_builtins (void);
1153 static rtx spe_expand_predicate_builtin (enum insn_code, tree, rtx);
1154 static rtx spe_expand_evsel_builtin (enum insn_code, tree, rtx);
1155 static int rs6000_emit_int_cmove (rtx, rtx, rtx, rtx);
1156 static rs6000_stack_t *rs6000_stack_info (void);
1157 static void is_altivec_return_reg (rtx, void *);
1158 int easy_vector_constant (rtx, machine_mode);
1159 static rtx rs6000_debug_legitimize_address (rtx, rtx, machine_mode);
1160 static rtx rs6000_legitimize_tls_address (rtx, enum tls_model);
1161 static rtx rs6000_darwin64_record_arg (CUMULATIVE_ARGS *, const_tree,
1162 bool, bool);
1163 #if TARGET_MACHO
1164 static void macho_branch_islands (void);
1165 #endif
1166 static rtx rs6000_legitimize_reload_address (rtx, machine_mode, int, int,
1167 int, int *);
1168 static rtx rs6000_debug_legitimize_reload_address (rtx, machine_mode, int,
1169 int, int, int *);
1170 static bool rs6000_mode_dependent_address (const_rtx);
1171 static bool rs6000_debug_mode_dependent_address (const_rtx);
1172 static enum reg_class rs6000_secondary_reload_class (enum reg_class,
1173 machine_mode, rtx);
1174 static enum reg_class rs6000_debug_secondary_reload_class (enum reg_class,
1175 machine_mode,
1176 rtx);
1177 static enum reg_class rs6000_preferred_reload_class (rtx, enum reg_class);
1178 static enum reg_class rs6000_debug_preferred_reload_class (rtx,
1179 enum reg_class);
1180 static bool rs6000_secondary_memory_needed (enum reg_class, enum reg_class,
1181 machine_mode);
1182 static bool rs6000_debug_secondary_memory_needed (enum reg_class,
1183 enum reg_class,
1184 machine_mode);
1185 static bool rs6000_cannot_change_mode_class (machine_mode,
1186 machine_mode,
1187 enum reg_class);
1188 static bool rs6000_debug_cannot_change_mode_class (machine_mode,
1189 machine_mode,
1190 enum reg_class);
1191 static bool rs6000_save_toc_in_prologue_p (void);
1192 static rtx rs6000_internal_arg_pointer (void);
1193
1194 rtx (*rs6000_legitimize_reload_address_ptr) (rtx, machine_mode, int, int,
1195 int, int *)
1196 = rs6000_legitimize_reload_address;
1197
1198 static bool (*rs6000_mode_dependent_address_ptr) (const_rtx)
1199 = rs6000_mode_dependent_address;
1200
1201 enum reg_class (*rs6000_secondary_reload_class_ptr) (enum reg_class,
1202 machine_mode, rtx)
1203 = rs6000_secondary_reload_class;
1204
1205 enum reg_class (*rs6000_preferred_reload_class_ptr) (rtx, enum reg_class)
1206 = rs6000_preferred_reload_class;
1207
1208 bool (*rs6000_secondary_memory_needed_ptr) (enum reg_class, enum reg_class,
1209 machine_mode)
1210 = rs6000_secondary_memory_needed;
1211
1212 bool (*rs6000_cannot_change_mode_class_ptr) (machine_mode,
1213 machine_mode,
1214 enum reg_class)
1215 = rs6000_cannot_change_mode_class;
1216
1217 const int INSN_NOT_AVAILABLE = -1;
1218
1219 static void rs6000_print_isa_options (FILE *, int, const char *,
1220 HOST_WIDE_INT);
1221 static void rs6000_print_builtin_options (FILE *, int, const char *,
1222 HOST_WIDE_INT);
1223
1224 static enum rs6000_reg_type register_to_reg_type (rtx, bool *);
1225 static bool rs6000_secondary_reload_move (enum rs6000_reg_type,
1226 enum rs6000_reg_type,
1227 machine_mode,
1228 secondary_reload_info *,
1229 bool);
1230 rtl_opt_pass *make_pass_analyze_swaps (gcc::context*);
1231
1232 /* Hash table stuff for keeping track of TOC entries. */
1233
1234 struct GTY((for_user)) toc_hash_struct
1235 {
1236 /* `key' will satisfy CONSTANT_P; in fact, it will satisfy
1237 ASM_OUTPUT_SPECIAL_POOL_ENTRY_P. */
1238 rtx key;
1239 machine_mode key_mode;
1240 int labelno;
1241 };
1242
1243 struct toc_hasher : ggc_ptr_hash<toc_hash_struct>
1244 {
1245 static hashval_t hash (toc_hash_struct *);
1246 static bool equal (toc_hash_struct *, toc_hash_struct *);
1247 };
1248
1249 static GTY (()) hash_table<toc_hasher> *toc_hash_table;
1250
1251 /* Hash table to keep track of the argument types for builtin functions. */
1252
1253 struct GTY((for_user)) builtin_hash_struct
1254 {
1255 tree type;
1256 machine_mode mode[4]; /* return value + 3 arguments. */
1257 unsigned char uns_p[4]; /* and whether the types are unsigned. */
1258 };
1259
1260 struct builtin_hasher : ggc_ptr_hash<builtin_hash_struct>
1261 {
1262 static hashval_t hash (builtin_hash_struct *);
1263 static bool equal (builtin_hash_struct *, builtin_hash_struct *);
1264 };
1265
1266 static GTY (()) hash_table<builtin_hasher> *builtin_hash_table;
1267
1268 \f
1269 /* Default register names. */
1270 char rs6000_reg_names[][8] =
1271 {
1272 "0", "1", "2", "3", "4", "5", "6", "7",
1273 "8", "9", "10", "11", "12", "13", "14", "15",
1274 "16", "17", "18", "19", "20", "21", "22", "23",
1275 "24", "25", "26", "27", "28", "29", "30", "31",
1276 "0", "1", "2", "3", "4", "5", "6", "7",
1277 "8", "9", "10", "11", "12", "13", "14", "15",
1278 "16", "17", "18", "19", "20", "21", "22", "23",
1279 "24", "25", "26", "27", "28", "29", "30", "31",
1280 "mq", "lr", "ctr","ap",
1281 "0", "1", "2", "3", "4", "5", "6", "7",
1282 "ca",
1283 /* AltiVec registers. */
1284 "0", "1", "2", "3", "4", "5", "6", "7",
1285 "8", "9", "10", "11", "12", "13", "14", "15",
1286 "16", "17", "18", "19", "20", "21", "22", "23",
1287 "24", "25", "26", "27", "28", "29", "30", "31",
1288 "vrsave", "vscr",
1289 /* SPE registers. */
1290 "spe_acc", "spefscr",
1291 /* Soft frame pointer. */
1292 "sfp",
1293 /* HTM SPR registers. */
1294 "tfhar", "tfiar", "texasr",
1295 /* SPE High registers. */
1296 "0", "1", "2", "3", "4", "5", "6", "7",
1297 "8", "9", "10", "11", "12", "13", "14", "15",
1298 "16", "17", "18", "19", "20", "21", "22", "23",
1299 "24", "25", "26", "27", "28", "29", "30", "31"
1300 };
1301
1302 #ifdef TARGET_REGNAMES
1303 static const char alt_reg_names[][8] =
1304 {
1305 "%r0", "%r1", "%r2", "%r3", "%r4", "%r5", "%r6", "%r7",
1306 "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15",
1307 "%r16", "%r17", "%r18", "%r19", "%r20", "%r21", "%r22", "%r23",
1308 "%r24", "%r25", "%r26", "%r27", "%r28", "%r29", "%r30", "%r31",
1309 "%f0", "%f1", "%f2", "%f3", "%f4", "%f5", "%f6", "%f7",
1310 "%f8", "%f9", "%f10", "%f11", "%f12", "%f13", "%f14", "%f15",
1311 "%f16", "%f17", "%f18", "%f19", "%f20", "%f21", "%f22", "%f23",
1312 "%f24", "%f25", "%f26", "%f27", "%f28", "%f29", "%f30", "%f31",
1313 "mq", "lr", "ctr", "ap",
1314 "%cr0", "%cr1", "%cr2", "%cr3", "%cr4", "%cr5", "%cr6", "%cr7",
1315 "ca",
1316 /* AltiVec registers. */
1317 "%v0", "%v1", "%v2", "%v3", "%v4", "%v5", "%v6", "%v7",
1318 "%v8", "%v9", "%v10", "%v11", "%v12", "%v13", "%v14", "%v15",
1319 "%v16", "%v17", "%v18", "%v19", "%v20", "%v21", "%v22", "%v23",
1320 "%v24", "%v25", "%v26", "%v27", "%v28", "%v29", "%v30", "%v31",
1321 "vrsave", "vscr",
1322 /* SPE registers. */
1323 "spe_acc", "spefscr",
1324 /* Soft frame pointer. */
1325 "sfp",
1326 /* HTM SPR registers. */
1327 "tfhar", "tfiar", "texasr",
1328 /* SPE High registers. */
1329 "%rh0", "%rh1", "%rh2", "%rh3", "%rh4", "%rh5", "%rh6", "%rh7",
1330 "%rh8", "%rh9", "%rh10", "%r11", "%rh12", "%rh13", "%rh14", "%rh15",
1331 "%rh16", "%rh17", "%rh18", "%rh19", "%rh20", "%rh21", "%rh22", "%rh23",
1332 "%rh24", "%rh25", "%rh26", "%rh27", "%rh28", "%rh29", "%rh30", "%rh31"
1333 };
1334 #endif
1335
1336 /* Table of valid machine attributes. */
1337
1338 static const struct attribute_spec rs6000_attribute_table[] =
1339 {
1340 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
1341 affects_type_identity } */
1342 { "altivec", 1, 1, false, true, false, rs6000_handle_altivec_attribute,
1343 false },
1344 { "longcall", 0, 0, false, true, true, rs6000_handle_longcall_attribute,
1345 false },
1346 { "shortcall", 0, 0, false, true, true, rs6000_handle_longcall_attribute,
1347 false },
1348 { "ms_struct", 0, 0, false, false, false, rs6000_handle_struct_attribute,
1349 false },
1350 { "gcc_struct", 0, 0, false, false, false, rs6000_handle_struct_attribute,
1351 false },
1352 #ifdef SUBTARGET_ATTRIBUTE_TABLE
1353 SUBTARGET_ATTRIBUTE_TABLE,
1354 #endif
1355 { NULL, 0, 0, false, false, false, NULL, false }
1356 };
1357 \f
1358 #ifndef TARGET_PROFILE_KERNEL
1359 #define TARGET_PROFILE_KERNEL 0
1360 #endif
1361
1362 /* The VRSAVE bitmask puts bit %v0 as the most significant bit. */
1363 #define ALTIVEC_REG_BIT(REGNO) (0x80000000 >> ((REGNO) - FIRST_ALTIVEC_REGNO))
1364 \f
1365 /* Initialize the GCC target structure. */
1366 #undef TARGET_ATTRIBUTE_TABLE
1367 #define TARGET_ATTRIBUTE_TABLE rs6000_attribute_table
1368 #undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
1369 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES rs6000_set_default_type_attributes
1370 #undef TARGET_ATTRIBUTE_TAKES_IDENTIFIER_P
1371 #define TARGET_ATTRIBUTE_TAKES_IDENTIFIER_P rs6000_attribute_takes_identifier_p
1372
1373 #undef TARGET_ASM_ALIGNED_DI_OP
1374 #define TARGET_ASM_ALIGNED_DI_OP DOUBLE_INT_ASM_OP
1375
1376 /* Default unaligned ops are only provided for ELF. Find the ops needed
1377 for non-ELF systems. */
1378 #ifndef OBJECT_FORMAT_ELF
1379 #if TARGET_XCOFF
1380 /* For XCOFF. rs6000_assemble_integer will handle unaligned DIs on
1381 64-bit targets. */
1382 #undef TARGET_ASM_UNALIGNED_HI_OP
1383 #define TARGET_ASM_UNALIGNED_HI_OP "\t.vbyte\t2,"
1384 #undef TARGET_ASM_UNALIGNED_SI_OP
1385 #define TARGET_ASM_UNALIGNED_SI_OP "\t.vbyte\t4,"
1386 #undef TARGET_ASM_UNALIGNED_DI_OP
1387 #define TARGET_ASM_UNALIGNED_DI_OP "\t.vbyte\t8,"
1388 #else
1389 /* For Darwin. */
1390 #undef TARGET_ASM_UNALIGNED_HI_OP
1391 #define TARGET_ASM_UNALIGNED_HI_OP "\t.short\t"
1392 #undef TARGET_ASM_UNALIGNED_SI_OP
1393 #define TARGET_ASM_UNALIGNED_SI_OP "\t.long\t"
1394 #undef TARGET_ASM_UNALIGNED_DI_OP
1395 #define TARGET_ASM_UNALIGNED_DI_OP "\t.quad\t"
1396 #undef TARGET_ASM_ALIGNED_DI_OP
1397 #define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
1398 #endif
1399 #endif
1400
1401 /* This hook deals with fixups for relocatable code and DI-mode objects
1402 in 64-bit code. */
1403 #undef TARGET_ASM_INTEGER
1404 #define TARGET_ASM_INTEGER rs6000_assemble_integer
1405
1406 #if defined (HAVE_GAS_HIDDEN) && !TARGET_MACHO
1407 #undef TARGET_ASM_ASSEMBLE_VISIBILITY
1408 #define TARGET_ASM_ASSEMBLE_VISIBILITY rs6000_assemble_visibility
1409 #endif
1410
1411 #undef TARGET_SET_UP_BY_PROLOGUE
1412 #define TARGET_SET_UP_BY_PROLOGUE rs6000_set_up_by_prologue
1413
1414 #undef TARGET_EXTRA_LIVE_ON_ENTRY
1415 #define TARGET_EXTRA_LIVE_ON_ENTRY rs6000_live_on_entry
1416
1417 #undef TARGET_INTERNAL_ARG_POINTER
1418 #define TARGET_INTERNAL_ARG_POINTER rs6000_internal_arg_pointer
1419
1420 #undef TARGET_HAVE_TLS
1421 #define TARGET_HAVE_TLS HAVE_AS_TLS
1422
1423 #undef TARGET_CANNOT_FORCE_CONST_MEM
1424 #define TARGET_CANNOT_FORCE_CONST_MEM rs6000_cannot_force_const_mem
1425
1426 #undef TARGET_DELEGITIMIZE_ADDRESS
1427 #define TARGET_DELEGITIMIZE_ADDRESS rs6000_delegitimize_address
1428
1429 #undef TARGET_CONST_NOT_OK_FOR_DEBUG_P
1430 #define TARGET_CONST_NOT_OK_FOR_DEBUG_P rs6000_const_not_ok_for_debug_p
1431
1432 #undef TARGET_ASM_FUNCTION_PROLOGUE
1433 #define TARGET_ASM_FUNCTION_PROLOGUE rs6000_output_function_prologue
1434 #undef TARGET_ASM_FUNCTION_EPILOGUE
1435 #define TARGET_ASM_FUNCTION_EPILOGUE rs6000_output_function_epilogue
1436
1437 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
1438 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA rs6000_output_addr_const_extra
1439
1440 #undef TARGET_LEGITIMIZE_ADDRESS
1441 #define TARGET_LEGITIMIZE_ADDRESS rs6000_legitimize_address
1442
1443 #undef TARGET_SCHED_VARIABLE_ISSUE
1444 #define TARGET_SCHED_VARIABLE_ISSUE rs6000_variable_issue
1445
1446 #undef TARGET_SCHED_ISSUE_RATE
1447 #define TARGET_SCHED_ISSUE_RATE rs6000_issue_rate
1448 #undef TARGET_SCHED_ADJUST_COST
1449 #define TARGET_SCHED_ADJUST_COST rs6000_adjust_cost
1450 #undef TARGET_SCHED_ADJUST_PRIORITY
1451 #define TARGET_SCHED_ADJUST_PRIORITY rs6000_adjust_priority
1452 #undef TARGET_SCHED_IS_COSTLY_DEPENDENCE
1453 #define TARGET_SCHED_IS_COSTLY_DEPENDENCE rs6000_is_costly_dependence
1454 #undef TARGET_SCHED_INIT
1455 #define TARGET_SCHED_INIT rs6000_sched_init
1456 #undef TARGET_SCHED_FINISH
1457 #define TARGET_SCHED_FINISH rs6000_sched_finish
1458 #undef TARGET_SCHED_REORDER
1459 #define TARGET_SCHED_REORDER rs6000_sched_reorder
1460 #undef TARGET_SCHED_REORDER2
1461 #define TARGET_SCHED_REORDER2 rs6000_sched_reorder2
1462
1463 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
1464 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD rs6000_use_sched_lookahead
1465
1466 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD
1467 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD rs6000_use_sched_lookahead_guard
1468
1469 #undef TARGET_SCHED_ALLOC_SCHED_CONTEXT
1470 #define TARGET_SCHED_ALLOC_SCHED_CONTEXT rs6000_alloc_sched_context
1471 #undef TARGET_SCHED_INIT_SCHED_CONTEXT
1472 #define TARGET_SCHED_INIT_SCHED_CONTEXT rs6000_init_sched_context
1473 #undef TARGET_SCHED_SET_SCHED_CONTEXT
1474 #define TARGET_SCHED_SET_SCHED_CONTEXT rs6000_set_sched_context
1475 #undef TARGET_SCHED_FREE_SCHED_CONTEXT
1476 #define TARGET_SCHED_FREE_SCHED_CONTEXT rs6000_free_sched_context
1477
1478 #undef TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD
1479 #define TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD rs6000_builtin_mask_for_load
1480 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
1481 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
1482 rs6000_builtin_support_vector_misalignment
1483 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
1484 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE rs6000_vector_alignment_reachable
1485 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
1486 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
1487 rs6000_builtin_vectorization_cost
1488 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
1489 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE \
1490 rs6000_preferred_simd_mode
1491 #undef TARGET_VECTORIZE_INIT_COST
1492 #define TARGET_VECTORIZE_INIT_COST rs6000_init_cost
1493 #undef TARGET_VECTORIZE_ADD_STMT_COST
1494 #define TARGET_VECTORIZE_ADD_STMT_COST rs6000_add_stmt_cost
1495 #undef TARGET_VECTORIZE_FINISH_COST
1496 #define TARGET_VECTORIZE_FINISH_COST rs6000_finish_cost
1497 #undef TARGET_VECTORIZE_DESTROY_COST_DATA
1498 #define TARGET_VECTORIZE_DESTROY_COST_DATA rs6000_destroy_cost_data
1499
1500 #undef TARGET_INIT_BUILTINS
1501 #define TARGET_INIT_BUILTINS rs6000_init_builtins
1502 #undef TARGET_BUILTIN_DECL
1503 #define TARGET_BUILTIN_DECL rs6000_builtin_decl
1504
1505 #undef TARGET_EXPAND_BUILTIN
1506 #define TARGET_EXPAND_BUILTIN rs6000_expand_builtin
1507
1508 #undef TARGET_MANGLE_TYPE
1509 #define TARGET_MANGLE_TYPE rs6000_mangle_type
1510
1511 #undef TARGET_INIT_LIBFUNCS
1512 #define TARGET_INIT_LIBFUNCS rs6000_init_libfuncs
1513
1514 #if TARGET_MACHO
1515 #undef TARGET_BINDS_LOCAL_P
1516 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
1517 #endif
1518
1519 #undef TARGET_MS_BITFIELD_LAYOUT_P
1520 #define TARGET_MS_BITFIELD_LAYOUT_P rs6000_ms_bitfield_layout_p
1521
1522 #undef TARGET_ASM_OUTPUT_MI_THUNK
1523 #define TARGET_ASM_OUTPUT_MI_THUNK rs6000_output_mi_thunk
1524
1525 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
1526 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true
1527
1528 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
1529 #define TARGET_FUNCTION_OK_FOR_SIBCALL rs6000_function_ok_for_sibcall
1530
1531 #undef TARGET_REGISTER_MOVE_COST
1532 #define TARGET_REGISTER_MOVE_COST rs6000_register_move_cost
1533 #undef TARGET_MEMORY_MOVE_COST
1534 #define TARGET_MEMORY_MOVE_COST rs6000_memory_move_cost
1535 #undef TARGET_CANNOT_COPY_INSN_P
1536 #define TARGET_CANNOT_COPY_INSN_P rs6000_cannot_copy_insn_p
1537 #undef TARGET_RTX_COSTS
1538 #define TARGET_RTX_COSTS rs6000_rtx_costs
1539 #undef TARGET_ADDRESS_COST
1540 #define TARGET_ADDRESS_COST hook_int_rtx_mode_as_bool_0
1541
1542 #undef TARGET_DWARF_REGISTER_SPAN
1543 #define TARGET_DWARF_REGISTER_SPAN rs6000_dwarf_register_span
1544
1545 #undef TARGET_INIT_DWARF_REG_SIZES_EXTRA
1546 #define TARGET_INIT_DWARF_REG_SIZES_EXTRA rs6000_init_dwarf_reg_sizes_extra
1547
1548 #undef TARGET_MEMBER_TYPE_FORCES_BLK
1549 #define TARGET_MEMBER_TYPE_FORCES_BLK rs6000_member_type_forces_blk
1550
1551 #undef TARGET_PROMOTE_FUNCTION_MODE
1552 #define TARGET_PROMOTE_FUNCTION_MODE rs6000_promote_function_mode
1553
1554 #undef TARGET_RETURN_IN_MEMORY
1555 #define TARGET_RETURN_IN_MEMORY rs6000_return_in_memory
1556
1557 #undef TARGET_RETURN_IN_MSB
1558 #define TARGET_RETURN_IN_MSB rs6000_return_in_msb
1559
1560 #undef TARGET_SETUP_INCOMING_VARARGS
1561 #define TARGET_SETUP_INCOMING_VARARGS setup_incoming_varargs
1562
1563 /* Always strict argument naming on rs6000. */
1564 #undef TARGET_STRICT_ARGUMENT_NAMING
1565 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
1566 #undef TARGET_PRETEND_OUTGOING_VARARGS_NAMED
1567 #define TARGET_PRETEND_OUTGOING_VARARGS_NAMED hook_bool_CUMULATIVE_ARGS_true
1568 #undef TARGET_SPLIT_COMPLEX_ARG
1569 #define TARGET_SPLIT_COMPLEX_ARG hook_bool_const_tree_true
1570 #undef TARGET_MUST_PASS_IN_STACK
1571 #define TARGET_MUST_PASS_IN_STACK rs6000_must_pass_in_stack
1572 #undef TARGET_PASS_BY_REFERENCE
1573 #define TARGET_PASS_BY_REFERENCE rs6000_pass_by_reference
1574 #undef TARGET_ARG_PARTIAL_BYTES
1575 #define TARGET_ARG_PARTIAL_BYTES rs6000_arg_partial_bytes
1576 #undef TARGET_FUNCTION_ARG_ADVANCE
1577 #define TARGET_FUNCTION_ARG_ADVANCE rs6000_function_arg_advance
1578 #undef TARGET_FUNCTION_ARG
1579 #define TARGET_FUNCTION_ARG rs6000_function_arg
1580 #undef TARGET_FUNCTION_ARG_BOUNDARY
1581 #define TARGET_FUNCTION_ARG_BOUNDARY rs6000_function_arg_boundary
1582
1583 #undef TARGET_BUILD_BUILTIN_VA_LIST
1584 #define TARGET_BUILD_BUILTIN_VA_LIST rs6000_build_builtin_va_list
1585
1586 #undef TARGET_EXPAND_BUILTIN_VA_START
1587 #define TARGET_EXPAND_BUILTIN_VA_START rs6000_va_start
1588
1589 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
1590 #define TARGET_GIMPLIFY_VA_ARG_EXPR rs6000_gimplify_va_arg
1591
1592 #undef TARGET_EH_RETURN_FILTER_MODE
1593 #define TARGET_EH_RETURN_FILTER_MODE rs6000_eh_return_filter_mode
1594
1595 #undef TARGET_SCALAR_MODE_SUPPORTED_P
1596 #define TARGET_SCALAR_MODE_SUPPORTED_P rs6000_scalar_mode_supported_p
1597
1598 #undef TARGET_VECTOR_MODE_SUPPORTED_P
1599 #define TARGET_VECTOR_MODE_SUPPORTED_P rs6000_vector_mode_supported_p
1600
1601 #undef TARGET_INVALID_ARG_FOR_UNPROTOTYPED_FN
1602 #define TARGET_INVALID_ARG_FOR_UNPROTOTYPED_FN invalid_arg_for_unprototyped_fn
1603
1604 #undef TARGET_ASM_LOOP_ALIGN_MAX_SKIP
1605 #define TARGET_ASM_LOOP_ALIGN_MAX_SKIP rs6000_loop_align_max_skip
1606
1607 #undef TARGET_MD_ASM_ADJUST
1608 #define TARGET_MD_ASM_ADJUST rs6000_md_asm_adjust
1609
1610 #undef TARGET_OPTION_OVERRIDE
1611 #define TARGET_OPTION_OVERRIDE rs6000_option_override
1612
1613 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
1614 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
1615 rs6000_builtin_vectorized_function
1616
1617 #undef TARGET_VECTORIZE_BUILTIN_MD_VECTORIZED_FUNCTION
1618 #define TARGET_VECTORIZE_BUILTIN_MD_VECTORIZED_FUNCTION \
1619 rs6000_builtin_md_vectorized_function
1620
1621 #if !TARGET_MACHO
1622 #undef TARGET_STACK_PROTECT_FAIL
1623 #define TARGET_STACK_PROTECT_FAIL rs6000_stack_protect_fail
1624 #endif
1625
1626 #ifdef HAVE_AS_TLS
1627 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
1628 #define TARGET_ASM_OUTPUT_DWARF_DTPREL rs6000_output_dwarf_dtprel
1629 #endif
1630
1631 /* Use a 32-bit anchor range. This leads to sequences like:
1632
1633 addis tmp,anchor,high
1634 add dest,tmp,low
1635
1636 where tmp itself acts as an anchor, and can be shared between
1637 accesses to the same 64k page. */
1638 #undef TARGET_MIN_ANCHOR_OFFSET
1639 #define TARGET_MIN_ANCHOR_OFFSET -0x7fffffff - 1
1640 #undef TARGET_MAX_ANCHOR_OFFSET
1641 #define TARGET_MAX_ANCHOR_OFFSET 0x7fffffff
1642 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
1643 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P rs6000_use_blocks_for_constant_p
1644 #undef TARGET_USE_BLOCKS_FOR_DECL_P
1645 #define TARGET_USE_BLOCKS_FOR_DECL_P rs6000_use_blocks_for_decl_p
1646
1647 #undef TARGET_BUILTIN_RECIPROCAL
1648 #define TARGET_BUILTIN_RECIPROCAL rs6000_builtin_reciprocal
1649
1650 #undef TARGET_EXPAND_TO_RTL_HOOK
1651 #define TARGET_EXPAND_TO_RTL_HOOK rs6000_alloc_sdmode_stack_slot
1652
1653 #undef TARGET_INSTANTIATE_DECLS
1654 #define TARGET_INSTANTIATE_DECLS rs6000_instantiate_decls
1655
1656 #undef TARGET_SECONDARY_RELOAD
1657 #define TARGET_SECONDARY_RELOAD rs6000_secondary_reload
1658
1659 #undef TARGET_LEGITIMATE_ADDRESS_P
1660 #define TARGET_LEGITIMATE_ADDRESS_P rs6000_legitimate_address_p
1661
1662 #undef TARGET_MODE_DEPENDENT_ADDRESS_P
1663 #define TARGET_MODE_DEPENDENT_ADDRESS_P rs6000_mode_dependent_address_p
1664
1665 #undef TARGET_LRA_P
1666 #define TARGET_LRA_P rs6000_lra_p
1667
1668 #undef TARGET_CAN_ELIMINATE
1669 #define TARGET_CAN_ELIMINATE rs6000_can_eliminate
1670
1671 #undef TARGET_CONDITIONAL_REGISTER_USAGE
1672 #define TARGET_CONDITIONAL_REGISTER_USAGE rs6000_conditional_register_usage
1673
1674 #undef TARGET_TRAMPOLINE_INIT
1675 #define TARGET_TRAMPOLINE_INIT rs6000_trampoline_init
1676
1677 #undef TARGET_FUNCTION_VALUE
1678 #define TARGET_FUNCTION_VALUE rs6000_function_value
1679
1680 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
1681 #define TARGET_OPTION_VALID_ATTRIBUTE_P rs6000_valid_attribute_p
1682
1683 #undef TARGET_OPTION_SAVE
1684 #define TARGET_OPTION_SAVE rs6000_function_specific_save
1685
1686 #undef TARGET_OPTION_RESTORE
1687 #define TARGET_OPTION_RESTORE rs6000_function_specific_restore
1688
1689 #undef TARGET_OPTION_PRINT
1690 #define TARGET_OPTION_PRINT rs6000_function_specific_print
1691
1692 #undef TARGET_CAN_INLINE_P
1693 #define TARGET_CAN_INLINE_P rs6000_can_inline_p
1694
1695 #undef TARGET_SET_CURRENT_FUNCTION
1696 #define TARGET_SET_CURRENT_FUNCTION rs6000_set_current_function
1697
1698 #undef TARGET_LEGITIMATE_CONSTANT_P
1699 #define TARGET_LEGITIMATE_CONSTANT_P rs6000_legitimate_constant_p
1700
1701 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
1702 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK rs6000_vectorize_vec_perm_const_ok
1703
1704 #undef TARGET_CAN_USE_DOLOOP_P
1705 #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
1706
1707 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
1708 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV rs6000_atomic_assign_expand_fenv
1709
1710 #undef TARGET_LIBGCC_CMP_RETURN_MODE
1711 #define TARGET_LIBGCC_CMP_RETURN_MODE rs6000_abi_word_mode
1712 #undef TARGET_LIBGCC_SHIFT_COUNT_MODE
1713 #define TARGET_LIBGCC_SHIFT_COUNT_MODE rs6000_abi_word_mode
1714 #undef TARGET_UNWIND_WORD_MODE
1715 #define TARGET_UNWIND_WORD_MODE rs6000_abi_word_mode
1716
1717 #undef TARGET_OFFLOAD_OPTIONS
1718 #define TARGET_OFFLOAD_OPTIONS rs6000_offload_options
1719
1720 #undef TARGET_C_MODE_FOR_SUFFIX
1721 #define TARGET_C_MODE_FOR_SUFFIX rs6000_c_mode_for_suffix
1722
1723 #undef TARGET_INVALID_BINARY_OP
1724 #define TARGET_INVALID_BINARY_OP rs6000_invalid_binary_op
1725
1726 #undef TARGET_OPTAB_SUPPORTED_P
1727 #define TARGET_OPTAB_SUPPORTED_P rs6000_optab_supported_p
1728 \f
1729
1730 /* Processor table. */
1731 struct rs6000_ptt
1732 {
1733 const char *const name; /* Canonical processor name. */
1734 const enum processor_type processor; /* Processor type enum value. */
1735 const HOST_WIDE_INT target_enable; /* Target flags to enable. */
1736 };
1737
1738 static struct rs6000_ptt const processor_target_table[] =
1739 {
1740 #define RS6000_CPU(NAME, CPU, FLAGS) { NAME, CPU, FLAGS },
1741 #include "rs6000-cpus.def"
1742 #undef RS6000_CPU
1743 };
1744
1745 /* Look up a processor name for -mcpu=xxx and -mtune=xxx. Return -1 if the
1746 name is invalid. */
1747
1748 static int
1749 rs6000_cpu_name_lookup (const char *name)
1750 {
1751 size_t i;
1752
1753 if (name != NULL)
1754 {
1755 for (i = 0; i < ARRAY_SIZE (processor_target_table); i++)
1756 if (! strcmp (name, processor_target_table[i].name))
1757 return (int)i;
1758 }
1759
1760 return -1;
1761 }
1762
1763 \f
1764 /* Return number of consecutive hard regs needed starting at reg REGNO
1765 to hold something of mode MODE.
1766 This is ordinarily the length in words of a value of mode MODE
1767 but can be less for certain modes in special long registers.
1768
1769 For the SPE, GPRs are 64 bits but only 32 bits are visible in
1770 scalar instructions. The upper 32 bits are only available to the
1771 SIMD instructions.
1772
1773 POWER and PowerPC GPRs hold 32 bits worth;
1774 PowerPC64 GPRs and FPRs point register holds 64 bits worth. */
1775
1776 static int
1777 rs6000_hard_regno_nregs_internal (int regno, machine_mode mode)
1778 {
1779 unsigned HOST_WIDE_INT reg_size;
1780
1781 /* 128-bit floating point usually takes 2 registers, unless it is IEEE
1782 128-bit floating point that can go in vector registers, which has VSX
1783 memory addressing. */
1784 if (FP_REGNO_P (regno))
1785 reg_size = (VECTOR_MEM_VSX_P (mode)
1786 ? UNITS_PER_VSX_WORD
1787 : UNITS_PER_FP_WORD);
1788
1789 else if (SPE_SIMD_REGNO_P (regno) && TARGET_SPE && SPE_VECTOR_MODE (mode))
1790 reg_size = UNITS_PER_SPE_WORD;
1791
1792 else if (ALTIVEC_REGNO_P (regno))
1793 reg_size = UNITS_PER_ALTIVEC_WORD;
1794
1795 /* The value returned for SCmode in the E500 double case is 2 for
1796 ABI compatibility; storing an SCmode value in a single register
1797 would require function_arg and rs6000_spe_function_arg to handle
1798 SCmode so as to pass the value correctly in a pair of
1799 registers. */
1800 else if (TARGET_E500_DOUBLE && FLOAT_MODE_P (mode) && mode != SCmode
1801 && !DECIMAL_FLOAT_MODE_P (mode) && SPE_SIMD_REGNO_P (regno))
1802 reg_size = UNITS_PER_FP_WORD;
1803
1804 else
1805 reg_size = UNITS_PER_WORD;
1806
1807 return (GET_MODE_SIZE (mode) + reg_size - 1) / reg_size;
1808 }
1809
1810 /* Value is 1 if hard register REGNO can hold a value of machine-mode
1811 MODE. */
1812 static int
1813 rs6000_hard_regno_mode_ok (int regno, machine_mode mode)
1814 {
1815 int last_regno = regno + rs6000_hard_regno_nregs[mode][regno] - 1;
1816
1817 /* PTImode can only go in GPRs. Quad word memory operations require even/odd
1818 register combinations, and use PTImode where we need to deal with quad
1819 word memory operations. Don't allow quad words in the argument or frame
1820 pointer registers, just registers 0..31. */
1821 if (mode == PTImode)
1822 return (IN_RANGE (regno, FIRST_GPR_REGNO, LAST_GPR_REGNO)
1823 && IN_RANGE (last_regno, FIRST_GPR_REGNO, LAST_GPR_REGNO)
1824 && ((regno & 1) == 0));
1825
1826 /* VSX registers that overlap the FPR registers are larger than for non-VSX
1827 implementations. Don't allow an item to be split between a FP register
1828 and an Altivec register. Allow TImode in all VSX registers if the user
1829 asked for it. */
1830 if (TARGET_VSX && VSX_REGNO_P (regno)
1831 && (VECTOR_MEM_VSX_P (mode)
1832 || FLOAT128_VECTOR_P (mode)
1833 || reg_addr[mode].scalar_in_vmx_p
1834 || (TARGET_VSX_TIMODE && mode == TImode)
1835 || (TARGET_VADDUQM && mode == V1TImode)))
1836 {
1837 if (FP_REGNO_P (regno))
1838 return FP_REGNO_P (last_regno);
1839
1840 if (ALTIVEC_REGNO_P (regno))
1841 {
1842 if (GET_MODE_SIZE (mode) != 16 && !reg_addr[mode].scalar_in_vmx_p)
1843 return 0;
1844
1845 return ALTIVEC_REGNO_P (last_regno);
1846 }
1847 }
1848
1849 /* The GPRs can hold any mode, but values bigger than one register
1850 cannot go past R31. */
1851 if (INT_REGNO_P (regno))
1852 return INT_REGNO_P (last_regno);
1853
1854 /* The float registers (except for VSX vector modes) can only hold floating
1855 modes and DImode. */
1856 if (FP_REGNO_P (regno))
1857 {
1858 if (FLOAT128_VECTOR_P (mode))
1859 return false;
1860
1861 if (SCALAR_FLOAT_MODE_P (mode)
1862 && (mode != TDmode || (regno % 2) == 0)
1863 && FP_REGNO_P (last_regno))
1864 return 1;
1865
1866 if (GET_MODE_CLASS (mode) == MODE_INT
1867 && GET_MODE_SIZE (mode) == UNITS_PER_FP_WORD)
1868 return 1;
1869
1870 if (PAIRED_SIMD_REGNO_P (regno) && TARGET_PAIRED_FLOAT
1871 && PAIRED_VECTOR_MODE (mode))
1872 return 1;
1873
1874 return 0;
1875 }
1876
1877 /* The CR register can only hold CC modes. */
1878 if (CR_REGNO_P (regno))
1879 return GET_MODE_CLASS (mode) == MODE_CC;
1880
1881 if (CA_REGNO_P (regno))
1882 return mode == Pmode || mode == SImode;
1883
1884 /* AltiVec only in AldyVec registers. */
1885 if (ALTIVEC_REGNO_P (regno))
1886 return (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode)
1887 || mode == V1TImode);
1888
1889 /* ...but GPRs can hold SIMD data on the SPE in one register. */
1890 if (SPE_SIMD_REGNO_P (regno) && TARGET_SPE && SPE_VECTOR_MODE (mode))
1891 return 1;
1892
1893 /* We cannot put non-VSX TImode or PTImode anywhere except general register
1894 and it must be able to fit within the register set. */
1895
1896 return GET_MODE_SIZE (mode) <= UNITS_PER_WORD;
1897 }
1898
1899 /* Print interesting facts about registers. */
1900 static void
1901 rs6000_debug_reg_print (int first_regno, int last_regno, const char *reg_name)
1902 {
1903 int r, m;
1904
1905 for (r = first_regno; r <= last_regno; ++r)
1906 {
1907 const char *comma = "";
1908 int len;
1909
1910 if (first_regno == last_regno)
1911 fprintf (stderr, "%s:\t", reg_name);
1912 else
1913 fprintf (stderr, "%s%d:\t", reg_name, r - first_regno);
1914
1915 len = 8;
1916 for (m = 0; m < NUM_MACHINE_MODES; ++m)
1917 if (rs6000_hard_regno_mode_ok_p[m][r] && rs6000_hard_regno_nregs[m][r])
1918 {
1919 if (len > 70)
1920 {
1921 fprintf (stderr, ",\n\t");
1922 len = 8;
1923 comma = "";
1924 }
1925
1926 if (rs6000_hard_regno_nregs[m][r] > 1)
1927 len += fprintf (stderr, "%s%s/%d", comma, GET_MODE_NAME (m),
1928 rs6000_hard_regno_nregs[m][r]);
1929 else
1930 len += fprintf (stderr, "%s%s", comma, GET_MODE_NAME (m));
1931
1932 comma = ", ";
1933 }
1934
1935 if (call_used_regs[r])
1936 {
1937 if (len > 70)
1938 {
1939 fprintf (stderr, ",\n\t");
1940 len = 8;
1941 comma = "";
1942 }
1943
1944 len += fprintf (stderr, "%s%s", comma, "call-used");
1945 comma = ", ";
1946 }
1947
1948 if (fixed_regs[r])
1949 {
1950 if (len > 70)
1951 {
1952 fprintf (stderr, ",\n\t");
1953 len = 8;
1954 comma = "";
1955 }
1956
1957 len += fprintf (stderr, "%s%s", comma, "fixed");
1958 comma = ", ";
1959 }
1960
1961 if (len > 70)
1962 {
1963 fprintf (stderr, ",\n\t");
1964 comma = "";
1965 }
1966
1967 len += fprintf (stderr, "%sreg-class = %s", comma,
1968 reg_class_names[(int)rs6000_regno_regclass[r]]);
1969 comma = ", ";
1970
1971 if (len > 70)
1972 {
1973 fprintf (stderr, ",\n\t");
1974 comma = "";
1975 }
1976
1977 fprintf (stderr, "%sregno = %d\n", comma, r);
1978 }
1979 }
1980
1981 static const char *
1982 rs6000_debug_vector_unit (enum rs6000_vector v)
1983 {
1984 const char *ret;
1985
1986 switch (v)
1987 {
1988 case VECTOR_NONE: ret = "none"; break;
1989 case VECTOR_ALTIVEC: ret = "altivec"; break;
1990 case VECTOR_VSX: ret = "vsx"; break;
1991 case VECTOR_P8_VECTOR: ret = "p8_vector"; break;
1992 case VECTOR_PAIRED: ret = "paired"; break;
1993 case VECTOR_SPE: ret = "spe"; break;
1994 case VECTOR_OTHER: ret = "other"; break;
1995 default: ret = "unknown"; break;
1996 }
1997
1998 return ret;
1999 }
2000
2001 /* Inner function printing just the address mask for a particular reload
2002 register class. */
2003 DEBUG_FUNCTION char *
2004 rs6000_debug_addr_mask (addr_mask_type mask, bool keep_spaces)
2005 {
2006 static char ret[8];
2007 char *p = ret;
2008
2009 if ((mask & RELOAD_REG_VALID) != 0)
2010 *p++ = 'v';
2011 else if (keep_spaces)
2012 *p++ = ' ';
2013
2014 if ((mask & RELOAD_REG_MULTIPLE) != 0)
2015 *p++ = 'm';
2016 else if (keep_spaces)
2017 *p++ = ' ';
2018
2019 if ((mask & RELOAD_REG_INDEXED) != 0)
2020 *p++ = 'i';
2021 else if (keep_spaces)
2022 *p++ = ' ';
2023
2024 if ((mask & RELOAD_REG_OFFSET) != 0)
2025 *p++ = 'o';
2026 else if (keep_spaces)
2027 *p++ = ' ';
2028
2029 if ((mask & RELOAD_REG_PRE_INCDEC) != 0)
2030 *p++ = '+';
2031 else if (keep_spaces)
2032 *p++ = ' ';
2033
2034 if ((mask & RELOAD_REG_PRE_MODIFY) != 0)
2035 *p++ = '+';
2036 else if (keep_spaces)
2037 *p++ = ' ';
2038
2039 if ((mask & RELOAD_REG_AND_M16) != 0)
2040 *p++ = '&';
2041 else if (keep_spaces)
2042 *p++ = ' ';
2043
2044 *p = '\0';
2045
2046 return ret;
2047 }
2048
2049 /* Print the address masks in a human readble fashion. */
2050 DEBUG_FUNCTION void
2051 rs6000_debug_print_mode (ssize_t m)
2052 {
2053 ssize_t rc;
2054 int spaces = 0;
2055 bool fuse_extra_p;
2056
2057 fprintf (stderr, "Mode: %-5s", GET_MODE_NAME (m));
2058 for (rc = 0; rc < N_RELOAD_REG; rc++)
2059 fprintf (stderr, " %s: %s", reload_reg_map[rc].name,
2060 rs6000_debug_addr_mask (reg_addr[m].addr_mask[rc], true));
2061
2062 if ((reg_addr[m].reload_store != CODE_FOR_nothing)
2063 || (reg_addr[m].reload_load != CODE_FOR_nothing))
2064 fprintf (stderr, " Reload=%c%c",
2065 (reg_addr[m].reload_store != CODE_FOR_nothing) ? 's' : '*',
2066 (reg_addr[m].reload_load != CODE_FOR_nothing) ? 'l' : '*');
2067 else
2068 spaces += sizeof (" Reload=sl") - 1;
2069
2070 if (reg_addr[m].scalar_in_vmx_p)
2071 {
2072 fprintf (stderr, "%*s Upper=y", spaces, "");
2073 spaces = 0;
2074 }
2075 else
2076 spaces += sizeof (" Upper=y") - 1;
2077
2078 fuse_extra_p = ((reg_addr[m].fusion_gpr_ld != CODE_FOR_nothing)
2079 || reg_addr[m].fused_toc);
2080 if (!fuse_extra_p)
2081 {
2082 for (rc = 0; rc < N_RELOAD_REG; rc++)
2083 {
2084 if (rc != RELOAD_REG_ANY)
2085 {
2086 if (reg_addr[m].fusion_addi_ld[rc] != CODE_FOR_nothing
2087 || reg_addr[m].fusion_addi_ld[rc] != CODE_FOR_nothing
2088 || reg_addr[m].fusion_addi_st[rc] != CODE_FOR_nothing
2089 || reg_addr[m].fusion_addis_ld[rc] != CODE_FOR_nothing
2090 || reg_addr[m].fusion_addis_st[rc] != CODE_FOR_nothing)
2091 {
2092 fuse_extra_p = true;
2093 break;
2094 }
2095 }
2096 }
2097 }
2098
2099 if (fuse_extra_p)
2100 {
2101 fprintf (stderr, "%*s Fuse:", spaces, "");
2102 spaces = 0;
2103
2104 for (rc = 0; rc < N_RELOAD_REG; rc++)
2105 {
2106 if (rc != RELOAD_REG_ANY)
2107 {
2108 char load, store;
2109
2110 if (reg_addr[m].fusion_addis_ld[rc] != CODE_FOR_nothing)
2111 load = 'l';
2112 else if (reg_addr[m].fusion_addi_ld[rc] != CODE_FOR_nothing)
2113 load = 'L';
2114 else
2115 load = '-';
2116
2117 if (reg_addr[m].fusion_addis_st[rc] != CODE_FOR_nothing)
2118 store = 's';
2119 else if (reg_addr[m].fusion_addi_st[rc] != CODE_FOR_nothing)
2120 store = 'S';
2121 else
2122 store = '-';
2123
2124 if (load == '-' && store == '-')
2125 spaces += 5;
2126 else
2127 {
2128 fprintf (stderr, "%*s%c=%c%c", (spaces + 1), "",
2129 reload_reg_map[rc].name[0], load, store);
2130 spaces = 0;
2131 }
2132 }
2133 }
2134
2135 if (reg_addr[m].fusion_gpr_ld != CODE_FOR_nothing)
2136 {
2137 fprintf (stderr, "%*sP8gpr", (spaces + 1), "");
2138 spaces = 0;
2139 }
2140 else
2141 spaces += sizeof (" P8gpr") - 1;
2142
2143 if (reg_addr[m].fused_toc)
2144 {
2145 fprintf (stderr, "%*sToc", (spaces + 1), "");
2146 spaces = 0;
2147 }
2148 else
2149 spaces += sizeof (" Toc") - 1;
2150 }
2151 else
2152 spaces += sizeof (" Fuse: G=ls F=ls v=ls P8gpr Toc") - 1;
2153
2154 if (rs6000_vector_unit[m] != VECTOR_NONE
2155 || rs6000_vector_mem[m] != VECTOR_NONE)
2156 {
2157 fprintf (stderr, "%*s vector: arith=%-10s mem=%s",
2158 spaces, "",
2159 rs6000_debug_vector_unit (rs6000_vector_unit[m]),
2160 rs6000_debug_vector_unit (rs6000_vector_mem[m]));
2161 }
2162
2163 fputs ("\n", stderr);
2164 }
2165
2166 #define DEBUG_FMT_ID "%-32s= "
2167 #define DEBUG_FMT_D DEBUG_FMT_ID "%d\n"
2168 #define DEBUG_FMT_WX DEBUG_FMT_ID "%#.12" HOST_WIDE_INT_PRINT "x: "
2169 #define DEBUG_FMT_S DEBUG_FMT_ID "%s\n"
2170
2171 /* Print various interesting information with -mdebug=reg. */
2172 static void
2173 rs6000_debug_reg_global (void)
2174 {
2175 static const char *const tf[2] = { "false", "true" };
2176 const char *nl = (const char *)0;
2177 int m;
2178 size_t m1, m2, v;
2179 char costly_num[20];
2180 char nop_num[20];
2181 char flags_buffer[40];
2182 const char *costly_str;
2183 const char *nop_str;
2184 const char *trace_str;
2185 const char *abi_str;
2186 const char *cmodel_str;
2187 struct cl_target_option cl_opts;
2188
2189 /* Modes we want tieable information on. */
2190 static const machine_mode print_tieable_modes[] = {
2191 QImode,
2192 HImode,
2193 SImode,
2194 DImode,
2195 TImode,
2196 PTImode,
2197 SFmode,
2198 DFmode,
2199 TFmode,
2200 IFmode,
2201 KFmode,
2202 SDmode,
2203 DDmode,
2204 TDmode,
2205 V8QImode,
2206 V4HImode,
2207 V2SImode,
2208 V16QImode,
2209 V8HImode,
2210 V4SImode,
2211 V2DImode,
2212 V1TImode,
2213 V32QImode,
2214 V16HImode,
2215 V8SImode,
2216 V4DImode,
2217 V2TImode,
2218 V2SFmode,
2219 V4SFmode,
2220 V2DFmode,
2221 V8SFmode,
2222 V4DFmode,
2223 CCmode,
2224 CCUNSmode,
2225 CCEQmode,
2226 };
2227
2228 /* Virtual regs we are interested in. */
2229 const static struct {
2230 int regno; /* register number. */
2231 const char *name; /* register name. */
2232 } virtual_regs[] = {
2233 { STACK_POINTER_REGNUM, "stack pointer:" },
2234 { TOC_REGNUM, "toc: " },
2235 { STATIC_CHAIN_REGNUM, "static chain: " },
2236 { RS6000_PIC_OFFSET_TABLE_REGNUM, "pic offset: " },
2237 { HARD_FRAME_POINTER_REGNUM, "hard frame: " },
2238 { ARG_POINTER_REGNUM, "arg pointer: " },
2239 { FRAME_POINTER_REGNUM, "frame pointer:" },
2240 { FIRST_PSEUDO_REGISTER, "first pseudo: " },
2241 { FIRST_VIRTUAL_REGISTER, "first virtual:" },
2242 { VIRTUAL_INCOMING_ARGS_REGNUM, "incoming_args:" },
2243 { VIRTUAL_STACK_VARS_REGNUM, "stack_vars: " },
2244 { VIRTUAL_STACK_DYNAMIC_REGNUM, "stack_dynamic:" },
2245 { VIRTUAL_OUTGOING_ARGS_REGNUM, "outgoing_args:" },
2246 { VIRTUAL_CFA_REGNUM, "cfa (frame): " },
2247 { VIRTUAL_PREFERRED_STACK_BOUNDARY_REGNUM, "stack boundry:" },
2248 { LAST_VIRTUAL_REGISTER, "last virtual: " },
2249 };
2250
2251 fputs ("\nHard register information:\n", stderr);
2252 rs6000_debug_reg_print (FIRST_GPR_REGNO, LAST_GPR_REGNO, "gr");
2253 rs6000_debug_reg_print (FIRST_FPR_REGNO, LAST_FPR_REGNO, "fp");
2254 rs6000_debug_reg_print (FIRST_ALTIVEC_REGNO,
2255 LAST_ALTIVEC_REGNO,
2256 "vs");
2257 rs6000_debug_reg_print (LR_REGNO, LR_REGNO, "lr");
2258 rs6000_debug_reg_print (CTR_REGNO, CTR_REGNO, "ctr");
2259 rs6000_debug_reg_print (CR0_REGNO, CR7_REGNO, "cr");
2260 rs6000_debug_reg_print (CA_REGNO, CA_REGNO, "ca");
2261 rs6000_debug_reg_print (VRSAVE_REGNO, VRSAVE_REGNO, "vrsave");
2262 rs6000_debug_reg_print (VSCR_REGNO, VSCR_REGNO, "vscr");
2263 rs6000_debug_reg_print (SPE_ACC_REGNO, SPE_ACC_REGNO, "spe_a");
2264 rs6000_debug_reg_print (SPEFSCR_REGNO, SPEFSCR_REGNO, "spe_f");
2265
2266 fputs ("\nVirtual/stack/frame registers:\n", stderr);
2267 for (v = 0; v < ARRAY_SIZE (virtual_regs); v++)
2268 fprintf (stderr, "%s regno = %3d\n", virtual_regs[v].name, virtual_regs[v].regno);
2269
2270 fprintf (stderr,
2271 "\n"
2272 "d reg_class = %s\n"
2273 "f reg_class = %s\n"
2274 "v reg_class = %s\n"
2275 "wa reg_class = %s\n"
2276 "wb reg_class = %s\n"
2277 "wd reg_class = %s\n"
2278 "we reg_class = %s\n"
2279 "wf reg_class = %s\n"
2280 "wg reg_class = %s\n"
2281 "wh reg_class = %s\n"
2282 "wi reg_class = %s\n"
2283 "wj reg_class = %s\n"
2284 "wk reg_class = %s\n"
2285 "wl reg_class = %s\n"
2286 "wm reg_class = %s\n"
2287 "wp reg_class = %s\n"
2288 "wq reg_class = %s\n"
2289 "wr reg_class = %s\n"
2290 "ws reg_class = %s\n"
2291 "wt reg_class = %s\n"
2292 "wu reg_class = %s\n"
2293 "wv reg_class = %s\n"
2294 "ww reg_class = %s\n"
2295 "wx reg_class = %s\n"
2296 "wy reg_class = %s\n"
2297 "wz reg_class = %s\n"
2298 "\n",
2299 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_d]],
2300 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_f]],
2301 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_v]],
2302 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wa]],
2303 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wb]],
2304 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wd]],
2305 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_we]],
2306 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wf]],
2307 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wg]],
2308 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wh]],
2309 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wi]],
2310 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wj]],
2311 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wk]],
2312 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wl]],
2313 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wm]],
2314 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wp]],
2315 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wq]],
2316 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wr]],
2317 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_ws]],
2318 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wt]],
2319 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wu]],
2320 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wv]],
2321 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_ww]],
2322 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wx]],
2323 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wy]],
2324 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wz]]);
2325
2326 nl = "\n";
2327 for (m = 0; m < NUM_MACHINE_MODES; ++m)
2328 rs6000_debug_print_mode (m);
2329
2330 fputs ("\n", stderr);
2331
2332 for (m1 = 0; m1 < ARRAY_SIZE (print_tieable_modes); m1++)
2333 {
2334 machine_mode mode1 = print_tieable_modes[m1];
2335 bool first_time = true;
2336
2337 nl = (const char *)0;
2338 for (m2 = 0; m2 < ARRAY_SIZE (print_tieable_modes); m2++)
2339 {
2340 machine_mode mode2 = print_tieable_modes[m2];
2341 if (mode1 != mode2 && MODES_TIEABLE_P (mode1, mode2))
2342 {
2343 if (first_time)
2344 {
2345 fprintf (stderr, "Tieable modes %s:", GET_MODE_NAME (mode1));
2346 nl = "\n";
2347 first_time = false;
2348 }
2349
2350 fprintf (stderr, " %s", GET_MODE_NAME (mode2));
2351 }
2352 }
2353
2354 if (!first_time)
2355 fputs ("\n", stderr);
2356 }
2357
2358 if (nl)
2359 fputs (nl, stderr);
2360
2361 if (rs6000_recip_control)
2362 {
2363 fprintf (stderr, "\nReciprocal mask = 0x%x\n", rs6000_recip_control);
2364
2365 for (m = 0; m < NUM_MACHINE_MODES; ++m)
2366 if (rs6000_recip_bits[m])
2367 {
2368 fprintf (stderr,
2369 "Reciprocal estimate mode: %-5s divide: %s rsqrt: %s\n",
2370 GET_MODE_NAME (m),
2371 (RS6000_RECIP_AUTO_RE_P (m)
2372 ? "auto"
2373 : (RS6000_RECIP_HAVE_RE_P (m) ? "have" : "none")),
2374 (RS6000_RECIP_AUTO_RSQRTE_P (m)
2375 ? "auto"
2376 : (RS6000_RECIP_HAVE_RSQRTE_P (m) ? "have" : "none")));
2377 }
2378
2379 fputs ("\n", stderr);
2380 }
2381
2382 if (rs6000_cpu_index >= 0)
2383 {
2384 const char *name = processor_target_table[rs6000_cpu_index].name;
2385 HOST_WIDE_INT flags
2386 = processor_target_table[rs6000_cpu_index].target_enable;
2387
2388 sprintf (flags_buffer, "-mcpu=%s flags", name);
2389 rs6000_print_isa_options (stderr, 0, flags_buffer, flags);
2390 }
2391 else
2392 fprintf (stderr, DEBUG_FMT_S, "cpu", "<none>");
2393
2394 if (rs6000_tune_index >= 0)
2395 {
2396 const char *name = processor_target_table[rs6000_tune_index].name;
2397 HOST_WIDE_INT flags
2398 = processor_target_table[rs6000_tune_index].target_enable;
2399
2400 sprintf (flags_buffer, "-mtune=%s flags", name);
2401 rs6000_print_isa_options (stderr, 0, flags_buffer, flags);
2402 }
2403 else
2404 fprintf (stderr, DEBUG_FMT_S, "tune", "<none>");
2405
2406 cl_target_option_save (&cl_opts, &global_options);
2407 rs6000_print_isa_options (stderr, 0, "rs6000_isa_flags",
2408 rs6000_isa_flags);
2409
2410 rs6000_print_isa_options (stderr, 0, "rs6000_isa_flags_explicit",
2411 rs6000_isa_flags_explicit);
2412
2413 rs6000_print_builtin_options (stderr, 0, "rs6000_builtin_mask",
2414 rs6000_builtin_mask);
2415
2416 rs6000_print_isa_options (stderr, 0, "TARGET_DEFAULT", TARGET_DEFAULT);
2417
2418 fprintf (stderr, DEBUG_FMT_S, "--with-cpu default",
2419 OPTION_TARGET_CPU_DEFAULT ? OPTION_TARGET_CPU_DEFAULT : "<none>");
2420
2421 switch (rs6000_sched_costly_dep)
2422 {
2423 case max_dep_latency:
2424 costly_str = "max_dep_latency";
2425 break;
2426
2427 case no_dep_costly:
2428 costly_str = "no_dep_costly";
2429 break;
2430
2431 case all_deps_costly:
2432 costly_str = "all_deps_costly";
2433 break;
2434
2435 case true_store_to_load_dep_costly:
2436 costly_str = "true_store_to_load_dep_costly";
2437 break;
2438
2439 case store_to_load_dep_costly:
2440 costly_str = "store_to_load_dep_costly";
2441 break;
2442
2443 default:
2444 costly_str = costly_num;
2445 sprintf (costly_num, "%d", (int)rs6000_sched_costly_dep);
2446 break;
2447 }
2448
2449 fprintf (stderr, DEBUG_FMT_S, "sched_costly_dep", costly_str);
2450
2451 switch (rs6000_sched_insert_nops)
2452 {
2453 case sched_finish_regroup_exact:
2454 nop_str = "sched_finish_regroup_exact";
2455 break;
2456
2457 case sched_finish_pad_groups:
2458 nop_str = "sched_finish_pad_groups";
2459 break;
2460
2461 case sched_finish_none:
2462 nop_str = "sched_finish_none";
2463 break;
2464
2465 default:
2466 nop_str = nop_num;
2467 sprintf (nop_num, "%d", (int)rs6000_sched_insert_nops);
2468 break;
2469 }
2470
2471 fprintf (stderr, DEBUG_FMT_S, "sched_insert_nops", nop_str);
2472
2473 switch (rs6000_sdata)
2474 {
2475 default:
2476 case SDATA_NONE:
2477 break;
2478
2479 case SDATA_DATA:
2480 fprintf (stderr, DEBUG_FMT_S, "sdata", "data");
2481 break;
2482
2483 case SDATA_SYSV:
2484 fprintf (stderr, DEBUG_FMT_S, "sdata", "sysv");
2485 break;
2486
2487 case SDATA_EABI:
2488 fprintf (stderr, DEBUG_FMT_S, "sdata", "eabi");
2489 break;
2490
2491 }
2492
2493 switch (rs6000_traceback)
2494 {
2495 case traceback_default: trace_str = "default"; break;
2496 case traceback_none: trace_str = "none"; break;
2497 case traceback_part: trace_str = "part"; break;
2498 case traceback_full: trace_str = "full"; break;
2499 default: trace_str = "unknown"; break;
2500 }
2501
2502 fprintf (stderr, DEBUG_FMT_S, "traceback", trace_str);
2503
2504 switch (rs6000_current_cmodel)
2505 {
2506 case CMODEL_SMALL: cmodel_str = "small"; break;
2507 case CMODEL_MEDIUM: cmodel_str = "medium"; break;
2508 case CMODEL_LARGE: cmodel_str = "large"; break;
2509 default: cmodel_str = "unknown"; break;
2510 }
2511
2512 fprintf (stderr, DEBUG_FMT_S, "cmodel", cmodel_str);
2513
2514 switch (rs6000_current_abi)
2515 {
2516 case ABI_NONE: abi_str = "none"; break;
2517 case ABI_AIX: abi_str = "aix"; break;
2518 case ABI_ELFv2: abi_str = "ELFv2"; break;
2519 case ABI_V4: abi_str = "V4"; break;
2520 case ABI_DARWIN: abi_str = "darwin"; break;
2521 default: abi_str = "unknown"; break;
2522 }
2523
2524 fprintf (stderr, DEBUG_FMT_S, "abi", abi_str);
2525
2526 if (rs6000_altivec_abi)
2527 fprintf (stderr, DEBUG_FMT_S, "altivec_abi", "true");
2528
2529 if (rs6000_spe_abi)
2530 fprintf (stderr, DEBUG_FMT_S, "spe_abi", "true");
2531
2532 if (rs6000_darwin64_abi)
2533 fprintf (stderr, DEBUG_FMT_S, "darwin64_abi", "true");
2534
2535 if (rs6000_float_gprs)
2536 fprintf (stderr, DEBUG_FMT_S, "float_gprs", "true");
2537
2538 fprintf (stderr, DEBUG_FMT_S, "fprs",
2539 (TARGET_FPRS ? "true" : "false"));
2540
2541 fprintf (stderr, DEBUG_FMT_S, "single_float",
2542 (TARGET_SINGLE_FLOAT ? "true" : "false"));
2543
2544 fprintf (stderr, DEBUG_FMT_S, "double_float",
2545 (TARGET_DOUBLE_FLOAT ? "true" : "false"));
2546
2547 fprintf (stderr, DEBUG_FMT_S, "soft_float",
2548 (TARGET_SOFT_FLOAT ? "true" : "false"));
2549
2550 fprintf (stderr, DEBUG_FMT_S, "e500_single",
2551 (TARGET_E500_SINGLE ? "true" : "false"));
2552
2553 fprintf (stderr, DEBUG_FMT_S, "e500_double",
2554 (TARGET_E500_DOUBLE ? "true" : "false"));
2555
2556 if (TARGET_LINK_STACK)
2557 fprintf (stderr, DEBUG_FMT_S, "link_stack", "true");
2558
2559 if (targetm.lra_p ())
2560 fprintf (stderr, DEBUG_FMT_S, "lra", "true");
2561
2562 if (TARGET_P8_FUSION)
2563 {
2564 char options[80];
2565
2566 strcpy (options, (TARGET_P9_FUSION) ? "power9" : "power8");
2567 if (TARGET_TOC_FUSION)
2568 strcat (options, ", toc");
2569
2570 if (TARGET_P8_FUSION_SIGN)
2571 strcat (options, ", sign");
2572
2573 fprintf (stderr, DEBUG_FMT_S, "fusion", options);
2574 }
2575
2576 fprintf (stderr, DEBUG_FMT_S, "plt-format",
2577 TARGET_SECURE_PLT ? "secure" : "bss");
2578 fprintf (stderr, DEBUG_FMT_S, "struct-return",
2579 aix_struct_return ? "aix" : "sysv");
2580 fprintf (stderr, DEBUG_FMT_S, "always_hint", tf[!!rs6000_always_hint]);
2581 fprintf (stderr, DEBUG_FMT_S, "sched_groups", tf[!!rs6000_sched_groups]);
2582 fprintf (stderr, DEBUG_FMT_S, "align_branch",
2583 tf[!!rs6000_align_branch_targets]);
2584 fprintf (stderr, DEBUG_FMT_D, "tls_size", rs6000_tls_size);
2585 fprintf (stderr, DEBUG_FMT_D, "long_double_size",
2586 rs6000_long_double_type_size);
2587 fprintf (stderr, DEBUG_FMT_D, "sched_restricted_insns_priority",
2588 (int)rs6000_sched_restricted_insns_priority);
2589 fprintf (stderr, DEBUG_FMT_D, "Number of standard builtins",
2590 (int)END_BUILTINS);
2591 fprintf (stderr, DEBUG_FMT_D, "Number of rs6000 builtins",
2592 (int)RS6000_BUILTIN_COUNT);
2593
2594 if (TARGET_VSX)
2595 fprintf (stderr, DEBUG_FMT_D, "VSX easy 64-bit scalar element",
2596 (int)VECTOR_ELEMENT_SCALAR_64BIT);
2597
2598 if (TARGET_DIRECT_MOVE_128)
2599 fprintf (stderr, DEBUG_FMT_D, "VSX easy 64-bit mfvsrld element",
2600 (int)VECTOR_ELEMENT_MFVSRLD_64BIT);
2601 }
2602
2603 \f
2604 /* Update the addr mask bits in reg_addr to help secondary reload and go if
2605 legitimate address support to figure out the appropriate addressing to
2606 use. */
2607
2608 static void
2609 rs6000_setup_reg_addr_masks (void)
2610 {
2611 ssize_t rc, reg, m, nregs;
2612 addr_mask_type any_addr_mask, addr_mask;
2613
2614 for (m = 0; m < NUM_MACHINE_MODES; ++m)
2615 {
2616 machine_mode m2 = (machine_mode)m;
2617 unsigned short msize = GET_MODE_SIZE (m2);
2618
2619 /* SDmode is special in that we want to access it only via REG+REG
2620 addressing on power7 and above, since we want to use the LFIWZX and
2621 STFIWZX instructions to load it. */
2622 bool indexed_only_p = (m == SDmode && TARGET_NO_SDMODE_STACK);
2623
2624 any_addr_mask = 0;
2625 for (rc = FIRST_RELOAD_REG_CLASS; rc <= LAST_RELOAD_REG_CLASS; rc++)
2626 {
2627 addr_mask = 0;
2628 reg = reload_reg_map[rc].reg;
2629
2630 /* Can mode values go in the GPR/FPR/Altivec registers? */
2631 if (reg >= 0 && rs6000_hard_regno_mode_ok_p[m][reg])
2632 {
2633 nregs = rs6000_hard_regno_nregs[m][reg];
2634 addr_mask |= RELOAD_REG_VALID;
2635
2636 /* Indicate if the mode takes more than 1 physical register. If
2637 it takes a single register, indicate it can do REG+REG
2638 addressing. */
2639 if (nregs > 1 || m == BLKmode)
2640 addr_mask |= RELOAD_REG_MULTIPLE;
2641 else
2642 addr_mask |= RELOAD_REG_INDEXED;
2643
2644 /* Figure out if we can do PRE_INC, PRE_DEC, or PRE_MODIFY
2645 addressing. Restrict addressing on SPE for 64-bit types
2646 because of the SUBREG hackery used to address 64-bit floats in
2647 '32-bit' GPRs. If we allow scalars into Altivec registers,
2648 don't allow PRE_INC, PRE_DEC, or PRE_MODIFY. */
2649
2650 if (TARGET_UPDATE
2651 && (rc == RELOAD_REG_GPR || rc == RELOAD_REG_FPR)
2652 && msize <= 8
2653 && !VECTOR_MODE_P (m2)
2654 && !FLOAT128_VECTOR_P (m2)
2655 && !COMPLEX_MODE_P (m2)
2656 && (m2 != DFmode || !TARGET_UPPER_REGS_DF)
2657 && (m2 != SFmode || !TARGET_UPPER_REGS_SF)
2658 && !(TARGET_E500_DOUBLE && msize == 8))
2659 {
2660 addr_mask |= RELOAD_REG_PRE_INCDEC;
2661
2662 /* PRE_MODIFY is more restricted than PRE_INC/PRE_DEC in that
2663 we don't allow PRE_MODIFY for some multi-register
2664 operations. */
2665 switch (m)
2666 {
2667 default:
2668 addr_mask |= RELOAD_REG_PRE_MODIFY;
2669 break;
2670
2671 case DImode:
2672 if (TARGET_POWERPC64)
2673 addr_mask |= RELOAD_REG_PRE_MODIFY;
2674 break;
2675
2676 case DFmode:
2677 case DDmode:
2678 if (TARGET_DF_INSN)
2679 addr_mask |= RELOAD_REG_PRE_MODIFY;
2680 break;
2681 }
2682 }
2683 }
2684
2685 /* GPR and FPR registers can do REG+OFFSET addressing, except
2686 possibly for SDmode. ISA 3.0 (i.e. power9) adds D-form
2687 addressing for scalars to altivec registers. */
2688 if ((addr_mask != 0) && !indexed_only_p
2689 && msize <= 8
2690 && (rc == RELOAD_REG_GPR
2691 || rc == RELOAD_REG_FPR
2692 || (rc == RELOAD_REG_VMX
2693 && TARGET_P9_DFORM
2694 && (m2 == DFmode || m2 == SFmode))))
2695 addr_mask |= RELOAD_REG_OFFSET;
2696
2697 /* VMX registers can do (REG & -16) and ((REG+REG) & -16)
2698 addressing on 128-bit types. */
2699 if (rc == RELOAD_REG_VMX && msize == 16
2700 && (addr_mask & RELOAD_REG_VALID) != 0)
2701 addr_mask |= RELOAD_REG_AND_M16;
2702
2703 reg_addr[m].addr_mask[rc] = addr_mask;
2704 any_addr_mask |= addr_mask;
2705 }
2706
2707 reg_addr[m].addr_mask[RELOAD_REG_ANY] = any_addr_mask;
2708 }
2709 }
2710
2711 \f
2712 /* Initialize the various global tables that are based on register size. */
2713 static void
2714 rs6000_init_hard_regno_mode_ok (bool global_init_p)
2715 {
2716 ssize_t r, m, c;
2717 int align64;
2718 int align32;
2719
2720 /* Precalculate REGNO_REG_CLASS. */
2721 rs6000_regno_regclass[0] = GENERAL_REGS;
2722 for (r = 1; r < 32; ++r)
2723 rs6000_regno_regclass[r] = BASE_REGS;
2724
2725 for (r = 32; r < 64; ++r)
2726 rs6000_regno_regclass[r] = FLOAT_REGS;
2727
2728 for (r = 64; r < FIRST_PSEUDO_REGISTER; ++r)
2729 rs6000_regno_regclass[r] = NO_REGS;
2730
2731 for (r = FIRST_ALTIVEC_REGNO; r <= LAST_ALTIVEC_REGNO; ++r)
2732 rs6000_regno_regclass[r] = ALTIVEC_REGS;
2733
2734 rs6000_regno_regclass[CR0_REGNO] = CR0_REGS;
2735 for (r = CR1_REGNO; r <= CR7_REGNO; ++r)
2736 rs6000_regno_regclass[r] = CR_REGS;
2737
2738 rs6000_regno_regclass[LR_REGNO] = LINK_REGS;
2739 rs6000_regno_regclass[CTR_REGNO] = CTR_REGS;
2740 rs6000_regno_regclass[CA_REGNO] = NO_REGS;
2741 rs6000_regno_regclass[VRSAVE_REGNO] = VRSAVE_REGS;
2742 rs6000_regno_regclass[VSCR_REGNO] = VRSAVE_REGS;
2743 rs6000_regno_regclass[SPE_ACC_REGNO] = SPE_ACC_REGS;
2744 rs6000_regno_regclass[SPEFSCR_REGNO] = SPEFSCR_REGS;
2745 rs6000_regno_regclass[TFHAR_REGNO] = SPR_REGS;
2746 rs6000_regno_regclass[TFIAR_REGNO] = SPR_REGS;
2747 rs6000_regno_regclass[TEXASR_REGNO] = SPR_REGS;
2748 rs6000_regno_regclass[ARG_POINTER_REGNUM] = BASE_REGS;
2749 rs6000_regno_regclass[FRAME_POINTER_REGNUM] = BASE_REGS;
2750
2751 /* Precalculate register class to simpler reload register class. We don't
2752 need all of the register classes that are combinations of different
2753 classes, just the simple ones that have constraint letters. */
2754 for (c = 0; c < N_REG_CLASSES; c++)
2755 reg_class_to_reg_type[c] = NO_REG_TYPE;
2756
2757 reg_class_to_reg_type[(int)GENERAL_REGS] = GPR_REG_TYPE;
2758 reg_class_to_reg_type[(int)BASE_REGS] = GPR_REG_TYPE;
2759 reg_class_to_reg_type[(int)VSX_REGS] = VSX_REG_TYPE;
2760 reg_class_to_reg_type[(int)VRSAVE_REGS] = SPR_REG_TYPE;
2761 reg_class_to_reg_type[(int)VSCR_REGS] = SPR_REG_TYPE;
2762 reg_class_to_reg_type[(int)LINK_REGS] = SPR_REG_TYPE;
2763 reg_class_to_reg_type[(int)CTR_REGS] = SPR_REG_TYPE;
2764 reg_class_to_reg_type[(int)LINK_OR_CTR_REGS] = SPR_REG_TYPE;
2765 reg_class_to_reg_type[(int)CR_REGS] = CR_REG_TYPE;
2766 reg_class_to_reg_type[(int)CR0_REGS] = CR_REG_TYPE;
2767 reg_class_to_reg_type[(int)SPE_ACC_REGS] = SPE_ACC_TYPE;
2768 reg_class_to_reg_type[(int)SPEFSCR_REGS] = SPEFSCR_REG_TYPE;
2769
2770 if (TARGET_VSX)
2771 {
2772 reg_class_to_reg_type[(int)FLOAT_REGS] = VSX_REG_TYPE;
2773 reg_class_to_reg_type[(int)ALTIVEC_REGS] = VSX_REG_TYPE;
2774 }
2775 else
2776 {
2777 reg_class_to_reg_type[(int)FLOAT_REGS] = FPR_REG_TYPE;
2778 reg_class_to_reg_type[(int)ALTIVEC_REGS] = ALTIVEC_REG_TYPE;
2779 }
2780
2781 /* Precalculate the valid memory formats as well as the vector information,
2782 this must be set up before the rs6000_hard_regno_nregs_internal calls
2783 below. */
2784 gcc_assert ((int)VECTOR_NONE == 0);
2785 memset ((void *) &rs6000_vector_unit[0], '\0', sizeof (rs6000_vector_unit));
2786 memset ((void *) &rs6000_vector_mem[0], '\0', sizeof (rs6000_vector_unit));
2787
2788 gcc_assert ((int)CODE_FOR_nothing == 0);
2789 memset ((void *) &reg_addr[0], '\0', sizeof (reg_addr));
2790
2791 gcc_assert ((int)NO_REGS == 0);
2792 memset ((void *) &rs6000_constraints[0], '\0', sizeof (rs6000_constraints));
2793
2794 /* The VSX hardware allows native alignment for vectors, but control whether the compiler
2795 believes it can use native alignment or still uses 128-bit alignment. */
2796 if (TARGET_VSX && !TARGET_VSX_ALIGN_128)
2797 {
2798 align64 = 64;
2799 align32 = 32;
2800 }
2801 else
2802 {
2803 align64 = 128;
2804 align32 = 128;
2805 }
2806
2807 /* KF mode (IEEE 128-bit in VSX registers). We do not have arithmetic, so
2808 only set the memory modes. Include TFmode if -mabi=ieeelongdouble. */
2809 if (TARGET_FLOAT128)
2810 {
2811 rs6000_vector_mem[KFmode] = VECTOR_VSX;
2812 rs6000_vector_align[KFmode] = 128;
2813
2814 if (FLOAT128_IEEE_P (TFmode))
2815 {
2816 rs6000_vector_mem[TFmode] = VECTOR_VSX;
2817 rs6000_vector_align[TFmode] = 128;
2818 }
2819 }
2820
2821 /* V2DF mode, VSX only. */
2822 if (TARGET_VSX)
2823 {
2824 rs6000_vector_unit[V2DFmode] = VECTOR_VSX;
2825 rs6000_vector_mem[V2DFmode] = VECTOR_VSX;
2826 rs6000_vector_align[V2DFmode] = align64;
2827 }
2828
2829 /* V4SF mode, either VSX or Altivec. */
2830 if (TARGET_VSX)
2831 {
2832 rs6000_vector_unit[V4SFmode] = VECTOR_VSX;
2833 rs6000_vector_mem[V4SFmode] = VECTOR_VSX;
2834 rs6000_vector_align[V4SFmode] = align32;
2835 }
2836 else if (TARGET_ALTIVEC)
2837 {
2838 rs6000_vector_unit[V4SFmode] = VECTOR_ALTIVEC;
2839 rs6000_vector_mem[V4SFmode] = VECTOR_ALTIVEC;
2840 rs6000_vector_align[V4SFmode] = align32;
2841 }
2842
2843 /* V16QImode, V8HImode, V4SImode are Altivec only, but possibly do VSX loads
2844 and stores. */
2845 if (TARGET_ALTIVEC)
2846 {
2847 rs6000_vector_unit[V4SImode] = VECTOR_ALTIVEC;
2848 rs6000_vector_unit[V8HImode] = VECTOR_ALTIVEC;
2849 rs6000_vector_unit[V16QImode] = VECTOR_ALTIVEC;
2850 rs6000_vector_align[V4SImode] = align32;
2851 rs6000_vector_align[V8HImode] = align32;
2852 rs6000_vector_align[V16QImode] = align32;
2853
2854 if (TARGET_VSX)
2855 {
2856 rs6000_vector_mem[V4SImode] = VECTOR_VSX;
2857 rs6000_vector_mem[V8HImode] = VECTOR_VSX;
2858 rs6000_vector_mem[V16QImode] = VECTOR_VSX;
2859 }
2860 else
2861 {
2862 rs6000_vector_mem[V4SImode] = VECTOR_ALTIVEC;
2863 rs6000_vector_mem[V8HImode] = VECTOR_ALTIVEC;
2864 rs6000_vector_mem[V16QImode] = VECTOR_ALTIVEC;
2865 }
2866 }
2867
2868 /* V2DImode, full mode depends on ISA 2.07 vector mode. Allow under VSX to
2869 do insert/splat/extract. Altivec doesn't have 64-bit integer support. */
2870 if (TARGET_VSX)
2871 {
2872 rs6000_vector_mem[V2DImode] = VECTOR_VSX;
2873 rs6000_vector_unit[V2DImode]
2874 = (TARGET_P8_VECTOR) ? VECTOR_P8_VECTOR : VECTOR_NONE;
2875 rs6000_vector_align[V2DImode] = align64;
2876
2877 rs6000_vector_mem[V1TImode] = VECTOR_VSX;
2878 rs6000_vector_unit[V1TImode]
2879 = (TARGET_P8_VECTOR) ? VECTOR_P8_VECTOR : VECTOR_NONE;
2880 rs6000_vector_align[V1TImode] = 128;
2881 }
2882
2883 /* DFmode, see if we want to use the VSX unit. Memory is handled
2884 differently, so don't set rs6000_vector_mem. */
2885 if (TARGET_VSX && TARGET_VSX_SCALAR_DOUBLE)
2886 {
2887 rs6000_vector_unit[DFmode] = VECTOR_VSX;
2888 rs6000_vector_align[DFmode] = 64;
2889 }
2890
2891 /* SFmode, see if we want to use the VSX unit. */
2892 if (TARGET_P8_VECTOR && TARGET_VSX_SCALAR_FLOAT)
2893 {
2894 rs6000_vector_unit[SFmode] = VECTOR_VSX;
2895 rs6000_vector_align[SFmode] = 32;
2896 }
2897
2898 /* Allow TImode in VSX register and set the VSX memory macros. */
2899 if (TARGET_VSX && TARGET_VSX_TIMODE)
2900 {
2901 rs6000_vector_mem[TImode] = VECTOR_VSX;
2902 rs6000_vector_align[TImode] = align64;
2903 }
2904
2905 /* TODO add SPE and paired floating point vector support. */
2906
2907 /* Register class constraints for the constraints that depend on compile
2908 switches. When the VSX code was added, different constraints were added
2909 based on the type (DFmode, V2DFmode, V4SFmode). For the vector types, all
2910 of the VSX registers are used. The register classes for scalar floating
2911 point types is set, based on whether we allow that type into the upper
2912 (Altivec) registers. GCC has register classes to target the Altivec
2913 registers for load/store operations, to select using a VSX memory
2914 operation instead of the traditional floating point operation. The
2915 constraints are:
2916
2917 d - Register class to use with traditional DFmode instructions.
2918 f - Register class to use with traditional SFmode instructions.
2919 v - Altivec register.
2920 wa - Any VSX register.
2921 wc - Reserved to represent individual CR bits (used in LLVM).
2922 wd - Preferred register class for V2DFmode.
2923 wf - Preferred register class for V4SFmode.
2924 wg - Float register for power6x move insns.
2925 wh - FP register for direct move instructions.
2926 wi - FP or VSX register to hold 64-bit integers for VSX insns.
2927 wj - FP or VSX register to hold 64-bit integers for direct moves.
2928 wk - FP or VSX register to hold 64-bit doubles for direct moves.
2929 wl - Float register if we can do 32-bit signed int loads.
2930 wm - VSX register for ISA 2.07 direct move operations.
2931 wn - always NO_REGS.
2932 wr - GPR if 64-bit mode is permitted.
2933 ws - Register class to do ISA 2.06 DF operations.
2934 wt - VSX register for TImode in VSX registers.
2935 wu - Altivec register for ISA 2.07 VSX SF/SI load/stores.
2936 wv - Altivec register for ISA 2.06 VSX DF/DI load/stores.
2937 ww - Register class to do SF conversions in with VSX operations.
2938 wx - Float register if we can do 32-bit int stores.
2939 wy - Register class to do ISA 2.07 SF operations.
2940 wz - Float register if we can do 32-bit unsigned int loads. */
2941
2942 if (TARGET_HARD_FLOAT && TARGET_FPRS)
2943 rs6000_constraints[RS6000_CONSTRAINT_f] = FLOAT_REGS; /* SFmode */
2944
2945 if (TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT)
2946 rs6000_constraints[RS6000_CONSTRAINT_d] = FLOAT_REGS; /* DFmode */
2947
2948 if (TARGET_VSX)
2949 {
2950 rs6000_constraints[RS6000_CONSTRAINT_wa] = VSX_REGS;
2951 rs6000_constraints[RS6000_CONSTRAINT_wd] = VSX_REGS; /* V2DFmode */
2952 rs6000_constraints[RS6000_CONSTRAINT_wf] = VSX_REGS; /* V4SFmode */
2953 rs6000_constraints[RS6000_CONSTRAINT_wi] = FLOAT_REGS; /* DImode */
2954
2955 if (TARGET_VSX_TIMODE)
2956 rs6000_constraints[RS6000_CONSTRAINT_wt] = VSX_REGS; /* TImode */
2957
2958 if (TARGET_UPPER_REGS_DF) /* DFmode */
2959 {
2960 rs6000_constraints[RS6000_CONSTRAINT_ws] = VSX_REGS;
2961 rs6000_constraints[RS6000_CONSTRAINT_wv] = ALTIVEC_REGS;
2962 }
2963 else
2964 rs6000_constraints[RS6000_CONSTRAINT_ws] = FLOAT_REGS;
2965 }
2966
2967 /* Add conditional constraints based on various options, to allow us to
2968 collapse multiple insn patterns. */
2969 if (TARGET_ALTIVEC)
2970 rs6000_constraints[RS6000_CONSTRAINT_v] = ALTIVEC_REGS;
2971
2972 if (TARGET_MFPGPR) /* DFmode */
2973 rs6000_constraints[RS6000_CONSTRAINT_wg] = FLOAT_REGS;
2974
2975 if (TARGET_LFIWAX)
2976 rs6000_constraints[RS6000_CONSTRAINT_wl] = FLOAT_REGS; /* DImode */
2977
2978 if (TARGET_DIRECT_MOVE)
2979 {
2980 rs6000_constraints[RS6000_CONSTRAINT_wh] = FLOAT_REGS;
2981 rs6000_constraints[RS6000_CONSTRAINT_wj] /* DImode */
2982 = rs6000_constraints[RS6000_CONSTRAINT_wi];
2983 rs6000_constraints[RS6000_CONSTRAINT_wk] /* DFmode */
2984 = rs6000_constraints[RS6000_CONSTRAINT_ws];
2985 rs6000_constraints[RS6000_CONSTRAINT_wm] = VSX_REGS;
2986 }
2987
2988 if (TARGET_POWERPC64)
2989 rs6000_constraints[RS6000_CONSTRAINT_wr] = GENERAL_REGS;
2990
2991 if (TARGET_P8_VECTOR && TARGET_UPPER_REGS_SF) /* SFmode */
2992 {
2993 rs6000_constraints[RS6000_CONSTRAINT_wu] = ALTIVEC_REGS;
2994 rs6000_constraints[RS6000_CONSTRAINT_wy] = VSX_REGS;
2995 rs6000_constraints[RS6000_CONSTRAINT_ww] = VSX_REGS;
2996 }
2997 else if (TARGET_P8_VECTOR)
2998 {
2999 rs6000_constraints[RS6000_CONSTRAINT_wy] = FLOAT_REGS;
3000 rs6000_constraints[RS6000_CONSTRAINT_ww] = FLOAT_REGS;
3001 }
3002 else if (TARGET_VSX)
3003 rs6000_constraints[RS6000_CONSTRAINT_ww] = FLOAT_REGS;
3004
3005 if (TARGET_STFIWX)
3006 rs6000_constraints[RS6000_CONSTRAINT_wx] = FLOAT_REGS; /* DImode */
3007
3008 if (TARGET_LFIWZX)
3009 rs6000_constraints[RS6000_CONSTRAINT_wz] = FLOAT_REGS; /* DImode */
3010
3011 if (TARGET_FLOAT128)
3012 {
3013 rs6000_constraints[RS6000_CONSTRAINT_wq] = VSX_REGS; /* KFmode */
3014 if (FLOAT128_IEEE_P (TFmode))
3015 rs6000_constraints[RS6000_CONSTRAINT_wp] = VSX_REGS; /* TFmode */
3016 }
3017
3018 /* Support for new D-form instructions. */
3019 if (TARGET_P9_DFORM)
3020 rs6000_constraints[RS6000_CONSTRAINT_wb] = ALTIVEC_REGS;
3021
3022 /* Support for new direct moves. */
3023 if (TARGET_DIRECT_MOVE_128)
3024 rs6000_constraints[RS6000_CONSTRAINT_we] = VSX_REGS;
3025
3026 /* Set up the reload helper and direct move functions. */
3027 if (TARGET_VSX || TARGET_ALTIVEC)
3028 {
3029 if (TARGET_64BIT)
3030 {
3031 reg_addr[V16QImode].reload_store = CODE_FOR_reload_v16qi_di_store;
3032 reg_addr[V16QImode].reload_load = CODE_FOR_reload_v16qi_di_load;
3033 reg_addr[V8HImode].reload_store = CODE_FOR_reload_v8hi_di_store;
3034 reg_addr[V8HImode].reload_load = CODE_FOR_reload_v8hi_di_load;
3035 reg_addr[V4SImode].reload_store = CODE_FOR_reload_v4si_di_store;
3036 reg_addr[V4SImode].reload_load = CODE_FOR_reload_v4si_di_load;
3037 reg_addr[V2DImode].reload_store = CODE_FOR_reload_v2di_di_store;
3038 reg_addr[V2DImode].reload_load = CODE_FOR_reload_v2di_di_load;
3039 reg_addr[V1TImode].reload_store = CODE_FOR_reload_v1ti_di_store;
3040 reg_addr[V1TImode].reload_load = CODE_FOR_reload_v1ti_di_load;
3041 reg_addr[V4SFmode].reload_store = CODE_FOR_reload_v4sf_di_store;
3042 reg_addr[V4SFmode].reload_load = CODE_FOR_reload_v4sf_di_load;
3043 reg_addr[V2DFmode].reload_store = CODE_FOR_reload_v2df_di_store;
3044 reg_addr[V2DFmode].reload_load = CODE_FOR_reload_v2df_di_load;
3045 reg_addr[KFmode].reload_store = CODE_FOR_reload_kf_di_store;
3046 reg_addr[KFmode].reload_load = CODE_FOR_reload_kf_di_load;
3047 reg_addr[DFmode].reload_store = CODE_FOR_reload_df_di_store;
3048 reg_addr[DFmode].reload_load = CODE_FOR_reload_df_di_load;
3049 reg_addr[DDmode].reload_store = CODE_FOR_reload_dd_di_store;
3050 reg_addr[DDmode].reload_load = CODE_FOR_reload_dd_di_load;
3051 reg_addr[SFmode].reload_store = CODE_FOR_reload_sf_di_store;
3052 reg_addr[SFmode].reload_load = CODE_FOR_reload_sf_di_load;
3053
3054 if (FLOAT128_IEEE_P (TFmode))
3055 {
3056 reg_addr[TFmode].reload_store = CODE_FOR_reload_tf_di_store;
3057 reg_addr[TFmode].reload_load = CODE_FOR_reload_tf_di_load;
3058 }
3059
3060 /* Only provide a reload handler for SDmode if lfiwzx/stfiwx are
3061 available. */
3062 if (TARGET_NO_SDMODE_STACK)
3063 {
3064 reg_addr[SDmode].reload_store = CODE_FOR_reload_sd_di_store;
3065 reg_addr[SDmode].reload_load = CODE_FOR_reload_sd_di_load;
3066 }
3067
3068 if (TARGET_VSX_TIMODE)
3069 {
3070 reg_addr[TImode].reload_store = CODE_FOR_reload_ti_di_store;
3071 reg_addr[TImode].reload_load = CODE_FOR_reload_ti_di_load;
3072 }
3073
3074 if (TARGET_DIRECT_MOVE && !TARGET_DIRECT_MOVE_128)
3075 {
3076 reg_addr[TImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxti;
3077 reg_addr[V1TImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv1ti;
3078 reg_addr[V2DFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv2df;
3079 reg_addr[V2DImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv2di;
3080 reg_addr[V4SFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv4sf;
3081 reg_addr[V4SImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv4si;
3082 reg_addr[V8HImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv8hi;
3083 reg_addr[V16QImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv16qi;
3084 reg_addr[SFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxsf;
3085
3086 reg_addr[TImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprti;
3087 reg_addr[V1TImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv1ti;
3088 reg_addr[V2DFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv2df;
3089 reg_addr[V2DImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv2di;
3090 reg_addr[V4SFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv4sf;
3091 reg_addr[V4SImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv4si;
3092 reg_addr[V8HImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv8hi;
3093 reg_addr[V16QImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv16qi;
3094 reg_addr[SFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprsf;
3095 }
3096 }
3097 else
3098 {
3099 reg_addr[V16QImode].reload_store = CODE_FOR_reload_v16qi_si_store;
3100 reg_addr[V16QImode].reload_load = CODE_FOR_reload_v16qi_si_load;
3101 reg_addr[V8HImode].reload_store = CODE_FOR_reload_v8hi_si_store;
3102 reg_addr[V8HImode].reload_load = CODE_FOR_reload_v8hi_si_load;
3103 reg_addr[V4SImode].reload_store = CODE_FOR_reload_v4si_si_store;
3104 reg_addr[V4SImode].reload_load = CODE_FOR_reload_v4si_si_load;
3105 reg_addr[V2DImode].reload_store = CODE_FOR_reload_v2di_si_store;
3106 reg_addr[V2DImode].reload_load = CODE_FOR_reload_v2di_si_load;
3107 reg_addr[V1TImode].reload_store = CODE_FOR_reload_v1ti_si_store;
3108 reg_addr[V1TImode].reload_load = CODE_FOR_reload_v1ti_si_load;
3109 reg_addr[V4SFmode].reload_store = CODE_FOR_reload_v4sf_si_store;
3110 reg_addr[V4SFmode].reload_load = CODE_FOR_reload_v4sf_si_load;
3111 reg_addr[V2DFmode].reload_store = CODE_FOR_reload_v2df_si_store;
3112 reg_addr[V2DFmode].reload_load = CODE_FOR_reload_v2df_si_load;
3113 reg_addr[KFmode].reload_store = CODE_FOR_reload_kf_si_store;
3114 reg_addr[KFmode].reload_load = CODE_FOR_reload_kf_si_load;
3115 reg_addr[DFmode].reload_store = CODE_FOR_reload_df_si_store;
3116 reg_addr[DFmode].reload_load = CODE_FOR_reload_df_si_load;
3117 reg_addr[DDmode].reload_store = CODE_FOR_reload_dd_si_store;
3118 reg_addr[DDmode].reload_load = CODE_FOR_reload_dd_si_load;
3119 reg_addr[SFmode].reload_store = CODE_FOR_reload_sf_si_store;
3120 reg_addr[SFmode].reload_load = CODE_FOR_reload_sf_si_load;
3121
3122 if (FLOAT128_IEEE_P (TFmode))
3123 {
3124 reg_addr[TFmode].reload_store = CODE_FOR_reload_tf_si_store;
3125 reg_addr[TFmode].reload_load = CODE_FOR_reload_tf_si_load;
3126 }
3127
3128 /* Only provide a reload handler for SDmode if lfiwzx/stfiwx are
3129 available. */
3130 if (TARGET_NO_SDMODE_STACK)
3131 {
3132 reg_addr[SDmode].reload_store = CODE_FOR_reload_sd_si_store;
3133 reg_addr[SDmode].reload_load = CODE_FOR_reload_sd_si_load;
3134 }
3135
3136 if (TARGET_VSX_TIMODE)
3137 {
3138 reg_addr[TImode].reload_store = CODE_FOR_reload_ti_si_store;
3139 reg_addr[TImode].reload_load = CODE_FOR_reload_ti_si_load;
3140 }
3141
3142 if (TARGET_DIRECT_MOVE)
3143 {
3144 reg_addr[DImode].reload_fpr_gpr = CODE_FOR_reload_fpr_from_gprdi;
3145 reg_addr[DDmode].reload_fpr_gpr = CODE_FOR_reload_fpr_from_gprdd;
3146 reg_addr[DFmode].reload_fpr_gpr = CODE_FOR_reload_fpr_from_gprdf;
3147 }
3148 }
3149
3150 if (TARGET_UPPER_REGS_DF)
3151 reg_addr[DFmode].scalar_in_vmx_p = true;
3152
3153 if (TARGET_UPPER_REGS_SF)
3154 reg_addr[SFmode].scalar_in_vmx_p = true;
3155 }
3156
3157 /* Setup the fusion operations. */
3158 if (TARGET_P8_FUSION)
3159 {
3160 reg_addr[QImode].fusion_gpr_ld = CODE_FOR_fusion_gpr_load_qi;
3161 reg_addr[HImode].fusion_gpr_ld = CODE_FOR_fusion_gpr_load_hi;
3162 reg_addr[SImode].fusion_gpr_ld = CODE_FOR_fusion_gpr_load_si;
3163 if (TARGET_64BIT)
3164 reg_addr[DImode].fusion_gpr_ld = CODE_FOR_fusion_gpr_load_di;
3165 }
3166
3167 if (TARGET_P9_FUSION)
3168 {
3169 struct fuse_insns {
3170 enum machine_mode mode; /* mode of the fused type. */
3171 enum machine_mode pmode; /* pointer mode. */
3172 enum rs6000_reload_reg_type rtype; /* register type. */
3173 enum insn_code load; /* load insn. */
3174 enum insn_code store; /* store insn. */
3175 };
3176
3177 static const struct fuse_insns addis_insns[] = {
3178 { SFmode, DImode, RELOAD_REG_FPR,
3179 CODE_FOR_fusion_fpr_di_sf_load,
3180 CODE_FOR_fusion_fpr_di_sf_store },
3181
3182 { SFmode, SImode, RELOAD_REG_FPR,
3183 CODE_FOR_fusion_fpr_si_sf_load,
3184 CODE_FOR_fusion_fpr_si_sf_store },
3185
3186 { DFmode, DImode, RELOAD_REG_FPR,
3187 CODE_FOR_fusion_fpr_di_df_load,
3188 CODE_FOR_fusion_fpr_di_df_store },
3189
3190 { DFmode, SImode, RELOAD_REG_FPR,
3191 CODE_FOR_fusion_fpr_si_df_load,
3192 CODE_FOR_fusion_fpr_si_df_store },
3193
3194 { DImode, DImode, RELOAD_REG_FPR,
3195 CODE_FOR_fusion_fpr_di_di_load,
3196 CODE_FOR_fusion_fpr_di_di_store },
3197
3198 { DImode, SImode, RELOAD_REG_FPR,
3199 CODE_FOR_fusion_fpr_si_di_load,
3200 CODE_FOR_fusion_fpr_si_di_store },
3201
3202 { QImode, DImode, RELOAD_REG_GPR,
3203 CODE_FOR_fusion_gpr_di_qi_load,
3204 CODE_FOR_fusion_gpr_di_qi_store },
3205
3206 { QImode, SImode, RELOAD_REG_GPR,
3207 CODE_FOR_fusion_gpr_si_qi_load,
3208 CODE_FOR_fusion_gpr_si_qi_store },
3209
3210 { HImode, DImode, RELOAD_REG_GPR,
3211 CODE_FOR_fusion_gpr_di_hi_load,
3212 CODE_FOR_fusion_gpr_di_hi_store },
3213
3214 { HImode, SImode, RELOAD_REG_GPR,
3215 CODE_FOR_fusion_gpr_si_hi_load,
3216 CODE_FOR_fusion_gpr_si_hi_store },
3217
3218 { SImode, DImode, RELOAD_REG_GPR,
3219 CODE_FOR_fusion_gpr_di_si_load,
3220 CODE_FOR_fusion_gpr_di_si_store },
3221
3222 { SImode, SImode, RELOAD_REG_GPR,
3223 CODE_FOR_fusion_gpr_si_si_load,
3224 CODE_FOR_fusion_gpr_si_si_store },
3225
3226 { SFmode, DImode, RELOAD_REG_GPR,
3227 CODE_FOR_fusion_gpr_di_sf_load,
3228 CODE_FOR_fusion_gpr_di_sf_store },
3229
3230 { SFmode, SImode, RELOAD_REG_GPR,
3231 CODE_FOR_fusion_gpr_si_sf_load,
3232 CODE_FOR_fusion_gpr_si_sf_store },
3233
3234 { DImode, DImode, RELOAD_REG_GPR,
3235 CODE_FOR_fusion_gpr_di_di_load,
3236 CODE_FOR_fusion_gpr_di_di_store },
3237
3238 { DFmode, DImode, RELOAD_REG_GPR,
3239 CODE_FOR_fusion_gpr_di_df_load,
3240 CODE_FOR_fusion_gpr_di_df_store },
3241 };
3242
3243 enum machine_mode cur_pmode = Pmode;
3244 size_t i;
3245
3246 for (i = 0; i < ARRAY_SIZE (addis_insns); i++)
3247 {
3248 enum machine_mode xmode = addis_insns[i].mode;
3249 enum rs6000_reload_reg_type rtype = addis_insns[i].rtype;
3250
3251 if (addis_insns[i].pmode != cur_pmode)
3252 continue;
3253
3254 if (rtype == RELOAD_REG_FPR
3255 && (!TARGET_HARD_FLOAT || !TARGET_FPRS))
3256 continue;
3257
3258 reg_addr[xmode].fusion_addis_ld[rtype] = addis_insns[i].load;
3259 reg_addr[xmode].fusion_addis_st[rtype] = addis_insns[i].store;
3260 }
3261 }
3262
3263 /* Note which types we support fusing TOC setup plus memory insn. We only do
3264 fused TOCs for medium/large code models. */
3265 if (TARGET_P8_FUSION && TARGET_TOC_FUSION && TARGET_POWERPC64
3266 && (TARGET_CMODEL != CMODEL_SMALL))
3267 {
3268 reg_addr[QImode].fused_toc = true;
3269 reg_addr[HImode].fused_toc = true;
3270 reg_addr[SImode].fused_toc = true;
3271 reg_addr[DImode].fused_toc = true;
3272 if (TARGET_HARD_FLOAT && TARGET_FPRS)
3273 {
3274 if (TARGET_SINGLE_FLOAT)
3275 reg_addr[SFmode].fused_toc = true;
3276 if (TARGET_DOUBLE_FLOAT)
3277 reg_addr[DFmode].fused_toc = true;
3278 }
3279 }
3280
3281 /* Precalculate HARD_REGNO_NREGS. */
3282 for (r = 0; r < FIRST_PSEUDO_REGISTER; ++r)
3283 for (m = 0; m < NUM_MACHINE_MODES; ++m)
3284 rs6000_hard_regno_nregs[m][r]
3285 = rs6000_hard_regno_nregs_internal (r, (machine_mode)m);
3286
3287 /* Precalculate HARD_REGNO_MODE_OK. */
3288 for (r = 0; r < FIRST_PSEUDO_REGISTER; ++r)
3289 for (m = 0; m < NUM_MACHINE_MODES; ++m)
3290 if (rs6000_hard_regno_mode_ok (r, (machine_mode)m))
3291 rs6000_hard_regno_mode_ok_p[m][r] = true;
3292
3293 /* Precalculate CLASS_MAX_NREGS sizes. */
3294 for (c = 0; c < LIM_REG_CLASSES; ++c)
3295 {
3296 int reg_size;
3297
3298 if (TARGET_VSX && VSX_REG_CLASS_P (c))
3299 reg_size = UNITS_PER_VSX_WORD;
3300
3301 else if (c == ALTIVEC_REGS)
3302 reg_size = UNITS_PER_ALTIVEC_WORD;
3303
3304 else if (c == FLOAT_REGS)
3305 reg_size = UNITS_PER_FP_WORD;
3306
3307 else
3308 reg_size = UNITS_PER_WORD;
3309
3310 for (m = 0; m < NUM_MACHINE_MODES; ++m)
3311 {
3312 machine_mode m2 = (machine_mode)m;
3313 int reg_size2 = reg_size;
3314
3315 /* TDmode & IBM 128-bit floating point always takes 2 registers, even
3316 in VSX. */
3317 if (TARGET_VSX && VSX_REG_CLASS_P (c) && FLOAT128_2REG_P (m))
3318 reg_size2 = UNITS_PER_FP_WORD;
3319
3320 rs6000_class_max_nregs[m][c]
3321 = (GET_MODE_SIZE (m2) + reg_size2 - 1) / reg_size2;
3322 }
3323 }
3324
3325 if (TARGET_E500_DOUBLE)
3326 rs6000_class_max_nregs[DFmode][GENERAL_REGS] = 1;
3327
3328 /* Calculate which modes to automatically generate code to use a the
3329 reciprocal divide and square root instructions. In the future, possibly
3330 automatically generate the instructions even if the user did not specify
3331 -mrecip. The older machines double precision reciprocal sqrt estimate is
3332 not accurate enough. */
3333 memset (rs6000_recip_bits, 0, sizeof (rs6000_recip_bits));
3334 if (TARGET_FRES)
3335 rs6000_recip_bits[SFmode] = RS6000_RECIP_MASK_HAVE_RE;
3336 if (TARGET_FRE)
3337 rs6000_recip_bits[DFmode] = RS6000_RECIP_MASK_HAVE_RE;
3338 if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode))
3339 rs6000_recip_bits[V4SFmode] = RS6000_RECIP_MASK_HAVE_RE;
3340 if (VECTOR_UNIT_VSX_P (V2DFmode))
3341 rs6000_recip_bits[V2DFmode] = RS6000_RECIP_MASK_HAVE_RE;
3342
3343 if (TARGET_FRSQRTES)
3344 rs6000_recip_bits[SFmode] |= RS6000_RECIP_MASK_HAVE_RSQRTE;
3345 if (TARGET_FRSQRTE)
3346 rs6000_recip_bits[DFmode] |= RS6000_RECIP_MASK_HAVE_RSQRTE;
3347 if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode))
3348 rs6000_recip_bits[V4SFmode] |= RS6000_RECIP_MASK_HAVE_RSQRTE;
3349 if (VECTOR_UNIT_VSX_P (V2DFmode))
3350 rs6000_recip_bits[V2DFmode] |= RS6000_RECIP_MASK_HAVE_RSQRTE;
3351
3352 if (rs6000_recip_control)
3353 {
3354 if (!flag_finite_math_only)
3355 warning (0, "-mrecip requires -ffinite-math or -ffast-math");
3356 if (flag_trapping_math)
3357 warning (0, "-mrecip requires -fno-trapping-math or -ffast-math");
3358 if (!flag_reciprocal_math)
3359 warning (0, "-mrecip requires -freciprocal-math or -ffast-math");
3360 if (flag_finite_math_only && !flag_trapping_math && flag_reciprocal_math)
3361 {
3362 if (RS6000_RECIP_HAVE_RE_P (SFmode)
3363 && (rs6000_recip_control & RECIP_SF_DIV) != 0)
3364 rs6000_recip_bits[SFmode] |= RS6000_RECIP_MASK_AUTO_RE;
3365
3366 if (RS6000_RECIP_HAVE_RE_P (DFmode)
3367 && (rs6000_recip_control & RECIP_DF_DIV) != 0)
3368 rs6000_recip_bits[DFmode] |= RS6000_RECIP_MASK_AUTO_RE;
3369
3370 if (RS6000_RECIP_HAVE_RE_P (V4SFmode)
3371 && (rs6000_recip_control & RECIP_V4SF_DIV) != 0)
3372 rs6000_recip_bits[V4SFmode] |= RS6000_RECIP_MASK_AUTO_RE;
3373
3374 if (RS6000_RECIP_HAVE_RE_P (V2DFmode)
3375 && (rs6000_recip_control & RECIP_V2DF_DIV) != 0)
3376 rs6000_recip_bits[V2DFmode] |= RS6000_RECIP_MASK_AUTO_RE;
3377
3378 if (RS6000_RECIP_HAVE_RSQRTE_P (SFmode)
3379 && (rs6000_recip_control & RECIP_SF_RSQRT) != 0)
3380 rs6000_recip_bits[SFmode] |= RS6000_RECIP_MASK_AUTO_RSQRTE;
3381
3382 if (RS6000_RECIP_HAVE_RSQRTE_P (DFmode)
3383 && (rs6000_recip_control & RECIP_DF_RSQRT) != 0)
3384 rs6000_recip_bits[DFmode] |= RS6000_RECIP_MASK_AUTO_RSQRTE;
3385
3386 if (RS6000_RECIP_HAVE_RSQRTE_P (V4SFmode)
3387 && (rs6000_recip_control & RECIP_V4SF_RSQRT) != 0)
3388 rs6000_recip_bits[V4SFmode] |= RS6000_RECIP_MASK_AUTO_RSQRTE;
3389
3390 if (RS6000_RECIP_HAVE_RSQRTE_P (V2DFmode)
3391 && (rs6000_recip_control & RECIP_V2DF_RSQRT) != 0)
3392 rs6000_recip_bits[V2DFmode] |= RS6000_RECIP_MASK_AUTO_RSQRTE;
3393 }
3394 }
3395
3396 /* Update the addr mask bits in reg_addr to help secondary reload and go if
3397 legitimate address support to figure out the appropriate addressing to
3398 use. */
3399 rs6000_setup_reg_addr_masks ();
3400
3401 if (global_init_p || TARGET_DEBUG_TARGET)
3402 {
3403 if (TARGET_DEBUG_REG)
3404 rs6000_debug_reg_global ();
3405
3406 if (TARGET_DEBUG_COST || TARGET_DEBUG_REG)
3407 fprintf (stderr,
3408 "SImode variable mult cost = %d\n"
3409 "SImode constant mult cost = %d\n"
3410 "SImode short constant mult cost = %d\n"
3411 "DImode multipliciation cost = %d\n"
3412 "SImode division cost = %d\n"
3413 "DImode division cost = %d\n"
3414 "Simple fp operation cost = %d\n"
3415 "DFmode multiplication cost = %d\n"
3416 "SFmode division cost = %d\n"
3417 "DFmode division cost = %d\n"
3418 "cache line size = %d\n"
3419 "l1 cache size = %d\n"
3420 "l2 cache size = %d\n"
3421 "simultaneous prefetches = %d\n"
3422 "\n",
3423 rs6000_cost->mulsi,
3424 rs6000_cost->mulsi_const,
3425 rs6000_cost->mulsi_const9,
3426 rs6000_cost->muldi,
3427 rs6000_cost->divsi,
3428 rs6000_cost->divdi,
3429 rs6000_cost->fp,
3430 rs6000_cost->dmul,
3431 rs6000_cost->sdiv,
3432 rs6000_cost->ddiv,
3433 rs6000_cost->cache_line_size,
3434 rs6000_cost->l1_cache_size,
3435 rs6000_cost->l2_cache_size,
3436 rs6000_cost->simultaneous_prefetches);
3437 }
3438 }
3439
3440 #if TARGET_MACHO
3441 /* The Darwin version of SUBTARGET_OVERRIDE_OPTIONS. */
3442
3443 static void
3444 darwin_rs6000_override_options (void)
3445 {
3446 /* The Darwin ABI always includes AltiVec, can't be (validly) turned
3447 off. */
3448 rs6000_altivec_abi = 1;
3449 TARGET_ALTIVEC_VRSAVE = 1;
3450 rs6000_current_abi = ABI_DARWIN;
3451
3452 if (DEFAULT_ABI == ABI_DARWIN
3453 && TARGET_64BIT)
3454 darwin_one_byte_bool = 1;
3455
3456 if (TARGET_64BIT && ! TARGET_POWERPC64)
3457 {
3458 rs6000_isa_flags |= OPTION_MASK_POWERPC64;
3459 warning (0, "-m64 requires PowerPC64 architecture, enabling");
3460 }
3461 if (flag_mkernel)
3462 {
3463 rs6000_default_long_calls = 1;
3464 rs6000_isa_flags |= OPTION_MASK_SOFT_FLOAT;
3465 }
3466
3467 /* Make -m64 imply -maltivec. Darwin's 64-bit ABI includes
3468 Altivec. */
3469 if (!flag_mkernel && !flag_apple_kext
3470 && TARGET_64BIT
3471 && ! (rs6000_isa_flags_explicit & OPTION_MASK_ALTIVEC))
3472 rs6000_isa_flags |= OPTION_MASK_ALTIVEC;
3473
3474 /* Unless the user (not the configurer) has explicitly overridden
3475 it with -mcpu=G3 or -mno-altivec, then 10.5+ targets default to
3476 G4 unless targeting the kernel. */
3477 if (!flag_mkernel
3478 && !flag_apple_kext
3479 && strverscmp (darwin_macosx_version_min, "10.5") >= 0
3480 && ! (rs6000_isa_flags_explicit & OPTION_MASK_ALTIVEC)
3481 && ! global_options_set.x_rs6000_cpu_index)
3482 {
3483 rs6000_isa_flags |= OPTION_MASK_ALTIVEC;
3484 }
3485 }
3486 #endif
3487
3488 /* If not otherwise specified by a target, make 'long double' equivalent to
3489 'double'. */
3490
3491 #ifndef RS6000_DEFAULT_LONG_DOUBLE_SIZE
3492 #define RS6000_DEFAULT_LONG_DOUBLE_SIZE 64
3493 #endif
3494
3495 /* Return the builtin mask of the various options used that could affect which
3496 builtins were used. In the past we used target_flags, but we've run out of
3497 bits, and some options like SPE and PAIRED are no longer in
3498 target_flags. */
3499
3500 HOST_WIDE_INT
3501 rs6000_builtin_mask_calculate (void)
3502 {
3503 return (((TARGET_ALTIVEC) ? RS6000_BTM_ALTIVEC : 0)
3504 | ((TARGET_VSX) ? RS6000_BTM_VSX : 0)
3505 | ((TARGET_SPE) ? RS6000_BTM_SPE : 0)
3506 | ((TARGET_PAIRED_FLOAT) ? RS6000_BTM_PAIRED : 0)
3507 | ((TARGET_FRE) ? RS6000_BTM_FRE : 0)
3508 | ((TARGET_FRES) ? RS6000_BTM_FRES : 0)
3509 | ((TARGET_FRSQRTE) ? RS6000_BTM_FRSQRTE : 0)
3510 | ((TARGET_FRSQRTES) ? RS6000_BTM_FRSQRTES : 0)
3511 | ((TARGET_POPCNTD) ? RS6000_BTM_POPCNTD : 0)
3512 | ((rs6000_cpu == PROCESSOR_CELL) ? RS6000_BTM_CELL : 0)
3513 | ((TARGET_P8_VECTOR) ? RS6000_BTM_P8_VECTOR : 0)
3514 | ((TARGET_CRYPTO) ? RS6000_BTM_CRYPTO : 0)
3515 | ((TARGET_HTM) ? RS6000_BTM_HTM : 0)
3516 | ((TARGET_DFP) ? RS6000_BTM_DFP : 0)
3517 | ((TARGET_HARD_FLOAT) ? RS6000_BTM_HARD_FLOAT : 0)
3518 | ((TARGET_LONG_DOUBLE_128) ? RS6000_BTM_LDBL128 : 0));
3519 }
3520
3521 /* Implement TARGET_MD_ASM_ADJUST. All asm statements are considered
3522 to clobber the XER[CA] bit because clobbering that bit without telling
3523 the compiler worked just fine with versions of GCC before GCC 5, and
3524 breaking a lot of older code in ways that are hard to track down is
3525 not such a great idea. */
3526
3527 static rtx_insn *
3528 rs6000_md_asm_adjust (vec<rtx> &/*outputs*/, vec<rtx> &/*inputs*/,
3529 vec<const char *> &/*constraints*/,
3530 vec<rtx> &clobbers, HARD_REG_SET &clobbered_regs)
3531 {
3532 clobbers.safe_push (gen_rtx_REG (SImode, CA_REGNO));
3533 SET_HARD_REG_BIT (clobbered_regs, CA_REGNO);
3534 return NULL;
3535 }
3536
3537 /* Override command line options. Mostly we process the processor type and
3538 sometimes adjust other TARGET_ options. */
3539
3540 static bool
3541 rs6000_option_override_internal (bool global_init_p)
3542 {
3543 bool ret = true;
3544 bool have_cpu = false;
3545
3546 /* The default cpu requested at configure time, if any. */
3547 const char *implicit_cpu = OPTION_TARGET_CPU_DEFAULT;
3548
3549 HOST_WIDE_INT set_masks;
3550 int cpu_index;
3551 int tune_index;
3552 struct cl_target_option *main_target_opt
3553 = ((global_init_p || target_option_default_node == NULL)
3554 ? NULL : TREE_TARGET_OPTION (target_option_default_node));
3555
3556 /* Print defaults. */
3557 if ((TARGET_DEBUG_REG || TARGET_DEBUG_TARGET) && global_init_p)
3558 rs6000_print_isa_options (stderr, 0, "TARGET_DEFAULT", TARGET_DEFAULT);
3559
3560 /* Remember the explicit arguments. */
3561 if (global_init_p)
3562 rs6000_isa_flags_explicit = global_options_set.x_rs6000_isa_flags;
3563
3564 /* On 64-bit Darwin, power alignment is ABI-incompatible with some C
3565 library functions, so warn about it. The flag may be useful for
3566 performance studies from time to time though, so don't disable it
3567 entirely. */
3568 if (global_options_set.x_rs6000_alignment_flags
3569 && rs6000_alignment_flags == MASK_ALIGN_POWER
3570 && DEFAULT_ABI == ABI_DARWIN
3571 && TARGET_64BIT)
3572 warning (0, "-malign-power is not supported for 64-bit Darwin;"
3573 " it is incompatible with the installed C and C++ libraries");
3574
3575 /* Numerous experiment shows that IRA based loop pressure
3576 calculation works better for RTL loop invariant motion on targets
3577 with enough (>= 32) registers. It is an expensive optimization.
3578 So it is on only for peak performance. */
3579 if (optimize >= 3 && global_init_p
3580 && !global_options_set.x_flag_ira_loop_pressure)
3581 flag_ira_loop_pressure = 1;
3582
3583 /* Set the pointer size. */
3584 if (TARGET_64BIT)
3585 {
3586 rs6000_pmode = (int)DImode;
3587 rs6000_pointer_size = 64;
3588 }
3589 else
3590 {
3591 rs6000_pmode = (int)SImode;
3592 rs6000_pointer_size = 32;
3593 }
3594
3595 /* Some OSs don't support saving the high part of 64-bit registers on context
3596 switch. Other OSs don't support saving Altivec registers. On those OSs,
3597 we don't touch the OPTION_MASK_POWERPC64 or OPTION_MASK_ALTIVEC settings;
3598 if the user wants either, the user must explicitly specify them and we
3599 won't interfere with the user's specification. */
3600
3601 set_masks = POWERPC_MASKS;
3602 #ifdef OS_MISSING_POWERPC64
3603 if (OS_MISSING_POWERPC64)
3604 set_masks &= ~OPTION_MASK_POWERPC64;
3605 #endif
3606 #ifdef OS_MISSING_ALTIVEC
3607 if (OS_MISSING_ALTIVEC)
3608 set_masks &= ~(OPTION_MASK_ALTIVEC | OPTION_MASK_VSX);
3609 #endif
3610
3611 /* Don't override by the processor default if given explicitly. */
3612 set_masks &= ~rs6000_isa_flags_explicit;
3613
3614 /* Process the -mcpu=<xxx> and -mtune=<xxx> argument. If the user changed
3615 the cpu in a target attribute or pragma, but did not specify a tuning
3616 option, use the cpu for the tuning option rather than the option specified
3617 with -mtune on the command line. Process a '--with-cpu' configuration
3618 request as an implicit --cpu. */
3619 if (rs6000_cpu_index >= 0)
3620 {
3621 cpu_index = rs6000_cpu_index;
3622 have_cpu = true;
3623 }
3624 else if (main_target_opt != NULL && main_target_opt->x_rs6000_cpu_index >= 0)
3625 {
3626 rs6000_cpu_index = cpu_index = main_target_opt->x_rs6000_cpu_index;
3627 have_cpu = true;
3628 }
3629 else if (implicit_cpu)
3630 {
3631 rs6000_cpu_index = cpu_index = rs6000_cpu_name_lookup (implicit_cpu);
3632 have_cpu = true;
3633 }
3634 else
3635 {
3636 /* PowerPC 64-bit LE requires at least ISA 2.07. */
3637 const char *default_cpu = ((!TARGET_POWERPC64)
3638 ? "powerpc"
3639 : ((BYTES_BIG_ENDIAN)
3640 ? "powerpc64"
3641 : "powerpc64le"));
3642
3643 rs6000_cpu_index = cpu_index = rs6000_cpu_name_lookup (default_cpu);
3644 have_cpu = false;
3645 }
3646
3647 gcc_assert (cpu_index >= 0);
3648
3649 /* If we have a cpu, either through an explicit -mcpu=<xxx> or if the
3650 compiler was configured with --with-cpu=<xxx>, replace all of the ISA bits
3651 with those from the cpu, except for options that were explicitly set. If
3652 we don't have a cpu, do not override the target bits set in
3653 TARGET_DEFAULT. */
3654 if (have_cpu)
3655 {
3656 rs6000_isa_flags &= ~set_masks;
3657 rs6000_isa_flags |= (processor_target_table[cpu_index].target_enable
3658 & set_masks);
3659 }
3660 else
3661 {
3662 /* If no -mcpu=<xxx>, inherit any default options that were cleared via
3663 POWERPC_MASKS. Originally, TARGET_DEFAULT was used to initialize
3664 target_flags via the TARGET_DEFAULT_TARGET_FLAGS hook. When we switched
3665 to using rs6000_isa_flags, we need to do the initialization here.
3666
3667 If there is a TARGET_DEFAULT, use that. Otherwise fall back to using
3668 -mcpu=powerpc, -mcpu=powerpc64, or -mcpu=powerpc64le defaults. */
3669 HOST_WIDE_INT flags = ((TARGET_DEFAULT) ? TARGET_DEFAULT
3670 : processor_target_table[cpu_index].target_enable);
3671 rs6000_isa_flags |= (flags & ~rs6000_isa_flags_explicit);
3672 }
3673
3674 if (rs6000_tune_index >= 0)
3675 tune_index = rs6000_tune_index;
3676 else if (have_cpu)
3677 {
3678 /* Until power9 tuning is available, use power8 tuning if -mcpu=power9. */
3679 if (processor_target_table[cpu_index].processor != PROCESSOR_POWER9)
3680 rs6000_tune_index = tune_index = cpu_index;
3681 else
3682 {
3683 size_t i;
3684 tune_index = -1;
3685 for (i = 0; i < ARRAY_SIZE (processor_target_table); i++)
3686 if (processor_target_table[i].processor == PROCESSOR_POWER8)
3687 {
3688 rs6000_tune_index = tune_index = i;
3689 break;
3690 }
3691 }
3692 }
3693 else
3694 {
3695 size_t i;
3696 enum processor_type tune_proc
3697 = (TARGET_POWERPC64 ? PROCESSOR_DEFAULT64 : PROCESSOR_DEFAULT);
3698
3699 tune_index = -1;
3700 for (i = 0; i < ARRAY_SIZE (processor_target_table); i++)
3701 if (processor_target_table[i].processor == tune_proc)
3702 {
3703 rs6000_tune_index = tune_index = i;
3704 break;
3705 }
3706 }
3707
3708 gcc_assert (tune_index >= 0);
3709 rs6000_cpu = processor_target_table[tune_index].processor;
3710
3711 /* Pick defaults for SPE related control flags. Do this early to make sure
3712 that the TARGET_ macros are representative ASAP. */
3713 {
3714 int spe_capable_cpu =
3715 (rs6000_cpu == PROCESSOR_PPC8540
3716 || rs6000_cpu == PROCESSOR_PPC8548);
3717
3718 if (!global_options_set.x_rs6000_spe_abi)
3719 rs6000_spe_abi = spe_capable_cpu;
3720
3721 if (!global_options_set.x_rs6000_spe)
3722 rs6000_spe = spe_capable_cpu;
3723
3724 if (!global_options_set.x_rs6000_float_gprs)
3725 rs6000_float_gprs =
3726 (rs6000_cpu == PROCESSOR_PPC8540 ? 1
3727 : rs6000_cpu == PROCESSOR_PPC8548 ? 2
3728 : 0);
3729 }
3730
3731 if (global_options_set.x_rs6000_spe_abi
3732 && rs6000_spe_abi
3733 && !TARGET_SPE_ABI)
3734 error ("not configured for SPE ABI");
3735
3736 if (global_options_set.x_rs6000_spe
3737 && rs6000_spe
3738 && !TARGET_SPE)
3739 error ("not configured for SPE instruction set");
3740
3741 if (main_target_opt != NULL
3742 && ((main_target_opt->x_rs6000_spe_abi != rs6000_spe_abi)
3743 || (main_target_opt->x_rs6000_spe != rs6000_spe)
3744 || (main_target_opt->x_rs6000_float_gprs != rs6000_float_gprs)))
3745 error ("target attribute or pragma changes SPE ABI");
3746
3747 if (rs6000_cpu == PROCESSOR_PPCE300C2 || rs6000_cpu == PROCESSOR_PPCE300C3
3748 || rs6000_cpu == PROCESSOR_PPCE500MC || rs6000_cpu == PROCESSOR_PPCE500MC64
3749 || rs6000_cpu == PROCESSOR_PPCE5500)
3750 {
3751 if (TARGET_ALTIVEC)
3752 error ("AltiVec not supported in this target");
3753 if (TARGET_SPE)
3754 error ("SPE not supported in this target");
3755 }
3756 if (rs6000_cpu == PROCESSOR_PPCE6500)
3757 {
3758 if (TARGET_SPE)
3759 error ("SPE not supported in this target");
3760 }
3761
3762 /* Disable Cell microcode if we are optimizing for the Cell
3763 and not optimizing for size. */
3764 if (rs6000_gen_cell_microcode == -1)
3765 rs6000_gen_cell_microcode = !(rs6000_cpu == PROCESSOR_CELL
3766 && !optimize_size);
3767
3768 /* If we are optimizing big endian systems for space and it's OK to
3769 use instructions that would be microcoded on the Cell, use the
3770 load/store multiple and string instructions. */
3771 if (BYTES_BIG_ENDIAN && optimize_size && rs6000_gen_cell_microcode)
3772 rs6000_isa_flags |= ~rs6000_isa_flags_explicit & (OPTION_MASK_MULTIPLE
3773 | OPTION_MASK_STRING);
3774
3775 /* Don't allow -mmultiple or -mstring on little endian systems
3776 unless the cpu is a 750, because the hardware doesn't support the
3777 instructions used in little endian mode, and causes an alignment
3778 trap. The 750 does not cause an alignment trap (except when the
3779 target is unaligned). */
3780
3781 if (!BYTES_BIG_ENDIAN && rs6000_cpu != PROCESSOR_PPC750)
3782 {
3783 if (TARGET_MULTIPLE)
3784 {
3785 rs6000_isa_flags &= ~OPTION_MASK_MULTIPLE;
3786 if ((rs6000_isa_flags_explicit & OPTION_MASK_MULTIPLE) != 0)
3787 warning (0, "-mmultiple is not supported on little endian systems");
3788 }
3789
3790 if (TARGET_STRING)
3791 {
3792 rs6000_isa_flags &= ~OPTION_MASK_STRING;
3793 if ((rs6000_isa_flags_explicit & OPTION_MASK_STRING) != 0)
3794 warning (0, "-mstring is not supported on little endian systems");
3795 }
3796 }
3797
3798 /* If little-endian, default to -mstrict-align on older processors.
3799 Testing for htm matches power8 and later. */
3800 if (!BYTES_BIG_ENDIAN
3801 && !(processor_target_table[tune_index].target_enable & OPTION_MASK_HTM))
3802 rs6000_isa_flags |= ~rs6000_isa_flags_explicit & OPTION_MASK_STRICT_ALIGN;
3803
3804 /* -maltivec={le,be} implies -maltivec. */
3805 if (rs6000_altivec_element_order != 0)
3806 rs6000_isa_flags |= OPTION_MASK_ALTIVEC;
3807
3808 /* Disallow -maltivec=le in big endian mode for now. This is not
3809 known to be useful for anyone. */
3810 if (BYTES_BIG_ENDIAN && rs6000_altivec_element_order == 1)
3811 {
3812 warning (0, N_("-maltivec=le not allowed for big-endian targets"));
3813 rs6000_altivec_element_order = 0;
3814 }
3815
3816 /* Add some warnings for VSX. */
3817 if (TARGET_VSX)
3818 {
3819 const char *msg = NULL;
3820 if (!TARGET_HARD_FLOAT || !TARGET_FPRS
3821 || !TARGET_SINGLE_FLOAT || !TARGET_DOUBLE_FLOAT)
3822 {
3823 if (rs6000_isa_flags_explicit & OPTION_MASK_VSX)
3824 msg = N_("-mvsx requires hardware floating point");
3825 else
3826 {
3827 rs6000_isa_flags &= ~ OPTION_MASK_VSX;
3828 rs6000_isa_flags_explicit |= OPTION_MASK_VSX;
3829 }
3830 }
3831 else if (TARGET_PAIRED_FLOAT)
3832 msg = N_("-mvsx and -mpaired are incompatible");
3833 else if (TARGET_AVOID_XFORM > 0)
3834 msg = N_("-mvsx needs indexed addressing");
3835 else if (!TARGET_ALTIVEC && (rs6000_isa_flags_explicit
3836 & OPTION_MASK_ALTIVEC))
3837 {
3838 if (rs6000_isa_flags_explicit & OPTION_MASK_VSX)
3839 msg = N_("-mvsx and -mno-altivec are incompatible");
3840 else
3841 msg = N_("-mno-altivec disables vsx");
3842 }
3843
3844 if (msg)
3845 {
3846 warning (0, msg);
3847 rs6000_isa_flags &= ~ OPTION_MASK_VSX;
3848 rs6000_isa_flags_explicit |= OPTION_MASK_VSX;
3849 }
3850 }
3851
3852 /* If hard-float/altivec/vsx were explicitly turned off then don't allow
3853 the -mcpu setting to enable options that conflict. */
3854 if ((!TARGET_HARD_FLOAT || !TARGET_ALTIVEC || !TARGET_VSX)
3855 && (rs6000_isa_flags_explicit & (OPTION_MASK_SOFT_FLOAT
3856 | OPTION_MASK_ALTIVEC
3857 | OPTION_MASK_VSX)) != 0)
3858 rs6000_isa_flags &= ~((OPTION_MASK_P8_VECTOR | OPTION_MASK_CRYPTO
3859 | OPTION_MASK_DIRECT_MOVE)
3860 & ~rs6000_isa_flags_explicit);
3861
3862 if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
3863 rs6000_print_isa_options (stderr, 0, "before defaults", rs6000_isa_flags);
3864
3865 /* For the newer switches (vsx, dfp, etc.) set some of the older options,
3866 unless the user explicitly used the -mno-<option> to disable the code. */
3867 if (TARGET_P9_VECTOR || TARGET_MODULO || TARGET_P9_DFORM || TARGET_P9_MINMAX)
3868 rs6000_isa_flags |= (ISA_3_0_MASKS_SERVER & ~rs6000_isa_flags_explicit);
3869 else if (TARGET_P8_VECTOR || TARGET_DIRECT_MOVE || TARGET_CRYPTO)
3870 rs6000_isa_flags |= (ISA_2_7_MASKS_SERVER & ~rs6000_isa_flags_explicit);
3871 else if (TARGET_VSX)
3872 rs6000_isa_flags |= (ISA_2_6_MASKS_SERVER & ~rs6000_isa_flags_explicit);
3873 else if (TARGET_POPCNTD)
3874 rs6000_isa_flags |= (ISA_2_6_MASKS_EMBEDDED & ~rs6000_isa_flags_explicit);
3875 else if (TARGET_DFP)
3876 rs6000_isa_flags |= (ISA_2_5_MASKS_SERVER & ~rs6000_isa_flags_explicit);
3877 else if (TARGET_CMPB)
3878 rs6000_isa_flags |= (ISA_2_5_MASKS_EMBEDDED & ~rs6000_isa_flags_explicit);
3879 else if (TARGET_FPRND)
3880 rs6000_isa_flags |= (ISA_2_4_MASKS & ~rs6000_isa_flags_explicit);
3881 else if (TARGET_POPCNTB)
3882 rs6000_isa_flags |= (ISA_2_2_MASKS & ~rs6000_isa_flags_explicit);
3883 else if (TARGET_ALTIVEC)
3884 rs6000_isa_flags |= (OPTION_MASK_PPC_GFXOPT & ~rs6000_isa_flags_explicit);
3885
3886 if (TARGET_CRYPTO && !TARGET_ALTIVEC)
3887 {
3888 if (rs6000_isa_flags_explicit & OPTION_MASK_CRYPTO)
3889 error ("-mcrypto requires -maltivec");
3890 rs6000_isa_flags &= ~OPTION_MASK_CRYPTO;
3891 }
3892
3893 if (TARGET_DIRECT_MOVE && !TARGET_VSX)
3894 {
3895 if (rs6000_isa_flags_explicit & OPTION_MASK_DIRECT_MOVE)
3896 error ("-mdirect-move requires -mvsx");
3897 rs6000_isa_flags &= ~OPTION_MASK_DIRECT_MOVE;
3898 }
3899
3900 if (TARGET_P8_VECTOR && !TARGET_ALTIVEC)
3901 {
3902 if (rs6000_isa_flags_explicit & OPTION_MASK_P8_VECTOR)
3903 error ("-mpower8-vector requires -maltivec");
3904 rs6000_isa_flags &= ~OPTION_MASK_P8_VECTOR;
3905 }
3906
3907 if (TARGET_P8_VECTOR && !TARGET_VSX)
3908 {
3909 if (rs6000_isa_flags_explicit & OPTION_MASK_P8_VECTOR)
3910 error ("-mpower8-vector requires -mvsx");
3911 rs6000_isa_flags &= ~OPTION_MASK_P8_VECTOR;
3912 }
3913
3914 if (TARGET_VSX_TIMODE && !TARGET_VSX)
3915 {
3916 if (rs6000_isa_flags_explicit & OPTION_MASK_VSX_TIMODE)
3917 error ("-mvsx-timode requires -mvsx");
3918 rs6000_isa_flags &= ~OPTION_MASK_VSX_TIMODE;
3919 }
3920
3921 if (TARGET_DFP && !TARGET_HARD_FLOAT)
3922 {
3923 if (rs6000_isa_flags_explicit & OPTION_MASK_DFP)
3924 error ("-mhard-dfp requires -mhard-float");
3925 rs6000_isa_flags &= ~OPTION_MASK_DFP;
3926 }
3927
3928 /* Allow an explicit -mupper-regs to set both -mupper-regs-df and
3929 -mupper-regs-sf, depending on the cpu, unless the user explicitly also set
3930 the individual option. */
3931 if (TARGET_UPPER_REGS > 0)
3932 {
3933 if (TARGET_VSX
3934 && !(rs6000_isa_flags_explicit & OPTION_MASK_UPPER_REGS_DF))
3935 {
3936 rs6000_isa_flags |= OPTION_MASK_UPPER_REGS_DF;
3937 rs6000_isa_flags_explicit |= OPTION_MASK_UPPER_REGS_DF;
3938 }
3939 if (TARGET_P8_VECTOR
3940 && !(rs6000_isa_flags_explicit & OPTION_MASK_UPPER_REGS_SF))
3941 {
3942 rs6000_isa_flags |= OPTION_MASK_UPPER_REGS_SF;
3943 rs6000_isa_flags_explicit |= OPTION_MASK_UPPER_REGS_SF;
3944 }
3945 }
3946 else if (TARGET_UPPER_REGS == 0)
3947 {
3948 if (TARGET_VSX
3949 && !(rs6000_isa_flags_explicit & OPTION_MASK_UPPER_REGS_DF))
3950 {
3951 rs6000_isa_flags &= ~OPTION_MASK_UPPER_REGS_DF;
3952 rs6000_isa_flags_explicit |= OPTION_MASK_UPPER_REGS_DF;
3953 }
3954 if (TARGET_P8_VECTOR
3955 && !(rs6000_isa_flags_explicit & OPTION_MASK_UPPER_REGS_SF))
3956 {
3957 rs6000_isa_flags &= ~OPTION_MASK_UPPER_REGS_SF;
3958 rs6000_isa_flags_explicit |= OPTION_MASK_UPPER_REGS_SF;
3959 }
3960 }
3961
3962 if (TARGET_UPPER_REGS_DF && !TARGET_VSX)
3963 {
3964 if (rs6000_isa_flags_explicit & OPTION_MASK_UPPER_REGS_DF)
3965 error ("-mupper-regs-df requires -mvsx");
3966 rs6000_isa_flags &= ~OPTION_MASK_UPPER_REGS_DF;
3967 }
3968
3969 if (TARGET_UPPER_REGS_SF && !TARGET_P8_VECTOR)
3970 {
3971 if (rs6000_isa_flags_explicit & OPTION_MASK_UPPER_REGS_SF)
3972 error ("-mupper-regs-sf requires -mpower8-vector");
3973 rs6000_isa_flags &= ~OPTION_MASK_UPPER_REGS_SF;
3974 }
3975
3976 /* The quad memory instructions only works in 64-bit mode. In 32-bit mode,
3977 silently turn off quad memory mode. */
3978 if ((TARGET_QUAD_MEMORY || TARGET_QUAD_MEMORY_ATOMIC) && !TARGET_POWERPC64)
3979 {
3980 if ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY) != 0)
3981 warning (0, N_("-mquad-memory requires 64-bit mode"));
3982
3983 if ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY_ATOMIC) != 0)
3984 warning (0, N_("-mquad-memory-atomic requires 64-bit mode"));
3985
3986 rs6000_isa_flags &= ~(OPTION_MASK_QUAD_MEMORY
3987 | OPTION_MASK_QUAD_MEMORY_ATOMIC);
3988 }
3989
3990 /* Non-atomic quad memory load/store are disabled for little endian, since
3991 the words are reversed, but atomic operations can still be done by
3992 swapping the words. */
3993 if (TARGET_QUAD_MEMORY && !WORDS_BIG_ENDIAN)
3994 {
3995 if ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY) != 0)
3996 warning (0, N_("-mquad-memory is not available in little endian mode"));
3997
3998 rs6000_isa_flags &= ~OPTION_MASK_QUAD_MEMORY;
3999 }
4000
4001 /* Assume if the user asked for normal quad memory instructions, they want
4002 the atomic versions as well, unless they explicity told us not to use quad
4003 word atomic instructions. */
4004 if (TARGET_QUAD_MEMORY
4005 && !TARGET_QUAD_MEMORY_ATOMIC
4006 && ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY_ATOMIC) == 0))
4007 rs6000_isa_flags |= OPTION_MASK_QUAD_MEMORY_ATOMIC;
4008
4009 /* Enable power8 fusion if we are tuning for power8, even if we aren't
4010 generating power8 instructions. */
4011 if (!(rs6000_isa_flags_explicit & OPTION_MASK_P8_FUSION))
4012 rs6000_isa_flags |= (processor_target_table[tune_index].target_enable
4013 & OPTION_MASK_P8_FUSION);
4014
4015 /* Setting additional fusion flags turns on base fusion. */
4016 if (!TARGET_P8_FUSION && (TARGET_P8_FUSION_SIGN || TARGET_TOC_FUSION))
4017 {
4018 if (rs6000_isa_flags_explicit & OPTION_MASK_P8_FUSION)
4019 {
4020 if (TARGET_P8_FUSION_SIGN)
4021 error ("-mpower8-fusion-sign requires -mpower8-fusion");
4022
4023 if (TARGET_TOC_FUSION)
4024 error ("-mtoc-fusion requires -mpower8-fusion");
4025
4026 rs6000_isa_flags &= ~OPTION_MASK_P8_FUSION;
4027 }
4028 else
4029 rs6000_isa_flags |= OPTION_MASK_P8_FUSION;
4030 }
4031
4032 /* Power9 fusion is a superset over power8 fusion. */
4033 if (TARGET_P9_FUSION && !TARGET_P8_FUSION)
4034 {
4035 if (rs6000_isa_flags_explicit & OPTION_MASK_P8_FUSION)
4036 {
4037 error ("-mpower9-fusion requires -mpower8-fusion");
4038 rs6000_isa_flags &= ~OPTION_MASK_P9_FUSION;
4039 }
4040 else
4041 rs6000_isa_flags |= OPTION_MASK_P8_FUSION;
4042 }
4043
4044 /* Enable power9 fusion if we are tuning for power9, even if we aren't
4045 generating power9 instructions. */
4046 if (!(rs6000_isa_flags_explicit & OPTION_MASK_P9_FUSION))
4047 rs6000_isa_flags |= (processor_target_table[tune_index].target_enable
4048 & OPTION_MASK_P9_FUSION);
4049
4050 /* Power8 does not fuse sign extended loads with the addis. If we are
4051 optimizing at high levels for speed, convert a sign extended load into a
4052 zero extending load, and an explicit sign extension. */
4053 if (TARGET_P8_FUSION
4054 && !(rs6000_isa_flags_explicit & OPTION_MASK_P8_FUSION_SIGN)
4055 && optimize_function_for_speed_p (cfun)
4056 && optimize >= 3)
4057 rs6000_isa_flags |= OPTION_MASK_P8_FUSION_SIGN;
4058
4059 /* TOC fusion requires 64-bit and medium/large code model. */
4060 if (TARGET_TOC_FUSION && !TARGET_POWERPC64)
4061 {
4062 rs6000_isa_flags &= ~OPTION_MASK_TOC_FUSION;
4063 if ((rs6000_isa_flags_explicit & OPTION_MASK_TOC_FUSION) != 0)
4064 warning (0, N_("-mtoc-fusion requires 64-bit"));
4065 }
4066
4067 if (TARGET_TOC_FUSION && (TARGET_CMODEL == CMODEL_SMALL))
4068 {
4069 rs6000_isa_flags &= ~OPTION_MASK_TOC_FUSION;
4070 if ((rs6000_isa_flags_explicit & OPTION_MASK_TOC_FUSION) != 0)
4071 warning (0, N_("-mtoc-fusion requires medium/large code model"));
4072 }
4073
4074 /* Turn on -mtoc-fusion by default if p8-fusion and 64-bit medium/large code
4075 model. */
4076 if (TARGET_P8_FUSION && !TARGET_TOC_FUSION && TARGET_POWERPC64
4077 && (TARGET_CMODEL != CMODEL_SMALL)
4078 && !(rs6000_isa_flags_explicit & OPTION_MASK_TOC_FUSION))
4079 rs6000_isa_flags |= OPTION_MASK_TOC_FUSION;
4080
4081 /* ISA 3.0 D-form instructions require p9-vector and upper-regs. */
4082 if (TARGET_P9_DFORM && !TARGET_P9_VECTOR)
4083 {
4084 if (rs6000_isa_flags_explicit & OPTION_MASK_P9_VECTOR)
4085 error ("-mpower9-dform requires -mpower9-vector");
4086 rs6000_isa_flags &= ~OPTION_MASK_P9_DFORM;
4087 }
4088
4089 if (TARGET_P9_DFORM && !TARGET_UPPER_REGS_DF)
4090 {
4091 if (rs6000_isa_flags_explicit & OPTION_MASK_UPPER_REGS_DF)
4092 error ("-mpower9-dform requires -mupper-regs-df");
4093 rs6000_isa_flags &= ~OPTION_MASK_P9_DFORM;
4094 }
4095
4096 if (TARGET_P9_DFORM && !TARGET_UPPER_REGS_SF)
4097 {
4098 if (rs6000_isa_flags_explicit & OPTION_MASK_UPPER_REGS_SF)
4099 error ("-mpower9-dform requires -mupper-regs-sf");
4100 rs6000_isa_flags &= ~OPTION_MASK_P9_DFORM;
4101 }
4102
4103 /* ISA 3.0 vector instructions include ISA 2.07. */
4104 if (TARGET_P9_VECTOR && !TARGET_P8_VECTOR)
4105 {
4106 if (rs6000_isa_flags_explicit & OPTION_MASK_P8_VECTOR)
4107 error ("-mpower9-vector requires -mpower8-vector");
4108 rs6000_isa_flags &= ~OPTION_MASK_P9_VECTOR;
4109 }
4110
4111 /* Set -mallow-movmisalign to explicitly on if we have full ISA 2.07
4112 support. If we only have ISA 2.06 support, and the user did not specify
4113 the switch, leave it set to -1 so the movmisalign patterns are enabled,
4114 but we don't enable the full vectorization support */
4115 if (TARGET_ALLOW_MOVMISALIGN == -1 && TARGET_P8_VECTOR && TARGET_DIRECT_MOVE)
4116 TARGET_ALLOW_MOVMISALIGN = 1;
4117
4118 else if (TARGET_ALLOW_MOVMISALIGN && !TARGET_VSX)
4119 {
4120 if (TARGET_ALLOW_MOVMISALIGN > 0)
4121 error ("-mallow-movmisalign requires -mvsx");
4122
4123 TARGET_ALLOW_MOVMISALIGN = 0;
4124 }
4125
4126 /* Determine when unaligned vector accesses are permitted, and when
4127 they are preferred over masked Altivec loads. Note that if
4128 TARGET_ALLOW_MOVMISALIGN has been disabled by the user, then
4129 TARGET_EFFICIENT_UNALIGNED_VSX must be as well. The converse is
4130 not true. */
4131 if (TARGET_EFFICIENT_UNALIGNED_VSX)
4132 {
4133 if (!TARGET_VSX)
4134 {
4135 if (rs6000_isa_flags_explicit & OPTION_MASK_EFFICIENT_UNALIGNED_VSX)
4136 error ("-mefficient-unaligned-vsx requires -mvsx");
4137
4138 rs6000_isa_flags &= ~OPTION_MASK_EFFICIENT_UNALIGNED_VSX;
4139 }
4140
4141 else if (!TARGET_ALLOW_MOVMISALIGN)
4142 {
4143 if (rs6000_isa_flags_explicit & OPTION_MASK_EFFICIENT_UNALIGNED_VSX)
4144 error ("-mefficient-unaligned-vsx requires -mallow-movmisalign");
4145
4146 rs6000_isa_flags &= ~OPTION_MASK_EFFICIENT_UNALIGNED_VSX;
4147 }
4148 }
4149
4150 /* __float128 requires VSX support. */
4151 if (TARGET_FLOAT128 && !TARGET_VSX)
4152 {
4153 if ((rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128) != 0)
4154 error ("-mfloat128 requires VSX support");
4155
4156 rs6000_isa_flags &= ~(OPTION_MASK_FLOAT128 | OPTION_MASK_FLOAT128_HW);
4157 }
4158
4159 /* IEEE 128-bit floating point hardware instructions imply enabling
4160 __float128. */
4161 if (TARGET_FLOAT128_HW
4162 && (rs6000_isa_flags & (OPTION_MASK_P9_VECTOR
4163 | OPTION_MASK_DIRECT_MOVE
4164 | OPTION_MASK_UPPER_REGS_DF
4165 | OPTION_MASK_UPPER_REGS_SF)) == 0)
4166 {
4167 if ((rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_HW) != 0)
4168 error ("-mfloat128-hardware requires full ISA 3.0 support");
4169
4170 rs6000_isa_flags &= ~OPTION_MASK_FLOAT128_HW;
4171 }
4172
4173 else if (TARGET_P9_VECTOR && !TARGET_FLOAT128_HW
4174 && (rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_HW) == 0)
4175 rs6000_isa_flags |= OPTION_MASK_FLOAT128_HW;
4176
4177 if (TARGET_FLOAT128_HW
4178 && (rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128) == 0)
4179 rs6000_isa_flags |= OPTION_MASK_FLOAT128;
4180
4181 /* Print the options after updating the defaults. */
4182 if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
4183 rs6000_print_isa_options (stderr, 0, "after defaults", rs6000_isa_flags);
4184
4185 /* E500mc does "better" if we inline more aggressively. Respect the
4186 user's opinion, though. */
4187 if (rs6000_block_move_inline_limit == 0
4188 && (rs6000_cpu == PROCESSOR_PPCE500MC
4189 || rs6000_cpu == PROCESSOR_PPCE500MC64
4190 || rs6000_cpu == PROCESSOR_PPCE5500
4191 || rs6000_cpu == PROCESSOR_PPCE6500))
4192 rs6000_block_move_inline_limit = 128;
4193
4194 /* store_one_arg depends on expand_block_move to handle at least the
4195 size of reg_parm_stack_space. */
4196 if (rs6000_block_move_inline_limit < (TARGET_POWERPC64 ? 64 : 32))
4197 rs6000_block_move_inline_limit = (TARGET_POWERPC64 ? 64 : 32);
4198
4199 if (global_init_p)
4200 {
4201 /* If the appropriate debug option is enabled, replace the target hooks
4202 with debug versions that call the real version and then prints
4203 debugging information. */
4204 if (TARGET_DEBUG_COST)
4205 {
4206 targetm.rtx_costs = rs6000_debug_rtx_costs;
4207 targetm.address_cost = rs6000_debug_address_cost;
4208 targetm.sched.adjust_cost = rs6000_debug_adjust_cost;
4209 }
4210
4211 if (TARGET_DEBUG_ADDR)
4212 {
4213 targetm.legitimate_address_p = rs6000_debug_legitimate_address_p;
4214 targetm.legitimize_address = rs6000_debug_legitimize_address;
4215 rs6000_secondary_reload_class_ptr
4216 = rs6000_debug_secondary_reload_class;
4217 rs6000_secondary_memory_needed_ptr
4218 = rs6000_debug_secondary_memory_needed;
4219 rs6000_cannot_change_mode_class_ptr
4220 = rs6000_debug_cannot_change_mode_class;
4221 rs6000_preferred_reload_class_ptr
4222 = rs6000_debug_preferred_reload_class;
4223 rs6000_legitimize_reload_address_ptr
4224 = rs6000_debug_legitimize_reload_address;
4225 rs6000_mode_dependent_address_ptr
4226 = rs6000_debug_mode_dependent_address;
4227 }
4228
4229 if (rs6000_veclibabi_name)
4230 {
4231 if (strcmp (rs6000_veclibabi_name, "mass") == 0)
4232 rs6000_veclib_handler = rs6000_builtin_vectorized_libmass;
4233 else
4234 {
4235 error ("unknown vectorization library ABI type (%s) for "
4236 "-mveclibabi= switch", rs6000_veclibabi_name);
4237 ret = false;
4238 }
4239 }
4240 }
4241
4242 if (!global_options_set.x_rs6000_long_double_type_size)
4243 {
4244 if (main_target_opt != NULL
4245 && (main_target_opt->x_rs6000_long_double_type_size
4246 != RS6000_DEFAULT_LONG_DOUBLE_SIZE))
4247 error ("target attribute or pragma changes long double size");
4248 else
4249 rs6000_long_double_type_size = RS6000_DEFAULT_LONG_DOUBLE_SIZE;
4250 }
4251
4252 #if !defined (POWERPC_LINUX) && !defined (POWERPC_FREEBSD)
4253 if (!global_options_set.x_rs6000_ieeequad)
4254 rs6000_ieeequad = 1;
4255 #endif
4256
4257 /* Disable VSX and Altivec silently if the user switched cpus to power7 in a
4258 target attribute or pragma which automatically enables both options,
4259 unless the altivec ABI was set. This is set by default for 64-bit, but
4260 not for 32-bit. */
4261 if (main_target_opt != NULL && !main_target_opt->x_rs6000_altivec_abi)
4262 rs6000_isa_flags &= ~((OPTION_MASK_VSX | OPTION_MASK_ALTIVEC
4263 | OPTION_MASK_FLOAT128)
4264 & ~rs6000_isa_flags_explicit);
4265
4266 /* Enable Altivec ABI for AIX -maltivec. */
4267 if (TARGET_XCOFF && (TARGET_ALTIVEC || TARGET_VSX))
4268 {
4269 if (main_target_opt != NULL && !main_target_opt->x_rs6000_altivec_abi)
4270 error ("target attribute or pragma changes AltiVec ABI");
4271 else
4272 rs6000_altivec_abi = 1;
4273 }
4274
4275 /* The AltiVec ABI is the default for PowerPC-64 GNU/Linux. For
4276 PowerPC-32 GNU/Linux, -maltivec implies the AltiVec ABI. It can
4277 be explicitly overridden in either case. */
4278 if (TARGET_ELF)
4279 {
4280 if (!global_options_set.x_rs6000_altivec_abi
4281 && (TARGET_64BIT || TARGET_ALTIVEC || TARGET_VSX))
4282 {
4283 if (main_target_opt != NULL &&
4284 !main_target_opt->x_rs6000_altivec_abi)
4285 error ("target attribute or pragma changes AltiVec ABI");
4286 else
4287 rs6000_altivec_abi = 1;
4288 }
4289 }
4290
4291 /* Set the Darwin64 ABI as default for 64-bit Darwin.
4292 So far, the only darwin64 targets are also MACH-O. */
4293 if (TARGET_MACHO
4294 && DEFAULT_ABI == ABI_DARWIN
4295 && TARGET_64BIT)
4296 {
4297 if (main_target_opt != NULL && !main_target_opt->x_rs6000_darwin64_abi)
4298 error ("target attribute or pragma changes darwin64 ABI");
4299 else
4300 {
4301 rs6000_darwin64_abi = 1;
4302 /* Default to natural alignment, for better performance. */
4303 rs6000_alignment_flags = MASK_ALIGN_NATURAL;
4304 }
4305 }
4306
4307 /* Place FP constants in the constant pool instead of TOC
4308 if section anchors enabled. */
4309 if (flag_section_anchors
4310 && !global_options_set.x_TARGET_NO_FP_IN_TOC)
4311 TARGET_NO_FP_IN_TOC = 1;
4312
4313 if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
4314 rs6000_print_isa_options (stderr, 0, "before subtarget", rs6000_isa_flags);
4315
4316 #ifdef SUBTARGET_OVERRIDE_OPTIONS
4317 SUBTARGET_OVERRIDE_OPTIONS;
4318 #endif
4319 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
4320 SUBSUBTARGET_OVERRIDE_OPTIONS;
4321 #endif
4322 #ifdef SUB3TARGET_OVERRIDE_OPTIONS
4323 SUB3TARGET_OVERRIDE_OPTIONS;
4324 #endif
4325
4326 if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
4327 rs6000_print_isa_options (stderr, 0, "after subtarget", rs6000_isa_flags);
4328
4329 /* For the E500 family of cores, reset the single/double FP flags to let us
4330 check that they remain constant across attributes or pragmas. Also,
4331 clear a possible request for string instructions, not supported and which
4332 we might have silently queried above for -Os.
4333
4334 For other families, clear ISEL in case it was set implicitly.
4335 */
4336
4337 switch (rs6000_cpu)
4338 {
4339 case PROCESSOR_PPC8540:
4340 case PROCESSOR_PPC8548:
4341 case PROCESSOR_PPCE500MC:
4342 case PROCESSOR_PPCE500MC64:
4343 case PROCESSOR_PPCE5500:
4344 case PROCESSOR_PPCE6500:
4345
4346 rs6000_single_float = TARGET_E500_SINGLE || TARGET_E500_DOUBLE;
4347 rs6000_double_float = TARGET_E500_DOUBLE;
4348
4349 rs6000_isa_flags &= ~OPTION_MASK_STRING;
4350
4351 break;
4352
4353 default:
4354
4355 if (have_cpu && !(rs6000_isa_flags_explicit & OPTION_MASK_ISEL))
4356 rs6000_isa_flags &= ~OPTION_MASK_ISEL;
4357
4358 break;
4359 }
4360
4361 if (main_target_opt)
4362 {
4363 if (main_target_opt->x_rs6000_single_float != rs6000_single_float)
4364 error ("target attribute or pragma changes single precision floating "
4365 "point");
4366 if (main_target_opt->x_rs6000_double_float != rs6000_double_float)
4367 error ("target attribute or pragma changes double precision floating "
4368 "point");
4369 }
4370
4371 /* Detect invalid option combinations with E500. */
4372 CHECK_E500_OPTIONS;
4373
4374 rs6000_always_hint = (rs6000_cpu != PROCESSOR_POWER4
4375 && rs6000_cpu != PROCESSOR_POWER5
4376 && rs6000_cpu != PROCESSOR_POWER6
4377 && rs6000_cpu != PROCESSOR_POWER7
4378 && rs6000_cpu != PROCESSOR_POWER8
4379 && rs6000_cpu != PROCESSOR_POWER9
4380 && rs6000_cpu != PROCESSOR_PPCA2
4381 && rs6000_cpu != PROCESSOR_CELL
4382 && rs6000_cpu != PROCESSOR_PPC476);
4383 rs6000_sched_groups = (rs6000_cpu == PROCESSOR_POWER4
4384 || rs6000_cpu == PROCESSOR_POWER5
4385 || rs6000_cpu == PROCESSOR_POWER7
4386 || rs6000_cpu == PROCESSOR_POWER8
4387 || rs6000_cpu == PROCESSOR_POWER9);
4388 rs6000_align_branch_targets = (rs6000_cpu == PROCESSOR_POWER4
4389 || rs6000_cpu == PROCESSOR_POWER5
4390 || rs6000_cpu == PROCESSOR_POWER6
4391 || rs6000_cpu == PROCESSOR_POWER7
4392 || rs6000_cpu == PROCESSOR_POWER8
4393 || rs6000_cpu == PROCESSOR_POWER9
4394 || rs6000_cpu == PROCESSOR_PPCE500MC
4395 || rs6000_cpu == PROCESSOR_PPCE500MC64
4396 || rs6000_cpu == PROCESSOR_PPCE5500
4397 || rs6000_cpu == PROCESSOR_PPCE6500);
4398
4399 /* Allow debug switches to override the above settings. These are set to -1
4400 in rs6000.opt to indicate the user hasn't directly set the switch. */
4401 if (TARGET_ALWAYS_HINT >= 0)
4402 rs6000_always_hint = TARGET_ALWAYS_HINT;
4403
4404 if (TARGET_SCHED_GROUPS >= 0)
4405 rs6000_sched_groups = TARGET_SCHED_GROUPS;
4406
4407 if (TARGET_ALIGN_BRANCH_TARGETS >= 0)
4408 rs6000_align_branch_targets = TARGET_ALIGN_BRANCH_TARGETS;
4409
4410 rs6000_sched_restricted_insns_priority
4411 = (rs6000_sched_groups ? 1 : 0);
4412
4413 /* Handle -msched-costly-dep option. */
4414 rs6000_sched_costly_dep
4415 = (rs6000_sched_groups ? true_store_to_load_dep_costly : no_dep_costly);
4416
4417 if (rs6000_sched_costly_dep_str)
4418 {
4419 if (! strcmp (rs6000_sched_costly_dep_str, "no"))
4420 rs6000_sched_costly_dep = no_dep_costly;
4421 else if (! strcmp (rs6000_sched_costly_dep_str, "all"))
4422 rs6000_sched_costly_dep = all_deps_costly;
4423 else if (! strcmp (rs6000_sched_costly_dep_str, "true_store_to_load"))
4424 rs6000_sched_costly_dep = true_store_to_load_dep_costly;
4425 else if (! strcmp (rs6000_sched_costly_dep_str, "store_to_load"))
4426 rs6000_sched_costly_dep = store_to_load_dep_costly;
4427 else
4428 rs6000_sched_costly_dep = ((enum rs6000_dependence_cost)
4429 atoi (rs6000_sched_costly_dep_str));
4430 }
4431
4432 /* Handle -minsert-sched-nops option. */
4433 rs6000_sched_insert_nops
4434 = (rs6000_sched_groups ? sched_finish_regroup_exact : sched_finish_none);
4435
4436 if (rs6000_sched_insert_nops_str)
4437 {
4438 if (! strcmp (rs6000_sched_insert_nops_str, "no"))
4439 rs6000_sched_insert_nops = sched_finish_none;
4440 else if (! strcmp (rs6000_sched_insert_nops_str, "pad"))
4441 rs6000_sched_insert_nops = sched_finish_pad_groups;
4442 else if (! strcmp (rs6000_sched_insert_nops_str, "regroup_exact"))
4443 rs6000_sched_insert_nops = sched_finish_regroup_exact;
4444 else
4445 rs6000_sched_insert_nops = ((enum rs6000_nop_insertion)
4446 atoi (rs6000_sched_insert_nops_str));
4447 }
4448
4449 if (global_init_p)
4450 {
4451 #ifdef TARGET_REGNAMES
4452 /* If the user desires alternate register names, copy in the
4453 alternate names now. */
4454 if (TARGET_REGNAMES)
4455 memcpy (rs6000_reg_names, alt_reg_names, sizeof (rs6000_reg_names));
4456 #endif
4457
4458 /* Set aix_struct_return last, after the ABI is determined.
4459 If -maix-struct-return or -msvr4-struct-return was explicitly
4460 used, don't override with the ABI default. */
4461 if (!global_options_set.x_aix_struct_return)
4462 aix_struct_return = (DEFAULT_ABI != ABI_V4 || DRAFT_V4_STRUCT_RET);
4463
4464 #if 0
4465 /* IBM XL compiler defaults to unsigned bitfields. */
4466 if (TARGET_XL_COMPAT)
4467 flag_signed_bitfields = 0;
4468 #endif
4469
4470 if (TARGET_LONG_DOUBLE_128 && !TARGET_IEEEQUAD)
4471 REAL_MODE_FORMAT (TFmode) = &ibm_extended_format;
4472
4473 if (TARGET_TOC)
4474 ASM_GENERATE_INTERNAL_LABEL (toc_label_name, "LCTOC", 1);
4475
4476 /* We can only guarantee the availability of DI pseudo-ops when
4477 assembling for 64-bit targets. */
4478 if (!TARGET_64BIT)
4479 {
4480 targetm.asm_out.aligned_op.di = NULL;
4481 targetm.asm_out.unaligned_op.di = NULL;
4482 }
4483
4484
4485 /* Set branch target alignment, if not optimizing for size. */
4486 if (!optimize_size)
4487 {
4488 /* Cell wants to be aligned 8byte for dual issue. Titan wants to be
4489 aligned 8byte to avoid misprediction by the branch predictor. */
4490 if (rs6000_cpu == PROCESSOR_TITAN
4491 || rs6000_cpu == PROCESSOR_CELL)
4492 {
4493 if (align_functions <= 0)
4494 align_functions = 8;
4495 if (align_jumps <= 0)
4496 align_jumps = 8;
4497 if (align_loops <= 0)
4498 align_loops = 8;
4499 }
4500 if (rs6000_align_branch_targets)
4501 {
4502 if (align_functions <= 0)
4503 align_functions = 16;
4504 if (align_jumps <= 0)
4505 align_jumps = 16;
4506 if (align_loops <= 0)
4507 {
4508 can_override_loop_align = 1;
4509 align_loops = 16;
4510 }
4511 }
4512 if (align_jumps_max_skip <= 0)
4513 align_jumps_max_skip = 15;
4514 if (align_loops_max_skip <= 0)
4515 align_loops_max_skip = 15;
4516 }
4517
4518 /* Arrange to save and restore machine status around nested functions. */
4519 init_machine_status = rs6000_init_machine_status;
4520
4521 /* We should always be splitting complex arguments, but we can't break
4522 Linux and Darwin ABIs at the moment. For now, only AIX is fixed. */
4523 if (DEFAULT_ABI == ABI_V4 || DEFAULT_ABI == ABI_DARWIN)
4524 targetm.calls.split_complex_arg = NULL;
4525 }
4526
4527 /* Initialize rs6000_cost with the appropriate target costs. */
4528 if (optimize_size)
4529 rs6000_cost = TARGET_POWERPC64 ? &size64_cost : &size32_cost;
4530 else
4531 switch (rs6000_cpu)
4532 {
4533 case PROCESSOR_RS64A:
4534 rs6000_cost = &rs64a_cost;
4535 break;
4536
4537 case PROCESSOR_MPCCORE:
4538 rs6000_cost = &mpccore_cost;
4539 break;
4540
4541 case PROCESSOR_PPC403:
4542 rs6000_cost = &ppc403_cost;
4543 break;
4544
4545 case PROCESSOR_PPC405:
4546 rs6000_cost = &ppc405_cost;
4547 break;
4548
4549 case PROCESSOR_PPC440:
4550 rs6000_cost = &ppc440_cost;
4551 break;
4552
4553 case PROCESSOR_PPC476:
4554 rs6000_cost = &ppc476_cost;
4555 break;
4556
4557 case PROCESSOR_PPC601:
4558 rs6000_cost = &ppc601_cost;
4559 break;
4560
4561 case PROCESSOR_PPC603:
4562 rs6000_cost = &ppc603_cost;
4563 break;
4564
4565 case PROCESSOR_PPC604:
4566 rs6000_cost = &ppc604_cost;
4567 break;
4568
4569 case PROCESSOR_PPC604e:
4570 rs6000_cost = &ppc604e_cost;
4571 break;
4572
4573 case PROCESSOR_PPC620:
4574 rs6000_cost = &ppc620_cost;
4575 break;
4576
4577 case PROCESSOR_PPC630:
4578 rs6000_cost = &ppc630_cost;
4579 break;
4580
4581 case PROCESSOR_CELL:
4582 rs6000_cost = &ppccell_cost;
4583 break;
4584
4585 case PROCESSOR_PPC750:
4586 case PROCESSOR_PPC7400:
4587 rs6000_cost = &ppc750_cost;
4588 break;
4589
4590 case PROCESSOR_PPC7450:
4591 rs6000_cost = &ppc7450_cost;
4592 break;
4593
4594 case PROCESSOR_PPC8540:
4595 case PROCESSOR_PPC8548:
4596 rs6000_cost = &ppc8540_cost;
4597 break;
4598
4599 case PROCESSOR_PPCE300C2:
4600 case PROCESSOR_PPCE300C3:
4601 rs6000_cost = &ppce300c2c3_cost;
4602 break;
4603
4604 case PROCESSOR_PPCE500MC:
4605 rs6000_cost = &ppce500mc_cost;
4606 break;
4607
4608 case PROCESSOR_PPCE500MC64:
4609 rs6000_cost = &ppce500mc64_cost;
4610 break;
4611
4612 case PROCESSOR_PPCE5500:
4613 rs6000_cost = &ppce5500_cost;
4614 break;
4615
4616 case PROCESSOR_PPCE6500:
4617 rs6000_cost = &ppce6500_cost;
4618 break;
4619
4620 case PROCESSOR_TITAN:
4621 rs6000_cost = &titan_cost;
4622 break;
4623
4624 case PROCESSOR_POWER4:
4625 case PROCESSOR_POWER5:
4626 rs6000_cost = &power4_cost;
4627 break;
4628
4629 case PROCESSOR_POWER6:
4630 rs6000_cost = &power6_cost;
4631 break;
4632
4633 case PROCESSOR_POWER7:
4634 rs6000_cost = &power7_cost;
4635 break;
4636
4637 case PROCESSOR_POWER8:
4638 rs6000_cost = &power8_cost;
4639 break;
4640
4641 case PROCESSOR_POWER9:
4642 rs6000_cost = &power9_cost;
4643 break;
4644
4645 case PROCESSOR_PPCA2:
4646 rs6000_cost = &ppca2_cost;
4647 break;
4648
4649 default:
4650 gcc_unreachable ();
4651 }
4652
4653 if (global_init_p)
4654 {
4655 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
4656 rs6000_cost->simultaneous_prefetches,
4657 global_options.x_param_values,
4658 global_options_set.x_param_values);
4659 maybe_set_param_value (PARAM_L1_CACHE_SIZE, rs6000_cost->l1_cache_size,
4660 global_options.x_param_values,
4661 global_options_set.x_param_values);
4662 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
4663 rs6000_cost->cache_line_size,
4664 global_options.x_param_values,
4665 global_options_set.x_param_values);
4666 maybe_set_param_value (PARAM_L2_CACHE_SIZE, rs6000_cost->l2_cache_size,
4667 global_options.x_param_values,
4668 global_options_set.x_param_values);
4669
4670 /* Increase loop peeling limits based on performance analysis. */
4671 maybe_set_param_value (PARAM_MAX_PEELED_INSNS, 400,
4672 global_options.x_param_values,
4673 global_options_set.x_param_values);
4674 maybe_set_param_value (PARAM_MAX_COMPLETELY_PEELED_INSNS, 400,
4675 global_options.x_param_values,
4676 global_options_set.x_param_values);
4677
4678 /* If using typedef char *va_list, signal that
4679 __builtin_va_start (&ap, 0) can be optimized to
4680 ap = __builtin_next_arg (0). */
4681 if (DEFAULT_ABI != ABI_V4)
4682 targetm.expand_builtin_va_start = NULL;
4683 }
4684
4685 /* Set up single/double float flags.
4686 If TARGET_HARD_FLOAT is set, but neither single or double is set,
4687 then set both flags. */
4688 if (TARGET_HARD_FLOAT && TARGET_FPRS
4689 && rs6000_single_float == 0 && rs6000_double_float == 0)
4690 rs6000_single_float = rs6000_double_float = 1;
4691
4692 /* If not explicitly specified via option, decide whether to generate indexed
4693 load/store instructions. */
4694 if (TARGET_AVOID_XFORM == -1)
4695 /* Avoid indexed addressing when targeting Power6 in order to avoid the
4696 DERAT mispredict penalty. However the LVE and STVE altivec instructions
4697 need indexed accesses and the type used is the scalar type of the element
4698 being loaded or stored. */
4699 TARGET_AVOID_XFORM = (rs6000_cpu == PROCESSOR_POWER6 && TARGET_CMPB
4700 && !TARGET_ALTIVEC);
4701
4702 /* Set the -mrecip options. */
4703 if (rs6000_recip_name)
4704 {
4705 char *p = ASTRDUP (rs6000_recip_name);
4706 char *q;
4707 unsigned int mask, i;
4708 bool invert;
4709
4710 while ((q = strtok (p, ",")) != NULL)
4711 {
4712 p = NULL;
4713 if (*q == '!')
4714 {
4715 invert = true;
4716 q++;
4717 }
4718 else
4719 invert = false;
4720
4721 if (!strcmp (q, "default"))
4722 mask = ((TARGET_RECIP_PRECISION)
4723 ? RECIP_HIGH_PRECISION : RECIP_LOW_PRECISION);
4724 else
4725 {
4726 for (i = 0; i < ARRAY_SIZE (recip_options); i++)
4727 if (!strcmp (q, recip_options[i].string))
4728 {
4729 mask = recip_options[i].mask;
4730 break;
4731 }
4732
4733 if (i == ARRAY_SIZE (recip_options))
4734 {
4735 error ("unknown option for -mrecip=%s", q);
4736 invert = false;
4737 mask = 0;
4738 ret = false;
4739 }
4740 }
4741
4742 if (invert)
4743 rs6000_recip_control &= ~mask;
4744 else
4745 rs6000_recip_control |= mask;
4746 }
4747 }
4748
4749 /* Set the builtin mask of the various options used that could affect which
4750 builtins were used. In the past we used target_flags, but we've run out
4751 of bits, and some options like SPE and PAIRED are no longer in
4752 target_flags. */
4753 rs6000_builtin_mask = rs6000_builtin_mask_calculate ();
4754 if (TARGET_DEBUG_BUILTIN || TARGET_DEBUG_TARGET)
4755 rs6000_print_builtin_options (stderr, 0, "builtin mask",
4756 rs6000_builtin_mask);
4757
4758 /* Initialize all of the registers. */
4759 rs6000_init_hard_regno_mode_ok (global_init_p);
4760
4761 /* Save the initial options in case the user does function specific options */
4762 if (global_init_p)
4763 target_option_default_node = target_option_current_node
4764 = build_target_option_node (&global_options);
4765
4766 /* If not explicitly specified via option, decide whether to generate the
4767 extra blr's required to preserve the link stack on some cpus (eg, 476). */
4768 if (TARGET_LINK_STACK == -1)
4769 SET_TARGET_LINK_STACK (rs6000_cpu == PROCESSOR_PPC476 && flag_pic);
4770
4771 return ret;
4772 }
4773
4774 /* Implement TARGET_OPTION_OVERRIDE. On the RS/6000 this is used to
4775 define the target cpu type. */
4776
4777 static void
4778 rs6000_option_override (void)
4779 {
4780 (void) rs6000_option_override_internal (true);
4781
4782 /* Register machine-specific passes. This needs to be done at start-up.
4783 It's convenient to do it here (like i386 does). */
4784 opt_pass *pass_analyze_swaps = make_pass_analyze_swaps (g);
4785
4786 struct register_pass_info analyze_swaps_info
4787 = { pass_analyze_swaps, "cse1", 1, PASS_POS_INSERT_BEFORE };
4788
4789 register_pass (&analyze_swaps_info);
4790 }
4791
4792 \f
4793 /* Implement targetm.vectorize.builtin_mask_for_load. */
4794 static tree
4795 rs6000_builtin_mask_for_load (void)
4796 {
4797 /* Don't use lvsl/vperm for P8 and similarly efficient machines. */
4798 if ((TARGET_ALTIVEC && !TARGET_VSX)
4799 || (TARGET_VSX && !TARGET_EFFICIENT_UNALIGNED_VSX))
4800 return altivec_builtin_mask_for_load;
4801 else
4802 return 0;
4803 }
4804
4805 /* Implement LOOP_ALIGN. */
4806 int
4807 rs6000_loop_align (rtx label)
4808 {
4809 basic_block bb;
4810 int ninsns;
4811
4812 /* Don't override loop alignment if -falign-loops was specified. */
4813 if (!can_override_loop_align)
4814 return align_loops_log;
4815
4816 bb = BLOCK_FOR_INSN (label);
4817 ninsns = num_loop_insns(bb->loop_father);
4818
4819 /* Align small loops to 32 bytes to fit in an icache sector, otherwise return default. */
4820 if (ninsns > 4 && ninsns <= 8
4821 && (rs6000_cpu == PROCESSOR_POWER4
4822 || rs6000_cpu == PROCESSOR_POWER5
4823 || rs6000_cpu == PROCESSOR_POWER6
4824 || rs6000_cpu == PROCESSOR_POWER7
4825 || rs6000_cpu == PROCESSOR_POWER8
4826 || rs6000_cpu == PROCESSOR_POWER9))
4827 return 5;
4828 else
4829 return align_loops_log;
4830 }
4831
4832 /* Implement TARGET_LOOP_ALIGN_MAX_SKIP. */
4833 static int
4834 rs6000_loop_align_max_skip (rtx_insn *label)
4835 {
4836 return (1 << rs6000_loop_align (label)) - 1;
4837 }
4838
4839 /* Return true iff, data reference of TYPE can reach vector alignment (16)
4840 after applying N number of iterations. This routine does not determine
4841 how may iterations are required to reach desired alignment. */
4842
4843 static bool
4844 rs6000_vector_alignment_reachable (const_tree type ATTRIBUTE_UNUSED, bool is_packed)
4845 {
4846 if (is_packed)
4847 return false;
4848
4849 if (TARGET_32BIT)
4850 {
4851 if (rs6000_alignment_flags == MASK_ALIGN_NATURAL)
4852 return true;
4853
4854 if (rs6000_alignment_flags == MASK_ALIGN_POWER)
4855 return true;
4856
4857 return false;
4858 }
4859 else
4860 {
4861 if (TARGET_MACHO)
4862 return false;
4863
4864 /* Assuming that all other types are naturally aligned. CHECKME! */
4865 return true;
4866 }
4867 }
4868
4869 /* Return true if the vector misalignment factor is supported by the
4870 target. */
4871 static bool
4872 rs6000_builtin_support_vector_misalignment (machine_mode mode,
4873 const_tree type,
4874 int misalignment,
4875 bool is_packed)
4876 {
4877 if (TARGET_VSX)
4878 {
4879 if (TARGET_EFFICIENT_UNALIGNED_VSX)
4880 return true;
4881
4882 /* Return if movmisalign pattern is not supported for this mode. */
4883 if (optab_handler (movmisalign_optab, mode) == CODE_FOR_nothing)
4884 return false;
4885
4886 if (misalignment == -1)
4887 {
4888 /* Misalignment factor is unknown at compile time but we know
4889 it's word aligned. */
4890 if (rs6000_vector_alignment_reachable (type, is_packed))
4891 {
4892 int element_size = TREE_INT_CST_LOW (TYPE_SIZE (type));
4893
4894 if (element_size == 64 || element_size == 32)
4895 return true;
4896 }
4897
4898 return false;
4899 }
4900
4901 /* VSX supports word-aligned vector. */
4902 if (misalignment % 4 == 0)
4903 return true;
4904 }
4905 return false;
4906 }
4907
4908 /* Implement targetm.vectorize.builtin_vectorization_cost. */
4909 static int
4910 rs6000_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
4911 tree vectype, int misalign)
4912 {
4913 unsigned elements;
4914 tree elem_type;
4915
4916 switch (type_of_cost)
4917 {
4918 case scalar_stmt:
4919 case scalar_load:
4920 case scalar_store:
4921 case vector_stmt:
4922 case vector_load:
4923 case vector_store:
4924 case vec_to_scalar:
4925 case scalar_to_vec:
4926 case cond_branch_not_taken:
4927 return 1;
4928
4929 case vec_perm:
4930 if (TARGET_VSX)
4931 return 3;
4932 else
4933 return 1;
4934
4935 case vec_promote_demote:
4936 if (TARGET_VSX)
4937 return 4;
4938 else
4939 return 1;
4940
4941 case cond_branch_taken:
4942 return 3;
4943
4944 case unaligned_load:
4945 if (TARGET_EFFICIENT_UNALIGNED_VSX)
4946 return 1;
4947
4948 if (TARGET_VSX && TARGET_ALLOW_MOVMISALIGN)
4949 {
4950 elements = TYPE_VECTOR_SUBPARTS (vectype);
4951 if (elements == 2)
4952 /* Double word aligned. */
4953 return 2;
4954
4955 if (elements == 4)
4956 {
4957 switch (misalign)
4958 {
4959 case 8:
4960 /* Double word aligned. */
4961 return 2;
4962
4963 case -1:
4964 /* Unknown misalignment. */
4965 case 4:
4966 case 12:
4967 /* Word aligned. */
4968 return 22;
4969
4970 default:
4971 gcc_unreachable ();
4972 }
4973 }
4974 }
4975
4976 if (TARGET_ALTIVEC)
4977 /* Misaligned loads are not supported. */
4978 gcc_unreachable ();
4979
4980 return 2;
4981
4982 case unaligned_store:
4983 if (TARGET_EFFICIENT_UNALIGNED_VSX)
4984 return 1;
4985
4986 if (TARGET_VSX && TARGET_ALLOW_MOVMISALIGN)
4987 {
4988 elements = TYPE_VECTOR_SUBPARTS (vectype);
4989 if (elements == 2)
4990 /* Double word aligned. */
4991 return 2;
4992
4993 if (elements == 4)
4994 {
4995 switch (misalign)
4996 {
4997 case 8:
4998 /* Double word aligned. */
4999 return 2;
5000
5001 case -1:
5002 /* Unknown misalignment. */
5003 case 4:
5004 case 12:
5005 /* Word aligned. */
5006 return 23;
5007
5008 default:
5009 gcc_unreachable ();
5010 }
5011 }
5012 }
5013
5014 if (TARGET_ALTIVEC)
5015 /* Misaligned stores are not supported. */
5016 gcc_unreachable ();
5017
5018 return 2;
5019
5020 case vec_construct:
5021 elements = TYPE_VECTOR_SUBPARTS (vectype);
5022 elem_type = TREE_TYPE (vectype);
5023 /* 32-bit vectors loaded into registers are stored as double
5024 precision, so we need n/2 converts in addition to the usual
5025 n/2 merges to construct a vector of short floats from them. */
5026 if (SCALAR_FLOAT_TYPE_P (elem_type)
5027 && TYPE_PRECISION (elem_type) == 32)
5028 return elements + 1;
5029 else
5030 return elements / 2 + 1;
5031
5032 default:
5033 gcc_unreachable ();
5034 }
5035 }
5036
5037 /* Implement targetm.vectorize.preferred_simd_mode. */
5038
5039 static machine_mode
5040 rs6000_preferred_simd_mode (machine_mode mode)
5041 {
5042 if (TARGET_VSX)
5043 switch (mode)
5044 {
5045 case DFmode:
5046 return V2DFmode;
5047 default:;
5048 }
5049 if (TARGET_ALTIVEC || TARGET_VSX)
5050 switch (mode)
5051 {
5052 case SFmode:
5053 return V4SFmode;
5054 case TImode:
5055 return V1TImode;
5056 case DImode:
5057 return V2DImode;
5058 case SImode:
5059 return V4SImode;
5060 case HImode:
5061 return V8HImode;
5062 case QImode:
5063 return V16QImode;
5064 default:;
5065 }
5066 if (TARGET_SPE)
5067 switch (mode)
5068 {
5069 case SFmode:
5070 return V2SFmode;
5071 case SImode:
5072 return V2SImode;
5073 default:;
5074 }
5075 if (TARGET_PAIRED_FLOAT
5076 && mode == SFmode)
5077 return V2SFmode;
5078 return word_mode;
5079 }
5080
5081 typedef struct _rs6000_cost_data
5082 {
5083 struct loop *loop_info;
5084 unsigned cost[3];
5085 } rs6000_cost_data;
5086
5087 /* Test for likely overcommitment of vector hardware resources. If a
5088 loop iteration is relatively large, and too large a percentage of
5089 instructions in the loop are vectorized, the cost model may not
5090 adequately reflect delays from unavailable vector resources.
5091 Penalize the loop body cost for this case. */
5092
5093 static void
5094 rs6000_density_test (rs6000_cost_data *data)
5095 {
5096 const int DENSITY_PCT_THRESHOLD = 85;
5097 const int DENSITY_SIZE_THRESHOLD = 70;
5098 const int DENSITY_PENALTY = 10;
5099 struct loop *loop = data->loop_info;
5100 basic_block *bbs = get_loop_body (loop);
5101 int nbbs = loop->num_nodes;
5102 int vec_cost = data->cost[vect_body], not_vec_cost = 0;
5103 int i, density_pct;
5104
5105 for (i = 0; i < nbbs; i++)
5106 {
5107 basic_block bb = bbs[i];
5108 gimple_stmt_iterator gsi;
5109
5110 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
5111 {
5112 gimple *stmt = gsi_stmt (gsi);
5113 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5114
5115 if (!STMT_VINFO_RELEVANT_P (stmt_info)
5116 && !STMT_VINFO_IN_PATTERN_P (stmt_info))
5117 not_vec_cost++;
5118 }
5119 }
5120
5121 free (bbs);
5122 density_pct = (vec_cost * 100) / (vec_cost + not_vec_cost);
5123
5124 if (density_pct > DENSITY_PCT_THRESHOLD
5125 && vec_cost + not_vec_cost > DENSITY_SIZE_THRESHOLD)
5126 {
5127 data->cost[vect_body] = vec_cost * (100 + DENSITY_PENALTY) / 100;
5128 if (dump_enabled_p ())
5129 dump_printf_loc (MSG_NOTE, vect_location,
5130 "density %d%%, cost %d exceeds threshold, penalizing "
5131 "loop body cost by %d%%", density_pct,
5132 vec_cost + not_vec_cost, DENSITY_PENALTY);
5133 }
5134 }
5135
5136 /* Implement targetm.vectorize.init_cost. */
5137
5138 static void *
5139 rs6000_init_cost (struct loop *loop_info)
5140 {
5141 rs6000_cost_data *data = XNEW (struct _rs6000_cost_data);
5142 data->loop_info = loop_info;
5143 data->cost[vect_prologue] = 0;
5144 data->cost[vect_body] = 0;
5145 data->cost[vect_epilogue] = 0;
5146 return data;
5147 }
5148
5149 /* Implement targetm.vectorize.add_stmt_cost. */
5150
5151 static unsigned
5152 rs6000_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
5153 struct _stmt_vec_info *stmt_info, int misalign,
5154 enum vect_cost_model_location where)
5155 {
5156 rs6000_cost_data *cost_data = (rs6000_cost_data*) data;
5157 unsigned retval = 0;
5158
5159 if (flag_vect_cost_model)
5160 {
5161 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
5162 int stmt_cost = rs6000_builtin_vectorization_cost (kind, vectype,
5163 misalign);
5164 /* Statements in an inner loop relative to the loop being
5165 vectorized are weighted more heavily. The value here is
5166 arbitrary and could potentially be improved with analysis. */
5167 if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
5168 count *= 50; /* FIXME. */
5169
5170 retval = (unsigned) (count * stmt_cost);
5171 cost_data->cost[where] += retval;
5172 }
5173
5174 return retval;
5175 }
5176
5177 /* Implement targetm.vectorize.finish_cost. */
5178
5179 static void
5180 rs6000_finish_cost (void *data, unsigned *prologue_cost,
5181 unsigned *body_cost, unsigned *epilogue_cost)
5182 {
5183 rs6000_cost_data *cost_data = (rs6000_cost_data*) data;
5184
5185 if (cost_data->loop_info)
5186 rs6000_density_test (cost_data);
5187
5188 *prologue_cost = cost_data->cost[vect_prologue];
5189 *body_cost = cost_data->cost[vect_body];
5190 *epilogue_cost = cost_data->cost[vect_epilogue];
5191 }
5192
5193 /* Implement targetm.vectorize.destroy_cost_data. */
5194
5195 static void
5196 rs6000_destroy_cost_data (void *data)
5197 {
5198 free (data);
5199 }
5200
5201 /* Handler for the Mathematical Acceleration Subsystem (mass) interface to a
5202 library with vectorized intrinsics. */
5203
5204 static tree
5205 rs6000_builtin_vectorized_libmass (combined_fn fn, tree type_out,
5206 tree type_in)
5207 {
5208 char name[32];
5209 const char *suffix = NULL;
5210 tree fntype, new_fndecl, bdecl = NULL_TREE;
5211 int n_args = 1;
5212 const char *bname;
5213 machine_mode el_mode, in_mode;
5214 int n, in_n;
5215
5216 /* Libmass is suitable for unsafe math only as it does not correctly support
5217 parts of IEEE with the required precision such as denormals. Only support
5218 it if we have VSX to use the simd d2 or f4 functions.
5219 XXX: Add variable length support. */
5220 if (!flag_unsafe_math_optimizations || !TARGET_VSX)
5221 return NULL_TREE;
5222
5223 el_mode = TYPE_MODE (TREE_TYPE (type_out));
5224 n = TYPE_VECTOR_SUBPARTS (type_out);
5225 in_mode = TYPE_MODE (TREE_TYPE (type_in));
5226 in_n = TYPE_VECTOR_SUBPARTS (type_in);
5227 if (el_mode != in_mode
5228 || n != in_n)
5229 return NULL_TREE;
5230
5231 switch (fn)
5232 {
5233 CASE_CFN_ATAN2:
5234 CASE_CFN_HYPOT:
5235 CASE_CFN_POW:
5236 n_args = 2;
5237 /* fall through */
5238
5239 CASE_CFN_ACOS:
5240 CASE_CFN_ACOSH:
5241 CASE_CFN_ASIN:
5242 CASE_CFN_ASINH:
5243 CASE_CFN_ATAN:
5244 CASE_CFN_ATANH:
5245 CASE_CFN_CBRT:
5246 CASE_CFN_COS:
5247 CASE_CFN_COSH:
5248 CASE_CFN_ERF:
5249 CASE_CFN_ERFC:
5250 CASE_CFN_EXP2:
5251 CASE_CFN_EXP:
5252 CASE_CFN_EXPM1:
5253 CASE_CFN_LGAMMA:
5254 CASE_CFN_LOG10:
5255 CASE_CFN_LOG1P:
5256 CASE_CFN_LOG2:
5257 CASE_CFN_LOG:
5258 CASE_CFN_SIN:
5259 CASE_CFN_SINH:
5260 CASE_CFN_SQRT:
5261 CASE_CFN_TAN:
5262 CASE_CFN_TANH:
5263 if (el_mode == DFmode && n == 2)
5264 {
5265 bdecl = mathfn_built_in (double_type_node, fn);
5266 suffix = "d2"; /* pow -> powd2 */
5267 }
5268 else if (el_mode == SFmode && n == 4)
5269 {
5270 bdecl = mathfn_built_in (float_type_node, fn);
5271 suffix = "4"; /* powf -> powf4 */
5272 }
5273 else
5274 return NULL_TREE;
5275 if (!bdecl)
5276 return NULL_TREE;
5277 break;
5278
5279 default:
5280 return NULL_TREE;
5281 }
5282
5283 gcc_assert (suffix != NULL);
5284 bname = IDENTIFIER_POINTER (DECL_NAME (bdecl));
5285 if (!bname)
5286 return NULL_TREE;
5287
5288 strcpy (name, bname + sizeof ("__builtin_") - 1);
5289 strcat (name, suffix);
5290
5291 if (n_args == 1)
5292 fntype = build_function_type_list (type_out, type_in, NULL);
5293 else if (n_args == 2)
5294 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
5295 else
5296 gcc_unreachable ();
5297
5298 /* Build a function declaration for the vectorized function. */
5299 new_fndecl = build_decl (BUILTINS_LOCATION,
5300 FUNCTION_DECL, get_identifier (name), fntype);
5301 TREE_PUBLIC (new_fndecl) = 1;
5302 DECL_EXTERNAL (new_fndecl) = 1;
5303 DECL_IS_NOVOPS (new_fndecl) = 1;
5304 TREE_READONLY (new_fndecl) = 1;
5305
5306 return new_fndecl;
5307 }
5308
5309 /* Returns a function decl for a vectorized version of the builtin function
5310 with builtin function code FN and the result vector type TYPE, or NULL_TREE
5311 if it is not available. */
5312
5313 static tree
5314 rs6000_builtin_vectorized_function (unsigned int fn, tree type_out,
5315 tree type_in)
5316 {
5317 machine_mode in_mode, out_mode;
5318 int in_n, out_n;
5319
5320 if (TARGET_DEBUG_BUILTIN)
5321 fprintf (stderr, "rs6000_builtin_vectorized_function (%s, %s, %s)\n",
5322 combined_fn_name (combined_fn (fn)),
5323 GET_MODE_NAME (TYPE_MODE (type_out)),
5324 GET_MODE_NAME (TYPE_MODE (type_in)));
5325
5326 if (TREE_CODE (type_out) != VECTOR_TYPE
5327 || TREE_CODE (type_in) != VECTOR_TYPE
5328 || !TARGET_VECTORIZE_BUILTINS)
5329 return NULL_TREE;
5330
5331 out_mode = TYPE_MODE (TREE_TYPE (type_out));
5332 out_n = TYPE_VECTOR_SUBPARTS (type_out);
5333 in_mode = TYPE_MODE (TREE_TYPE (type_in));
5334 in_n = TYPE_VECTOR_SUBPARTS (type_in);
5335
5336 switch (fn)
5337 {
5338 CASE_CFN_COPYSIGN:
5339 if (VECTOR_UNIT_VSX_P (V2DFmode)
5340 && out_mode == DFmode && out_n == 2
5341 && in_mode == DFmode && in_n == 2)
5342 return rs6000_builtin_decls[VSX_BUILTIN_CPSGNDP];
5343 if (VECTOR_UNIT_VSX_P (V4SFmode)
5344 && out_mode == SFmode && out_n == 4
5345 && in_mode == SFmode && in_n == 4)
5346 return rs6000_builtin_decls[VSX_BUILTIN_CPSGNSP];
5347 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode)
5348 && out_mode == SFmode && out_n == 4
5349 && in_mode == SFmode && in_n == 4)
5350 return rs6000_builtin_decls[ALTIVEC_BUILTIN_COPYSIGN_V4SF];
5351 break;
5352 CASE_CFN_CEIL:
5353 if (VECTOR_UNIT_VSX_P (V2DFmode)
5354 && out_mode == DFmode && out_n == 2
5355 && in_mode == DFmode && in_n == 2)
5356 return rs6000_builtin_decls[VSX_BUILTIN_XVRDPIP];
5357 if (VECTOR_UNIT_VSX_P (V4SFmode)
5358 && out_mode == SFmode && out_n == 4
5359 && in_mode == SFmode && in_n == 4)
5360 return rs6000_builtin_decls[VSX_BUILTIN_XVRSPIP];
5361 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode)
5362 && out_mode == SFmode && out_n == 4
5363 && in_mode == SFmode && in_n == 4)
5364 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRFIP];
5365 break;
5366 CASE_CFN_FLOOR:
5367 if (VECTOR_UNIT_VSX_P (V2DFmode)
5368 && out_mode == DFmode && out_n == 2
5369 && in_mode == DFmode && in_n == 2)
5370 return rs6000_builtin_decls[VSX_BUILTIN_XVRDPIM];
5371 if (VECTOR_UNIT_VSX_P (V4SFmode)
5372 && out_mode == SFmode && out_n == 4
5373 && in_mode == SFmode && in_n == 4)
5374 return rs6000_builtin_decls[VSX_BUILTIN_XVRSPIM];
5375 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode)
5376 && out_mode == SFmode && out_n == 4
5377 && in_mode == SFmode && in_n == 4)
5378 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRFIM];
5379 break;
5380 CASE_CFN_FMA:
5381 if (VECTOR_UNIT_VSX_P (V2DFmode)
5382 && out_mode == DFmode && out_n == 2
5383 && in_mode == DFmode && in_n == 2)
5384 return rs6000_builtin_decls[VSX_BUILTIN_XVMADDDP];
5385 if (VECTOR_UNIT_VSX_P (V4SFmode)
5386 && out_mode == SFmode && out_n == 4
5387 && in_mode == SFmode && in_n == 4)
5388 return rs6000_builtin_decls[VSX_BUILTIN_XVMADDSP];
5389 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode)
5390 && out_mode == SFmode && out_n == 4
5391 && in_mode == SFmode && in_n == 4)
5392 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VMADDFP];
5393 break;
5394 CASE_CFN_TRUNC:
5395 if (VECTOR_UNIT_VSX_P (V2DFmode)
5396 && out_mode == DFmode && out_n == 2
5397 && in_mode == DFmode && in_n == 2)
5398 return rs6000_builtin_decls[VSX_BUILTIN_XVRDPIZ];
5399 if (VECTOR_UNIT_VSX_P (V4SFmode)
5400 && out_mode == SFmode && out_n == 4
5401 && in_mode == SFmode && in_n == 4)
5402 return rs6000_builtin_decls[VSX_BUILTIN_XVRSPIZ];
5403 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode)
5404 && out_mode == SFmode && out_n == 4
5405 && in_mode == SFmode && in_n == 4)
5406 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRFIZ];
5407 break;
5408 CASE_CFN_NEARBYINT:
5409 if (VECTOR_UNIT_VSX_P (V2DFmode)
5410 && flag_unsafe_math_optimizations
5411 && out_mode == DFmode && out_n == 2
5412 && in_mode == DFmode && in_n == 2)
5413 return rs6000_builtin_decls[VSX_BUILTIN_XVRDPI];
5414 if (VECTOR_UNIT_VSX_P (V4SFmode)
5415 && flag_unsafe_math_optimizations
5416 && out_mode == SFmode && out_n == 4
5417 && in_mode == SFmode && in_n == 4)
5418 return rs6000_builtin_decls[VSX_BUILTIN_XVRSPI];
5419 break;
5420 CASE_CFN_RINT:
5421 if (VECTOR_UNIT_VSX_P (V2DFmode)
5422 && !flag_trapping_math
5423 && out_mode == DFmode && out_n == 2
5424 && in_mode == DFmode && in_n == 2)
5425 return rs6000_builtin_decls[VSX_BUILTIN_XVRDPIC];
5426 if (VECTOR_UNIT_VSX_P (V4SFmode)
5427 && !flag_trapping_math
5428 && out_mode == SFmode && out_n == 4
5429 && in_mode == SFmode && in_n == 4)
5430 return rs6000_builtin_decls[VSX_BUILTIN_XVRSPIC];
5431 break;
5432 default:
5433 break;
5434 }
5435
5436 /* Generate calls to libmass if appropriate. */
5437 if (rs6000_veclib_handler)
5438 return rs6000_veclib_handler (combined_fn (fn), type_out, type_in);
5439
5440 return NULL_TREE;
5441 }
5442
5443 /* Implement TARGET_VECTORIZE_BUILTIN_MD_VECTORIZED_FUNCTION. */
5444
5445 static tree
5446 rs6000_builtin_md_vectorized_function (tree fndecl, tree type_out,
5447 tree type_in)
5448 {
5449 machine_mode in_mode, out_mode;
5450 int in_n, out_n;
5451
5452 if (TARGET_DEBUG_BUILTIN)
5453 fprintf (stderr, "rs6000_builtin_md_vectorized_function (%s, %s, %s)\n",
5454 IDENTIFIER_POINTER (DECL_NAME (fndecl)),
5455 GET_MODE_NAME (TYPE_MODE (type_out)),
5456 GET_MODE_NAME (TYPE_MODE (type_in)));
5457
5458 if (TREE_CODE (type_out) != VECTOR_TYPE
5459 || TREE_CODE (type_in) != VECTOR_TYPE
5460 || !TARGET_VECTORIZE_BUILTINS)
5461 return NULL_TREE;
5462
5463 out_mode = TYPE_MODE (TREE_TYPE (type_out));
5464 out_n = TYPE_VECTOR_SUBPARTS (type_out);
5465 in_mode = TYPE_MODE (TREE_TYPE (type_in));
5466 in_n = TYPE_VECTOR_SUBPARTS (type_in);
5467
5468 enum rs6000_builtins fn
5469 = (enum rs6000_builtins) DECL_FUNCTION_CODE (fndecl);
5470 switch (fn)
5471 {
5472 case RS6000_BUILTIN_RSQRTF:
5473 if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode)
5474 && out_mode == SFmode && out_n == 4
5475 && in_mode == SFmode && in_n == 4)
5476 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRSQRTFP];
5477 break;
5478 case RS6000_BUILTIN_RSQRT:
5479 if (VECTOR_UNIT_VSX_P (V2DFmode)
5480 && out_mode == DFmode && out_n == 2
5481 && in_mode == DFmode && in_n == 2)
5482 return rs6000_builtin_decls[VSX_BUILTIN_RSQRT_2DF];
5483 break;
5484 case RS6000_BUILTIN_RECIPF:
5485 if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode)
5486 && out_mode == SFmode && out_n == 4
5487 && in_mode == SFmode && in_n == 4)
5488 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRECIPFP];
5489 break;
5490 case RS6000_BUILTIN_RECIP:
5491 if (VECTOR_UNIT_VSX_P (V2DFmode)
5492 && out_mode == DFmode && out_n == 2
5493 && in_mode == DFmode && in_n == 2)
5494 return rs6000_builtin_decls[VSX_BUILTIN_RECIP_V2DF];
5495 break;
5496 default:
5497 break;
5498 }
5499 return NULL_TREE;
5500 }
5501 \f
5502 /* Default CPU string for rs6000*_file_start functions. */
5503 static const char *rs6000_default_cpu;
5504
5505 /* Do anything needed at the start of the asm file. */
5506
5507 static void
5508 rs6000_file_start (void)
5509 {
5510 char buffer[80];
5511 const char *start = buffer;
5512 FILE *file = asm_out_file;
5513
5514 rs6000_default_cpu = TARGET_CPU_DEFAULT;
5515
5516 default_file_start ();
5517
5518 if (flag_verbose_asm)
5519 {
5520 sprintf (buffer, "\n%s rs6000/powerpc options:", ASM_COMMENT_START);
5521
5522 if (rs6000_default_cpu != 0 && rs6000_default_cpu[0] != '\0')
5523 {
5524 fprintf (file, "%s --with-cpu=%s", start, rs6000_default_cpu);
5525 start = "";
5526 }
5527
5528 if (global_options_set.x_rs6000_cpu_index)
5529 {
5530 fprintf (file, "%s -mcpu=%s", start,
5531 processor_target_table[rs6000_cpu_index].name);
5532 start = "";
5533 }
5534
5535 if (global_options_set.x_rs6000_tune_index)
5536 {
5537 fprintf (file, "%s -mtune=%s", start,
5538 processor_target_table[rs6000_tune_index].name);
5539 start = "";
5540 }
5541
5542 if (PPC405_ERRATUM77)
5543 {
5544 fprintf (file, "%s PPC405CR_ERRATUM77", start);
5545 start = "";
5546 }
5547
5548 #ifdef USING_ELFOS_H
5549 switch (rs6000_sdata)
5550 {
5551 case SDATA_NONE: fprintf (file, "%s -msdata=none", start); start = ""; break;
5552 case SDATA_DATA: fprintf (file, "%s -msdata=data", start); start = ""; break;
5553 case SDATA_SYSV: fprintf (file, "%s -msdata=sysv", start); start = ""; break;
5554 case SDATA_EABI: fprintf (file, "%s -msdata=eabi", start); start = ""; break;
5555 }
5556
5557 if (rs6000_sdata && g_switch_value)
5558 {
5559 fprintf (file, "%s -G %d", start,
5560 g_switch_value);
5561 start = "";
5562 }
5563 #endif
5564
5565 if (*start == '\0')
5566 putc ('\n', file);
5567 }
5568
5569 #ifdef USING_ELFOS_H
5570 if (rs6000_default_cpu == 0 || rs6000_default_cpu[0] == '\0'
5571 || !global_options_set.x_rs6000_cpu_index)
5572 {
5573 fputs ("\t.machine ", asm_out_file);
5574 if ((rs6000_isa_flags & OPTION_MASK_MODULO) != 0)
5575 fputs ("power9\n", asm_out_file);
5576 else if ((rs6000_isa_flags & OPTION_MASK_DIRECT_MOVE) != 0)
5577 fputs ("power8\n", asm_out_file);
5578 else if ((rs6000_isa_flags & OPTION_MASK_POPCNTD) != 0)
5579 fputs ("power7\n", asm_out_file);
5580 else if ((rs6000_isa_flags & OPTION_MASK_CMPB) != 0)
5581 fputs ("power6\n", asm_out_file);
5582 else if ((rs6000_isa_flags & OPTION_MASK_POPCNTB) != 0)
5583 fputs ("power5\n", asm_out_file);
5584 else if ((rs6000_isa_flags & OPTION_MASK_MFCRF) != 0)
5585 fputs ("power4\n", asm_out_file);
5586 else if ((rs6000_isa_flags & OPTION_MASK_POWERPC64) != 0)
5587 fputs ("ppc64\n", asm_out_file);
5588 else
5589 fputs ("ppc\n", asm_out_file);
5590 }
5591 #endif
5592
5593 if (DEFAULT_ABI == ABI_ELFv2)
5594 fprintf (file, "\t.abiversion 2\n");
5595
5596 if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2
5597 || (TARGET_ELF && flag_pic == 2))
5598 {
5599 switch_to_section (toc_section);
5600 switch_to_section (text_section);
5601 }
5602 }
5603
5604 \f
5605 /* Return nonzero if this function is known to have a null epilogue. */
5606
5607 int
5608 direct_return (void)
5609 {
5610 if (reload_completed)
5611 {
5612 rs6000_stack_t *info = rs6000_stack_info ();
5613
5614 if (info->first_gp_reg_save == 32
5615 && info->first_fp_reg_save == 64
5616 && info->first_altivec_reg_save == LAST_ALTIVEC_REGNO + 1
5617 && ! info->lr_save_p
5618 && ! info->cr_save_p
5619 && info->vrsave_size == 0
5620 && ! info->push_p)
5621 return 1;
5622 }
5623
5624 return 0;
5625 }
5626
5627 /* Return the number of instructions it takes to form a constant in an
5628 integer register. */
5629
5630 int
5631 num_insns_constant_wide (HOST_WIDE_INT value)
5632 {
5633 /* signed constant loadable with addi */
5634 if (((unsigned HOST_WIDE_INT) value + 0x8000) < 0x10000)
5635 return 1;
5636
5637 /* constant loadable with addis */
5638 else if ((value & 0xffff) == 0
5639 && (value >> 31 == -1 || value >> 31 == 0))
5640 return 1;
5641
5642 else if (TARGET_POWERPC64)
5643 {
5644 HOST_WIDE_INT low = ((value & 0xffffffff) ^ 0x80000000) - 0x80000000;
5645 HOST_WIDE_INT high = value >> 31;
5646
5647 if (high == 0 || high == -1)
5648 return 2;
5649
5650 high >>= 1;
5651
5652 if (low == 0)
5653 return num_insns_constant_wide (high) + 1;
5654 else if (high == 0)
5655 return num_insns_constant_wide (low) + 1;
5656 else
5657 return (num_insns_constant_wide (high)
5658 + num_insns_constant_wide (low) + 1);
5659 }
5660
5661 else
5662 return 2;
5663 }
5664
5665 int
5666 num_insns_constant (rtx op, machine_mode mode)
5667 {
5668 HOST_WIDE_INT low, high;
5669
5670 switch (GET_CODE (op))
5671 {
5672 case CONST_INT:
5673 if ((INTVAL (op) >> 31) != 0 && (INTVAL (op) >> 31) != -1
5674 && rs6000_is_valid_and_mask (op, mode))
5675 return 2;
5676 else
5677 return num_insns_constant_wide (INTVAL (op));
5678
5679 case CONST_WIDE_INT:
5680 {
5681 int i;
5682 int ins = CONST_WIDE_INT_NUNITS (op) - 1;
5683 for (i = 0; i < CONST_WIDE_INT_NUNITS (op); i++)
5684 ins += num_insns_constant_wide (CONST_WIDE_INT_ELT (op, i));
5685 return ins;
5686 }
5687
5688 case CONST_DOUBLE:
5689 if (mode == SFmode || mode == SDmode)
5690 {
5691 long l;
5692
5693 if (DECIMAL_FLOAT_MODE_P (mode))
5694 REAL_VALUE_TO_TARGET_DECIMAL32
5695 (*CONST_DOUBLE_REAL_VALUE (op), l);
5696 else
5697 REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (op), l);
5698 return num_insns_constant_wide ((HOST_WIDE_INT) l);
5699 }
5700
5701 long l[2];
5702 if (DECIMAL_FLOAT_MODE_P (mode))
5703 REAL_VALUE_TO_TARGET_DECIMAL64 (*CONST_DOUBLE_REAL_VALUE (op), l);
5704 else
5705 REAL_VALUE_TO_TARGET_DOUBLE (*CONST_DOUBLE_REAL_VALUE (op), l);
5706 high = l[WORDS_BIG_ENDIAN == 0];
5707 low = l[WORDS_BIG_ENDIAN != 0];
5708
5709 if (TARGET_32BIT)
5710 return (num_insns_constant_wide (low)
5711 + num_insns_constant_wide (high));
5712 else
5713 {
5714 if ((high == 0 && low >= 0)
5715 || (high == -1 && low < 0))
5716 return num_insns_constant_wide (low);
5717
5718 else if (rs6000_is_valid_and_mask (op, mode))
5719 return 2;
5720
5721 else if (low == 0)
5722 return num_insns_constant_wide (high) + 1;
5723
5724 else
5725 return (num_insns_constant_wide (high)
5726 + num_insns_constant_wide (low) + 1);
5727 }
5728
5729 default:
5730 gcc_unreachable ();
5731 }
5732 }
5733
5734 /* Interpret element ELT of the CONST_VECTOR OP as an integer value.
5735 If the mode of OP is MODE_VECTOR_INT, this simply returns the
5736 corresponding element of the vector, but for V4SFmode and V2SFmode,
5737 the corresponding "float" is interpreted as an SImode integer. */
5738
5739 HOST_WIDE_INT
5740 const_vector_elt_as_int (rtx op, unsigned int elt)
5741 {
5742 rtx tmp;
5743
5744 /* We can't handle V2DImode and V2DFmode vector constants here yet. */
5745 gcc_assert (GET_MODE (op) != V2DImode
5746 && GET_MODE (op) != V2DFmode);
5747
5748 tmp = CONST_VECTOR_ELT (op, elt);
5749 if (GET_MODE (op) == V4SFmode
5750 || GET_MODE (op) == V2SFmode)
5751 tmp = gen_lowpart (SImode, tmp);
5752 return INTVAL (tmp);
5753 }
5754
5755 /* Return true if OP can be synthesized with a particular vspltisb, vspltish
5756 or vspltisw instruction. OP is a CONST_VECTOR. Which instruction is used
5757 depends on STEP and COPIES, one of which will be 1. If COPIES > 1,
5758 all items are set to the same value and contain COPIES replicas of the
5759 vsplt's operand; if STEP > 1, one in STEP elements is set to the vsplt's
5760 operand and the others are set to the value of the operand's msb. */
5761
5762 static bool
5763 vspltis_constant (rtx op, unsigned step, unsigned copies)
5764 {
5765 machine_mode mode = GET_MODE (op);
5766 machine_mode inner = GET_MODE_INNER (mode);
5767
5768 unsigned i;
5769 unsigned nunits;
5770 unsigned bitsize;
5771 unsigned mask;
5772
5773 HOST_WIDE_INT val;
5774 HOST_WIDE_INT splat_val;
5775 HOST_WIDE_INT msb_val;
5776
5777 if (mode == V2DImode || mode == V2DFmode || mode == V1TImode)
5778 return false;
5779
5780 nunits = GET_MODE_NUNITS (mode);
5781 bitsize = GET_MODE_BITSIZE (inner);
5782 mask = GET_MODE_MASK (inner);
5783
5784 val = const_vector_elt_as_int (op, BYTES_BIG_ENDIAN ? nunits - 1 : 0);
5785 splat_val = val;
5786 msb_val = val >= 0 ? 0 : -1;
5787
5788 /* Construct the value to be splatted, if possible. If not, return 0. */
5789 for (i = 2; i <= copies; i *= 2)
5790 {
5791 HOST_WIDE_INT small_val;
5792 bitsize /= 2;
5793 small_val = splat_val >> bitsize;
5794 mask >>= bitsize;
5795 if (splat_val != ((small_val << bitsize) | (small_val & mask)))
5796 return false;
5797 splat_val = small_val;
5798 }
5799
5800 /* Check if SPLAT_VAL can really be the operand of a vspltis[bhw]. */
5801 if (EASY_VECTOR_15 (splat_val))
5802 ;
5803
5804 /* Also check if we can splat, and then add the result to itself. Do so if
5805 the value is positive, of if the splat instruction is using OP's mode;
5806 for splat_val < 0, the splat and the add should use the same mode. */
5807 else if (EASY_VECTOR_15_ADD_SELF (splat_val)
5808 && (splat_val >= 0 || (step == 1 && copies == 1)))
5809 ;
5810
5811 /* Also check if are loading up the most significant bit which can be done by
5812 loading up -1 and shifting the value left by -1. */
5813 else if (EASY_VECTOR_MSB (splat_val, inner))
5814 ;
5815
5816 else
5817 return false;
5818
5819 /* Check if VAL is present in every STEP-th element, and the
5820 other elements are filled with its most significant bit. */
5821 for (i = 1; i < nunits; ++i)
5822 {
5823 HOST_WIDE_INT desired_val;
5824 unsigned elt = BYTES_BIG_ENDIAN ? nunits - 1 - i : i;
5825 if ((i & (step - 1)) == 0)
5826 desired_val = val;
5827 else
5828 desired_val = msb_val;
5829
5830 if (desired_val != const_vector_elt_as_int (op, elt))
5831 return false;
5832 }
5833
5834 return true;
5835 }
5836
5837 /* Like vsplitis_constant, but allow the value to be shifted left with a VSLDOI
5838 instruction, filling in the bottom elements with 0 or -1.
5839
5840 Return 0 if the constant cannot be generated with VSLDOI. Return positive
5841 for the number of zeroes to shift in, or negative for the number of 0xff
5842 bytes to shift in.
5843
5844 OP is a CONST_VECTOR. */
5845
5846 int
5847 vspltis_shifted (rtx op)
5848 {
5849 machine_mode mode = GET_MODE (op);
5850 machine_mode inner = GET_MODE_INNER (mode);
5851
5852 unsigned i, j;
5853 unsigned nunits;
5854 unsigned mask;
5855
5856 HOST_WIDE_INT val;
5857
5858 if (mode != V16QImode && mode != V8HImode && mode != V4SImode)
5859 return false;
5860
5861 /* We need to create pseudo registers to do the shift, so don't recognize
5862 shift vector constants after reload. */
5863 if (!can_create_pseudo_p ())
5864 return false;
5865
5866 nunits = GET_MODE_NUNITS (mode);
5867 mask = GET_MODE_MASK (inner);
5868
5869 val = const_vector_elt_as_int (op, BYTES_BIG_ENDIAN ? 0 : nunits - 1);
5870
5871 /* Check if the value can really be the operand of a vspltis[bhw]. */
5872 if (EASY_VECTOR_15 (val))
5873 ;
5874
5875 /* Also check if we are loading up the most significant bit which can be done
5876 by loading up -1 and shifting the value left by -1. */
5877 else if (EASY_VECTOR_MSB (val, inner))
5878 ;
5879
5880 else
5881 return 0;
5882
5883 /* Check if VAL is present in every STEP-th element until we find elements
5884 that are 0 or all 1 bits. */
5885 for (i = 1; i < nunits; ++i)
5886 {
5887 unsigned elt = BYTES_BIG_ENDIAN ? i : nunits - 1 - i;
5888 HOST_WIDE_INT elt_val = const_vector_elt_as_int (op, elt);
5889
5890 /* If the value isn't the splat value, check for the remaining elements
5891 being 0/-1. */
5892 if (val != elt_val)
5893 {
5894 if (elt_val == 0)
5895 {
5896 for (j = i+1; j < nunits; ++j)
5897 {
5898 unsigned elt2 = BYTES_BIG_ENDIAN ? j : nunits - 1 - j;
5899 if (const_vector_elt_as_int (op, elt2) != 0)
5900 return 0;
5901 }
5902
5903 return (nunits - i) * GET_MODE_SIZE (inner);
5904 }
5905
5906 else if ((elt_val & mask) == mask)
5907 {
5908 for (j = i+1; j < nunits; ++j)
5909 {
5910 unsigned elt2 = BYTES_BIG_ENDIAN ? j : nunits - 1 - j;
5911 if ((const_vector_elt_as_int (op, elt2) & mask) != mask)
5912 return 0;
5913 }
5914
5915 return -((nunits - i) * GET_MODE_SIZE (inner));
5916 }
5917
5918 else
5919 return 0;
5920 }
5921 }
5922
5923 /* If all elements are equal, we don't need to do VLSDOI. */
5924 return 0;
5925 }
5926
5927
5928 /* Return true if OP is of the given MODE and can be synthesized
5929 with a vspltisb, vspltish or vspltisw. */
5930
5931 bool
5932 easy_altivec_constant (rtx op, machine_mode mode)
5933 {
5934 unsigned step, copies;
5935
5936 if (mode == VOIDmode)
5937 mode = GET_MODE (op);
5938 else if (mode != GET_MODE (op))
5939 return false;
5940
5941 /* V2DI/V2DF was added with VSX. Only allow 0 and all 1's as easy
5942 constants. */
5943 if (mode == V2DFmode)
5944 return zero_constant (op, mode);
5945
5946 else if (mode == V2DImode)
5947 {
5948 if (GET_CODE (CONST_VECTOR_ELT (op, 0)) != CONST_INT
5949 || GET_CODE (CONST_VECTOR_ELT (op, 1)) != CONST_INT)
5950 return false;
5951
5952 if (zero_constant (op, mode))
5953 return true;
5954
5955 if (INTVAL (CONST_VECTOR_ELT (op, 0)) == -1
5956 && INTVAL (CONST_VECTOR_ELT (op, 1)) == -1)
5957 return true;
5958
5959 return false;
5960 }
5961
5962 /* V1TImode is a special container for TImode. Ignore for now. */
5963 else if (mode == V1TImode)
5964 return false;
5965
5966 /* Start with a vspltisw. */
5967 step = GET_MODE_NUNITS (mode) / 4;
5968 copies = 1;
5969
5970 if (vspltis_constant (op, step, copies))
5971 return true;
5972
5973 /* Then try with a vspltish. */
5974 if (step == 1)
5975 copies <<= 1;
5976 else
5977 step >>= 1;
5978
5979 if (vspltis_constant (op, step, copies))
5980 return true;
5981
5982 /* And finally a vspltisb. */
5983 if (step == 1)
5984 copies <<= 1;
5985 else
5986 step >>= 1;
5987
5988 if (vspltis_constant (op, step, copies))
5989 return true;
5990
5991 if (vspltis_shifted (op) != 0)
5992 return true;
5993
5994 return false;
5995 }
5996
5997 /* Generate a VEC_DUPLICATE representing a vspltis[bhw] instruction whose
5998 result is OP. Abort if it is not possible. */
5999
6000 rtx
6001 gen_easy_altivec_constant (rtx op)
6002 {
6003 machine_mode mode = GET_MODE (op);
6004 int nunits = GET_MODE_NUNITS (mode);
6005 rtx val = CONST_VECTOR_ELT (op, BYTES_BIG_ENDIAN ? nunits - 1 : 0);
6006 unsigned step = nunits / 4;
6007 unsigned copies = 1;
6008
6009 /* Start with a vspltisw. */
6010 if (vspltis_constant (op, step, copies))
6011 return gen_rtx_VEC_DUPLICATE (V4SImode, gen_lowpart (SImode, val));
6012
6013 /* Then try with a vspltish. */
6014 if (step == 1)
6015 copies <<= 1;
6016 else
6017 step >>= 1;
6018
6019 if (vspltis_constant (op, step, copies))
6020 return gen_rtx_VEC_DUPLICATE (V8HImode, gen_lowpart (HImode, val));
6021
6022 /* And finally a vspltisb. */
6023 if (step == 1)
6024 copies <<= 1;
6025 else
6026 step >>= 1;
6027
6028 if (vspltis_constant (op, step, copies))
6029 return gen_rtx_VEC_DUPLICATE (V16QImode, gen_lowpart (QImode, val));
6030
6031 gcc_unreachable ();
6032 }
6033
6034 const char *
6035 output_vec_const_move (rtx *operands)
6036 {
6037 int cst, cst2, shift;
6038 machine_mode mode;
6039 rtx dest, vec;
6040
6041 dest = operands[0];
6042 vec = operands[1];
6043 mode = GET_MODE (dest);
6044
6045 if (TARGET_VSX)
6046 {
6047 if (zero_constant (vec, mode))
6048 return "xxlxor %x0,%x0,%x0";
6049
6050 if (TARGET_P8_VECTOR && vec == CONSTM1_RTX (mode))
6051 return "xxlorc %x0,%x0,%x0";
6052
6053 if ((mode == V2DImode || mode == V1TImode)
6054 && INTVAL (CONST_VECTOR_ELT (vec, 0)) == -1
6055 && INTVAL (CONST_VECTOR_ELT (vec, 1)) == -1)
6056 return (TARGET_P8_VECTOR) ? "xxlorc %x0,%x0,%x0" : "vspltisw %0,-1";
6057 }
6058
6059 if (TARGET_ALTIVEC)
6060 {
6061 rtx splat_vec;
6062 if (zero_constant (vec, mode))
6063 return "vxor %0,%0,%0";
6064
6065 /* Do we need to construct a value using VSLDOI? */
6066 shift = vspltis_shifted (vec);
6067 if (shift != 0)
6068 return "#";
6069
6070 splat_vec = gen_easy_altivec_constant (vec);
6071 gcc_assert (GET_CODE (splat_vec) == VEC_DUPLICATE);
6072 operands[1] = XEXP (splat_vec, 0);
6073 if (!EASY_VECTOR_15 (INTVAL (operands[1])))
6074 return "#";
6075
6076 switch (GET_MODE (splat_vec))
6077 {
6078 case V4SImode:
6079 return "vspltisw %0,%1";
6080
6081 case V8HImode:
6082 return "vspltish %0,%1";
6083
6084 case V16QImode:
6085 return "vspltisb %0,%1";
6086
6087 default:
6088 gcc_unreachable ();
6089 }
6090 }
6091
6092 gcc_assert (TARGET_SPE);
6093
6094 /* Vector constant 0 is handled as a splitter of V2SI, and in the
6095 pattern of V1DI, V4HI, and V2SF.
6096
6097 FIXME: We should probably return # and add post reload
6098 splitters for these, but this way is so easy ;-). */
6099 cst = INTVAL (CONST_VECTOR_ELT (vec, 0));
6100 cst2 = INTVAL (CONST_VECTOR_ELT (vec, 1));
6101 operands[1] = CONST_VECTOR_ELT (vec, 0);
6102 operands[2] = CONST_VECTOR_ELT (vec, 1);
6103 if (cst == cst2)
6104 return "li %0,%1\n\tevmergelo %0,%0,%0";
6105 else if (WORDS_BIG_ENDIAN)
6106 return "li %0,%1\n\tevmergelo %0,%0,%0\n\tli %0,%2";
6107 else
6108 return "li %0,%2\n\tevmergelo %0,%0,%0\n\tli %0,%1";
6109 }
6110
6111 /* Initialize TARGET of vector PAIRED to VALS. */
6112
6113 void
6114 paired_expand_vector_init (rtx target, rtx vals)
6115 {
6116 machine_mode mode = GET_MODE (target);
6117 int n_elts = GET_MODE_NUNITS (mode);
6118 int n_var = 0;
6119 rtx x, new_rtx, tmp, constant_op, op1, op2;
6120 int i;
6121
6122 for (i = 0; i < n_elts; ++i)
6123 {
6124 x = XVECEXP (vals, 0, i);
6125 if (!(CONST_SCALAR_INT_P (x) || CONST_DOUBLE_P (x) || CONST_FIXED_P (x)))
6126 ++n_var;
6127 }
6128 if (n_var == 0)
6129 {
6130 /* Load from constant pool. */
6131 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
6132 return;
6133 }
6134
6135 if (n_var == 2)
6136 {
6137 /* The vector is initialized only with non-constants. */
6138 new_rtx = gen_rtx_VEC_CONCAT (V2SFmode, XVECEXP (vals, 0, 0),
6139 XVECEXP (vals, 0, 1));
6140
6141 emit_move_insn (target, new_rtx);
6142 return;
6143 }
6144
6145 /* One field is non-constant and the other one is a constant. Load the
6146 constant from the constant pool and use ps_merge instruction to
6147 construct the whole vector. */
6148 op1 = XVECEXP (vals, 0, 0);
6149 op2 = XVECEXP (vals, 0, 1);
6150
6151 constant_op = (CONSTANT_P (op1)) ? op1 : op2;
6152
6153 tmp = gen_reg_rtx (GET_MODE (constant_op));
6154 emit_move_insn (tmp, constant_op);
6155
6156 if (CONSTANT_P (op1))
6157 new_rtx = gen_rtx_VEC_CONCAT (V2SFmode, tmp, op2);
6158 else
6159 new_rtx = gen_rtx_VEC_CONCAT (V2SFmode, op1, tmp);
6160
6161 emit_move_insn (target, new_rtx);
6162 }
6163
6164 void
6165 paired_expand_vector_move (rtx operands[])
6166 {
6167 rtx op0 = operands[0], op1 = operands[1];
6168
6169 emit_move_insn (op0, op1);
6170 }
6171
6172 /* Emit vector compare for code RCODE. DEST is destination, OP1 and
6173 OP2 are two VEC_COND_EXPR operands, CC_OP0 and CC_OP1 are the two
6174 operands for the relation operation COND. This is a recursive
6175 function. */
6176
6177 static void
6178 paired_emit_vector_compare (enum rtx_code rcode,
6179 rtx dest, rtx op0, rtx op1,
6180 rtx cc_op0, rtx cc_op1)
6181 {
6182 rtx tmp = gen_reg_rtx (V2SFmode);
6183 rtx tmp1, max, min;
6184
6185 gcc_assert (TARGET_PAIRED_FLOAT);
6186 gcc_assert (GET_MODE (op0) == GET_MODE (op1));
6187
6188 switch (rcode)
6189 {
6190 case LT:
6191 case LTU:
6192 paired_emit_vector_compare (GE, dest, op1, op0, cc_op0, cc_op1);
6193 return;
6194 case GE:
6195 case GEU:
6196 emit_insn (gen_subv2sf3 (tmp, cc_op0, cc_op1));
6197 emit_insn (gen_selv2sf4 (dest, tmp, op0, op1, CONST0_RTX (SFmode)));
6198 return;
6199 case LE:
6200 case LEU:
6201 paired_emit_vector_compare (GE, dest, op0, op1, cc_op1, cc_op0);
6202 return;
6203 case GT:
6204 paired_emit_vector_compare (LE, dest, op1, op0, cc_op0, cc_op1);
6205 return;
6206 case EQ:
6207 tmp1 = gen_reg_rtx (V2SFmode);
6208 max = gen_reg_rtx (V2SFmode);
6209 min = gen_reg_rtx (V2SFmode);
6210 gen_reg_rtx (V2SFmode);
6211
6212 emit_insn (gen_subv2sf3 (tmp, cc_op0, cc_op1));
6213 emit_insn (gen_selv2sf4
6214 (max, tmp, cc_op0, cc_op1, CONST0_RTX (SFmode)));
6215 emit_insn (gen_subv2sf3 (tmp, cc_op1, cc_op0));
6216 emit_insn (gen_selv2sf4
6217 (min, tmp, cc_op0, cc_op1, CONST0_RTX (SFmode)));
6218 emit_insn (gen_subv2sf3 (tmp1, min, max));
6219 emit_insn (gen_selv2sf4 (dest, tmp1, op0, op1, CONST0_RTX (SFmode)));
6220 return;
6221 case NE:
6222 paired_emit_vector_compare (EQ, dest, op1, op0, cc_op0, cc_op1);
6223 return;
6224 case UNLE:
6225 paired_emit_vector_compare (LE, dest, op1, op0, cc_op0, cc_op1);
6226 return;
6227 case UNLT:
6228 paired_emit_vector_compare (LT, dest, op1, op0, cc_op0, cc_op1);
6229 return;
6230 case UNGE:
6231 paired_emit_vector_compare (GE, dest, op1, op0, cc_op0, cc_op1);
6232 return;
6233 case UNGT:
6234 paired_emit_vector_compare (GT, dest, op1, op0, cc_op0, cc_op1);
6235 return;
6236 default:
6237 gcc_unreachable ();
6238 }
6239
6240 return;
6241 }
6242
6243 /* Emit vector conditional expression.
6244 DEST is destination. OP1 and OP2 are two VEC_COND_EXPR operands.
6245 CC_OP0 and CC_OP1 are the two operands for the relation operation COND. */
6246
6247 int
6248 paired_emit_vector_cond_expr (rtx dest, rtx op1, rtx op2,
6249 rtx cond, rtx cc_op0, rtx cc_op1)
6250 {
6251 enum rtx_code rcode = GET_CODE (cond);
6252
6253 if (!TARGET_PAIRED_FLOAT)
6254 return 0;
6255
6256 paired_emit_vector_compare (rcode, dest, op1, op2, cc_op0, cc_op1);
6257
6258 return 1;
6259 }
6260
6261 /* Initialize vector TARGET to VALS. */
6262
6263 void
6264 rs6000_expand_vector_init (rtx target, rtx vals)
6265 {
6266 machine_mode mode = GET_MODE (target);
6267 machine_mode inner_mode = GET_MODE_INNER (mode);
6268 int n_elts = GET_MODE_NUNITS (mode);
6269 int n_var = 0, one_var = -1;
6270 bool all_same = true, all_const_zero = true;
6271 rtx x, mem;
6272 int i;
6273
6274 for (i = 0; i < n_elts; ++i)
6275 {
6276 x = XVECEXP (vals, 0, i);
6277 if (!(CONST_SCALAR_INT_P (x) || CONST_DOUBLE_P (x) || CONST_FIXED_P (x)))
6278 ++n_var, one_var = i;
6279 else if (x != CONST0_RTX (inner_mode))
6280 all_const_zero = false;
6281
6282 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
6283 all_same = false;
6284 }
6285
6286 if (n_var == 0)
6287 {
6288 rtx const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
6289 bool int_vector_p = (GET_MODE_CLASS (mode) == MODE_VECTOR_INT);
6290 if ((int_vector_p || TARGET_VSX) && all_const_zero)
6291 {
6292 /* Zero register. */
6293 emit_insn (gen_rtx_SET (target, gen_rtx_XOR (mode, target, target)));
6294 return;
6295 }
6296 else if (int_vector_p && easy_vector_constant (const_vec, mode))
6297 {
6298 /* Splat immediate. */
6299 emit_insn (gen_rtx_SET (target, const_vec));
6300 return;
6301 }
6302 else
6303 {
6304 /* Load from constant pool. */
6305 emit_move_insn (target, const_vec);
6306 return;
6307 }
6308 }
6309
6310 /* Double word values on VSX can use xxpermdi or lxvdsx. */
6311 if (VECTOR_MEM_VSX_P (mode) && (mode == V2DFmode || mode == V2DImode))
6312 {
6313 rtx op0 = XVECEXP (vals, 0, 0);
6314 rtx op1 = XVECEXP (vals, 0, 1);
6315 if (all_same)
6316 {
6317 if (!MEM_P (op0) && !REG_P (op0))
6318 op0 = force_reg (inner_mode, op0);
6319 if (mode == V2DFmode)
6320 emit_insn (gen_vsx_splat_v2df (target, op0));
6321 else
6322 emit_insn (gen_vsx_splat_v2di (target, op0));
6323 }
6324 else
6325 {
6326 op0 = force_reg (inner_mode, op0);
6327 op1 = force_reg (inner_mode, op1);
6328 if (mode == V2DFmode)
6329 emit_insn (gen_vsx_concat_v2df (target, op0, op1));
6330 else
6331 emit_insn (gen_vsx_concat_v2di (target, op0, op1));
6332 }
6333 return;
6334 }
6335
6336 /* With single precision floating point on VSX, know that internally single
6337 precision is actually represented as a double, and either make 2 V2DF
6338 vectors, and convert these vectors to single precision, or do one
6339 conversion, and splat the result to the other elements. */
6340 if (mode == V4SFmode && VECTOR_MEM_VSX_P (mode))
6341 {
6342 if (all_same)
6343 {
6344 rtx freg = gen_reg_rtx (V4SFmode);
6345 rtx sreg = force_reg (SFmode, XVECEXP (vals, 0, 0));
6346 rtx cvt = ((TARGET_XSCVDPSPN)
6347 ? gen_vsx_xscvdpspn_scalar (freg, sreg)
6348 : gen_vsx_xscvdpsp_scalar (freg, sreg));
6349
6350 emit_insn (cvt);
6351 emit_insn (gen_vsx_xxspltw_v4sf_direct (target, freg, const0_rtx));
6352 }
6353 else
6354 {
6355 rtx dbl_even = gen_reg_rtx (V2DFmode);
6356 rtx dbl_odd = gen_reg_rtx (V2DFmode);
6357 rtx flt_even = gen_reg_rtx (V4SFmode);
6358 rtx flt_odd = gen_reg_rtx (V4SFmode);
6359 rtx op0 = force_reg (SFmode, XVECEXP (vals, 0, 0));
6360 rtx op1 = force_reg (SFmode, XVECEXP (vals, 0, 1));
6361 rtx op2 = force_reg (SFmode, XVECEXP (vals, 0, 2));
6362 rtx op3 = force_reg (SFmode, XVECEXP (vals, 0, 3));
6363
6364 emit_insn (gen_vsx_concat_v2sf (dbl_even, op0, op1));
6365 emit_insn (gen_vsx_concat_v2sf (dbl_odd, op2, op3));
6366 emit_insn (gen_vsx_xvcvdpsp (flt_even, dbl_even));
6367 emit_insn (gen_vsx_xvcvdpsp (flt_odd, dbl_odd));
6368 rs6000_expand_extract_even (target, flt_even, flt_odd);
6369 }
6370 return;
6371 }
6372
6373 /* Store value to stack temp. Load vector element. Splat. However, splat
6374 of 64-bit items is not supported on Altivec. */
6375 if (all_same && GET_MODE_SIZE (inner_mode) <= 4)
6376 {
6377 mem = assign_stack_temp (mode, GET_MODE_SIZE (inner_mode));
6378 emit_move_insn (adjust_address_nv (mem, inner_mode, 0),
6379 XVECEXP (vals, 0, 0));
6380 x = gen_rtx_UNSPEC (VOIDmode,
6381 gen_rtvec (1, const0_rtx), UNSPEC_LVE);
6382 emit_insn (gen_rtx_PARALLEL (VOIDmode,
6383 gen_rtvec (2,
6384 gen_rtx_SET (target, mem),
6385 x)));
6386 x = gen_rtx_VEC_SELECT (inner_mode, target,
6387 gen_rtx_PARALLEL (VOIDmode,
6388 gen_rtvec (1, const0_rtx)));
6389 emit_insn (gen_rtx_SET (target, gen_rtx_VEC_DUPLICATE (mode, x)));
6390 return;
6391 }
6392
6393 /* One field is non-constant. Load constant then overwrite
6394 varying field. */
6395 if (n_var == 1)
6396 {
6397 rtx copy = copy_rtx (vals);
6398
6399 /* Load constant part of vector, substitute neighboring value for
6400 varying element. */
6401 XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, (one_var + 1) % n_elts);
6402 rs6000_expand_vector_init (target, copy);
6403
6404 /* Insert variable. */
6405 rs6000_expand_vector_set (target, XVECEXP (vals, 0, one_var), one_var);
6406 return;
6407 }
6408
6409 /* Construct the vector in memory one field at a time
6410 and load the whole vector. */
6411 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
6412 for (i = 0; i < n_elts; i++)
6413 emit_move_insn (adjust_address_nv (mem, inner_mode,
6414 i * GET_MODE_SIZE (inner_mode)),
6415 XVECEXP (vals, 0, i));
6416 emit_move_insn (target, mem);
6417 }
6418
6419 /* Set field ELT of TARGET to VAL. */
6420
6421 void
6422 rs6000_expand_vector_set (rtx target, rtx val, int elt)
6423 {
6424 machine_mode mode = GET_MODE (target);
6425 machine_mode inner_mode = GET_MODE_INNER (mode);
6426 rtx reg = gen_reg_rtx (mode);
6427 rtx mask, mem, x;
6428 int width = GET_MODE_SIZE (inner_mode);
6429 int i;
6430
6431 if (VECTOR_MEM_VSX_P (mode) && (mode == V2DFmode || mode == V2DImode))
6432 {
6433 rtx (*set_func) (rtx, rtx, rtx, rtx)
6434 = ((mode == V2DFmode) ? gen_vsx_set_v2df : gen_vsx_set_v2di);
6435 emit_insn (set_func (target, target, val, GEN_INT (elt)));
6436 return;
6437 }
6438
6439 /* Simplify setting single element vectors like V1TImode. */
6440 if (GET_MODE_SIZE (mode) == GET_MODE_SIZE (inner_mode) && elt == 0)
6441 {
6442 emit_move_insn (target, gen_lowpart (mode, val));
6443 return;
6444 }
6445
6446 /* Load single variable value. */
6447 mem = assign_stack_temp (mode, GET_MODE_SIZE (inner_mode));
6448 emit_move_insn (adjust_address_nv (mem, inner_mode, 0), val);
6449 x = gen_rtx_UNSPEC (VOIDmode,
6450 gen_rtvec (1, const0_rtx), UNSPEC_LVE);
6451 emit_insn (gen_rtx_PARALLEL (VOIDmode,
6452 gen_rtvec (2,
6453 gen_rtx_SET (reg, mem),
6454 x)));
6455
6456 /* Linear sequence. */
6457 mask = gen_rtx_PARALLEL (V16QImode, rtvec_alloc (16));
6458 for (i = 0; i < 16; ++i)
6459 XVECEXP (mask, 0, i) = GEN_INT (i);
6460
6461 /* Set permute mask to insert element into target. */
6462 for (i = 0; i < width; ++i)
6463 XVECEXP (mask, 0, elt*width + i)
6464 = GEN_INT (i + 0x10);
6465 x = gen_rtx_CONST_VECTOR (V16QImode, XVEC (mask, 0));
6466
6467 if (BYTES_BIG_ENDIAN)
6468 x = gen_rtx_UNSPEC (mode,
6469 gen_rtvec (3, target, reg,
6470 force_reg (V16QImode, x)),
6471 UNSPEC_VPERM);
6472 else
6473 {
6474 /* Invert selector. We prefer to generate VNAND on P8 so
6475 that future fusion opportunities can kick in, but must
6476 generate VNOR elsewhere. */
6477 rtx notx = gen_rtx_NOT (V16QImode, force_reg (V16QImode, x));
6478 rtx iorx = (TARGET_P8_VECTOR
6479 ? gen_rtx_IOR (V16QImode, notx, notx)
6480 : gen_rtx_AND (V16QImode, notx, notx));
6481 rtx tmp = gen_reg_rtx (V16QImode);
6482 emit_insn (gen_rtx_SET (tmp, iorx));
6483
6484 /* Permute with operands reversed and adjusted selector. */
6485 x = gen_rtx_UNSPEC (mode, gen_rtvec (3, reg, target, tmp),
6486 UNSPEC_VPERM);
6487 }
6488
6489 emit_insn (gen_rtx_SET (target, x));
6490 }
6491
6492 /* Extract field ELT from VEC into TARGET. */
6493
6494 void
6495 rs6000_expand_vector_extract (rtx target, rtx vec, int elt)
6496 {
6497 machine_mode mode = GET_MODE (vec);
6498 machine_mode inner_mode = GET_MODE_INNER (mode);
6499 rtx mem;
6500
6501 if (VECTOR_MEM_VSX_P (mode))
6502 {
6503 switch (mode)
6504 {
6505 default:
6506 break;
6507 case V1TImode:
6508 gcc_assert (elt == 0 && inner_mode == TImode);
6509 emit_move_insn (target, gen_lowpart (TImode, vec));
6510 break;
6511 case V2DFmode:
6512 emit_insn (gen_vsx_extract_v2df (target, vec, GEN_INT (elt)));
6513 return;
6514 case V2DImode:
6515 emit_insn (gen_vsx_extract_v2di (target, vec, GEN_INT (elt)));
6516 return;
6517 case V4SFmode:
6518 emit_insn (gen_vsx_extract_v4sf (target, vec, GEN_INT (elt)));
6519 return;
6520 }
6521 }
6522
6523 /* Allocate mode-sized buffer. */
6524 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
6525
6526 emit_move_insn (mem, vec);
6527
6528 /* Add offset to field within buffer matching vector element. */
6529 mem = adjust_address_nv (mem, inner_mode, elt * GET_MODE_SIZE (inner_mode));
6530
6531 emit_move_insn (target, adjust_address_nv (mem, inner_mode, 0));
6532 }
6533
6534 /* Return TRUE if OP is an invalid SUBREG operation on the e500. */
6535
6536 bool
6537 invalid_e500_subreg (rtx op, machine_mode mode)
6538 {
6539 if (TARGET_E500_DOUBLE)
6540 {
6541 /* Reject (subreg:SI (reg:DF)); likewise with subreg:DI or
6542 subreg:TI and reg:TF. Decimal float modes are like integer
6543 modes (only low part of each register used) for this
6544 purpose. */
6545 if (GET_CODE (op) == SUBREG
6546 && (mode == SImode || mode == DImode || mode == TImode
6547 || mode == DDmode || mode == TDmode || mode == PTImode)
6548 && REG_P (SUBREG_REG (op))
6549 && (GET_MODE (SUBREG_REG (op)) == DFmode
6550 || GET_MODE (SUBREG_REG (op)) == TFmode
6551 || GET_MODE (SUBREG_REG (op)) == IFmode
6552 || GET_MODE (SUBREG_REG (op)) == KFmode))
6553 return true;
6554
6555 /* Reject (subreg:DF (reg:DI)); likewise with subreg:TF and
6556 reg:TI. */
6557 if (GET_CODE (op) == SUBREG
6558 && (mode == DFmode || mode == TFmode || mode == IFmode
6559 || mode == KFmode)
6560 && REG_P (SUBREG_REG (op))
6561 && (GET_MODE (SUBREG_REG (op)) == DImode
6562 || GET_MODE (SUBREG_REG (op)) == TImode
6563 || GET_MODE (SUBREG_REG (op)) == PTImode
6564 || GET_MODE (SUBREG_REG (op)) == DDmode
6565 || GET_MODE (SUBREG_REG (op)) == TDmode))
6566 return true;
6567 }
6568
6569 if (TARGET_SPE
6570 && GET_CODE (op) == SUBREG
6571 && mode == SImode
6572 && REG_P (SUBREG_REG (op))
6573 && SPE_VECTOR_MODE (GET_MODE (SUBREG_REG (op))))
6574 return true;
6575
6576 return false;
6577 }
6578
6579 /* Return alignment of TYPE. Existing alignment is ALIGN. HOW
6580 selects whether the alignment is abi mandated, optional, or
6581 both abi and optional alignment. */
6582
6583 unsigned int
6584 rs6000_data_alignment (tree type, unsigned int align, enum data_align how)
6585 {
6586 if (how != align_opt)
6587 {
6588 if (TREE_CODE (type) == VECTOR_TYPE)
6589 {
6590 if ((TARGET_SPE && SPE_VECTOR_MODE (TYPE_MODE (type)))
6591 || (TARGET_PAIRED_FLOAT && PAIRED_VECTOR_MODE (TYPE_MODE (type))))
6592 {
6593 if (align < 64)
6594 align = 64;
6595 }
6596 else if (align < 128)
6597 align = 128;
6598 }
6599 else if (TARGET_E500_DOUBLE
6600 && TREE_CODE (type) == REAL_TYPE
6601 && TYPE_MODE (type) == DFmode)
6602 {
6603 if (align < 64)
6604 align = 64;
6605 }
6606 }
6607
6608 if (how != align_abi)
6609 {
6610 if (TREE_CODE (type) == ARRAY_TYPE
6611 && TYPE_MODE (TREE_TYPE (type)) == QImode)
6612 {
6613 if (align < BITS_PER_WORD)
6614 align = BITS_PER_WORD;
6615 }
6616 }
6617
6618 return align;
6619 }
6620
6621 /* Previous GCC releases forced all vector types to have 16-byte alignment. */
6622
6623 bool
6624 rs6000_special_adjust_field_align_p (tree field, unsigned int computed)
6625 {
6626 if (TARGET_ALTIVEC && TREE_CODE (TREE_TYPE (field)) == VECTOR_TYPE)
6627 {
6628 if (computed != 128)
6629 {
6630 static bool warned;
6631 if (!warned && warn_psabi)
6632 {
6633 warned = true;
6634 inform (input_location,
6635 "the layout of aggregates containing vectors with"
6636 " %d-byte alignment has changed in GCC 5",
6637 computed / BITS_PER_UNIT);
6638 }
6639 }
6640 /* In current GCC there is no special case. */
6641 return false;
6642 }
6643
6644 return false;
6645 }
6646
6647 /* AIX increases natural record alignment to doubleword if the first
6648 field is an FP double while the FP fields remain word aligned. */
6649
6650 unsigned int
6651 rs6000_special_round_type_align (tree type, unsigned int computed,
6652 unsigned int specified)
6653 {
6654 unsigned int align = MAX (computed, specified);
6655 tree field = TYPE_FIELDS (type);
6656
6657 /* Skip all non field decls */
6658 while (field != NULL && TREE_CODE (field) != FIELD_DECL)
6659 field = DECL_CHAIN (field);
6660
6661 if (field != NULL && field != type)
6662 {
6663 type = TREE_TYPE (field);
6664 while (TREE_CODE (type) == ARRAY_TYPE)
6665 type = TREE_TYPE (type);
6666
6667 if (type != error_mark_node && TYPE_MODE (type) == DFmode)
6668 align = MAX (align, 64);
6669 }
6670
6671 return align;
6672 }
6673
6674 /* Darwin increases record alignment to the natural alignment of
6675 the first field. */
6676
6677 unsigned int
6678 darwin_rs6000_special_round_type_align (tree type, unsigned int computed,
6679 unsigned int specified)
6680 {
6681 unsigned int align = MAX (computed, specified);
6682
6683 if (TYPE_PACKED (type))
6684 return align;
6685
6686 /* Find the first field, looking down into aggregates. */
6687 do {
6688 tree field = TYPE_FIELDS (type);
6689 /* Skip all non field decls */
6690 while (field != NULL && TREE_CODE (field) != FIELD_DECL)
6691 field = DECL_CHAIN (field);
6692 if (! field)
6693 break;
6694 /* A packed field does not contribute any extra alignment. */
6695 if (DECL_PACKED (field))
6696 return align;
6697 type = TREE_TYPE (field);
6698 while (TREE_CODE (type) == ARRAY_TYPE)
6699 type = TREE_TYPE (type);
6700 } while (AGGREGATE_TYPE_P (type));
6701
6702 if (! AGGREGATE_TYPE_P (type) && type != error_mark_node)
6703 align = MAX (align, TYPE_ALIGN (type));
6704
6705 return align;
6706 }
6707
6708 /* Return 1 for an operand in small memory on V.4/eabi. */
6709
6710 int
6711 small_data_operand (rtx op ATTRIBUTE_UNUSED,
6712 machine_mode mode ATTRIBUTE_UNUSED)
6713 {
6714 #if TARGET_ELF
6715 rtx sym_ref;
6716
6717 if (rs6000_sdata == SDATA_NONE || rs6000_sdata == SDATA_DATA)
6718 return 0;
6719
6720 if (DEFAULT_ABI != ABI_V4)
6721 return 0;
6722
6723 /* Vector and float memory instructions have a limited offset on the
6724 SPE, so using a vector or float variable directly as an operand is
6725 not useful. */
6726 if (TARGET_SPE
6727 && (SPE_VECTOR_MODE (mode) || FLOAT_MODE_P (mode)))
6728 return 0;
6729
6730 if (GET_CODE (op) == SYMBOL_REF)
6731 sym_ref = op;
6732
6733 else if (GET_CODE (op) != CONST
6734 || GET_CODE (XEXP (op, 0)) != PLUS
6735 || GET_CODE (XEXP (XEXP (op, 0), 0)) != SYMBOL_REF
6736 || GET_CODE (XEXP (XEXP (op, 0), 1)) != CONST_INT)
6737 return 0;
6738
6739 else
6740 {
6741 rtx sum = XEXP (op, 0);
6742 HOST_WIDE_INT summand;
6743
6744 /* We have to be careful here, because it is the referenced address
6745 that must be 32k from _SDA_BASE_, not just the symbol. */
6746 summand = INTVAL (XEXP (sum, 1));
6747 if (summand < 0 || summand > g_switch_value)
6748 return 0;
6749
6750 sym_ref = XEXP (sum, 0);
6751 }
6752
6753 return SYMBOL_REF_SMALL_P (sym_ref);
6754 #else
6755 return 0;
6756 #endif
6757 }
6758
6759 /* Return true if either operand is a general purpose register. */
6760
6761 bool
6762 gpr_or_gpr_p (rtx op0, rtx op1)
6763 {
6764 return ((REG_P (op0) && INT_REGNO_P (REGNO (op0)))
6765 || (REG_P (op1) && INT_REGNO_P (REGNO (op1))));
6766 }
6767
6768 /* Return true if this is a move direct operation between GPR registers and
6769 floating point/VSX registers. */
6770
6771 bool
6772 direct_move_p (rtx op0, rtx op1)
6773 {
6774 int regno0, regno1;
6775
6776 if (!REG_P (op0) || !REG_P (op1))
6777 return false;
6778
6779 if (!TARGET_DIRECT_MOVE && !TARGET_MFPGPR)
6780 return false;
6781
6782 regno0 = REGNO (op0);
6783 regno1 = REGNO (op1);
6784 if (regno0 >= FIRST_PSEUDO_REGISTER || regno1 >= FIRST_PSEUDO_REGISTER)
6785 return false;
6786
6787 if (INT_REGNO_P (regno0))
6788 return (TARGET_DIRECT_MOVE) ? VSX_REGNO_P (regno1) : FP_REGNO_P (regno1);
6789
6790 else if (INT_REGNO_P (regno1))
6791 {
6792 if (TARGET_MFPGPR && FP_REGNO_P (regno0))
6793 return true;
6794
6795 else if (TARGET_DIRECT_MOVE && VSX_REGNO_P (regno0))
6796 return true;
6797 }
6798
6799 return false;
6800 }
6801
6802 /* Return true if this is a load or store quad operation. This function does
6803 not handle the atomic quad memory instructions. */
6804
6805 bool
6806 quad_load_store_p (rtx op0, rtx op1)
6807 {
6808 bool ret;
6809
6810 if (!TARGET_QUAD_MEMORY)
6811 ret = false;
6812
6813 else if (REG_P (op0) && MEM_P (op1))
6814 ret = (quad_int_reg_operand (op0, GET_MODE (op0))
6815 && quad_memory_operand (op1, GET_MODE (op1))
6816 && !reg_overlap_mentioned_p (op0, op1));
6817
6818 else if (MEM_P (op0) && REG_P (op1))
6819 ret = (quad_memory_operand (op0, GET_MODE (op0))
6820 && quad_int_reg_operand (op1, GET_MODE (op1)));
6821
6822 else
6823 ret = false;
6824
6825 if (TARGET_DEBUG_ADDR)
6826 {
6827 fprintf (stderr, "\n========== quad_load_store, return %s\n",
6828 ret ? "true" : "false");
6829 debug_rtx (gen_rtx_SET (op0, op1));
6830 }
6831
6832 return ret;
6833 }
6834
6835 /* Given an address, return a constant offset term if one exists. */
6836
6837 static rtx
6838 address_offset (rtx op)
6839 {
6840 if (GET_CODE (op) == PRE_INC
6841 || GET_CODE (op) == PRE_DEC)
6842 op = XEXP (op, 0);
6843 else if (GET_CODE (op) == PRE_MODIFY
6844 || GET_CODE (op) == LO_SUM)
6845 op = XEXP (op, 1);
6846
6847 if (GET_CODE (op) == CONST)
6848 op = XEXP (op, 0);
6849
6850 if (GET_CODE (op) == PLUS)
6851 op = XEXP (op, 1);
6852
6853 if (CONST_INT_P (op))
6854 return op;
6855
6856 return NULL_RTX;
6857 }
6858
6859 /* Return true if the MEM operand is a memory operand suitable for use
6860 with a (full width, possibly multiple) gpr load/store. On
6861 powerpc64 this means the offset must be divisible by 4.
6862 Implements 'Y' constraint.
6863
6864 Accept direct, indexed, offset, lo_sum and tocref. Since this is
6865 a constraint function we know the operand has satisfied a suitable
6866 memory predicate. Also accept some odd rtl generated by reload
6867 (see rs6000_legitimize_reload_address for various forms). It is
6868 important that reload rtl be accepted by appropriate constraints
6869 but not by the operand predicate.
6870
6871 Offsetting a lo_sum should not be allowed, except where we know by
6872 alignment that a 32k boundary is not crossed, but see the ???
6873 comment in rs6000_legitimize_reload_address. Note that by
6874 "offsetting" here we mean a further offset to access parts of the
6875 MEM. It's fine to have a lo_sum where the inner address is offset
6876 from a sym, since the same sym+offset will appear in the high part
6877 of the address calculation. */
6878
6879 bool
6880 mem_operand_gpr (rtx op, machine_mode mode)
6881 {
6882 unsigned HOST_WIDE_INT offset;
6883 int extra;
6884 rtx addr = XEXP (op, 0);
6885
6886 op = address_offset (addr);
6887 if (op == NULL_RTX)
6888 return true;
6889
6890 offset = INTVAL (op);
6891 if (TARGET_POWERPC64 && (offset & 3) != 0)
6892 return false;
6893
6894 extra = GET_MODE_SIZE (mode) - UNITS_PER_WORD;
6895 if (extra < 0)
6896 extra = 0;
6897
6898 if (GET_CODE (addr) == LO_SUM)
6899 /* For lo_sum addresses, we must allow any offset except one that
6900 causes a wrap, so test only the low 16 bits. */
6901 offset = ((offset & 0xffff) ^ 0x8000) - 0x8000;
6902
6903 return offset + 0x8000 < 0x10000u - extra;
6904 }
6905 \f
6906 /* Subroutines of rs6000_legitimize_address and rs6000_legitimate_address_p. */
6907
6908 static bool
6909 reg_offset_addressing_ok_p (machine_mode mode)
6910 {
6911 switch (mode)
6912 {
6913 case V16QImode:
6914 case V8HImode:
6915 case V4SFmode:
6916 case V4SImode:
6917 case V2DFmode:
6918 case V2DImode:
6919 case V1TImode:
6920 case TImode:
6921 case TFmode:
6922 case KFmode:
6923 /* AltiVec/VSX vector modes. Only reg+reg addressing is valid. While
6924 TImode is not a vector mode, if we want to use the VSX registers to
6925 move it around, we need to restrict ourselves to reg+reg addressing.
6926 Similarly for IEEE 128-bit floating point that is passed in a single
6927 vector register. */
6928 if (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode))
6929 return false;
6930 break;
6931
6932 case V4HImode:
6933 case V2SImode:
6934 case V1DImode:
6935 case V2SFmode:
6936 /* Paired vector modes. Only reg+reg addressing is valid. */
6937 if (TARGET_PAIRED_FLOAT)
6938 return false;
6939 break;
6940
6941 case SDmode:
6942 /* If we can do direct load/stores of SDmode, restrict it to reg+reg
6943 addressing for the LFIWZX and STFIWX instructions. */
6944 if (TARGET_NO_SDMODE_STACK)
6945 return false;
6946 break;
6947
6948 default:
6949 break;
6950 }
6951
6952 return true;
6953 }
6954
6955 static bool
6956 virtual_stack_registers_memory_p (rtx op)
6957 {
6958 int regnum;
6959
6960 if (GET_CODE (op) == REG)
6961 regnum = REGNO (op);
6962
6963 else if (GET_CODE (op) == PLUS
6964 && GET_CODE (XEXP (op, 0)) == REG
6965 && GET_CODE (XEXP (op, 1)) == CONST_INT)
6966 regnum = REGNO (XEXP (op, 0));
6967
6968 else
6969 return false;
6970
6971 return (regnum >= FIRST_VIRTUAL_REGISTER
6972 && regnum <= LAST_VIRTUAL_POINTER_REGISTER);
6973 }
6974
6975 /* Return true if a MODE sized memory accesses to OP plus OFFSET
6976 is known to not straddle a 32k boundary. This function is used
6977 to determine whether -mcmodel=medium code can use TOC pointer
6978 relative addressing for OP. This means the alignment of the TOC
6979 pointer must also be taken into account, and unfortunately that is
6980 only 8 bytes. */
6981
6982 #ifndef POWERPC64_TOC_POINTER_ALIGNMENT
6983 #define POWERPC64_TOC_POINTER_ALIGNMENT 8
6984 #endif
6985
6986 static bool
6987 offsettable_ok_by_alignment (rtx op, HOST_WIDE_INT offset,
6988 machine_mode mode)
6989 {
6990 tree decl;
6991 unsigned HOST_WIDE_INT dsize, dalign, lsb, mask;
6992
6993 if (GET_CODE (op) != SYMBOL_REF)
6994 return false;
6995
6996 dsize = GET_MODE_SIZE (mode);
6997 decl = SYMBOL_REF_DECL (op);
6998 if (!decl)
6999 {
7000 if (dsize == 0)
7001 return false;
7002
7003 /* -fsection-anchors loses the original SYMBOL_REF_DECL when
7004 replacing memory addresses with an anchor plus offset. We
7005 could find the decl by rummaging around in the block->objects
7006 VEC for the given offset but that seems like too much work. */
7007 dalign = BITS_PER_UNIT;
7008 if (SYMBOL_REF_HAS_BLOCK_INFO_P (op)
7009 && SYMBOL_REF_ANCHOR_P (op)
7010 && SYMBOL_REF_BLOCK (op) != NULL)
7011 {
7012 struct object_block *block = SYMBOL_REF_BLOCK (op);
7013
7014 dalign = block->alignment;
7015 offset += SYMBOL_REF_BLOCK_OFFSET (op);
7016 }
7017 else if (CONSTANT_POOL_ADDRESS_P (op))
7018 {
7019 /* It would be nice to have get_pool_align().. */
7020 machine_mode cmode = get_pool_mode (op);
7021
7022 dalign = GET_MODE_ALIGNMENT (cmode);
7023 }
7024 }
7025 else if (DECL_P (decl))
7026 {
7027 dalign = DECL_ALIGN (decl);
7028
7029 if (dsize == 0)
7030 {
7031 /* Allow BLKmode when the entire object is known to not
7032 cross a 32k boundary. */
7033 if (!DECL_SIZE_UNIT (decl))
7034 return false;
7035
7036 if (!tree_fits_uhwi_p (DECL_SIZE_UNIT (decl)))
7037 return false;
7038
7039 dsize = tree_to_uhwi (DECL_SIZE_UNIT (decl));
7040 if (dsize > 32768)
7041 return false;
7042
7043 dalign /= BITS_PER_UNIT;
7044 if (dalign > POWERPC64_TOC_POINTER_ALIGNMENT)
7045 dalign = POWERPC64_TOC_POINTER_ALIGNMENT;
7046 return dalign >= dsize;
7047 }
7048 }
7049 else
7050 gcc_unreachable ();
7051
7052 /* Find how many bits of the alignment we know for this access. */
7053 dalign /= BITS_PER_UNIT;
7054 if (dalign > POWERPC64_TOC_POINTER_ALIGNMENT)
7055 dalign = POWERPC64_TOC_POINTER_ALIGNMENT;
7056 mask = dalign - 1;
7057 lsb = offset & -offset;
7058 mask &= lsb - 1;
7059 dalign = mask + 1;
7060
7061 return dalign >= dsize;
7062 }
7063
7064 static bool
7065 constant_pool_expr_p (rtx op)
7066 {
7067 rtx base, offset;
7068
7069 split_const (op, &base, &offset);
7070 return (GET_CODE (base) == SYMBOL_REF
7071 && CONSTANT_POOL_ADDRESS_P (base)
7072 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (base), Pmode));
7073 }
7074
7075 static const_rtx tocrel_base, tocrel_offset;
7076
7077 /* Return true if OP is a toc pointer relative address (the output
7078 of create_TOC_reference). If STRICT, do not match high part or
7079 non-split -mcmodel=large/medium toc pointer relative addresses. */
7080
7081 bool
7082 toc_relative_expr_p (const_rtx op, bool strict)
7083 {
7084 if (!TARGET_TOC)
7085 return false;
7086
7087 if (TARGET_CMODEL != CMODEL_SMALL)
7088 {
7089 /* Only match the low part. */
7090 if (GET_CODE (op) == LO_SUM
7091 && REG_P (XEXP (op, 0))
7092 && INT_REG_OK_FOR_BASE_P (XEXP (op, 0), strict))
7093 op = XEXP (op, 1);
7094 else if (strict)
7095 return false;
7096 }
7097
7098 tocrel_base = op;
7099 tocrel_offset = const0_rtx;
7100 if (GET_CODE (op) == PLUS && add_cint_operand (XEXP (op, 1), GET_MODE (op)))
7101 {
7102 tocrel_base = XEXP (op, 0);
7103 tocrel_offset = XEXP (op, 1);
7104 }
7105
7106 return (GET_CODE (tocrel_base) == UNSPEC
7107 && XINT (tocrel_base, 1) == UNSPEC_TOCREL);
7108 }
7109
7110 /* Return true if X is a constant pool address, and also for cmodel=medium
7111 if X is a toc-relative address known to be offsettable within MODE. */
7112
7113 bool
7114 legitimate_constant_pool_address_p (const_rtx x, machine_mode mode,
7115 bool strict)
7116 {
7117 return (toc_relative_expr_p (x, strict)
7118 && (TARGET_CMODEL != CMODEL_MEDIUM
7119 || constant_pool_expr_p (XVECEXP (tocrel_base, 0, 0))
7120 || mode == QImode
7121 || offsettable_ok_by_alignment (XVECEXP (tocrel_base, 0, 0),
7122 INTVAL (tocrel_offset), mode)));
7123 }
7124
7125 static bool
7126 legitimate_small_data_p (machine_mode mode, rtx x)
7127 {
7128 return (DEFAULT_ABI == ABI_V4
7129 && !flag_pic && !TARGET_TOC
7130 && (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == CONST)
7131 && small_data_operand (x, mode));
7132 }
7133
7134 /* SPE offset addressing is limited to 5-bits worth of double words. */
7135 #define SPE_CONST_OFFSET_OK(x) (((x) & ~0xf8) == 0)
7136
7137 bool
7138 rs6000_legitimate_offset_address_p (machine_mode mode, rtx x,
7139 bool strict, bool worst_case)
7140 {
7141 unsigned HOST_WIDE_INT offset;
7142 unsigned int extra;
7143
7144 if (GET_CODE (x) != PLUS)
7145 return false;
7146 if (!REG_P (XEXP (x, 0)))
7147 return false;
7148 if (!INT_REG_OK_FOR_BASE_P (XEXP (x, 0), strict))
7149 return false;
7150 if (!reg_offset_addressing_ok_p (mode))
7151 return virtual_stack_registers_memory_p (x);
7152 if (legitimate_constant_pool_address_p (x, mode, strict || lra_in_progress))
7153 return true;
7154 if (GET_CODE (XEXP (x, 1)) != CONST_INT)
7155 return false;
7156
7157 offset = INTVAL (XEXP (x, 1));
7158 extra = 0;
7159 switch (mode)
7160 {
7161 case V4HImode:
7162 case V2SImode:
7163 case V1DImode:
7164 case V2SFmode:
7165 /* SPE vector modes. */
7166 return SPE_CONST_OFFSET_OK (offset);
7167
7168 case DFmode:
7169 case DDmode:
7170 case DImode:
7171 /* On e500v2, we may have:
7172
7173 (subreg:DF (mem:DI (plus (reg) (const_int))) 0).
7174
7175 Which gets addressed with evldd instructions. */
7176 if (TARGET_E500_DOUBLE)
7177 return SPE_CONST_OFFSET_OK (offset);
7178
7179 /* If we are using VSX scalar loads, restrict ourselves to reg+reg
7180 addressing. */
7181 if (VECTOR_MEM_VSX_P (mode))
7182 return false;
7183
7184 if (!worst_case)
7185 break;
7186 if (!TARGET_POWERPC64)
7187 extra = 4;
7188 else if (offset & 3)
7189 return false;
7190 break;
7191
7192 case TFmode:
7193 case IFmode:
7194 case KFmode:
7195 if (TARGET_E500_DOUBLE)
7196 return (SPE_CONST_OFFSET_OK (offset)
7197 && SPE_CONST_OFFSET_OK (offset + 8));
7198 /* fall through */
7199
7200 case TDmode:
7201 case TImode:
7202 case PTImode:
7203 extra = 8;
7204 if (!worst_case)
7205 break;
7206 if (!TARGET_POWERPC64)
7207 extra = 12;
7208 else if (offset & 3)
7209 return false;
7210 break;
7211
7212 default:
7213 break;
7214 }
7215
7216 offset += 0x8000;
7217 return offset < 0x10000 - extra;
7218 }
7219
7220 bool
7221 legitimate_indexed_address_p (rtx x, int strict)
7222 {
7223 rtx op0, op1;
7224
7225 if (GET_CODE (x) != PLUS)
7226 return false;
7227
7228 op0 = XEXP (x, 0);
7229 op1 = XEXP (x, 1);
7230
7231 /* Recognize the rtl generated by reload which we know will later be
7232 replaced with proper base and index regs. */
7233 if (!strict
7234 && reload_in_progress
7235 && (REG_P (op0) || GET_CODE (op0) == PLUS)
7236 && REG_P (op1))
7237 return true;
7238
7239 return (REG_P (op0) && REG_P (op1)
7240 && ((INT_REG_OK_FOR_BASE_P (op0, strict)
7241 && INT_REG_OK_FOR_INDEX_P (op1, strict))
7242 || (INT_REG_OK_FOR_BASE_P (op1, strict)
7243 && INT_REG_OK_FOR_INDEX_P (op0, strict))));
7244 }
7245
7246 bool
7247 avoiding_indexed_address_p (machine_mode mode)
7248 {
7249 /* Avoid indexed addressing for modes that have non-indexed
7250 load/store instruction forms. */
7251 return (TARGET_AVOID_XFORM && VECTOR_MEM_NONE_P (mode));
7252 }
7253
7254 bool
7255 legitimate_indirect_address_p (rtx x, int strict)
7256 {
7257 return GET_CODE (x) == REG && INT_REG_OK_FOR_BASE_P (x, strict);
7258 }
7259
7260 bool
7261 macho_lo_sum_memory_operand (rtx x, machine_mode mode)
7262 {
7263 if (!TARGET_MACHO || !flag_pic
7264 || mode != SImode || GET_CODE (x) != MEM)
7265 return false;
7266 x = XEXP (x, 0);
7267
7268 if (GET_CODE (x) != LO_SUM)
7269 return false;
7270 if (GET_CODE (XEXP (x, 0)) != REG)
7271 return false;
7272 if (!INT_REG_OK_FOR_BASE_P (XEXP (x, 0), 0))
7273 return false;
7274 x = XEXP (x, 1);
7275
7276 return CONSTANT_P (x);
7277 }
7278
7279 static bool
7280 legitimate_lo_sum_address_p (machine_mode mode, rtx x, int strict)
7281 {
7282 if (GET_CODE (x) != LO_SUM)
7283 return false;
7284 if (GET_CODE (XEXP (x, 0)) != REG)
7285 return false;
7286 if (!INT_REG_OK_FOR_BASE_P (XEXP (x, 0), strict))
7287 return false;
7288 /* Restrict addressing for DI because of our SUBREG hackery. */
7289 if (TARGET_E500_DOUBLE && GET_MODE_SIZE (mode) > UNITS_PER_WORD)
7290 return false;
7291 x = XEXP (x, 1);
7292
7293 if (TARGET_ELF || TARGET_MACHO)
7294 {
7295 bool large_toc_ok;
7296
7297 if (DEFAULT_ABI == ABI_V4 && flag_pic)
7298 return false;
7299 /* LRA don't use LEGITIMIZE_RELOAD_ADDRESS as it usually calls
7300 push_reload from reload pass code. LEGITIMIZE_RELOAD_ADDRESS
7301 recognizes some LO_SUM addresses as valid although this
7302 function says opposite. In most cases, LRA through different
7303 transformations can generate correct code for address reloads.
7304 It can not manage only some LO_SUM cases. So we need to add
7305 code analogous to one in rs6000_legitimize_reload_address for
7306 LOW_SUM here saying that some addresses are still valid. */
7307 large_toc_ok = (lra_in_progress && TARGET_CMODEL != CMODEL_SMALL
7308 && small_toc_ref (x, VOIDmode));
7309 if (TARGET_TOC && ! large_toc_ok)
7310 return false;
7311 if (GET_MODE_NUNITS (mode) != 1)
7312 return false;
7313 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD
7314 && !(/* ??? Assume floating point reg based on mode? */
7315 TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT
7316 && (mode == DFmode || mode == DDmode)))
7317 return false;
7318
7319 return CONSTANT_P (x) || large_toc_ok;
7320 }
7321
7322 return false;
7323 }
7324
7325
7326 /* Try machine-dependent ways of modifying an illegitimate address
7327 to be legitimate. If we find one, return the new, valid address.
7328 This is used from only one place: `memory_address' in explow.c.
7329
7330 OLDX is the address as it was before break_out_memory_refs was
7331 called. In some cases it is useful to look at this to decide what
7332 needs to be done.
7333
7334 It is always safe for this function to do nothing. It exists to
7335 recognize opportunities to optimize the output.
7336
7337 On RS/6000, first check for the sum of a register with a constant
7338 integer that is out of range. If so, generate code to add the
7339 constant with the low-order 16 bits masked to the register and force
7340 this result into another register (this can be done with `cau').
7341 Then generate an address of REG+(CONST&0xffff), allowing for the
7342 possibility of bit 16 being a one.
7343
7344 Then check for the sum of a register and something not constant, try to
7345 load the other things into a register and return the sum. */
7346
7347 static rtx
7348 rs6000_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
7349 machine_mode mode)
7350 {
7351 unsigned int extra;
7352
7353 if (!reg_offset_addressing_ok_p (mode))
7354 {
7355 if (virtual_stack_registers_memory_p (x))
7356 return x;
7357
7358 /* In theory we should not be seeing addresses of the form reg+0,
7359 but just in case it is generated, optimize it away. */
7360 if (GET_CODE (x) == PLUS && XEXP (x, 1) == const0_rtx)
7361 return force_reg (Pmode, XEXP (x, 0));
7362
7363 /* For TImode with load/store quad, restrict addresses to just a single
7364 pointer, so it works with both GPRs and VSX registers. */
7365 /* Make sure both operands are registers. */
7366 else if (GET_CODE (x) == PLUS
7367 && (mode != TImode || !TARGET_QUAD_MEMORY))
7368 return gen_rtx_PLUS (Pmode,
7369 force_reg (Pmode, XEXP (x, 0)),
7370 force_reg (Pmode, XEXP (x, 1)));
7371 else
7372 return force_reg (Pmode, x);
7373 }
7374 if (GET_CODE (x) == SYMBOL_REF)
7375 {
7376 enum tls_model model = SYMBOL_REF_TLS_MODEL (x);
7377 if (model != 0)
7378 return rs6000_legitimize_tls_address (x, model);
7379 }
7380
7381 extra = 0;
7382 switch (mode)
7383 {
7384 case TFmode:
7385 case TDmode:
7386 case TImode:
7387 case PTImode:
7388 case IFmode:
7389 case KFmode:
7390 /* As in legitimate_offset_address_p we do not assume
7391 worst-case. The mode here is just a hint as to the registers
7392 used. A TImode is usually in gprs, but may actually be in
7393 fprs. Leave worst-case scenario for reload to handle via
7394 insn constraints. PTImode is only GPRs. */
7395 extra = 8;
7396 break;
7397 default:
7398 break;
7399 }
7400
7401 if (GET_CODE (x) == PLUS
7402 && GET_CODE (XEXP (x, 0)) == REG
7403 && GET_CODE (XEXP (x, 1)) == CONST_INT
7404 && ((unsigned HOST_WIDE_INT) (INTVAL (XEXP (x, 1)) + 0x8000)
7405 >= 0x10000 - extra)
7406 && !(SPE_VECTOR_MODE (mode)
7407 || (TARGET_E500_DOUBLE && GET_MODE_SIZE (mode) > UNITS_PER_WORD)))
7408 {
7409 HOST_WIDE_INT high_int, low_int;
7410 rtx sum;
7411 low_int = ((INTVAL (XEXP (x, 1)) & 0xffff) ^ 0x8000) - 0x8000;
7412 if (low_int >= 0x8000 - extra)
7413 low_int = 0;
7414 high_int = INTVAL (XEXP (x, 1)) - low_int;
7415 sum = force_operand (gen_rtx_PLUS (Pmode, XEXP (x, 0),
7416 GEN_INT (high_int)), 0);
7417 return plus_constant (Pmode, sum, low_int);
7418 }
7419 else if (GET_CODE (x) == PLUS
7420 && GET_CODE (XEXP (x, 0)) == REG
7421 && GET_CODE (XEXP (x, 1)) != CONST_INT
7422 && GET_MODE_NUNITS (mode) == 1
7423 && (GET_MODE_SIZE (mode) <= UNITS_PER_WORD
7424 || (/* ??? Assume floating point reg based on mode? */
7425 (TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT)
7426 && (mode == DFmode || mode == DDmode)))
7427 && !avoiding_indexed_address_p (mode))
7428 {
7429 return gen_rtx_PLUS (Pmode, XEXP (x, 0),
7430 force_reg (Pmode, force_operand (XEXP (x, 1), 0)));
7431 }
7432 else if (SPE_VECTOR_MODE (mode)
7433 || (TARGET_E500_DOUBLE && GET_MODE_SIZE (mode) > UNITS_PER_WORD))
7434 {
7435 if (mode == DImode)
7436 return x;
7437 /* We accept [reg + reg] and [reg + OFFSET]. */
7438
7439 if (GET_CODE (x) == PLUS)
7440 {
7441 rtx op1 = XEXP (x, 0);
7442 rtx op2 = XEXP (x, 1);
7443 rtx y;
7444
7445 op1 = force_reg (Pmode, op1);
7446
7447 if (GET_CODE (op2) != REG
7448 && (GET_CODE (op2) != CONST_INT
7449 || !SPE_CONST_OFFSET_OK (INTVAL (op2))
7450 || (GET_MODE_SIZE (mode) > 8
7451 && !SPE_CONST_OFFSET_OK (INTVAL (op2) + 8))))
7452 op2 = force_reg (Pmode, op2);
7453
7454 /* We can't always do [reg + reg] for these, because [reg +
7455 reg + offset] is not a legitimate addressing mode. */
7456 y = gen_rtx_PLUS (Pmode, op1, op2);
7457
7458 if ((GET_MODE_SIZE (mode) > 8 || mode == DDmode) && REG_P (op2))
7459 return force_reg (Pmode, y);
7460 else
7461 return y;
7462 }
7463
7464 return force_reg (Pmode, x);
7465 }
7466 else if ((TARGET_ELF
7467 #if TARGET_MACHO
7468 || !MACHO_DYNAMIC_NO_PIC_P
7469 #endif
7470 )
7471 && TARGET_32BIT
7472 && TARGET_NO_TOC
7473 && ! flag_pic
7474 && GET_CODE (x) != CONST_INT
7475 && GET_CODE (x) != CONST_WIDE_INT
7476 && GET_CODE (x) != CONST_DOUBLE
7477 && CONSTANT_P (x)
7478 && GET_MODE_NUNITS (mode) == 1
7479 && (GET_MODE_SIZE (mode) <= UNITS_PER_WORD
7480 || (/* ??? Assume floating point reg based on mode? */
7481 (TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT)
7482 && (mode == DFmode || mode == DDmode))))
7483 {
7484 rtx reg = gen_reg_rtx (Pmode);
7485 if (TARGET_ELF)
7486 emit_insn (gen_elf_high (reg, x));
7487 else
7488 emit_insn (gen_macho_high (reg, x));
7489 return gen_rtx_LO_SUM (Pmode, reg, x);
7490 }
7491 else if (TARGET_TOC
7492 && GET_CODE (x) == SYMBOL_REF
7493 && constant_pool_expr_p (x)
7494 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (x), Pmode))
7495 return create_TOC_reference (x, NULL_RTX);
7496 else
7497 return x;
7498 }
7499
7500 /* Debug version of rs6000_legitimize_address. */
7501 static rtx
7502 rs6000_debug_legitimize_address (rtx x, rtx oldx, machine_mode mode)
7503 {
7504 rtx ret;
7505 rtx_insn *insns;
7506
7507 start_sequence ();
7508 ret = rs6000_legitimize_address (x, oldx, mode);
7509 insns = get_insns ();
7510 end_sequence ();
7511
7512 if (ret != x)
7513 {
7514 fprintf (stderr,
7515 "\nrs6000_legitimize_address: mode %s, old code %s, "
7516 "new code %s, modified\n",
7517 GET_MODE_NAME (mode), GET_RTX_NAME (GET_CODE (x)),
7518 GET_RTX_NAME (GET_CODE (ret)));
7519
7520 fprintf (stderr, "Original address:\n");
7521 debug_rtx (x);
7522
7523 fprintf (stderr, "oldx:\n");
7524 debug_rtx (oldx);
7525
7526 fprintf (stderr, "New address:\n");
7527 debug_rtx (ret);
7528
7529 if (insns)
7530 {
7531 fprintf (stderr, "Insns added:\n");
7532 debug_rtx_list (insns, 20);
7533 }
7534 }
7535 else
7536 {
7537 fprintf (stderr,
7538 "\nrs6000_legitimize_address: mode %s, code %s, no change:\n",
7539 GET_MODE_NAME (mode), GET_RTX_NAME (GET_CODE (x)));
7540
7541 debug_rtx (x);
7542 }
7543
7544 if (insns)
7545 emit_insn (insns);
7546
7547 return ret;
7548 }
7549
7550 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
7551 We need to emit DTP-relative relocations. */
7552
7553 static void rs6000_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
7554 static void
7555 rs6000_output_dwarf_dtprel (FILE *file, int size, rtx x)
7556 {
7557 switch (size)
7558 {
7559 case 4:
7560 fputs ("\t.long\t", file);
7561 break;
7562 case 8:
7563 fputs (DOUBLE_INT_ASM_OP, file);
7564 break;
7565 default:
7566 gcc_unreachable ();
7567 }
7568 output_addr_const (file, x);
7569 if (TARGET_ELF)
7570 fputs ("@dtprel+0x8000", file);
7571 else if (TARGET_XCOFF && GET_CODE (x) == SYMBOL_REF)
7572 {
7573 switch (SYMBOL_REF_TLS_MODEL (x))
7574 {
7575 case 0:
7576 break;
7577 case TLS_MODEL_LOCAL_EXEC:
7578 fputs ("@le", file);
7579 break;
7580 case TLS_MODEL_INITIAL_EXEC:
7581 fputs ("@ie", file);
7582 break;
7583 case TLS_MODEL_GLOBAL_DYNAMIC:
7584 case TLS_MODEL_LOCAL_DYNAMIC:
7585 fputs ("@m", file);
7586 break;
7587 default:
7588 gcc_unreachable ();
7589 }
7590 }
7591 }
7592
7593 /* Return true if X is a symbol that refers to real (rather than emulated)
7594 TLS. */
7595
7596 static bool
7597 rs6000_real_tls_symbol_ref_p (rtx x)
7598 {
7599 return (GET_CODE (x) == SYMBOL_REF
7600 && SYMBOL_REF_TLS_MODEL (x) >= TLS_MODEL_REAL);
7601 }
7602
7603 /* In the name of slightly smaller debug output, and to cater to
7604 general assembler lossage, recognize various UNSPEC sequences
7605 and turn them back into a direct symbol reference. */
7606
7607 static rtx
7608 rs6000_delegitimize_address (rtx orig_x)
7609 {
7610 rtx x, y, offset;
7611
7612 orig_x = delegitimize_mem_from_attrs (orig_x);
7613 x = orig_x;
7614 if (MEM_P (x))
7615 x = XEXP (x, 0);
7616
7617 y = x;
7618 if (TARGET_CMODEL != CMODEL_SMALL
7619 && GET_CODE (y) == LO_SUM)
7620 y = XEXP (y, 1);
7621
7622 offset = NULL_RTX;
7623 if (GET_CODE (y) == PLUS
7624 && GET_MODE (y) == Pmode
7625 && CONST_INT_P (XEXP (y, 1)))
7626 {
7627 offset = XEXP (y, 1);
7628 y = XEXP (y, 0);
7629 }
7630
7631 if (GET_CODE (y) == UNSPEC
7632 && XINT (y, 1) == UNSPEC_TOCREL)
7633 {
7634 y = XVECEXP (y, 0, 0);
7635
7636 #ifdef HAVE_AS_TLS
7637 /* Do not associate thread-local symbols with the original
7638 constant pool symbol. */
7639 if (TARGET_XCOFF
7640 && GET_CODE (y) == SYMBOL_REF
7641 && CONSTANT_POOL_ADDRESS_P (y)
7642 && rs6000_real_tls_symbol_ref_p (get_pool_constant (y)))
7643 return orig_x;
7644 #endif
7645
7646 if (offset != NULL_RTX)
7647 y = gen_rtx_PLUS (Pmode, y, offset);
7648 if (!MEM_P (orig_x))
7649 return y;
7650 else
7651 return replace_equiv_address_nv (orig_x, y);
7652 }
7653
7654 if (TARGET_MACHO
7655 && GET_CODE (orig_x) == LO_SUM
7656 && GET_CODE (XEXP (orig_x, 1)) == CONST)
7657 {
7658 y = XEXP (XEXP (orig_x, 1), 0);
7659 if (GET_CODE (y) == UNSPEC
7660 && XINT (y, 1) == UNSPEC_MACHOPIC_OFFSET)
7661 return XVECEXP (y, 0, 0);
7662 }
7663
7664 return orig_x;
7665 }
7666
7667 /* Return true if X shouldn't be emitted into the debug info.
7668 The linker doesn't like .toc section references from
7669 .debug_* sections, so reject .toc section symbols. */
7670
7671 static bool
7672 rs6000_const_not_ok_for_debug_p (rtx x)
7673 {
7674 if (GET_CODE (x) == SYMBOL_REF
7675 && CONSTANT_POOL_ADDRESS_P (x))
7676 {
7677 rtx c = get_pool_constant (x);
7678 machine_mode cmode = get_pool_mode (x);
7679 if (ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (c, cmode))
7680 return true;
7681 }
7682
7683 return false;
7684 }
7685
7686 /* Construct the SYMBOL_REF for the tls_get_addr function. */
7687
7688 static GTY(()) rtx rs6000_tls_symbol;
7689 static rtx
7690 rs6000_tls_get_addr (void)
7691 {
7692 if (!rs6000_tls_symbol)
7693 rs6000_tls_symbol = init_one_libfunc ("__tls_get_addr");
7694
7695 return rs6000_tls_symbol;
7696 }
7697
7698 /* Construct the SYMBOL_REF for TLS GOT references. */
7699
7700 static GTY(()) rtx rs6000_got_symbol;
7701 static rtx
7702 rs6000_got_sym (void)
7703 {
7704 if (!rs6000_got_symbol)
7705 {
7706 rs6000_got_symbol = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
7707 SYMBOL_REF_FLAGS (rs6000_got_symbol) |= SYMBOL_FLAG_LOCAL;
7708 SYMBOL_REF_FLAGS (rs6000_got_symbol) |= SYMBOL_FLAG_EXTERNAL;
7709 }
7710
7711 return rs6000_got_symbol;
7712 }
7713
7714 /* AIX Thread-Local Address support. */
7715
7716 static rtx
7717 rs6000_legitimize_tls_address_aix (rtx addr, enum tls_model model)
7718 {
7719 rtx sym, mem, tocref, tlsreg, tmpreg, dest, tlsaddr;
7720 const char *name;
7721 char *tlsname;
7722
7723 name = XSTR (addr, 0);
7724 /* Append TLS CSECT qualifier, unless the symbol already is qualified
7725 or the symbol will be in TLS private data section. */
7726 if (name[strlen (name) - 1] != ']'
7727 && (TREE_PUBLIC (SYMBOL_REF_DECL (addr))
7728 || bss_initializer_p (SYMBOL_REF_DECL (addr))))
7729 {
7730 tlsname = XALLOCAVEC (char, strlen (name) + 4);
7731 strcpy (tlsname, name);
7732 strcat (tlsname,
7733 bss_initializer_p (SYMBOL_REF_DECL (addr)) ? "[UL]" : "[TL]");
7734 tlsaddr = copy_rtx (addr);
7735 XSTR (tlsaddr, 0) = ggc_strdup (tlsname);
7736 }
7737 else
7738 tlsaddr = addr;
7739
7740 /* Place addr into TOC constant pool. */
7741 sym = force_const_mem (GET_MODE (tlsaddr), tlsaddr);
7742
7743 /* Output the TOC entry and create the MEM referencing the value. */
7744 if (constant_pool_expr_p (XEXP (sym, 0))
7745 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (XEXP (sym, 0)), Pmode))
7746 {
7747 tocref = create_TOC_reference (XEXP (sym, 0), NULL_RTX);
7748 mem = gen_const_mem (Pmode, tocref);
7749 set_mem_alias_set (mem, get_TOC_alias_set ());
7750 }
7751 else
7752 return sym;
7753
7754 /* Use global-dynamic for local-dynamic. */
7755 if (model == TLS_MODEL_GLOBAL_DYNAMIC
7756 || model == TLS_MODEL_LOCAL_DYNAMIC)
7757 {
7758 /* Create new TOC reference for @m symbol. */
7759 name = XSTR (XVECEXP (XEXP (mem, 0), 0, 0), 0);
7760 tlsname = XALLOCAVEC (char, strlen (name) + 1);
7761 strcpy (tlsname, "*LCM");
7762 strcat (tlsname, name + 3);
7763 rtx modaddr = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (tlsname));
7764 SYMBOL_REF_FLAGS (modaddr) |= SYMBOL_FLAG_LOCAL;
7765 tocref = create_TOC_reference (modaddr, NULL_RTX);
7766 rtx modmem = gen_const_mem (Pmode, tocref);
7767 set_mem_alias_set (modmem, get_TOC_alias_set ());
7768
7769 rtx modreg = gen_reg_rtx (Pmode);
7770 emit_insn (gen_rtx_SET (modreg, modmem));
7771
7772 tmpreg = gen_reg_rtx (Pmode);
7773 emit_insn (gen_rtx_SET (tmpreg, mem));
7774
7775 dest = gen_reg_rtx (Pmode);
7776 if (TARGET_32BIT)
7777 emit_insn (gen_tls_get_addrsi (dest, modreg, tmpreg));
7778 else
7779 emit_insn (gen_tls_get_addrdi (dest, modreg, tmpreg));
7780 return dest;
7781 }
7782 /* Obtain TLS pointer: 32 bit call or 64 bit GPR 13. */
7783 else if (TARGET_32BIT)
7784 {
7785 tlsreg = gen_reg_rtx (SImode);
7786 emit_insn (gen_tls_get_tpointer (tlsreg));
7787 }
7788 else
7789 tlsreg = gen_rtx_REG (DImode, 13);
7790
7791 /* Load the TOC value into temporary register. */
7792 tmpreg = gen_reg_rtx (Pmode);
7793 emit_insn (gen_rtx_SET (tmpreg, mem));
7794 set_unique_reg_note (get_last_insn (), REG_EQUAL,
7795 gen_rtx_MINUS (Pmode, addr, tlsreg));
7796
7797 /* Add TOC symbol value to TLS pointer. */
7798 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tmpreg, tlsreg));
7799
7800 return dest;
7801 }
7802
7803 /* ADDR contains a thread-local SYMBOL_REF. Generate code to compute
7804 this (thread-local) address. */
7805
7806 static rtx
7807 rs6000_legitimize_tls_address (rtx addr, enum tls_model model)
7808 {
7809 rtx dest, insn;
7810
7811 if (TARGET_XCOFF)
7812 return rs6000_legitimize_tls_address_aix (addr, model);
7813
7814 dest = gen_reg_rtx (Pmode);
7815 if (model == TLS_MODEL_LOCAL_EXEC && rs6000_tls_size == 16)
7816 {
7817 rtx tlsreg;
7818
7819 if (TARGET_64BIT)
7820 {
7821 tlsreg = gen_rtx_REG (Pmode, 13);
7822 insn = gen_tls_tprel_64 (dest, tlsreg, addr);
7823 }
7824 else
7825 {
7826 tlsreg = gen_rtx_REG (Pmode, 2);
7827 insn = gen_tls_tprel_32 (dest, tlsreg, addr);
7828 }
7829 emit_insn (insn);
7830 }
7831 else if (model == TLS_MODEL_LOCAL_EXEC && rs6000_tls_size == 32)
7832 {
7833 rtx tlsreg, tmp;
7834
7835 tmp = gen_reg_rtx (Pmode);
7836 if (TARGET_64BIT)
7837 {
7838 tlsreg = gen_rtx_REG (Pmode, 13);
7839 insn = gen_tls_tprel_ha_64 (tmp, tlsreg, addr);
7840 }
7841 else
7842 {
7843 tlsreg = gen_rtx_REG (Pmode, 2);
7844 insn = gen_tls_tprel_ha_32 (tmp, tlsreg, addr);
7845 }
7846 emit_insn (insn);
7847 if (TARGET_64BIT)
7848 insn = gen_tls_tprel_lo_64 (dest, tmp, addr);
7849 else
7850 insn = gen_tls_tprel_lo_32 (dest, tmp, addr);
7851 emit_insn (insn);
7852 }
7853 else
7854 {
7855 rtx r3, got, tga, tmp1, tmp2, call_insn;
7856
7857 /* We currently use relocations like @got@tlsgd for tls, which
7858 means the linker will handle allocation of tls entries, placing
7859 them in the .got section. So use a pointer to the .got section,
7860 not one to secondary TOC sections used by 64-bit -mminimal-toc,
7861 or to secondary GOT sections used by 32-bit -fPIC. */
7862 if (TARGET_64BIT)
7863 got = gen_rtx_REG (Pmode, 2);
7864 else
7865 {
7866 if (flag_pic == 1)
7867 got = gen_rtx_REG (Pmode, RS6000_PIC_OFFSET_TABLE_REGNUM);
7868 else
7869 {
7870 rtx gsym = rs6000_got_sym ();
7871 got = gen_reg_rtx (Pmode);
7872 if (flag_pic == 0)
7873 rs6000_emit_move (got, gsym, Pmode);
7874 else
7875 {
7876 rtx mem, lab, last;
7877
7878 tmp1 = gen_reg_rtx (Pmode);
7879 tmp2 = gen_reg_rtx (Pmode);
7880 mem = gen_const_mem (Pmode, tmp1);
7881 lab = gen_label_rtx ();
7882 emit_insn (gen_load_toc_v4_PIC_1b (gsym, lab));
7883 emit_move_insn (tmp1, gen_rtx_REG (Pmode, LR_REGNO));
7884 if (TARGET_LINK_STACK)
7885 emit_insn (gen_addsi3 (tmp1, tmp1, GEN_INT (4)));
7886 emit_move_insn (tmp2, mem);
7887 last = emit_insn (gen_addsi3 (got, tmp1, tmp2));
7888 set_unique_reg_note (last, REG_EQUAL, gsym);
7889 }
7890 }
7891 }
7892
7893 if (model == TLS_MODEL_GLOBAL_DYNAMIC)
7894 {
7895 tga = rs6000_tls_get_addr ();
7896 emit_library_call_value (tga, dest, LCT_CONST, Pmode,
7897 1, const0_rtx, Pmode);
7898
7899 r3 = gen_rtx_REG (Pmode, 3);
7900 if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
7901 {
7902 if (TARGET_64BIT)
7903 insn = gen_tls_gd_aix64 (r3, got, addr, tga, const0_rtx);
7904 else
7905 insn = gen_tls_gd_aix32 (r3, got, addr, tga, const0_rtx);
7906 }
7907 else if (DEFAULT_ABI == ABI_V4)
7908 insn = gen_tls_gd_sysvsi (r3, got, addr, tga, const0_rtx);
7909 else
7910 gcc_unreachable ();
7911 call_insn = last_call_insn ();
7912 PATTERN (call_insn) = insn;
7913 if (DEFAULT_ABI == ABI_V4 && TARGET_SECURE_PLT && flag_pic)
7914 use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn),
7915 pic_offset_table_rtx);
7916 }
7917 else if (model == TLS_MODEL_LOCAL_DYNAMIC)
7918 {
7919 tga = rs6000_tls_get_addr ();
7920 tmp1 = gen_reg_rtx (Pmode);
7921 emit_library_call_value (tga, tmp1, LCT_CONST, Pmode,
7922 1, const0_rtx, Pmode);
7923
7924 r3 = gen_rtx_REG (Pmode, 3);
7925 if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
7926 {
7927 if (TARGET_64BIT)
7928 insn = gen_tls_ld_aix64 (r3, got, tga, const0_rtx);
7929 else
7930 insn = gen_tls_ld_aix32 (r3, got, tga, const0_rtx);
7931 }
7932 else if (DEFAULT_ABI == ABI_V4)
7933 insn = gen_tls_ld_sysvsi (r3, got, tga, const0_rtx);
7934 else
7935 gcc_unreachable ();
7936 call_insn = last_call_insn ();
7937 PATTERN (call_insn) = insn;
7938 if (DEFAULT_ABI == ABI_V4 && TARGET_SECURE_PLT && flag_pic)
7939 use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn),
7940 pic_offset_table_rtx);
7941
7942 if (rs6000_tls_size == 16)
7943 {
7944 if (TARGET_64BIT)
7945 insn = gen_tls_dtprel_64 (dest, tmp1, addr);
7946 else
7947 insn = gen_tls_dtprel_32 (dest, tmp1, addr);
7948 }
7949 else if (rs6000_tls_size == 32)
7950 {
7951 tmp2 = gen_reg_rtx (Pmode);
7952 if (TARGET_64BIT)
7953 insn = gen_tls_dtprel_ha_64 (tmp2, tmp1, addr);
7954 else
7955 insn = gen_tls_dtprel_ha_32 (tmp2, tmp1, addr);
7956 emit_insn (insn);
7957 if (TARGET_64BIT)
7958 insn = gen_tls_dtprel_lo_64 (dest, tmp2, addr);
7959 else
7960 insn = gen_tls_dtprel_lo_32 (dest, tmp2, addr);
7961 }
7962 else
7963 {
7964 tmp2 = gen_reg_rtx (Pmode);
7965 if (TARGET_64BIT)
7966 insn = gen_tls_got_dtprel_64 (tmp2, got, addr);
7967 else
7968 insn = gen_tls_got_dtprel_32 (tmp2, got, addr);
7969 emit_insn (insn);
7970 insn = gen_rtx_SET (dest, gen_rtx_PLUS (Pmode, tmp2, tmp1));
7971 }
7972 emit_insn (insn);
7973 }
7974 else
7975 {
7976 /* IE, or 64-bit offset LE. */
7977 tmp2 = gen_reg_rtx (Pmode);
7978 if (TARGET_64BIT)
7979 insn = gen_tls_got_tprel_64 (tmp2, got, addr);
7980 else
7981 insn = gen_tls_got_tprel_32 (tmp2, got, addr);
7982 emit_insn (insn);
7983 if (TARGET_64BIT)
7984 insn = gen_tls_tls_64 (dest, tmp2, addr);
7985 else
7986 insn = gen_tls_tls_32 (dest, tmp2, addr);
7987 emit_insn (insn);
7988 }
7989 }
7990
7991 return dest;
7992 }
7993
7994 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
7995
7996 static bool
7997 rs6000_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
7998 {
7999 if (GET_CODE (x) == HIGH
8000 && GET_CODE (XEXP (x, 0)) == UNSPEC)
8001 return true;
8002
8003 /* A TLS symbol in the TOC cannot contain a sum. */
8004 if (GET_CODE (x) == CONST
8005 && GET_CODE (XEXP (x, 0)) == PLUS
8006 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
8007 && SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0)) != 0)
8008 return true;
8009
8010 /* Do not place an ELF TLS symbol in the constant pool. */
8011 return TARGET_ELF && tls_referenced_p (x);
8012 }
8013
8014 /* Return true iff the given SYMBOL_REF refers to a constant pool entry
8015 that we have put in the TOC, or for cmodel=medium, if the SYMBOL_REF
8016 can be addressed relative to the toc pointer. */
8017
8018 static bool
8019 use_toc_relative_ref (rtx sym, machine_mode mode)
8020 {
8021 return ((constant_pool_expr_p (sym)
8022 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (sym),
8023 get_pool_mode (sym)))
8024 || (TARGET_CMODEL == CMODEL_MEDIUM
8025 && SYMBOL_REF_LOCAL_P (sym)
8026 && GET_MODE_SIZE (mode) <= POWERPC64_TOC_POINTER_ALIGNMENT));
8027 }
8028
8029 /* Our implementation of LEGITIMIZE_RELOAD_ADDRESS. Returns a value to
8030 replace the input X, or the original X if no replacement is called for.
8031 The output parameter *WIN is 1 if the calling macro should goto WIN,
8032 0 if it should not.
8033
8034 For RS/6000, we wish to handle large displacements off a base
8035 register by splitting the addend across an addiu/addis and the mem insn.
8036 This cuts number of extra insns needed from 3 to 1.
8037
8038 On Darwin, we use this to generate code for floating point constants.
8039 A movsf_low is generated so we wind up with 2 instructions rather than 3.
8040 The Darwin code is inside #if TARGET_MACHO because only then are the
8041 machopic_* functions defined. */
8042 static rtx
8043 rs6000_legitimize_reload_address (rtx x, machine_mode mode,
8044 int opnum, int type,
8045 int ind_levels ATTRIBUTE_UNUSED, int *win)
8046 {
8047 bool reg_offset_p = reg_offset_addressing_ok_p (mode);
8048
8049 /* Nasty hack for vsx_splat_V2DF/V2DI load from mem, which takes a
8050 DFmode/DImode MEM. */
8051 if (reg_offset_p
8052 && opnum == 1
8053 && ((mode == DFmode && recog_data.operand_mode[0] == V2DFmode)
8054 || (mode == DImode && recog_data.operand_mode[0] == V2DImode)))
8055 reg_offset_p = false;
8056
8057 /* We must recognize output that we have already generated ourselves. */
8058 if (GET_CODE (x) == PLUS
8059 && GET_CODE (XEXP (x, 0)) == PLUS
8060 && GET_CODE (XEXP (XEXP (x, 0), 0)) == REG
8061 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
8062 && GET_CODE (XEXP (x, 1)) == CONST_INT)
8063 {
8064 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
8065 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
8066 opnum, (enum reload_type) type);
8067 *win = 1;
8068 return x;
8069 }
8070
8071 /* Likewise for (lo_sum (high ...) ...) output we have generated. */
8072 if (GET_CODE (x) == LO_SUM
8073 && GET_CODE (XEXP (x, 0)) == HIGH)
8074 {
8075 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
8076 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0,
8077 opnum, (enum reload_type) type);
8078 *win = 1;
8079 return x;
8080 }
8081
8082 #if TARGET_MACHO
8083 if (DEFAULT_ABI == ABI_DARWIN && flag_pic
8084 && GET_CODE (x) == LO_SUM
8085 && GET_CODE (XEXP (x, 0)) == PLUS
8086 && XEXP (XEXP (x, 0), 0) == pic_offset_table_rtx
8087 && GET_CODE (XEXP (XEXP (x, 0), 1)) == HIGH
8088 && XEXP (XEXP (XEXP (x, 0), 1), 0) == XEXP (x, 1)
8089 && machopic_operand_p (XEXP (x, 1)))
8090 {
8091 /* Result of previous invocation of this function on Darwin
8092 floating point constant. */
8093 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
8094 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0,
8095 opnum, (enum reload_type) type);
8096 *win = 1;
8097 return x;
8098 }
8099 #endif
8100
8101 if (TARGET_CMODEL != CMODEL_SMALL
8102 && reg_offset_p
8103 && small_toc_ref (x, VOIDmode))
8104 {
8105 rtx hi = gen_rtx_HIGH (Pmode, copy_rtx (x));
8106 x = gen_rtx_LO_SUM (Pmode, hi, x);
8107 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
8108 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0,
8109 opnum, (enum reload_type) type);
8110 *win = 1;
8111 return x;
8112 }
8113
8114 if (GET_CODE (x) == PLUS
8115 && GET_CODE (XEXP (x, 0)) == REG
8116 && REGNO (XEXP (x, 0)) < FIRST_PSEUDO_REGISTER
8117 && INT_REG_OK_FOR_BASE_P (XEXP (x, 0), 1)
8118 && GET_CODE (XEXP (x, 1)) == CONST_INT
8119 && reg_offset_p
8120 && !SPE_VECTOR_MODE (mode)
8121 && !(TARGET_E500_DOUBLE && GET_MODE_SIZE (mode) > UNITS_PER_WORD)
8122 && (!VECTOR_MODE_P (mode) || VECTOR_MEM_NONE_P (mode)))
8123 {
8124 HOST_WIDE_INT val = INTVAL (XEXP (x, 1));
8125 HOST_WIDE_INT low = ((val & 0xffff) ^ 0x8000) - 0x8000;
8126 HOST_WIDE_INT high
8127 = (((val - low) & 0xffffffff) ^ 0x80000000) - 0x80000000;
8128
8129 /* Check for 32-bit overflow. */
8130 if (high + low != val)
8131 {
8132 *win = 0;
8133 return x;
8134 }
8135
8136 /* Reload the high part into a base reg; leave the low part
8137 in the mem directly. */
8138
8139 x = gen_rtx_PLUS (GET_MODE (x),
8140 gen_rtx_PLUS (GET_MODE (x), XEXP (x, 0),
8141 GEN_INT (high)),
8142 GEN_INT (low));
8143
8144 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
8145 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
8146 opnum, (enum reload_type) type);
8147 *win = 1;
8148 return x;
8149 }
8150
8151 if (GET_CODE (x) == SYMBOL_REF
8152 && reg_offset_p
8153 && (!VECTOR_MODE_P (mode) || VECTOR_MEM_NONE_P (mode))
8154 && !SPE_VECTOR_MODE (mode)
8155 #if TARGET_MACHO
8156 && DEFAULT_ABI == ABI_DARWIN
8157 && (flag_pic || MACHO_DYNAMIC_NO_PIC_P)
8158 && machopic_symbol_defined_p (x)
8159 #else
8160 && DEFAULT_ABI == ABI_V4
8161 && !flag_pic
8162 #endif
8163 /* Don't do this for TFmode or TDmode, since the result isn't offsettable.
8164 The same goes for DImode without 64-bit gprs and DFmode and DDmode
8165 without fprs.
8166 ??? Assume floating point reg based on mode? This assumption is
8167 violated by eg. powerpc-linux -m32 compile of gcc.dg/pr28796-2.c
8168 where reload ends up doing a DFmode load of a constant from
8169 mem using two gprs. Unfortunately, at this point reload
8170 hasn't yet selected regs so poking around in reload data
8171 won't help and even if we could figure out the regs reliably,
8172 we'd still want to allow this transformation when the mem is
8173 naturally aligned. Since we say the address is good here, we
8174 can't disable offsets from LO_SUMs in mem_operand_gpr.
8175 FIXME: Allow offset from lo_sum for other modes too, when
8176 mem is sufficiently aligned.
8177
8178 Also disallow this if the type can go in VMX/Altivec registers, since
8179 those registers do not have d-form (reg+offset) address modes. */
8180 && !reg_addr[mode].scalar_in_vmx_p
8181 && mode != TFmode
8182 && mode != TDmode
8183 && mode != IFmode
8184 && mode != KFmode
8185 && (mode != TImode || !TARGET_VSX_TIMODE)
8186 && mode != PTImode
8187 && (mode != DImode || TARGET_POWERPC64)
8188 && ((mode != DFmode && mode != DDmode) || TARGET_POWERPC64
8189 || (TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT)))
8190 {
8191 #if TARGET_MACHO
8192 if (flag_pic)
8193 {
8194 rtx offset = machopic_gen_offset (x);
8195 x = gen_rtx_LO_SUM (GET_MODE (x),
8196 gen_rtx_PLUS (Pmode, pic_offset_table_rtx,
8197 gen_rtx_HIGH (Pmode, offset)), offset);
8198 }
8199 else
8200 #endif
8201 x = gen_rtx_LO_SUM (GET_MODE (x),
8202 gen_rtx_HIGH (Pmode, x), x);
8203
8204 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
8205 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0,
8206 opnum, (enum reload_type) type);
8207 *win = 1;
8208 return x;
8209 }
8210
8211 /* Reload an offset address wrapped by an AND that represents the
8212 masking of the lower bits. Strip the outer AND and let reload
8213 convert the offset address into an indirect address. For VSX,
8214 force reload to create the address with an AND in a separate
8215 register, because we can't guarantee an altivec register will
8216 be used. */
8217 if (VECTOR_MEM_ALTIVEC_P (mode)
8218 && GET_CODE (x) == AND
8219 && GET_CODE (XEXP (x, 0)) == PLUS
8220 && GET_CODE (XEXP (XEXP (x, 0), 0)) == REG
8221 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
8222 && GET_CODE (XEXP (x, 1)) == CONST_INT
8223 && INTVAL (XEXP (x, 1)) == -16)
8224 {
8225 x = XEXP (x, 0);
8226 *win = 1;
8227 return x;
8228 }
8229
8230 if (TARGET_TOC
8231 && reg_offset_p
8232 && GET_CODE (x) == SYMBOL_REF
8233 && use_toc_relative_ref (x, mode))
8234 {
8235 x = create_TOC_reference (x, NULL_RTX);
8236 if (TARGET_CMODEL != CMODEL_SMALL)
8237 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
8238 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0,
8239 opnum, (enum reload_type) type);
8240 *win = 1;
8241 return x;
8242 }
8243 *win = 0;
8244 return x;
8245 }
8246
8247 /* Debug version of rs6000_legitimize_reload_address. */
8248 static rtx
8249 rs6000_debug_legitimize_reload_address (rtx x, machine_mode mode,
8250 int opnum, int type,
8251 int ind_levels, int *win)
8252 {
8253 rtx ret = rs6000_legitimize_reload_address (x, mode, opnum, type,
8254 ind_levels, win);
8255 fprintf (stderr,
8256 "\nrs6000_legitimize_reload_address: mode = %s, opnum = %d, "
8257 "type = %d, ind_levels = %d, win = %d, original addr:\n",
8258 GET_MODE_NAME (mode), opnum, type, ind_levels, *win);
8259 debug_rtx (x);
8260
8261 if (x == ret)
8262 fprintf (stderr, "Same address returned\n");
8263 else if (!ret)
8264 fprintf (stderr, "NULL returned\n");
8265 else
8266 {
8267 fprintf (stderr, "New address:\n");
8268 debug_rtx (ret);
8269 }
8270
8271 return ret;
8272 }
8273
8274 /* TARGET_LEGITIMATE_ADDRESS_P recognizes an RTL expression
8275 that is a valid memory address for an instruction.
8276 The MODE argument is the machine mode for the MEM expression
8277 that wants to use this address.
8278
8279 On the RS/6000, there are four valid address: a SYMBOL_REF that
8280 refers to a constant pool entry of an address (or the sum of it
8281 plus a constant), a short (16-bit signed) constant plus a register,
8282 the sum of two registers, or a register indirect, possibly with an
8283 auto-increment. For DFmode, DDmode and DImode with a constant plus
8284 register, we must ensure that both words are addressable or PowerPC64
8285 with offset word aligned.
8286
8287 For modes spanning multiple registers (DFmode and DDmode in 32-bit GPRs,
8288 32-bit DImode, TImode, TFmode, TDmode), indexed addressing cannot be used
8289 because adjacent memory cells are accessed by adding word-sized offsets
8290 during assembly output. */
8291 static bool
8292 rs6000_legitimate_address_p (machine_mode mode, rtx x, bool reg_ok_strict)
8293 {
8294 bool reg_offset_p = reg_offset_addressing_ok_p (mode);
8295
8296 /* If this is an unaligned stvx/ldvx type address, discard the outer AND. */
8297 if (VECTOR_MEM_ALTIVEC_P (mode)
8298 && GET_CODE (x) == AND
8299 && GET_CODE (XEXP (x, 1)) == CONST_INT
8300 && INTVAL (XEXP (x, 1)) == -16)
8301 x = XEXP (x, 0);
8302
8303 if (TARGET_ELF && RS6000_SYMBOL_REF_TLS_P (x))
8304 return 0;
8305 if (legitimate_indirect_address_p (x, reg_ok_strict))
8306 return 1;
8307 if (TARGET_UPDATE
8308 && (GET_CODE (x) == PRE_INC || GET_CODE (x) == PRE_DEC)
8309 && mode_supports_pre_incdec_p (mode)
8310 && legitimate_indirect_address_p (XEXP (x, 0), reg_ok_strict))
8311 return 1;
8312 if (virtual_stack_registers_memory_p (x))
8313 return 1;
8314 if (reg_offset_p && legitimate_small_data_p (mode, x))
8315 return 1;
8316 if (reg_offset_p
8317 && legitimate_constant_pool_address_p (x, mode,
8318 reg_ok_strict || lra_in_progress))
8319 return 1;
8320 if (reg_offset_p && reg_addr[mode].fused_toc && toc_fusion_mem_wrapped (x, mode))
8321 return 1;
8322 /* For TImode, if we have load/store quad and TImode in VSX registers, only
8323 allow register indirect addresses. This will allow the values to go in
8324 either GPRs or VSX registers without reloading. The vector types would
8325 tend to go into VSX registers, so we allow REG+REG, while TImode seems
8326 somewhat split, in that some uses are GPR based, and some VSX based. */
8327 if (mode == TImode && TARGET_QUAD_MEMORY && TARGET_VSX_TIMODE)
8328 return 0;
8329 /* If not REG_OK_STRICT (before reload) let pass any stack offset. */
8330 if (! reg_ok_strict
8331 && reg_offset_p
8332 && GET_CODE (x) == PLUS
8333 && GET_CODE (XEXP (x, 0)) == REG
8334 && (XEXP (x, 0) == virtual_stack_vars_rtx
8335 || XEXP (x, 0) == arg_pointer_rtx)
8336 && GET_CODE (XEXP (x, 1)) == CONST_INT)
8337 return 1;
8338 if (rs6000_legitimate_offset_address_p (mode, x, reg_ok_strict, false))
8339 return 1;
8340 if (!FLOAT128_2REG_P (mode)
8341 && ((TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT)
8342 || TARGET_POWERPC64
8343 || (mode != DFmode && mode != DDmode)
8344 || (TARGET_E500_DOUBLE && mode != DDmode))
8345 && (TARGET_POWERPC64 || mode != DImode)
8346 && (mode != TImode || VECTOR_MEM_VSX_P (TImode))
8347 && mode != PTImode
8348 && !avoiding_indexed_address_p (mode)
8349 && legitimate_indexed_address_p (x, reg_ok_strict))
8350 return 1;
8351 if (TARGET_UPDATE && GET_CODE (x) == PRE_MODIFY
8352 && mode_supports_pre_modify_p (mode)
8353 && legitimate_indirect_address_p (XEXP (x, 0), reg_ok_strict)
8354 && (rs6000_legitimate_offset_address_p (mode, XEXP (x, 1),
8355 reg_ok_strict, false)
8356 || (!avoiding_indexed_address_p (mode)
8357 && legitimate_indexed_address_p (XEXP (x, 1), reg_ok_strict)))
8358 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
8359 return 1;
8360 if (reg_offset_p && legitimate_lo_sum_address_p (mode, x, reg_ok_strict))
8361 return 1;
8362 return 0;
8363 }
8364
8365 /* Debug version of rs6000_legitimate_address_p. */
8366 static bool
8367 rs6000_debug_legitimate_address_p (machine_mode mode, rtx x,
8368 bool reg_ok_strict)
8369 {
8370 bool ret = rs6000_legitimate_address_p (mode, x, reg_ok_strict);
8371 fprintf (stderr,
8372 "\nrs6000_legitimate_address_p: return = %s, mode = %s, "
8373 "strict = %d, reload = %s, code = %s\n",
8374 ret ? "true" : "false",
8375 GET_MODE_NAME (mode),
8376 reg_ok_strict,
8377 (reload_completed
8378 ? "after"
8379 : (reload_in_progress ? "progress" : "before")),
8380 GET_RTX_NAME (GET_CODE (x)));
8381 debug_rtx (x);
8382
8383 return ret;
8384 }
8385
8386 /* Implement TARGET_MODE_DEPENDENT_ADDRESS_P. */
8387
8388 static bool
8389 rs6000_mode_dependent_address_p (const_rtx addr,
8390 addr_space_t as ATTRIBUTE_UNUSED)
8391 {
8392 return rs6000_mode_dependent_address_ptr (addr);
8393 }
8394
8395 /* Go to LABEL if ADDR (a legitimate address expression)
8396 has an effect that depends on the machine mode it is used for.
8397
8398 On the RS/6000 this is true of all integral offsets (since AltiVec
8399 and VSX modes don't allow them) or is a pre-increment or decrement.
8400
8401 ??? Except that due to conceptual problems in offsettable_address_p
8402 we can't really report the problems of integral offsets. So leave
8403 this assuming that the adjustable offset must be valid for the
8404 sub-words of a TFmode operand, which is what we had before. */
8405
8406 static bool
8407 rs6000_mode_dependent_address (const_rtx addr)
8408 {
8409 switch (GET_CODE (addr))
8410 {
8411 case PLUS:
8412 /* Any offset from virtual_stack_vars_rtx and arg_pointer_rtx
8413 is considered a legitimate address before reload, so there
8414 are no offset restrictions in that case. Note that this
8415 condition is safe in strict mode because any address involving
8416 virtual_stack_vars_rtx or arg_pointer_rtx would already have
8417 been rejected as illegitimate. */
8418 if (XEXP (addr, 0) != virtual_stack_vars_rtx
8419 && XEXP (addr, 0) != arg_pointer_rtx
8420 && GET_CODE (XEXP (addr, 1)) == CONST_INT)
8421 {
8422 unsigned HOST_WIDE_INT val = INTVAL (XEXP (addr, 1));
8423 return val + 0x8000 >= 0x10000 - (TARGET_POWERPC64 ? 8 : 12);
8424 }
8425 break;
8426
8427 case LO_SUM:
8428 /* Anything in the constant pool is sufficiently aligned that
8429 all bytes have the same high part address. */
8430 return !legitimate_constant_pool_address_p (addr, QImode, false);
8431
8432 /* Auto-increment cases are now treated generically in recog.c. */
8433 case PRE_MODIFY:
8434 return TARGET_UPDATE;
8435
8436 /* AND is only allowed in Altivec loads. */
8437 case AND:
8438 return true;
8439
8440 default:
8441 break;
8442 }
8443
8444 return false;
8445 }
8446
8447 /* Debug version of rs6000_mode_dependent_address. */
8448 static bool
8449 rs6000_debug_mode_dependent_address (const_rtx addr)
8450 {
8451 bool ret = rs6000_mode_dependent_address (addr);
8452
8453 fprintf (stderr, "\nrs6000_mode_dependent_address: ret = %s\n",
8454 ret ? "true" : "false");
8455 debug_rtx (addr);
8456
8457 return ret;
8458 }
8459
8460 /* Implement FIND_BASE_TERM. */
8461
8462 rtx
8463 rs6000_find_base_term (rtx op)
8464 {
8465 rtx base;
8466
8467 base = op;
8468 if (GET_CODE (base) == CONST)
8469 base = XEXP (base, 0);
8470 if (GET_CODE (base) == PLUS)
8471 base = XEXP (base, 0);
8472 if (GET_CODE (base) == UNSPEC)
8473 switch (XINT (base, 1))
8474 {
8475 case UNSPEC_TOCREL:
8476 case UNSPEC_MACHOPIC_OFFSET:
8477 /* OP represents SYM [+ OFFSET] - ANCHOR. SYM is the base term
8478 for aliasing purposes. */
8479 return XVECEXP (base, 0, 0);
8480 }
8481
8482 return op;
8483 }
8484
8485 /* More elaborate version of recog's offsettable_memref_p predicate
8486 that works around the ??? note of rs6000_mode_dependent_address.
8487 In particular it accepts
8488
8489 (mem:DI (plus:SI (reg/f:SI 31 31) (const_int 32760 [0x7ff8])))
8490
8491 in 32-bit mode, that the recog predicate rejects. */
8492
8493 static bool
8494 rs6000_offsettable_memref_p (rtx op, machine_mode reg_mode)
8495 {
8496 bool worst_case;
8497
8498 if (!MEM_P (op))
8499 return false;
8500
8501 /* First mimic offsettable_memref_p. */
8502 if (offsettable_address_p (true, GET_MODE (op), XEXP (op, 0)))
8503 return true;
8504
8505 /* offsettable_address_p invokes rs6000_mode_dependent_address, but
8506 the latter predicate knows nothing about the mode of the memory
8507 reference and, therefore, assumes that it is the largest supported
8508 mode (TFmode). As a consequence, legitimate offsettable memory
8509 references are rejected. rs6000_legitimate_offset_address_p contains
8510 the correct logic for the PLUS case of rs6000_mode_dependent_address,
8511 at least with a little bit of help here given that we know the
8512 actual registers used. */
8513 worst_case = ((TARGET_POWERPC64 && GET_MODE_CLASS (reg_mode) == MODE_INT)
8514 || GET_MODE_SIZE (reg_mode) == 4);
8515 return rs6000_legitimate_offset_address_p (GET_MODE (op), XEXP (op, 0),
8516 true, worst_case);
8517 }
8518
8519 /* Change register usage conditional on target flags. */
8520 static void
8521 rs6000_conditional_register_usage (void)
8522 {
8523 int i;
8524
8525 if (TARGET_DEBUG_TARGET)
8526 fprintf (stderr, "rs6000_conditional_register_usage called\n");
8527
8528 /* Set MQ register fixed (already call_used) so that it will not be
8529 allocated. */
8530 fixed_regs[64] = 1;
8531
8532 /* 64-bit AIX and Linux reserve GPR13 for thread-private data. */
8533 if (TARGET_64BIT)
8534 fixed_regs[13] = call_used_regs[13]
8535 = call_really_used_regs[13] = 1;
8536
8537 /* Conditionally disable FPRs. */
8538 if (TARGET_SOFT_FLOAT || !TARGET_FPRS)
8539 for (i = 32; i < 64; i++)
8540 fixed_regs[i] = call_used_regs[i]
8541 = call_really_used_regs[i] = 1;
8542
8543 /* The TOC register is not killed across calls in a way that is
8544 visible to the compiler. */
8545 if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
8546 call_really_used_regs[2] = 0;
8547
8548 if (DEFAULT_ABI == ABI_V4
8549 && PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM
8550 && flag_pic == 2)
8551 fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] = 1;
8552
8553 if (DEFAULT_ABI == ABI_V4
8554 && PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM
8555 && flag_pic == 1)
8556 fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM]
8557 = call_used_regs[RS6000_PIC_OFFSET_TABLE_REGNUM]
8558 = call_really_used_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] = 1;
8559
8560 if (DEFAULT_ABI == ABI_DARWIN
8561 && PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM)
8562 fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM]
8563 = call_used_regs[RS6000_PIC_OFFSET_TABLE_REGNUM]
8564 = call_really_used_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] = 1;
8565
8566 if (TARGET_TOC && TARGET_MINIMAL_TOC)
8567 fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM]
8568 = call_used_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] = 1;
8569
8570 if (TARGET_SPE)
8571 {
8572 global_regs[SPEFSCR_REGNO] = 1;
8573 /* We used to use r14 as FIXED_SCRATCH to address SPE 64-bit
8574 registers in prologues and epilogues. We no longer use r14
8575 for FIXED_SCRATCH, but we're keeping r14 out of the allocation
8576 pool for link-compatibility with older versions of GCC. Once
8577 "old" code has died out, we can return r14 to the allocation
8578 pool. */
8579 fixed_regs[14]
8580 = call_used_regs[14]
8581 = call_really_used_regs[14] = 1;
8582 }
8583
8584 if (!TARGET_ALTIVEC && !TARGET_VSX)
8585 {
8586 for (i = FIRST_ALTIVEC_REGNO; i <= LAST_ALTIVEC_REGNO; ++i)
8587 fixed_regs[i] = call_used_regs[i] = call_really_used_regs[i] = 1;
8588 call_really_used_regs[VRSAVE_REGNO] = 1;
8589 }
8590
8591 if (TARGET_ALTIVEC || TARGET_VSX)
8592 global_regs[VSCR_REGNO] = 1;
8593
8594 if (TARGET_ALTIVEC_ABI)
8595 {
8596 for (i = FIRST_ALTIVEC_REGNO; i < FIRST_ALTIVEC_REGNO + 20; ++i)
8597 call_used_regs[i] = call_really_used_regs[i] = 1;
8598
8599 /* AIX reserves VR20:31 in non-extended ABI mode. */
8600 if (TARGET_XCOFF)
8601 for (i = FIRST_ALTIVEC_REGNO + 20; i < FIRST_ALTIVEC_REGNO + 32; ++i)
8602 fixed_regs[i] = call_used_regs[i] = call_really_used_regs[i] = 1;
8603 }
8604 }
8605
8606 \f
8607 /* Output insns to set DEST equal to the constant SOURCE as a series of
8608 lis, ori and shl instructions and return TRUE. */
8609
8610 bool
8611 rs6000_emit_set_const (rtx dest, rtx source)
8612 {
8613 machine_mode mode = GET_MODE (dest);
8614 rtx temp, set;
8615 rtx_insn *insn;
8616 HOST_WIDE_INT c;
8617
8618 gcc_checking_assert (CONST_INT_P (source));
8619 c = INTVAL (source);
8620 switch (mode)
8621 {
8622 case QImode:
8623 case HImode:
8624 emit_insn (gen_rtx_SET (dest, source));
8625 return true;
8626
8627 case SImode:
8628 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (SImode);
8629
8630 emit_insn (gen_rtx_SET (copy_rtx (temp),
8631 GEN_INT (c & ~(HOST_WIDE_INT) 0xffff)));
8632 emit_insn (gen_rtx_SET (dest,
8633 gen_rtx_IOR (SImode, copy_rtx (temp),
8634 GEN_INT (c & 0xffff))));
8635 break;
8636
8637 case DImode:
8638 if (!TARGET_POWERPC64)
8639 {
8640 rtx hi, lo;
8641
8642 hi = operand_subword_force (copy_rtx (dest), WORDS_BIG_ENDIAN == 0,
8643 DImode);
8644 lo = operand_subword_force (dest, WORDS_BIG_ENDIAN != 0,
8645 DImode);
8646 emit_move_insn (hi, GEN_INT (c >> 32));
8647 c = ((c & 0xffffffff) ^ 0x80000000) - 0x80000000;
8648 emit_move_insn (lo, GEN_INT (c));
8649 }
8650 else
8651 rs6000_emit_set_long_const (dest, c);
8652 break;
8653
8654 default:
8655 gcc_unreachable ();
8656 }
8657
8658 insn = get_last_insn ();
8659 set = single_set (insn);
8660 if (! CONSTANT_P (SET_SRC (set)))
8661 set_unique_reg_note (insn, REG_EQUAL, GEN_INT (c));
8662
8663 return true;
8664 }
8665
8666 /* Subroutine of rs6000_emit_set_const, handling PowerPC64 DImode.
8667 Output insns to set DEST equal to the constant C as a series of
8668 lis, ori and shl instructions. */
8669
8670 static void
8671 rs6000_emit_set_long_const (rtx dest, HOST_WIDE_INT c)
8672 {
8673 rtx temp;
8674 HOST_WIDE_INT ud1, ud2, ud3, ud4;
8675
8676 ud1 = c & 0xffff;
8677 c = c >> 16;
8678 ud2 = c & 0xffff;
8679 c = c >> 16;
8680 ud3 = c & 0xffff;
8681 c = c >> 16;
8682 ud4 = c & 0xffff;
8683
8684 if ((ud4 == 0xffff && ud3 == 0xffff && ud2 == 0xffff && (ud1 & 0x8000))
8685 || (ud4 == 0 && ud3 == 0 && ud2 == 0 && ! (ud1 & 0x8000)))
8686 emit_move_insn (dest, GEN_INT ((ud1 ^ 0x8000) - 0x8000));
8687
8688 else if ((ud4 == 0xffff && ud3 == 0xffff && (ud2 & 0x8000))
8689 || (ud4 == 0 && ud3 == 0 && ! (ud2 & 0x8000)))
8690 {
8691 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
8692
8693 emit_move_insn (ud1 != 0 ? copy_rtx (temp) : dest,
8694 GEN_INT (((ud2 << 16) ^ 0x80000000) - 0x80000000));
8695 if (ud1 != 0)
8696 emit_move_insn (dest,
8697 gen_rtx_IOR (DImode, copy_rtx (temp),
8698 GEN_INT (ud1)));
8699 }
8700 else if (ud3 == 0 && ud4 == 0)
8701 {
8702 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
8703
8704 gcc_assert (ud2 & 0x8000);
8705 emit_move_insn (copy_rtx (temp),
8706 GEN_INT (((ud2 << 16) ^ 0x80000000) - 0x80000000));
8707 if (ud1 != 0)
8708 emit_move_insn (copy_rtx (temp),
8709 gen_rtx_IOR (DImode, copy_rtx (temp),
8710 GEN_INT (ud1)));
8711 emit_move_insn (dest,
8712 gen_rtx_ZERO_EXTEND (DImode,
8713 gen_lowpart (SImode,
8714 copy_rtx (temp))));
8715 }
8716 else if ((ud4 == 0xffff && (ud3 & 0x8000))
8717 || (ud4 == 0 && ! (ud3 & 0x8000)))
8718 {
8719 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
8720
8721 emit_move_insn (copy_rtx (temp),
8722 GEN_INT (((ud3 << 16) ^ 0x80000000) - 0x80000000));
8723 if (ud2 != 0)
8724 emit_move_insn (copy_rtx (temp),
8725 gen_rtx_IOR (DImode, copy_rtx (temp),
8726 GEN_INT (ud2)));
8727 emit_move_insn (ud1 != 0 ? copy_rtx (temp) : dest,
8728 gen_rtx_ASHIFT (DImode, copy_rtx (temp),
8729 GEN_INT (16)));
8730 if (ud1 != 0)
8731 emit_move_insn (dest,
8732 gen_rtx_IOR (DImode, copy_rtx (temp),
8733 GEN_INT (ud1)));
8734 }
8735 else
8736 {
8737 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
8738
8739 emit_move_insn (copy_rtx (temp),
8740 GEN_INT (((ud4 << 16) ^ 0x80000000) - 0x80000000));
8741 if (ud3 != 0)
8742 emit_move_insn (copy_rtx (temp),
8743 gen_rtx_IOR (DImode, copy_rtx (temp),
8744 GEN_INT (ud3)));
8745
8746 emit_move_insn (ud2 != 0 || ud1 != 0 ? copy_rtx (temp) : dest,
8747 gen_rtx_ASHIFT (DImode, copy_rtx (temp),
8748 GEN_INT (32)));
8749 if (ud2 != 0)
8750 emit_move_insn (ud1 != 0 ? copy_rtx (temp) : dest,
8751 gen_rtx_IOR (DImode, copy_rtx (temp),
8752 GEN_INT (ud2 << 16)));
8753 if (ud1 != 0)
8754 emit_move_insn (dest,
8755 gen_rtx_IOR (DImode, copy_rtx (temp),
8756 GEN_INT (ud1)));
8757 }
8758 }
8759
8760 /* Helper for the following. Get rid of [r+r] memory refs
8761 in cases where it won't work (TImode, TFmode, TDmode, PTImode). */
8762
8763 static void
8764 rs6000_eliminate_indexed_memrefs (rtx operands[2])
8765 {
8766 if (reload_in_progress)
8767 return;
8768
8769 if (GET_CODE (operands[0]) == MEM
8770 && GET_CODE (XEXP (operands[0], 0)) != REG
8771 && ! legitimate_constant_pool_address_p (XEXP (operands[0], 0),
8772 GET_MODE (operands[0]), false))
8773 operands[0]
8774 = replace_equiv_address (operands[0],
8775 copy_addr_to_reg (XEXP (operands[0], 0)));
8776
8777 if (GET_CODE (operands[1]) == MEM
8778 && GET_CODE (XEXP (operands[1], 0)) != REG
8779 && ! legitimate_constant_pool_address_p (XEXP (operands[1], 0),
8780 GET_MODE (operands[1]), false))
8781 operands[1]
8782 = replace_equiv_address (operands[1],
8783 copy_addr_to_reg (XEXP (operands[1], 0)));
8784 }
8785
8786 /* Generate a vector of constants to permute MODE for a little-endian
8787 storage operation by swapping the two halves of a vector. */
8788 static rtvec
8789 rs6000_const_vec (machine_mode mode)
8790 {
8791 int i, subparts;
8792 rtvec v;
8793
8794 switch (mode)
8795 {
8796 case V1TImode:
8797 subparts = 1;
8798 break;
8799 case V2DFmode:
8800 case V2DImode:
8801 subparts = 2;
8802 break;
8803 case V4SFmode:
8804 case V4SImode:
8805 subparts = 4;
8806 break;
8807 case V8HImode:
8808 subparts = 8;
8809 break;
8810 case V16QImode:
8811 subparts = 16;
8812 break;
8813 default:
8814 gcc_unreachable();
8815 }
8816
8817 v = rtvec_alloc (subparts);
8818
8819 for (i = 0; i < subparts / 2; ++i)
8820 RTVEC_ELT (v, i) = gen_rtx_CONST_INT (DImode, i + subparts / 2);
8821 for (i = subparts / 2; i < subparts; ++i)
8822 RTVEC_ELT (v, i) = gen_rtx_CONST_INT (DImode, i - subparts / 2);
8823
8824 return v;
8825 }
8826
8827 /* Generate a permute rtx that represents an lxvd2x, stxvd2x, or xxpermdi
8828 for a VSX load or store operation. */
8829 rtx
8830 rs6000_gen_le_vsx_permute (rtx source, machine_mode mode)
8831 {
8832 /* Use ROTATE instead of VEC_SELECT on IEEE 128-bit floating point, and
8833 128-bit integers if they are allowed in VSX registers. */
8834 if (FLOAT128_VECTOR_P (mode) || mode == TImode)
8835 return gen_rtx_ROTATE (mode, source, GEN_INT (64));
8836 else
8837 {
8838 rtx par = gen_rtx_PARALLEL (VOIDmode, rs6000_const_vec (mode));
8839 return gen_rtx_VEC_SELECT (mode, source, par);
8840 }
8841 }
8842
8843 /* Emit a little-endian load from vector memory location SOURCE to VSX
8844 register DEST in mode MODE. The load is done with two permuting
8845 insn's that represent an lxvd2x and xxpermdi. */
8846 void
8847 rs6000_emit_le_vsx_load (rtx dest, rtx source, machine_mode mode)
8848 {
8849 rtx tmp, permute_mem, permute_reg;
8850
8851 /* Use V2DImode to do swaps of types with 128-bit scalare parts (TImode,
8852 V1TImode). */
8853 if (mode == TImode || mode == V1TImode)
8854 {
8855 mode = V2DImode;
8856 dest = gen_lowpart (V2DImode, dest);
8857 source = adjust_address (source, V2DImode, 0);
8858 }
8859
8860 tmp = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (dest) : dest;
8861 permute_mem = rs6000_gen_le_vsx_permute (source, mode);
8862 permute_reg = rs6000_gen_le_vsx_permute (tmp, mode);
8863 emit_insn (gen_rtx_SET (tmp, permute_mem));
8864 emit_insn (gen_rtx_SET (dest, permute_reg));
8865 }
8866
8867 /* Emit a little-endian store to vector memory location DEST from VSX
8868 register SOURCE in mode MODE. The store is done with two permuting
8869 insn's that represent an xxpermdi and an stxvd2x. */
8870 void
8871 rs6000_emit_le_vsx_store (rtx dest, rtx source, machine_mode mode)
8872 {
8873 rtx tmp, permute_src, permute_tmp;
8874
8875 /* This should never be called during or after reload, because it does
8876 not re-permute the source register. It is intended only for use
8877 during expand. */
8878 gcc_assert (!reload_in_progress && !lra_in_progress && !reload_completed);
8879
8880 /* Use V2DImode to do swaps of types with 128-bit scalar parts (TImode,
8881 V1TImode). */
8882 if (mode == TImode || mode == V1TImode)
8883 {
8884 mode = V2DImode;
8885 dest = adjust_address (dest, V2DImode, 0);
8886 source = gen_lowpart (V2DImode, source);
8887 }
8888
8889 tmp = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (source) : source;
8890 permute_src = rs6000_gen_le_vsx_permute (source, mode);
8891 permute_tmp = rs6000_gen_le_vsx_permute (tmp, mode);
8892 emit_insn (gen_rtx_SET (tmp, permute_src));
8893 emit_insn (gen_rtx_SET (dest, permute_tmp));
8894 }
8895
8896 /* Emit a sequence representing a little-endian VSX load or store,
8897 moving data from SOURCE to DEST in mode MODE. This is done
8898 separately from rs6000_emit_move to ensure it is called only
8899 during expand. LE VSX loads and stores introduced later are
8900 handled with a split. The expand-time RTL generation allows
8901 us to optimize away redundant pairs of register-permutes. */
8902 void
8903 rs6000_emit_le_vsx_move (rtx dest, rtx source, machine_mode mode)
8904 {
8905 gcc_assert (!BYTES_BIG_ENDIAN
8906 && VECTOR_MEM_VSX_P (mode)
8907 && !gpr_or_gpr_p (dest, source)
8908 && (MEM_P (source) ^ MEM_P (dest)));
8909
8910 if (MEM_P (source))
8911 {
8912 gcc_assert (REG_P (dest) || GET_CODE (dest) == SUBREG);
8913 rs6000_emit_le_vsx_load (dest, source, mode);
8914 }
8915 else
8916 {
8917 if (!REG_P (source))
8918 source = force_reg (mode, source);
8919 rs6000_emit_le_vsx_store (dest, source, mode);
8920 }
8921 }
8922
8923 /* Emit a move from SOURCE to DEST in mode MODE. */
8924 void
8925 rs6000_emit_move (rtx dest, rtx source, machine_mode mode)
8926 {
8927 rtx operands[2];
8928 operands[0] = dest;
8929 operands[1] = source;
8930
8931 if (TARGET_DEBUG_ADDR)
8932 {
8933 fprintf (stderr,
8934 "\nrs6000_emit_move: mode = %s, reload_in_progress = %d, "
8935 "reload_completed = %d, can_create_pseudos = %d.\ndest:\n",
8936 GET_MODE_NAME (mode),
8937 reload_in_progress,
8938 reload_completed,
8939 can_create_pseudo_p ());
8940 debug_rtx (dest);
8941 fprintf (stderr, "source:\n");
8942 debug_rtx (source);
8943 }
8944
8945 /* Sanity checks. Check that we get CONST_DOUBLE only when we should. */
8946 if (CONST_WIDE_INT_P (operands[1])
8947 && GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT)
8948 {
8949 /* This should be fixed with the introduction of CONST_WIDE_INT. */
8950 gcc_unreachable ();
8951 }
8952
8953 /* Check if GCC is setting up a block move that will end up using FP
8954 registers as temporaries. We must make sure this is acceptable. */
8955 if (GET_CODE (operands[0]) == MEM
8956 && GET_CODE (operands[1]) == MEM
8957 && mode == DImode
8958 && (SLOW_UNALIGNED_ACCESS (DImode, MEM_ALIGN (operands[0]))
8959 || SLOW_UNALIGNED_ACCESS (DImode, MEM_ALIGN (operands[1])))
8960 && ! (SLOW_UNALIGNED_ACCESS (SImode, (MEM_ALIGN (operands[0]) > 32
8961 ? 32 : MEM_ALIGN (operands[0])))
8962 || SLOW_UNALIGNED_ACCESS (SImode, (MEM_ALIGN (operands[1]) > 32
8963 ? 32
8964 : MEM_ALIGN (operands[1]))))
8965 && ! MEM_VOLATILE_P (operands [0])
8966 && ! MEM_VOLATILE_P (operands [1]))
8967 {
8968 emit_move_insn (adjust_address (operands[0], SImode, 0),
8969 adjust_address (operands[1], SImode, 0));
8970 emit_move_insn (adjust_address (copy_rtx (operands[0]), SImode, 4),
8971 adjust_address (copy_rtx (operands[1]), SImode, 4));
8972 return;
8973 }
8974
8975 if (can_create_pseudo_p () && GET_CODE (operands[0]) == MEM
8976 && !gpc_reg_operand (operands[1], mode))
8977 operands[1] = force_reg (mode, operands[1]);
8978
8979 /* Recognize the case where operand[1] is a reference to thread-local
8980 data and load its address to a register. */
8981 if (tls_referenced_p (operands[1]))
8982 {
8983 enum tls_model model;
8984 rtx tmp = operands[1];
8985 rtx addend = NULL;
8986
8987 if (GET_CODE (tmp) == CONST && GET_CODE (XEXP (tmp, 0)) == PLUS)
8988 {
8989 addend = XEXP (XEXP (tmp, 0), 1);
8990 tmp = XEXP (XEXP (tmp, 0), 0);
8991 }
8992
8993 gcc_assert (GET_CODE (tmp) == SYMBOL_REF);
8994 model = SYMBOL_REF_TLS_MODEL (tmp);
8995 gcc_assert (model != 0);
8996
8997 tmp = rs6000_legitimize_tls_address (tmp, model);
8998 if (addend)
8999 {
9000 tmp = gen_rtx_PLUS (mode, tmp, addend);
9001 tmp = force_operand (tmp, operands[0]);
9002 }
9003 operands[1] = tmp;
9004 }
9005
9006 /* Handle the case where reload calls us with an invalid address. */
9007 if (reload_in_progress && mode == Pmode
9008 && (! general_operand (operands[1], mode)
9009 || ! nonimmediate_operand (operands[0], mode)))
9010 goto emit_set;
9011
9012 /* 128-bit constant floating-point values on Darwin should really be loaded
9013 as two parts. However, this premature splitting is a problem when DFmode
9014 values can go into Altivec registers. */
9015 if (FLOAT128_IBM_P (mode) && !reg_addr[DFmode].scalar_in_vmx_p
9016 && GET_CODE (operands[1]) == CONST_DOUBLE)
9017 {
9018 rs6000_emit_move (simplify_gen_subreg (DFmode, operands[0], mode, 0),
9019 simplify_gen_subreg (DFmode, operands[1], mode, 0),
9020 DFmode);
9021 rs6000_emit_move (simplify_gen_subreg (DFmode, operands[0], mode,
9022 GET_MODE_SIZE (DFmode)),
9023 simplify_gen_subreg (DFmode, operands[1], mode,
9024 GET_MODE_SIZE (DFmode)),
9025 DFmode);
9026 return;
9027 }
9028
9029 if (reload_in_progress && cfun->machine->sdmode_stack_slot != NULL_RTX)
9030 cfun->machine->sdmode_stack_slot =
9031 eliminate_regs (cfun->machine->sdmode_stack_slot, VOIDmode, NULL_RTX);
9032
9033
9034 /* Transform (p0:DD, (SUBREG:DD p1:SD)) to ((SUBREG:SD p0:DD),
9035 p1:SD) if p1 is not of floating point class and p0 is spilled as
9036 we can have no analogous movsd_store for this. */
9037 if (lra_in_progress && mode == DDmode
9038 && REG_P (operands[0]) && REGNO (operands[0]) >= FIRST_PSEUDO_REGISTER
9039 && reg_preferred_class (REGNO (operands[0])) == NO_REGS
9040 && GET_CODE (operands[1]) == SUBREG && REG_P (SUBREG_REG (operands[1]))
9041 && GET_MODE (SUBREG_REG (operands[1])) == SDmode)
9042 {
9043 enum reg_class cl;
9044 int regno = REGNO (SUBREG_REG (operands[1]));
9045
9046 if (regno >= FIRST_PSEUDO_REGISTER)
9047 {
9048 cl = reg_preferred_class (regno);
9049 regno = cl == NO_REGS ? -1 : ira_class_hard_regs[cl][1];
9050 }
9051 if (regno >= 0 && ! FP_REGNO_P (regno))
9052 {
9053 mode = SDmode;
9054 operands[0] = gen_lowpart_SUBREG (SDmode, operands[0]);
9055 operands[1] = SUBREG_REG (operands[1]);
9056 }
9057 }
9058 if (lra_in_progress
9059 && mode == SDmode
9060 && REG_P (operands[0]) && REGNO (operands[0]) >= FIRST_PSEUDO_REGISTER
9061 && reg_preferred_class (REGNO (operands[0])) == NO_REGS
9062 && (REG_P (operands[1])
9063 || (GET_CODE (operands[1]) == SUBREG
9064 && REG_P (SUBREG_REG (operands[1])))))
9065 {
9066 int regno = REGNO (GET_CODE (operands[1]) == SUBREG
9067 ? SUBREG_REG (operands[1]) : operands[1]);
9068 enum reg_class cl;
9069
9070 if (regno >= FIRST_PSEUDO_REGISTER)
9071 {
9072 cl = reg_preferred_class (regno);
9073 gcc_assert (cl != NO_REGS);
9074 regno = ira_class_hard_regs[cl][0];
9075 }
9076 if (FP_REGNO_P (regno))
9077 {
9078 if (GET_MODE (operands[0]) != DDmode)
9079 operands[0] = gen_rtx_SUBREG (DDmode, operands[0], 0);
9080 emit_insn (gen_movsd_store (operands[0], operands[1]));
9081 }
9082 else if (INT_REGNO_P (regno))
9083 emit_insn (gen_movsd_hardfloat (operands[0], operands[1]));
9084 else
9085 gcc_unreachable();
9086 return;
9087 }
9088 /* Transform ((SUBREG:DD p0:SD), p1:DD) to (p0:SD, (SUBREG:SD
9089 p:DD)) if p0 is not of floating point class and p1 is spilled as
9090 we can have no analogous movsd_load for this. */
9091 if (lra_in_progress && mode == DDmode
9092 && GET_CODE (operands[0]) == SUBREG && REG_P (SUBREG_REG (operands[0]))
9093 && GET_MODE (SUBREG_REG (operands[0])) == SDmode
9094 && REG_P (operands[1]) && REGNO (operands[1]) >= FIRST_PSEUDO_REGISTER
9095 && reg_preferred_class (REGNO (operands[1])) == NO_REGS)
9096 {
9097 enum reg_class cl;
9098 int regno = REGNO (SUBREG_REG (operands[0]));
9099
9100 if (regno >= FIRST_PSEUDO_REGISTER)
9101 {
9102 cl = reg_preferred_class (regno);
9103 regno = cl == NO_REGS ? -1 : ira_class_hard_regs[cl][0];
9104 }
9105 if (regno >= 0 && ! FP_REGNO_P (regno))
9106 {
9107 mode = SDmode;
9108 operands[0] = SUBREG_REG (operands[0]);
9109 operands[1] = gen_lowpart_SUBREG (SDmode, operands[1]);
9110 }
9111 }
9112 if (lra_in_progress
9113 && mode == SDmode
9114 && (REG_P (operands[0])
9115 || (GET_CODE (operands[0]) == SUBREG
9116 && REG_P (SUBREG_REG (operands[0]))))
9117 && REG_P (operands[1]) && REGNO (operands[1]) >= FIRST_PSEUDO_REGISTER
9118 && reg_preferred_class (REGNO (operands[1])) == NO_REGS)
9119 {
9120 int regno = REGNO (GET_CODE (operands[0]) == SUBREG
9121 ? SUBREG_REG (operands[0]) : operands[0]);
9122 enum reg_class cl;
9123
9124 if (regno >= FIRST_PSEUDO_REGISTER)
9125 {
9126 cl = reg_preferred_class (regno);
9127 gcc_assert (cl != NO_REGS);
9128 regno = ira_class_hard_regs[cl][0];
9129 }
9130 if (FP_REGNO_P (regno))
9131 {
9132 if (GET_MODE (operands[1]) != DDmode)
9133 operands[1] = gen_rtx_SUBREG (DDmode, operands[1], 0);
9134 emit_insn (gen_movsd_load (operands[0], operands[1]));
9135 }
9136 else if (INT_REGNO_P (regno))
9137 emit_insn (gen_movsd_hardfloat (operands[0], operands[1]));
9138 else
9139 gcc_unreachable();
9140 return;
9141 }
9142
9143 if (reload_in_progress
9144 && mode == SDmode
9145 && cfun->machine->sdmode_stack_slot != NULL_RTX
9146 && MEM_P (operands[0])
9147 && rtx_equal_p (operands[0], cfun->machine->sdmode_stack_slot)
9148 && REG_P (operands[1]))
9149 {
9150 if (FP_REGNO_P (REGNO (operands[1])))
9151 {
9152 rtx mem = adjust_address_nv (operands[0], DDmode, 0);
9153 mem = eliminate_regs (mem, VOIDmode, NULL_RTX);
9154 emit_insn (gen_movsd_store (mem, operands[1]));
9155 }
9156 else if (INT_REGNO_P (REGNO (operands[1])))
9157 {
9158 rtx mem = operands[0];
9159 if (BYTES_BIG_ENDIAN)
9160 mem = adjust_address_nv (mem, mode, 4);
9161 mem = eliminate_regs (mem, VOIDmode, NULL_RTX);
9162 emit_insn (gen_movsd_hardfloat (mem, operands[1]));
9163 }
9164 else
9165 gcc_unreachable();
9166 return;
9167 }
9168 if (reload_in_progress
9169 && mode == SDmode
9170 && REG_P (operands[0])
9171 && MEM_P (operands[1])
9172 && cfun->machine->sdmode_stack_slot != NULL_RTX
9173 && rtx_equal_p (operands[1], cfun->machine->sdmode_stack_slot))
9174 {
9175 if (FP_REGNO_P (REGNO (operands[0])))
9176 {
9177 rtx mem = adjust_address_nv (operands[1], DDmode, 0);
9178 mem = eliminate_regs (mem, VOIDmode, NULL_RTX);
9179 emit_insn (gen_movsd_load (operands[0], mem));
9180 }
9181 else if (INT_REGNO_P (REGNO (operands[0])))
9182 {
9183 rtx mem = operands[1];
9184 if (BYTES_BIG_ENDIAN)
9185 mem = adjust_address_nv (mem, mode, 4);
9186 mem = eliminate_regs (mem, VOIDmode, NULL_RTX);
9187 emit_insn (gen_movsd_hardfloat (operands[0], mem));
9188 }
9189 else
9190 gcc_unreachable();
9191 return;
9192 }
9193
9194 /* FIXME: In the long term, this switch statement should go away
9195 and be replaced by a sequence of tests based on things like
9196 mode == Pmode. */
9197 switch (mode)
9198 {
9199 case HImode:
9200 case QImode:
9201 if (CONSTANT_P (operands[1])
9202 && GET_CODE (operands[1]) != CONST_INT)
9203 operands[1] = force_const_mem (mode, operands[1]);
9204 break;
9205
9206 case TFmode:
9207 case TDmode:
9208 case IFmode:
9209 case KFmode:
9210 if (FLOAT128_2REG_P (mode))
9211 rs6000_eliminate_indexed_memrefs (operands);
9212 /* fall through */
9213
9214 case DFmode:
9215 case DDmode:
9216 case SFmode:
9217 case SDmode:
9218 if (CONSTANT_P (operands[1])
9219 && ! easy_fp_constant (operands[1], mode))
9220 operands[1] = force_const_mem (mode, operands[1]);
9221 break;
9222
9223 case V16QImode:
9224 case V8HImode:
9225 case V4SFmode:
9226 case V4SImode:
9227 case V4HImode:
9228 case V2SFmode:
9229 case V2SImode:
9230 case V1DImode:
9231 case V2DFmode:
9232 case V2DImode:
9233 case V1TImode:
9234 if (CONSTANT_P (operands[1])
9235 && !easy_vector_constant (operands[1], mode))
9236 operands[1] = force_const_mem (mode, operands[1]);
9237 break;
9238
9239 case SImode:
9240 case DImode:
9241 /* Use default pattern for address of ELF small data */
9242 if (TARGET_ELF
9243 && mode == Pmode
9244 && DEFAULT_ABI == ABI_V4
9245 && (GET_CODE (operands[1]) == SYMBOL_REF
9246 || GET_CODE (operands[1]) == CONST)
9247 && small_data_operand (operands[1], mode))
9248 {
9249 emit_insn (gen_rtx_SET (operands[0], operands[1]));
9250 return;
9251 }
9252
9253 if (DEFAULT_ABI == ABI_V4
9254 && mode == Pmode && mode == SImode
9255 && flag_pic == 1 && got_operand (operands[1], mode))
9256 {
9257 emit_insn (gen_movsi_got (operands[0], operands[1]));
9258 return;
9259 }
9260
9261 if ((TARGET_ELF || DEFAULT_ABI == ABI_DARWIN)
9262 && TARGET_NO_TOC
9263 && ! flag_pic
9264 && mode == Pmode
9265 && CONSTANT_P (operands[1])
9266 && GET_CODE (operands[1]) != HIGH
9267 && GET_CODE (operands[1]) != CONST_INT)
9268 {
9269 rtx target = (!can_create_pseudo_p ()
9270 ? operands[0]
9271 : gen_reg_rtx (mode));
9272
9273 /* If this is a function address on -mcall-aixdesc,
9274 convert it to the address of the descriptor. */
9275 if (DEFAULT_ABI == ABI_AIX
9276 && GET_CODE (operands[1]) == SYMBOL_REF
9277 && XSTR (operands[1], 0)[0] == '.')
9278 {
9279 const char *name = XSTR (operands[1], 0);
9280 rtx new_ref;
9281 while (*name == '.')
9282 name++;
9283 new_ref = gen_rtx_SYMBOL_REF (Pmode, name);
9284 CONSTANT_POOL_ADDRESS_P (new_ref)
9285 = CONSTANT_POOL_ADDRESS_P (operands[1]);
9286 SYMBOL_REF_FLAGS (new_ref) = SYMBOL_REF_FLAGS (operands[1]);
9287 SYMBOL_REF_USED (new_ref) = SYMBOL_REF_USED (operands[1]);
9288 SYMBOL_REF_DATA (new_ref) = SYMBOL_REF_DATA (operands[1]);
9289 operands[1] = new_ref;
9290 }
9291
9292 if (DEFAULT_ABI == ABI_DARWIN)
9293 {
9294 #if TARGET_MACHO
9295 if (MACHO_DYNAMIC_NO_PIC_P)
9296 {
9297 /* Take care of any required data indirection. */
9298 operands[1] = rs6000_machopic_legitimize_pic_address (
9299 operands[1], mode, operands[0]);
9300 if (operands[0] != operands[1])
9301 emit_insn (gen_rtx_SET (operands[0], operands[1]));
9302 return;
9303 }
9304 #endif
9305 emit_insn (gen_macho_high (target, operands[1]));
9306 emit_insn (gen_macho_low (operands[0], target, operands[1]));
9307 return;
9308 }
9309
9310 emit_insn (gen_elf_high (target, operands[1]));
9311 emit_insn (gen_elf_low (operands[0], target, operands[1]));
9312 return;
9313 }
9314
9315 /* If this is a SYMBOL_REF that refers to a constant pool entry,
9316 and we have put it in the TOC, we just need to make a TOC-relative
9317 reference to it. */
9318 if (TARGET_TOC
9319 && GET_CODE (operands[1]) == SYMBOL_REF
9320 && use_toc_relative_ref (operands[1], mode))
9321 operands[1] = create_TOC_reference (operands[1], operands[0]);
9322 else if (mode == Pmode
9323 && CONSTANT_P (operands[1])
9324 && GET_CODE (operands[1]) != HIGH
9325 && ((GET_CODE (operands[1]) != CONST_INT
9326 && ! easy_fp_constant (operands[1], mode))
9327 || (GET_CODE (operands[1]) == CONST_INT
9328 && (num_insns_constant (operands[1], mode)
9329 > (TARGET_CMODEL != CMODEL_SMALL ? 3 : 2)))
9330 || (GET_CODE (operands[0]) == REG
9331 && FP_REGNO_P (REGNO (operands[0]))))
9332 && !toc_relative_expr_p (operands[1], false)
9333 && (TARGET_CMODEL == CMODEL_SMALL
9334 || can_create_pseudo_p ()
9335 || (REG_P (operands[0])
9336 && INT_REG_OK_FOR_BASE_P (operands[0], true))))
9337 {
9338
9339 #if TARGET_MACHO
9340 /* Darwin uses a special PIC legitimizer. */
9341 if (DEFAULT_ABI == ABI_DARWIN && MACHOPIC_INDIRECT)
9342 {
9343 operands[1] =
9344 rs6000_machopic_legitimize_pic_address (operands[1], mode,
9345 operands[0]);
9346 if (operands[0] != operands[1])
9347 emit_insn (gen_rtx_SET (operands[0], operands[1]));
9348 return;
9349 }
9350 #endif
9351
9352 /* If we are to limit the number of things we put in the TOC and
9353 this is a symbol plus a constant we can add in one insn,
9354 just put the symbol in the TOC and add the constant. Don't do
9355 this if reload is in progress. */
9356 if (GET_CODE (operands[1]) == CONST
9357 && TARGET_NO_SUM_IN_TOC && ! reload_in_progress
9358 && GET_CODE (XEXP (operands[1], 0)) == PLUS
9359 && add_operand (XEXP (XEXP (operands[1], 0), 1), mode)
9360 && (GET_CODE (XEXP (XEXP (operands[1], 0), 0)) == LABEL_REF
9361 || GET_CODE (XEXP (XEXP (operands[1], 0), 0)) == SYMBOL_REF)
9362 && ! side_effects_p (operands[0]))
9363 {
9364 rtx sym =
9365 force_const_mem (mode, XEXP (XEXP (operands[1], 0), 0));
9366 rtx other = XEXP (XEXP (operands[1], 0), 1);
9367
9368 sym = force_reg (mode, sym);
9369 emit_insn (gen_add3_insn (operands[0], sym, other));
9370 return;
9371 }
9372
9373 operands[1] = force_const_mem (mode, operands[1]);
9374
9375 if (TARGET_TOC
9376 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
9377 && constant_pool_expr_p (XEXP (operands[1], 0))
9378 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (
9379 get_pool_constant (XEXP (operands[1], 0)),
9380 get_pool_mode (XEXP (operands[1], 0))))
9381 {
9382 rtx tocref = create_TOC_reference (XEXP (operands[1], 0),
9383 operands[0]);
9384 operands[1] = gen_const_mem (mode, tocref);
9385 set_mem_alias_set (operands[1], get_TOC_alias_set ());
9386 }
9387 }
9388 break;
9389
9390 case TImode:
9391 if (!VECTOR_MEM_VSX_P (TImode))
9392 rs6000_eliminate_indexed_memrefs (operands);
9393 break;
9394
9395 case PTImode:
9396 rs6000_eliminate_indexed_memrefs (operands);
9397 break;
9398
9399 default:
9400 fatal_insn ("bad move", gen_rtx_SET (dest, source));
9401 }
9402
9403 /* Above, we may have called force_const_mem which may have returned
9404 an invalid address. If we can, fix this up; otherwise, reload will
9405 have to deal with it. */
9406 if (GET_CODE (operands[1]) == MEM && ! reload_in_progress)
9407 operands[1] = validize_mem (operands[1]);
9408
9409 emit_set:
9410 emit_insn (gen_rtx_SET (operands[0], operands[1]));
9411 }
9412
9413 /* Return true if a structure, union or array containing FIELD should be
9414 accessed using `BLKMODE'.
9415
9416 For the SPE, simd types are V2SI, and gcc can be tempted to put the
9417 entire thing in a DI and use subregs to access the internals.
9418 store_bit_field() will force (subreg:DI (reg:V2SI x))'s to the
9419 back-end. Because a single GPR can hold a V2SI, but not a DI, the
9420 best thing to do is set structs to BLKmode and avoid Severe Tire
9421 Damage.
9422
9423 On e500 v2, DF and DI modes suffer from the same anomaly. DF can
9424 fit into 1, whereas DI still needs two. */
9425
9426 static bool
9427 rs6000_member_type_forces_blk (const_tree field, machine_mode mode)
9428 {
9429 return ((TARGET_SPE && TREE_CODE (TREE_TYPE (field)) == VECTOR_TYPE)
9430 || (TARGET_E500_DOUBLE && mode == DFmode));
9431 }
9432 \f
9433 /* Nonzero if we can use a floating-point register to pass this arg. */
9434 #define USE_FP_FOR_ARG_P(CUM,MODE) \
9435 (SCALAR_FLOAT_MODE_NOT_VECTOR_P (MODE) \
9436 && (CUM)->fregno <= FP_ARG_MAX_REG \
9437 && TARGET_HARD_FLOAT && TARGET_FPRS)
9438
9439 /* Nonzero if we can use an AltiVec register to pass this arg. */
9440 #define USE_ALTIVEC_FOR_ARG_P(CUM,MODE,NAMED) \
9441 (ALTIVEC_OR_VSX_VECTOR_MODE (MODE) \
9442 && (CUM)->vregno <= ALTIVEC_ARG_MAX_REG \
9443 && TARGET_ALTIVEC_ABI \
9444 && (NAMED))
9445
9446 /* Walk down the type tree of TYPE counting consecutive base elements.
9447 If *MODEP is VOIDmode, then set it to the first valid floating point
9448 or vector type. If a non-floating point or vector type is found, or
9449 if a floating point or vector type that doesn't match a non-VOIDmode
9450 *MODEP is found, then return -1, otherwise return the count in the
9451 sub-tree. */
9452
9453 static int
9454 rs6000_aggregate_candidate (const_tree type, machine_mode *modep)
9455 {
9456 machine_mode mode;
9457 HOST_WIDE_INT size;
9458
9459 switch (TREE_CODE (type))
9460 {
9461 case REAL_TYPE:
9462 mode = TYPE_MODE (type);
9463 if (!SCALAR_FLOAT_MODE_P (mode))
9464 return -1;
9465
9466 if (*modep == VOIDmode)
9467 *modep = mode;
9468
9469 if (*modep == mode)
9470 return 1;
9471
9472 break;
9473
9474 case COMPLEX_TYPE:
9475 mode = TYPE_MODE (TREE_TYPE (type));
9476 if (!SCALAR_FLOAT_MODE_P (mode))
9477 return -1;
9478
9479 if (*modep == VOIDmode)
9480 *modep = mode;
9481
9482 if (*modep == mode)
9483 return 2;
9484
9485 break;
9486
9487 case VECTOR_TYPE:
9488 if (!TARGET_ALTIVEC_ABI || !TARGET_ALTIVEC)
9489 return -1;
9490
9491 /* Use V4SImode as representative of all 128-bit vector types. */
9492 size = int_size_in_bytes (type);
9493 switch (size)
9494 {
9495 case 16:
9496 mode = V4SImode;
9497 break;
9498 default:
9499 return -1;
9500 }
9501
9502 if (*modep == VOIDmode)
9503 *modep = mode;
9504
9505 /* Vector modes are considered to be opaque: two vectors are
9506 equivalent for the purposes of being homogeneous aggregates
9507 if they are the same size. */
9508 if (*modep == mode)
9509 return 1;
9510
9511 break;
9512
9513 case ARRAY_TYPE:
9514 {
9515 int count;
9516 tree index = TYPE_DOMAIN (type);
9517
9518 /* Can't handle incomplete types nor sizes that are not
9519 fixed. */
9520 if (!COMPLETE_TYPE_P (type)
9521 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
9522 return -1;
9523
9524 count = rs6000_aggregate_candidate (TREE_TYPE (type), modep);
9525 if (count == -1
9526 || !index
9527 || !TYPE_MAX_VALUE (index)
9528 || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index))
9529 || !TYPE_MIN_VALUE (index)
9530 || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index))
9531 || count < 0)
9532 return -1;
9533
9534 count *= (1 + tree_to_uhwi (TYPE_MAX_VALUE (index))
9535 - tree_to_uhwi (TYPE_MIN_VALUE (index)));
9536
9537 /* There must be no padding. */
9538 if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
9539 return -1;
9540
9541 return count;
9542 }
9543
9544 case RECORD_TYPE:
9545 {
9546 int count = 0;
9547 int sub_count;
9548 tree field;
9549
9550 /* Can't handle incomplete types nor sizes that are not
9551 fixed. */
9552 if (!COMPLETE_TYPE_P (type)
9553 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
9554 return -1;
9555
9556 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
9557 {
9558 if (TREE_CODE (field) != FIELD_DECL)
9559 continue;
9560
9561 sub_count = rs6000_aggregate_candidate (TREE_TYPE (field), modep);
9562 if (sub_count < 0)
9563 return -1;
9564 count += sub_count;
9565 }
9566
9567 /* There must be no padding. */
9568 if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
9569 return -1;
9570
9571 return count;
9572 }
9573
9574 case UNION_TYPE:
9575 case QUAL_UNION_TYPE:
9576 {
9577 /* These aren't very interesting except in a degenerate case. */
9578 int count = 0;
9579 int sub_count;
9580 tree field;
9581
9582 /* Can't handle incomplete types nor sizes that are not
9583 fixed. */
9584 if (!COMPLETE_TYPE_P (type)
9585 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
9586 return -1;
9587
9588 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
9589 {
9590 if (TREE_CODE (field) != FIELD_DECL)
9591 continue;
9592
9593 sub_count = rs6000_aggregate_candidate (TREE_TYPE (field), modep);
9594 if (sub_count < 0)
9595 return -1;
9596 count = count > sub_count ? count : sub_count;
9597 }
9598
9599 /* There must be no padding. */
9600 if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
9601 return -1;
9602
9603 return count;
9604 }
9605
9606 default:
9607 break;
9608 }
9609
9610 return -1;
9611 }
9612
9613 /* If an argument, whose type is described by TYPE and MODE, is a homogeneous
9614 float or vector aggregate that shall be passed in FP/vector registers
9615 according to the ELFv2 ABI, return the homogeneous element mode in
9616 *ELT_MODE and the number of elements in *N_ELTS, and return TRUE.
9617
9618 Otherwise, set *ELT_MODE to MODE and *N_ELTS to 1, and return FALSE. */
9619
9620 static bool
9621 rs6000_discover_homogeneous_aggregate (machine_mode mode, const_tree type,
9622 machine_mode *elt_mode,
9623 int *n_elts)
9624 {
9625 /* Note that we do not accept complex types at the top level as
9626 homogeneous aggregates; these types are handled via the
9627 targetm.calls.split_complex_arg mechanism. Complex types
9628 can be elements of homogeneous aggregates, however. */
9629 if (DEFAULT_ABI == ABI_ELFv2 && type && AGGREGATE_TYPE_P (type))
9630 {
9631 machine_mode field_mode = VOIDmode;
9632 int field_count = rs6000_aggregate_candidate (type, &field_mode);
9633
9634 if (field_count > 0)
9635 {
9636 int n_regs = (SCALAR_FLOAT_MODE_P (field_mode) ?
9637 (GET_MODE_SIZE (field_mode) + 7) >> 3 : 1);
9638
9639 /* The ELFv2 ABI allows homogeneous aggregates to occupy
9640 up to AGGR_ARG_NUM_REG registers. */
9641 if (field_count * n_regs <= AGGR_ARG_NUM_REG)
9642 {
9643 if (elt_mode)
9644 *elt_mode = field_mode;
9645 if (n_elts)
9646 *n_elts = field_count;
9647 return true;
9648 }
9649 }
9650 }
9651
9652 if (elt_mode)
9653 *elt_mode = mode;
9654 if (n_elts)
9655 *n_elts = 1;
9656 return false;
9657 }
9658
9659 /* Return a nonzero value to say to return the function value in
9660 memory, just as large structures are always returned. TYPE will be
9661 the data type of the value, and FNTYPE will be the type of the
9662 function doing the returning, or @code{NULL} for libcalls.
9663
9664 The AIX ABI for the RS/6000 specifies that all structures are
9665 returned in memory. The Darwin ABI does the same.
9666
9667 For the Darwin 64 Bit ABI, a function result can be returned in
9668 registers or in memory, depending on the size of the return data
9669 type. If it is returned in registers, the value occupies the same
9670 registers as it would if it were the first and only function
9671 argument. Otherwise, the function places its result in memory at
9672 the location pointed to by GPR3.
9673
9674 The SVR4 ABI specifies that structures <= 8 bytes are returned in r3/r4,
9675 but a draft put them in memory, and GCC used to implement the draft
9676 instead of the final standard. Therefore, aix_struct_return
9677 controls this instead of DEFAULT_ABI; V.4 targets needing backward
9678 compatibility can change DRAFT_V4_STRUCT_RET to override the
9679 default, and -m switches get the final word. See
9680 rs6000_option_override_internal for more details.
9681
9682 The PPC32 SVR4 ABI uses IEEE double extended for long double, if 128-bit
9683 long double support is enabled. These values are returned in memory.
9684
9685 int_size_in_bytes returns -1 for variable size objects, which go in
9686 memory always. The cast to unsigned makes -1 > 8. */
9687
9688 static bool
9689 rs6000_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
9690 {
9691 /* For the Darwin64 ABI, test if we can fit the return value in regs. */
9692 if (TARGET_MACHO
9693 && rs6000_darwin64_abi
9694 && TREE_CODE (type) == RECORD_TYPE
9695 && int_size_in_bytes (type) > 0)
9696 {
9697 CUMULATIVE_ARGS valcum;
9698 rtx valret;
9699
9700 valcum.words = 0;
9701 valcum.fregno = FP_ARG_MIN_REG;
9702 valcum.vregno = ALTIVEC_ARG_MIN_REG;
9703 /* Do a trial code generation as if this were going to be passed
9704 as an argument; if any part goes in memory, we return NULL. */
9705 valret = rs6000_darwin64_record_arg (&valcum, type, true, true);
9706 if (valret)
9707 return false;
9708 /* Otherwise fall through to more conventional ABI rules. */
9709 }
9710
9711 /* The ELFv2 ABI returns homogeneous VFP aggregates in registers */
9712 if (rs6000_discover_homogeneous_aggregate (TYPE_MODE (type), type,
9713 NULL, NULL))
9714 return false;
9715
9716 /* The ELFv2 ABI returns aggregates up to 16B in registers */
9717 if (DEFAULT_ABI == ABI_ELFv2 && AGGREGATE_TYPE_P (type)
9718 && (unsigned HOST_WIDE_INT) int_size_in_bytes (type) <= 16)
9719 return false;
9720
9721 if (AGGREGATE_TYPE_P (type)
9722 && (aix_struct_return
9723 || (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 8))
9724 return true;
9725
9726 /* Allow -maltivec -mabi=no-altivec without warning. Altivec vector
9727 modes only exist for GCC vector types if -maltivec. */
9728 if (TARGET_32BIT && !TARGET_ALTIVEC_ABI
9729 && ALTIVEC_VECTOR_MODE (TYPE_MODE (type)))
9730 return false;
9731
9732 /* Return synthetic vectors in memory. */
9733 if (TREE_CODE (type) == VECTOR_TYPE
9734 && int_size_in_bytes (type) > (TARGET_ALTIVEC_ABI ? 16 : 8))
9735 {
9736 static bool warned_for_return_big_vectors = false;
9737 if (!warned_for_return_big_vectors)
9738 {
9739 warning (0, "GCC vector returned by reference: "
9740 "non-standard ABI extension with no compatibility guarantee");
9741 warned_for_return_big_vectors = true;
9742 }
9743 return true;
9744 }
9745
9746 if (DEFAULT_ABI == ABI_V4 && TARGET_IEEEQUAD
9747 && FLOAT128_IEEE_P (TYPE_MODE (type)))
9748 return true;
9749
9750 return false;
9751 }
9752
9753 /* Specify whether values returned in registers should be at the most
9754 significant end of a register. We want aggregates returned by
9755 value to match the way aggregates are passed to functions. */
9756
9757 static bool
9758 rs6000_return_in_msb (const_tree valtype)
9759 {
9760 return (DEFAULT_ABI == ABI_ELFv2
9761 && BYTES_BIG_ENDIAN
9762 && AGGREGATE_TYPE_P (valtype)
9763 && FUNCTION_ARG_PADDING (TYPE_MODE (valtype), valtype) == upward);
9764 }
9765
9766 #ifdef HAVE_AS_GNU_ATTRIBUTE
9767 /* Return TRUE if a call to function FNDECL may be one that
9768 potentially affects the function calling ABI of the object file. */
9769
9770 static bool
9771 call_ABI_of_interest (tree fndecl)
9772 {
9773 if (symtab->state == EXPANSION)
9774 {
9775 struct cgraph_node *c_node;
9776
9777 /* Libcalls are always interesting. */
9778 if (fndecl == NULL_TREE)
9779 return true;
9780
9781 /* Any call to an external function is interesting. */
9782 if (DECL_EXTERNAL (fndecl))
9783 return true;
9784
9785 /* Interesting functions that we are emitting in this object file. */
9786 c_node = cgraph_node::get (fndecl);
9787 c_node = c_node->ultimate_alias_target ();
9788 return !c_node->only_called_directly_p ();
9789 }
9790 return false;
9791 }
9792 #endif
9793
9794 /* Initialize a variable CUM of type CUMULATIVE_ARGS
9795 for a call to a function whose data type is FNTYPE.
9796 For a library call, FNTYPE is 0 and RETURN_MODE the return value mode.
9797
9798 For incoming args we set the number of arguments in the prototype large
9799 so we never return a PARALLEL. */
9800
9801 void
9802 init_cumulative_args (CUMULATIVE_ARGS *cum, tree fntype,
9803 rtx libname ATTRIBUTE_UNUSED, int incoming,
9804 int libcall, int n_named_args,
9805 tree fndecl ATTRIBUTE_UNUSED,
9806 machine_mode return_mode ATTRIBUTE_UNUSED)
9807 {
9808 static CUMULATIVE_ARGS zero_cumulative;
9809
9810 *cum = zero_cumulative;
9811 cum->words = 0;
9812 cum->fregno = FP_ARG_MIN_REG;
9813 cum->vregno = ALTIVEC_ARG_MIN_REG;
9814 cum->prototype = (fntype && prototype_p (fntype));
9815 cum->call_cookie = ((DEFAULT_ABI == ABI_V4 && libcall)
9816 ? CALL_LIBCALL : CALL_NORMAL);
9817 cum->sysv_gregno = GP_ARG_MIN_REG;
9818 cum->stdarg = stdarg_p (fntype);
9819 cum->libcall = libcall;
9820
9821 cum->nargs_prototype = 0;
9822 if (incoming || cum->prototype)
9823 cum->nargs_prototype = n_named_args;
9824
9825 /* Check for a longcall attribute. */
9826 if ((!fntype && rs6000_default_long_calls)
9827 || (fntype
9828 && lookup_attribute ("longcall", TYPE_ATTRIBUTES (fntype))
9829 && !lookup_attribute ("shortcall", TYPE_ATTRIBUTES (fntype))))
9830 cum->call_cookie |= CALL_LONG;
9831
9832 if (TARGET_DEBUG_ARG)
9833 {
9834 fprintf (stderr, "\ninit_cumulative_args:");
9835 if (fntype)
9836 {
9837 tree ret_type = TREE_TYPE (fntype);
9838 fprintf (stderr, " ret code = %s,",
9839 get_tree_code_name (TREE_CODE (ret_type)));
9840 }
9841
9842 if (cum->call_cookie & CALL_LONG)
9843 fprintf (stderr, " longcall,");
9844
9845 fprintf (stderr, " proto = %d, nargs = %d\n",
9846 cum->prototype, cum->nargs_prototype);
9847 }
9848
9849 #ifdef HAVE_AS_GNU_ATTRIBUTE
9850 if (DEFAULT_ABI == ABI_V4)
9851 {
9852 cum->escapes = call_ABI_of_interest (fndecl);
9853 if (cum->escapes)
9854 {
9855 tree return_type;
9856
9857 if (fntype)
9858 {
9859 return_type = TREE_TYPE (fntype);
9860 return_mode = TYPE_MODE (return_type);
9861 }
9862 else
9863 return_type = lang_hooks.types.type_for_mode (return_mode, 0);
9864
9865 if (return_type != NULL)
9866 {
9867 if (TREE_CODE (return_type) == RECORD_TYPE
9868 && TYPE_TRANSPARENT_AGGR (return_type))
9869 {
9870 return_type = TREE_TYPE (first_field (return_type));
9871 return_mode = TYPE_MODE (return_type);
9872 }
9873 if (AGGREGATE_TYPE_P (return_type)
9874 && ((unsigned HOST_WIDE_INT) int_size_in_bytes (return_type)
9875 <= 8))
9876 rs6000_returns_struct = true;
9877 }
9878 if (SCALAR_FLOAT_MODE_NOT_VECTOR_P (return_mode))
9879 rs6000_passes_float = true;
9880 else if (ALTIVEC_OR_VSX_VECTOR_MODE (return_mode)
9881 || SPE_VECTOR_MODE (return_mode))
9882 rs6000_passes_vector = true;
9883 }
9884 }
9885 #endif
9886
9887 if (fntype
9888 && !TARGET_ALTIVEC
9889 && TARGET_ALTIVEC_ABI
9890 && ALTIVEC_VECTOR_MODE (TYPE_MODE (TREE_TYPE (fntype))))
9891 {
9892 error ("cannot return value in vector register because"
9893 " altivec instructions are disabled, use -maltivec"
9894 " to enable them");
9895 }
9896 }
9897 \f
9898 /* The mode the ABI uses for a word. This is not the same as word_mode
9899 for -m32 -mpowerpc64. This is used to implement various target hooks. */
9900
9901 static machine_mode
9902 rs6000_abi_word_mode (void)
9903 {
9904 return TARGET_32BIT ? SImode : DImode;
9905 }
9906
9907 /* Implement the TARGET_OFFLOAD_OPTIONS hook. */
9908 static char *
9909 rs6000_offload_options (void)
9910 {
9911 if (TARGET_64BIT)
9912 return xstrdup ("-foffload-abi=lp64");
9913 else
9914 return xstrdup ("-foffload-abi=ilp32");
9915 }
9916
9917 /* On rs6000, function arguments are promoted, as are function return
9918 values. */
9919
9920 static machine_mode
9921 rs6000_promote_function_mode (const_tree type ATTRIBUTE_UNUSED,
9922 machine_mode mode,
9923 int *punsignedp ATTRIBUTE_UNUSED,
9924 const_tree, int)
9925 {
9926 PROMOTE_MODE (mode, *punsignedp, type);
9927
9928 return mode;
9929 }
9930
9931 /* Return true if TYPE must be passed on the stack and not in registers. */
9932
9933 static bool
9934 rs6000_must_pass_in_stack (machine_mode mode, const_tree type)
9935 {
9936 if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2 || TARGET_64BIT)
9937 return must_pass_in_stack_var_size (mode, type);
9938 else
9939 return must_pass_in_stack_var_size_or_pad (mode, type);
9940 }
9941
9942 /* If defined, a C expression which determines whether, and in which
9943 direction, to pad out an argument with extra space. The value
9944 should be of type `enum direction': either `upward' to pad above
9945 the argument, `downward' to pad below, or `none' to inhibit
9946 padding.
9947
9948 For the AIX ABI structs are always stored left shifted in their
9949 argument slot. */
9950
9951 enum direction
9952 function_arg_padding (machine_mode mode, const_tree type)
9953 {
9954 #ifndef AGGREGATE_PADDING_FIXED
9955 #define AGGREGATE_PADDING_FIXED 0
9956 #endif
9957 #ifndef AGGREGATES_PAD_UPWARD_ALWAYS
9958 #define AGGREGATES_PAD_UPWARD_ALWAYS 0
9959 #endif
9960
9961 if (!AGGREGATE_PADDING_FIXED)
9962 {
9963 /* GCC used to pass structures of the same size as integer types as
9964 if they were in fact integers, ignoring FUNCTION_ARG_PADDING.
9965 i.e. Structures of size 1 or 2 (or 4 when TARGET_64BIT) were
9966 passed padded downward, except that -mstrict-align further
9967 muddied the water in that multi-component structures of 2 and 4
9968 bytes in size were passed padded upward.
9969
9970 The following arranges for best compatibility with previous
9971 versions of gcc, but removes the -mstrict-align dependency. */
9972 if (BYTES_BIG_ENDIAN)
9973 {
9974 HOST_WIDE_INT size = 0;
9975
9976 if (mode == BLKmode)
9977 {
9978 if (type && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST)
9979 size = int_size_in_bytes (type);
9980 }
9981 else
9982 size = GET_MODE_SIZE (mode);
9983
9984 if (size == 1 || size == 2 || size == 4)
9985 return downward;
9986 }
9987 return upward;
9988 }
9989
9990 if (AGGREGATES_PAD_UPWARD_ALWAYS)
9991 {
9992 if (type != 0 && AGGREGATE_TYPE_P (type))
9993 return upward;
9994 }
9995
9996 /* Fall back to the default. */
9997 return DEFAULT_FUNCTION_ARG_PADDING (mode, type);
9998 }
9999
10000 /* If defined, a C expression that gives the alignment boundary, in bits,
10001 of an argument with the specified mode and type. If it is not defined,
10002 PARM_BOUNDARY is used for all arguments.
10003
10004 V.4 wants long longs and doubles to be double word aligned. Just
10005 testing the mode size is a boneheaded way to do this as it means
10006 that other types such as complex int are also double word aligned.
10007 However, we're stuck with this because changing the ABI might break
10008 existing library interfaces.
10009
10010 Doubleword align SPE vectors.
10011 Quadword align Altivec/VSX vectors.
10012 Quadword align large synthetic vector types. */
10013
10014 static unsigned int
10015 rs6000_function_arg_boundary (machine_mode mode, const_tree type)
10016 {
10017 machine_mode elt_mode;
10018 int n_elts;
10019
10020 rs6000_discover_homogeneous_aggregate (mode, type, &elt_mode, &n_elts);
10021
10022 if (DEFAULT_ABI == ABI_V4
10023 && (GET_MODE_SIZE (mode) == 8
10024 || (TARGET_HARD_FLOAT
10025 && TARGET_FPRS
10026 && FLOAT128_2REG_P (mode))))
10027 return 64;
10028 else if (FLOAT128_VECTOR_P (mode))
10029 return 128;
10030 else if (SPE_VECTOR_MODE (mode)
10031 || (type && TREE_CODE (type) == VECTOR_TYPE
10032 && int_size_in_bytes (type) >= 8
10033 && int_size_in_bytes (type) < 16))
10034 return 64;
10035 else if (ALTIVEC_OR_VSX_VECTOR_MODE (elt_mode)
10036 || (type && TREE_CODE (type) == VECTOR_TYPE
10037 && int_size_in_bytes (type) >= 16))
10038 return 128;
10039
10040 /* Aggregate types that need > 8 byte alignment are quadword-aligned
10041 in the parameter area in the ELFv2 ABI, and in the AIX ABI unless
10042 -mcompat-align-parm is used. */
10043 if (((DEFAULT_ABI == ABI_AIX && !rs6000_compat_align_parm)
10044 || DEFAULT_ABI == ABI_ELFv2)
10045 && type && TYPE_ALIGN (type) > 64)
10046 {
10047 /* "Aggregate" means any AGGREGATE_TYPE except for single-element
10048 or homogeneous float/vector aggregates here. We already handled
10049 vector aggregates above, but still need to check for float here. */
10050 bool aggregate_p = (AGGREGATE_TYPE_P (type)
10051 && !SCALAR_FLOAT_MODE_P (elt_mode));
10052
10053 /* We used to check for BLKmode instead of the above aggregate type
10054 check. Warn when this results in any difference to the ABI. */
10055 if (aggregate_p != (mode == BLKmode))
10056 {
10057 static bool warned;
10058 if (!warned && warn_psabi)
10059 {
10060 warned = true;
10061 inform (input_location,
10062 "the ABI of passing aggregates with %d-byte alignment"
10063 " has changed in GCC 5",
10064 (int) TYPE_ALIGN (type) / BITS_PER_UNIT);
10065 }
10066 }
10067
10068 if (aggregate_p)
10069 return 128;
10070 }
10071
10072 /* Similar for the Darwin64 ABI. Note that for historical reasons we
10073 implement the "aggregate type" check as a BLKmode check here; this
10074 means certain aggregate types are in fact not aligned. */
10075 if (TARGET_MACHO && rs6000_darwin64_abi
10076 && mode == BLKmode
10077 && type && TYPE_ALIGN (type) > 64)
10078 return 128;
10079
10080 return PARM_BOUNDARY;
10081 }
10082
10083 /* The offset in words to the start of the parameter save area. */
10084
10085 static unsigned int
10086 rs6000_parm_offset (void)
10087 {
10088 return (DEFAULT_ABI == ABI_V4 ? 2
10089 : DEFAULT_ABI == ABI_ELFv2 ? 4
10090 : 6);
10091 }
10092
10093 /* For a function parm of MODE and TYPE, return the starting word in
10094 the parameter area. NWORDS of the parameter area are already used. */
10095
10096 static unsigned int
10097 rs6000_parm_start (machine_mode mode, const_tree type,
10098 unsigned int nwords)
10099 {
10100 unsigned int align;
10101
10102 align = rs6000_function_arg_boundary (mode, type) / PARM_BOUNDARY - 1;
10103 return nwords + (-(rs6000_parm_offset () + nwords) & align);
10104 }
10105
10106 /* Compute the size (in words) of a function argument. */
10107
10108 static unsigned long
10109 rs6000_arg_size (machine_mode mode, const_tree type)
10110 {
10111 unsigned long size;
10112
10113 if (mode != BLKmode)
10114 size = GET_MODE_SIZE (mode);
10115 else
10116 size = int_size_in_bytes (type);
10117
10118 if (TARGET_32BIT)
10119 return (size + 3) >> 2;
10120 else
10121 return (size + 7) >> 3;
10122 }
10123 \f
10124 /* Use this to flush pending int fields. */
10125
10126 static void
10127 rs6000_darwin64_record_arg_advance_flush (CUMULATIVE_ARGS *cum,
10128 HOST_WIDE_INT bitpos, int final)
10129 {
10130 unsigned int startbit, endbit;
10131 int intregs, intoffset;
10132 machine_mode mode;
10133
10134 /* Handle the situations where a float is taking up the first half
10135 of the GPR, and the other half is empty (typically due to
10136 alignment restrictions). We can detect this by a 8-byte-aligned
10137 int field, or by seeing that this is the final flush for this
10138 argument. Count the word and continue on. */
10139 if (cum->floats_in_gpr == 1
10140 && (cum->intoffset % 64 == 0
10141 || (cum->intoffset == -1 && final)))
10142 {
10143 cum->words++;
10144 cum->floats_in_gpr = 0;
10145 }
10146
10147 if (cum->intoffset == -1)
10148 return;
10149
10150 intoffset = cum->intoffset;
10151 cum->intoffset = -1;
10152 cum->floats_in_gpr = 0;
10153
10154 if (intoffset % BITS_PER_WORD != 0)
10155 {
10156 mode = mode_for_size (BITS_PER_WORD - intoffset % BITS_PER_WORD,
10157 MODE_INT, 0);
10158 if (mode == BLKmode)
10159 {
10160 /* We couldn't find an appropriate mode, which happens,
10161 e.g., in packed structs when there are 3 bytes to load.
10162 Back intoffset back to the beginning of the word in this
10163 case. */
10164 intoffset = ROUND_DOWN (intoffset, BITS_PER_WORD);
10165 }
10166 }
10167
10168 startbit = ROUND_DOWN (intoffset, BITS_PER_WORD);
10169 endbit = ROUND_UP (bitpos, BITS_PER_WORD);
10170 intregs = (endbit - startbit) / BITS_PER_WORD;
10171 cum->words += intregs;
10172 /* words should be unsigned. */
10173 if ((unsigned)cum->words < (endbit/BITS_PER_WORD))
10174 {
10175 int pad = (endbit/BITS_PER_WORD) - cum->words;
10176 cum->words += pad;
10177 }
10178 }
10179
10180 /* The darwin64 ABI calls for us to recurse down through structs,
10181 looking for elements passed in registers. Unfortunately, we have
10182 to track int register count here also because of misalignments
10183 in powerpc alignment mode. */
10184
10185 static void
10186 rs6000_darwin64_record_arg_advance_recurse (CUMULATIVE_ARGS *cum,
10187 const_tree type,
10188 HOST_WIDE_INT startbitpos)
10189 {
10190 tree f;
10191
10192 for (f = TYPE_FIELDS (type); f ; f = DECL_CHAIN (f))
10193 if (TREE_CODE (f) == FIELD_DECL)
10194 {
10195 HOST_WIDE_INT bitpos = startbitpos;
10196 tree ftype = TREE_TYPE (f);
10197 machine_mode mode;
10198 if (ftype == error_mark_node)
10199 continue;
10200 mode = TYPE_MODE (ftype);
10201
10202 if (DECL_SIZE (f) != 0
10203 && tree_fits_uhwi_p (bit_position (f)))
10204 bitpos += int_bit_position (f);
10205
10206 /* ??? FIXME: else assume zero offset. */
10207
10208 if (TREE_CODE (ftype) == RECORD_TYPE)
10209 rs6000_darwin64_record_arg_advance_recurse (cum, ftype, bitpos);
10210 else if (USE_FP_FOR_ARG_P (cum, mode))
10211 {
10212 unsigned n_fpregs = (GET_MODE_SIZE (mode) + 7) >> 3;
10213 rs6000_darwin64_record_arg_advance_flush (cum, bitpos, 0);
10214 cum->fregno += n_fpregs;
10215 /* Single-precision floats present a special problem for
10216 us, because they are smaller than an 8-byte GPR, and so
10217 the structure-packing rules combined with the standard
10218 varargs behavior mean that we want to pack float/float
10219 and float/int combinations into a single register's
10220 space. This is complicated by the arg advance flushing,
10221 which works on arbitrarily large groups of int-type
10222 fields. */
10223 if (mode == SFmode)
10224 {
10225 if (cum->floats_in_gpr == 1)
10226 {
10227 /* Two floats in a word; count the word and reset
10228 the float count. */
10229 cum->words++;
10230 cum->floats_in_gpr = 0;
10231 }
10232 else if (bitpos % 64 == 0)
10233 {
10234 /* A float at the beginning of an 8-byte word;
10235 count it and put off adjusting cum->words until
10236 we see if a arg advance flush is going to do it
10237 for us. */
10238 cum->floats_in_gpr++;
10239 }
10240 else
10241 {
10242 /* The float is at the end of a word, preceded
10243 by integer fields, so the arg advance flush
10244 just above has already set cum->words and
10245 everything is taken care of. */
10246 }
10247 }
10248 else
10249 cum->words += n_fpregs;
10250 }
10251 else if (USE_ALTIVEC_FOR_ARG_P (cum, mode, 1))
10252 {
10253 rs6000_darwin64_record_arg_advance_flush (cum, bitpos, 0);
10254 cum->vregno++;
10255 cum->words += 2;
10256 }
10257 else if (cum->intoffset == -1)
10258 cum->intoffset = bitpos;
10259 }
10260 }
10261
10262 /* Check for an item that needs to be considered specially under the darwin 64
10263 bit ABI. These are record types where the mode is BLK or the structure is
10264 8 bytes in size. */
10265 static int
10266 rs6000_darwin64_struct_check_p (machine_mode mode, const_tree type)
10267 {
10268 return rs6000_darwin64_abi
10269 && ((mode == BLKmode
10270 && TREE_CODE (type) == RECORD_TYPE
10271 && int_size_in_bytes (type) > 0)
10272 || (type && TREE_CODE (type) == RECORD_TYPE
10273 && int_size_in_bytes (type) == 8)) ? 1 : 0;
10274 }
10275
10276 /* Update the data in CUM to advance over an argument
10277 of mode MODE and data type TYPE.
10278 (TYPE is null for libcalls where that information may not be available.)
10279
10280 Note that for args passed by reference, function_arg will be called
10281 with MODE and TYPE set to that of the pointer to the arg, not the arg
10282 itself. */
10283
10284 static void
10285 rs6000_function_arg_advance_1 (CUMULATIVE_ARGS *cum, machine_mode mode,
10286 const_tree type, bool named, int depth)
10287 {
10288 machine_mode elt_mode;
10289 int n_elts;
10290
10291 rs6000_discover_homogeneous_aggregate (mode, type, &elt_mode, &n_elts);
10292
10293 /* Only tick off an argument if we're not recursing. */
10294 if (depth == 0)
10295 cum->nargs_prototype--;
10296
10297 #ifdef HAVE_AS_GNU_ATTRIBUTE
10298 if (DEFAULT_ABI == ABI_V4
10299 && cum->escapes)
10300 {
10301 if (SCALAR_FLOAT_MODE_NOT_VECTOR_P (mode))
10302 rs6000_passes_float = true;
10303 else if (named && ALTIVEC_OR_VSX_VECTOR_MODE (mode))
10304 rs6000_passes_vector = true;
10305 else if (SPE_VECTOR_MODE (mode)
10306 && !cum->stdarg
10307 && cum->sysv_gregno <= GP_ARG_MAX_REG)
10308 rs6000_passes_vector = true;
10309 }
10310 #endif
10311
10312 if (TARGET_ALTIVEC_ABI
10313 && (ALTIVEC_OR_VSX_VECTOR_MODE (elt_mode)
10314 || (type && TREE_CODE (type) == VECTOR_TYPE
10315 && int_size_in_bytes (type) == 16)))
10316 {
10317 bool stack = false;
10318
10319 if (USE_ALTIVEC_FOR_ARG_P (cum, elt_mode, named))
10320 {
10321 cum->vregno += n_elts;
10322
10323 if (!TARGET_ALTIVEC)
10324 error ("cannot pass argument in vector register because"
10325 " altivec instructions are disabled, use -maltivec"
10326 " to enable them");
10327
10328 /* PowerPC64 Linux and AIX allocate GPRs for a vector argument
10329 even if it is going to be passed in a vector register.
10330 Darwin does the same for variable-argument functions. */
10331 if (((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
10332 && TARGET_64BIT)
10333 || (cum->stdarg && DEFAULT_ABI != ABI_V4))
10334 stack = true;
10335 }
10336 else
10337 stack = true;
10338
10339 if (stack)
10340 {
10341 int align;
10342
10343 /* Vector parameters must be 16-byte aligned. In 32-bit
10344 mode this means we need to take into account the offset
10345 to the parameter save area. In 64-bit mode, they just
10346 have to start on an even word, since the parameter save
10347 area is 16-byte aligned. */
10348 if (TARGET_32BIT)
10349 align = -(rs6000_parm_offset () + cum->words) & 3;
10350 else
10351 align = cum->words & 1;
10352 cum->words += align + rs6000_arg_size (mode, type);
10353
10354 if (TARGET_DEBUG_ARG)
10355 {
10356 fprintf (stderr, "function_adv: words = %2d, align=%d, ",
10357 cum->words, align);
10358 fprintf (stderr, "nargs = %4d, proto = %d, mode = %4s\n",
10359 cum->nargs_prototype, cum->prototype,
10360 GET_MODE_NAME (mode));
10361 }
10362 }
10363 }
10364 else if (TARGET_SPE_ABI && TARGET_SPE && SPE_VECTOR_MODE (mode)
10365 && !cum->stdarg
10366 && cum->sysv_gregno <= GP_ARG_MAX_REG)
10367 cum->sysv_gregno++;
10368
10369 else if (TARGET_MACHO && rs6000_darwin64_struct_check_p (mode, type))
10370 {
10371 int size = int_size_in_bytes (type);
10372 /* Variable sized types have size == -1 and are
10373 treated as if consisting entirely of ints.
10374 Pad to 16 byte boundary if needed. */
10375 if (TYPE_ALIGN (type) >= 2 * BITS_PER_WORD
10376 && (cum->words % 2) != 0)
10377 cum->words++;
10378 /* For varargs, we can just go up by the size of the struct. */
10379 if (!named)
10380 cum->words += (size + 7) / 8;
10381 else
10382 {
10383 /* It is tempting to say int register count just goes up by
10384 sizeof(type)/8, but this is wrong in a case such as
10385 { int; double; int; } [powerpc alignment]. We have to
10386 grovel through the fields for these too. */
10387 cum->intoffset = 0;
10388 cum->floats_in_gpr = 0;
10389 rs6000_darwin64_record_arg_advance_recurse (cum, type, 0);
10390 rs6000_darwin64_record_arg_advance_flush (cum,
10391 size * BITS_PER_UNIT, 1);
10392 }
10393 if (TARGET_DEBUG_ARG)
10394 {
10395 fprintf (stderr, "function_adv: words = %2d, align=%d, size=%d",
10396 cum->words, TYPE_ALIGN (type), size);
10397 fprintf (stderr,
10398 "nargs = %4d, proto = %d, mode = %4s (darwin64 abi)\n",
10399 cum->nargs_prototype, cum->prototype,
10400 GET_MODE_NAME (mode));
10401 }
10402 }
10403 else if (DEFAULT_ABI == ABI_V4)
10404 {
10405 if (TARGET_HARD_FLOAT && TARGET_FPRS
10406 && ((TARGET_SINGLE_FLOAT && mode == SFmode)
10407 || (TARGET_DOUBLE_FLOAT && mode == DFmode)
10408 || FLOAT128_2REG_P (mode)
10409 || DECIMAL_FLOAT_MODE_P (mode)))
10410 {
10411 /* _Decimal128 must use an even/odd register pair. This assumes
10412 that the register number is odd when fregno is odd. */
10413 if (mode == TDmode && (cum->fregno % 2) == 1)
10414 cum->fregno++;
10415
10416 if (cum->fregno + (FLOAT128_2REG_P (mode) ? 1 : 0)
10417 <= FP_ARG_V4_MAX_REG)
10418 cum->fregno += (GET_MODE_SIZE (mode) + 7) >> 3;
10419 else
10420 {
10421 cum->fregno = FP_ARG_V4_MAX_REG + 1;
10422 if (mode == DFmode || FLOAT128_IBM_P (mode)
10423 || mode == DDmode || mode == TDmode)
10424 cum->words += cum->words & 1;
10425 cum->words += rs6000_arg_size (mode, type);
10426 }
10427 }
10428 else
10429 {
10430 int n_words = rs6000_arg_size (mode, type);
10431 int gregno = cum->sysv_gregno;
10432
10433 /* Long long and SPE vectors are put in (r3,r4), (r5,r6),
10434 (r7,r8) or (r9,r10). As does any other 2 word item such
10435 as complex int due to a historical mistake. */
10436 if (n_words == 2)
10437 gregno += (1 - gregno) & 1;
10438
10439 /* Multi-reg args are not split between registers and stack. */
10440 if (gregno + n_words - 1 > GP_ARG_MAX_REG)
10441 {
10442 /* Long long and SPE vectors are aligned on the stack.
10443 So are other 2 word items such as complex int due to
10444 a historical mistake. */
10445 if (n_words == 2)
10446 cum->words += cum->words & 1;
10447 cum->words += n_words;
10448 }
10449
10450 /* Note: continuing to accumulate gregno past when we've started
10451 spilling to the stack indicates the fact that we've started
10452 spilling to the stack to expand_builtin_saveregs. */
10453 cum->sysv_gregno = gregno + n_words;
10454 }
10455
10456 if (TARGET_DEBUG_ARG)
10457 {
10458 fprintf (stderr, "function_adv: words = %2d, fregno = %2d, ",
10459 cum->words, cum->fregno);
10460 fprintf (stderr, "gregno = %2d, nargs = %4d, proto = %d, ",
10461 cum->sysv_gregno, cum->nargs_prototype, cum->prototype);
10462 fprintf (stderr, "mode = %4s, named = %d\n",
10463 GET_MODE_NAME (mode), named);
10464 }
10465 }
10466 else
10467 {
10468 int n_words = rs6000_arg_size (mode, type);
10469 int start_words = cum->words;
10470 int align_words = rs6000_parm_start (mode, type, start_words);
10471
10472 cum->words = align_words + n_words;
10473
10474 if (SCALAR_FLOAT_MODE_P (elt_mode) && TARGET_HARD_FLOAT && TARGET_FPRS)
10475 {
10476 /* _Decimal128 must be passed in an even/odd float register pair.
10477 This assumes that the register number is odd when fregno is
10478 odd. */
10479 if (elt_mode == TDmode && (cum->fregno % 2) == 1)
10480 cum->fregno++;
10481 cum->fregno += n_elts * ((GET_MODE_SIZE (elt_mode) + 7) >> 3);
10482 }
10483
10484 if (TARGET_DEBUG_ARG)
10485 {
10486 fprintf (stderr, "function_adv: words = %2d, fregno = %2d, ",
10487 cum->words, cum->fregno);
10488 fprintf (stderr, "nargs = %4d, proto = %d, mode = %4s, ",
10489 cum->nargs_prototype, cum->prototype, GET_MODE_NAME (mode));
10490 fprintf (stderr, "named = %d, align = %d, depth = %d\n",
10491 named, align_words - start_words, depth);
10492 }
10493 }
10494 }
10495
10496 static void
10497 rs6000_function_arg_advance (cumulative_args_t cum, machine_mode mode,
10498 const_tree type, bool named)
10499 {
10500 rs6000_function_arg_advance_1 (get_cumulative_args (cum), mode, type, named,
10501 0);
10502 }
10503
10504 static rtx
10505 spe_build_register_parallel (machine_mode mode, int gregno)
10506 {
10507 rtx r1, r3, r5, r7;
10508
10509 switch (mode)
10510 {
10511 case DFmode:
10512 r1 = gen_rtx_REG (DImode, gregno);
10513 r1 = gen_rtx_EXPR_LIST (VOIDmode, r1, const0_rtx);
10514 return gen_rtx_PARALLEL (mode, gen_rtvec (1, r1));
10515
10516 case DCmode:
10517 case TFmode:
10518 r1 = gen_rtx_REG (DImode, gregno);
10519 r1 = gen_rtx_EXPR_LIST (VOIDmode, r1, const0_rtx);
10520 r3 = gen_rtx_REG (DImode, gregno + 2);
10521 r3 = gen_rtx_EXPR_LIST (VOIDmode, r3, GEN_INT (8));
10522 return gen_rtx_PARALLEL (mode, gen_rtvec (2, r1, r3));
10523
10524 case TCmode:
10525 r1 = gen_rtx_REG (DImode, gregno);
10526 r1 = gen_rtx_EXPR_LIST (VOIDmode, r1, const0_rtx);
10527 r3 = gen_rtx_REG (DImode, gregno + 2);
10528 r3 = gen_rtx_EXPR_LIST (VOIDmode, r3, GEN_INT (8));
10529 r5 = gen_rtx_REG (DImode, gregno + 4);
10530 r5 = gen_rtx_EXPR_LIST (VOIDmode, r5, GEN_INT (16));
10531 r7 = gen_rtx_REG (DImode, gregno + 6);
10532 r7 = gen_rtx_EXPR_LIST (VOIDmode, r7, GEN_INT (24));
10533 return gen_rtx_PARALLEL (mode, gen_rtvec (4, r1, r3, r5, r7));
10534
10535 default:
10536 gcc_unreachable ();
10537 }
10538 }
10539
10540 /* Determine where to put a SIMD argument on the SPE. */
10541 static rtx
10542 rs6000_spe_function_arg (const CUMULATIVE_ARGS *cum, machine_mode mode,
10543 const_tree type)
10544 {
10545 int gregno = cum->sysv_gregno;
10546
10547 /* On E500 v2, double arithmetic is done on the full 64-bit GPR, but
10548 are passed and returned in a pair of GPRs for ABI compatibility. */
10549 if (TARGET_E500_DOUBLE && (mode == DFmode || mode == TFmode
10550 || mode == DCmode || mode == TCmode))
10551 {
10552 int n_words = rs6000_arg_size (mode, type);
10553
10554 /* Doubles go in an odd/even register pair (r5/r6, etc). */
10555 if (mode == DFmode)
10556 gregno += (1 - gregno) & 1;
10557
10558 /* Multi-reg args are not split between registers and stack. */
10559 if (gregno + n_words - 1 > GP_ARG_MAX_REG)
10560 return NULL_RTX;
10561
10562 return spe_build_register_parallel (mode, gregno);
10563 }
10564 if (cum->stdarg)
10565 {
10566 int n_words = rs6000_arg_size (mode, type);
10567
10568 /* SPE vectors are put in odd registers. */
10569 if (n_words == 2 && (gregno & 1) == 0)
10570 gregno += 1;
10571
10572 if (gregno + n_words - 1 <= GP_ARG_MAX_REG)
10573 {
10574 rtx r1, r2;
10575 machine_mode m = SImode;
10576
10577 r1 = gen_rtx_REG (m, gregno);
10578 r1 = gen_rtx_EXPR_LIST (m, r1, const0_rtx);
10579 r2 = gen_rtx_REG (m, gregno + 1);
10580 r2 = gen_rtx_EXPR_LIST (m, r2, GEN_INT (4));
10581 return gen_rtx_PARALLEL (mode, gen_rtvec (2, r1, r2));
10582 }
10583 else
10584 return NULL_RTX;
10585 }
10586 else
10587 {
10588 if (gregno <= GP_ARG_MAX_REG)
10589 return gen_rtx_REG (mode, gregno);
10590 else
10591 return NULL_RTX;
10592 }
10593 }
10594
10595 /* A subroutine of rs6000_darwin64_record_arg. Assign the bits of the
10596 structure between cum->intoffset and bitpos to integer registers. */
10597
10598 static void
10599 rs6000_darwin64_record_arg_flush (CUMULATIVE_ARGS *cum,
10600 HOST_WIDE_INT bitpos, rtx rvec[], int *k)
10601 {
10602 machine_mode mode;
10603 unsigned int regno;
10604 unsigned int startbit, endbit;
10605 int this_regno, intregs, intoffset;
10606 rtx reg;
10607
10608 if (cum->intoffset == -1)
10609 return;
10610
10611 intoffset = cum->intoffset;
10612 cum->intoffset = -1;
10613
10614 /* If this is the trailing part of a word, try to only load that
10615 much into the register. Otherwise load the whole register. Note
10616 that in the latter case we may pick up unwanted bits. It's not a
10617 problem at the moment but may wish to revisit. */
10618
10619 if (intoffset % BITS_PER_WORD != 0)
10620 {
10621 mode = mode_for_size (BITS_PER_WORD - intoffset % BITS_PER_WORD,
10622 MODE_INT, 0);
10623 if (mode == BLKmode)
10624 {
10625 /* We couldn't find an appropriate mode, which happens,
10626 e.g., in packed structs when there are 3 bytes to load.
10627 Back intoffset back to the beginning of the word in this
10628 case. */
10629 intoffset = ROUND_DOWN (intoffset, BITS_PER_WORD);
10630 mode = word_mode;
10631 }
10632 }
10633 else
10634 mode = word_mode;
10635
10636 startbit = ROUND_DOWN (intoffset, BITS_PER_WORD);
10637 endbit = ROUND_UP (bitpos, BITS_PER_WORD);
10638 intregs = (endbit - startbit) / BITS_PER_WORD;
10639 this_regno = cum->words + intoffset / BITS_PER_WORD;
10640
10641 if (intregs > 0 && intregs > GP_ARG_NUM_REG - this_regno)
10642 cum->use_stack = 1;
10643
10644 intregs = MIN (intregs, GP_ARG_NUM_REG - this_regno);
10645 if (intregs <= 0)
10646 return;
10647
10648 intoffset /= BITS_PER_UNIT;
10649 do
10650 {
10651 regno = GP_ARG_MIN_REG + this_regno;
10652 reg = gen_rtx_REG (mode, regno);
10653 rvec[(*k)++] =
10654 gen_rtx_EXPR_LIST (VOIDmode, reg, GEN_INT (intoffset));
10655
10656 this_regno += 1;
10657 intoffset = (intoffset | (UNITS_PER_WORD-1)) + 1;
10658 mode = word_mode;
10659 intregs -= 1;
10660 }
10661 while (intregs > 0);
10662 }
10663
10664 /* Recursive workhorse for the following. */
10665
10666 static void
10667 rs6000_darwin64_record_arg_recurse (CUMULATIVE_ARGS *cum, const_tree type,
10668 HOST_WIDE_INT startbitpos, rtx rvec[],
10669 int *k)
10670 {
10671 tree f;
10672
10673 for (f = TYPE_FIELDS (type); f ; f = DECL_CHAIN (f))
10674 if (TREE_CODE (f) == FIELD_DECL)
10675 {
10676 HOST_WIDE_INT bitpos = startbitpos;
10677 tree ftype = TREE_TYPE (f);
10678 machine_mode mode;
10679 if (ftype == error_mark_node)
10680 continue;
10681 mode = TYPE_MODE (ftype);
10682
10683 if (DECL_SIZE (f) != 0
10684 && tree_fits_uhwi_p (bit_position (f)))
10685 bitpos += int_bit_position (f);
10686
10687 /* ??? FIXME: else assume zero offset. */
10688
10689 if (TREE_CODE (ftype) == RECORD_TYPE)
10690 rs6000_darwin64_record_arg_recurse (cum, ftype, bitpos, rvec, k);
10691 else if (cum->named && USE_FP_FOR_ARG_P (cum, mode))
10692 {
10693 unsigned n_fpreg = (GET_MODE_SIZE (mode) + 7) >> 3;
10694 #if 0
10695 switch (mode)
10696 {
10697 case SCmode: mode = SFmode; break;
10698 case DCmode: mode = DFmode; break;
10699 case TCmode: mode = TFmode; break;
10700 default: break;
10701 }
10702 #endif
10703 rs6000_darwin64_record_arg_flush (cum, bitpos, rvec, k);
10704 if (cum->fregno + n_fpreg > FP_ARG_MAX_REG + 1)
10705 {
10706 gcc_assert (cum->fregno == FP_ARG_MAX_REG
10707 && (mode == TFmode || mode == TDmode));
10708 /* Long double or _Decimal128 split over regs and memory. */
10709 mode = DECIMAL_FLOAT_MODE_P (mode) ? DDmode : DFmode;
10710 cum->use_stack=1;
10711 }
10712 rvec[(*k)++]
10713 = gen_rtx_EXPR_LIST (VOIDmode,
10714 gen_rtx_REG (mode, cum->fregno++),
10715 GEN_INT (bitpos / BITS_PER_UNIT));
10716 if (FLOAT128_2REG_P (mode))
10717 cum->fregno++;
10718 }
10719 else if (cum->named && USE_ALTIVEC_FOR_ARG_P (cum, mode, 1))
10720 {
10721 rs6000_darwin64_record_arg_flush (cum, bitpos, rvec, k);
10722 rvec[(*k)++]
10723 = gen_rtx_EXPR_LIST (VOIDmode,
10724 gen_rtx_REG (mode, cum->vregno++),
10725 GEN_INT (bitpos / BITS_PER_UNIT));
10726 }
10727 else if (cum->intoffset == -1)
10728 cum->intoffset = bitpos;
10729 }
10730 }
10731
10732 /* For the darwin64 ABI, we want to construct a PARALLEL consisting of
10733 the register(s) to be used for each field and subfield of a struct
10734 being passed by value, along with the offset of where the
10735 register's value may be found in the block. FP fields go in FP
10736 register, vector fields go in vector registers, and everything
10737 else goes in int registers, packed as in memory.
10738
10739 This code is also used for function return values. RETVAL indicates
10740 whether this is the case.
10741
10742 Much of this is taken from the SPARC V9 port, which has a similar
10743 calling convention. */
10744
10745 static rtx
10746 rs6000_darwin64_record_arg (CUMULATIVE_ARGS *orig_cum, const_tree type,
10747 bool named, bool retval)
10748 {
10749 rtx rvec[FIRST_PSEUDO_REGISTER];
10750 int k = 1, kbase = 1;
10751 HOST_WIDE_INT typesize = int_size_in_bytes (type);
10752 /* This is a copy; modifications are not visible to our caller. */
10753 CUMULATIVE_ARGS copy_cum = *orig_cum;
10754 CUMULATIVE_ARGS *cum = &copy_cum;
10755
10756 /* Pad to 16 byte boundary if needed. */
10757 if (!retval && TYPE_ALIGN (type) >= 2 * BITS_PER_WORD
10758 && (cum->words % 2) != 0)
10759 cum->words++;
10760
10761 cum->intoffset = 0;
10762 cum->use_stack = 0;
10763 cum->named = named;
10764
10765 /* Put entries into rvec[] for individual FP and vector fields, and
10766 for the chunks of memory that go in int regs. Note we start at
10767 element 1; 0 is reserved for an indication of using memory, and
10768 may or may not be filled in below. */
10769 rs6000_darwin64_record_arg_recurse (cum, type, /* startbit pos= */ 0, rvec, &k);
10770 rs6000_darwin64_record_arg_flush (cum, typesize * BITS_PER_UNIT, rvec, &k);
10771
10772 /* If any part of the struct went on the stack put all of it there.
10773 This hack is because the generic code for
10774 FUNCTION_ARG_PARTIAL_NREGS cannot handle cases where the register
10775 parts of the struct are not at the beginning. */
10776 if (cum->use_stack)
10777 {
10778 if (retval)
10779 return NULL_RTX; /* doesn't go in registers at all */
10780 kbase = 0;
10781 rvec[0] = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
10782 }
10783 if (k > 1 || cum->use_stack)
10784 return gen_rtx_PARALLEL (BLKmode, gen_rtvec_v (k - kbase, &rvec[kbase]));
10785 else
10786 return NULL_RTX;
10787 }
10788
10789 /* Determine where to place an argument in 64-bit mode with 32-bit ABI. */
10790
10791 static rtx
10792 rs6000_mixed_function_arg (machine_mode mode, const_tree type,
10793 int align_words)
10794 {
10795 int n_units;
10796 int i, k;
10797 rtx rvec[GP_ARG_NUM_REG + 1];
10798
10799 if (align_words >= GP_ARG_NUM_REG)
10800 return NULL_RTX;
10801
10802 n_units = rs6000_arg_size (mode, type);
10803
10804 /* Optimize the simple case where the arg fits in one gpr, except in
10805 the case of BLKmode due to assign_parms assuming that registers are
10806 BITS_PER_WORD wide. */
10807 if (n_units == 0
10808 || (n_units == 1 && mode != BLKmode))
10809 return gen_rtx_REG (mode, GP_ARG_MIN_REG + align_words);
10810
10811 k = 0;
10812 if (align_words + n_units > GP_ARG_NUM_REG)
10813 /* Not all of the arg fits in gprs. Say that it goes in memory too,
10814 using a magic NULL_RTX component.
10815 This is not strictly correct. Only some of the arg belongs in
10816 memory, not all of it. However, the normal scheme using
10817 function_arg_partial_nregs can result in unusual subregs, eg.
10818 (subreg:SI (reg:DF) 4), which are not handled well. The code to
10819 store the whole arg to memory is often more efficient than code
10820 to store pieces, and we know that space is available in the right
10821 place for the whole arg. */
10822 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
10823
10824 i = 0;
10825 do
10826 {
10827 rtx r = gen_rtx_REG (SImode, GP_ARG_MIN_REG + align_words);
10828 rtx off = GEN_INT (i++ * 4);
10829 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, r, off);
10830 }
10831 while (++align_words < GP_ARG_NUM_REG && --n_units != 0);
10832
10833 return gen_rtx_PARALLEL (mode, gen_rtvec_v (k, rvec));
10834 }
10835
10836 /* We have an argument of MODE and TYPE that goes into FPRs or VRs,
10837 but must also be copied into the parameter save area starting at
10838 offset ALIGN_WORDS. Fill in RVEC with the elements corresponding
10839 to the GPRs and/or memory. Return the number of elements used. */
10840
10841 static int
10842 rs6000_psave_function_arg (machine_mode mode, const_tree type,
10843 int align_words, rtx *rvec)
10844 {
10845 int k = 0;
10846
10847 if (align_words < GP_ARG_NUM_REG)
10848 {
10849 int n_words = rs6000_arg_size (mode, type);
10850
10851 if (align_words + n_words > GP_ARG_NUM_REG
10852 || mode == BLKmode
10853 || (TARGET_32BIT && TARGET_POWERPC64))
10854 {
10855 /* If this is partially on the stack, then we only
10856 include the portion actually in registers here. */
10857 machine_mode rmode = TARGET_32BIT ? SImode : DImode;
10858 int i = 0;
10859
10860 if (align_words + n_words > GP_ARG_NUM_REG)
10861 {
10862 /* Not all of the arg fits in gprs. Say that it goes in memory
10863 too, using a magic NULL_RTX component. Also see comment in
10864 rs6000_mixed_function_arg for why the normal
10865 function_arg_partial_nregs scheme doesn't work in this case. */
10866 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
10867 }
10868
10869 do
10870 {
10871 rtx r = gen_rtx_REG (rmode, GP_ARG_MIN_REG + align_words);
10872 rtx off = GEN_INT (i++ * GET_MODE_SIZE (rmode));
10873 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, r, off);
10874 }
10875 while (++align_words < GP_ARG_NUM_REG && --n_words != 0);
10876 }
10877 else
10878 {
10879 /* The whole arg fits in gprs. */
10880 rtx r = gen_rtx_REG (mode, GP_ARG_MIN_REG + align_words);
10881 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, r, const0_rtx);
10882 }
10883 }
10884 else
10885 {
10886 /* It's entirely in memory. */
10887 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
10888 }
10889
10890 return k;
10891 }
10892
10893 /* RVEC is a vector of K components of an argument of mode MODE.
10894 Construct the final function_arg return value from it. */
10895
10896 static rtx
10897 rs6000_finish_function_arg (machine_mode mode, rtx *rvec, int k)
10898 {
10899 gcc_assert (k >= 1);
10900
10901 /* Avoid returning a PARALLEL in the trivial cases. */
10902 if (k == 1)
10903 {
10904 if (XEXP (rvec[0], 0) == NULL_RTX)
10905 return NULL_RTX;
10906
10907 if (GET_MODE (XEXP (rvec[0], 0)) == mode)
10908 return XEXP (rvec[0], 0);
10909 }
10910
10911 return gen_rtx_PARALLEL (mode, gen_rtvec_v (k, rvec));
10912 }
10913
10914 /* Determine where to put an argument to a function.
10915 Value is zero to push the argument on the stack,
10916 or a hard register in which to store the argument.
10917
10918 MODE is the argument's machine mode.
10919 TYPE is the data type of the argument (as a tree).
10920 This is null for libcalls where that information may
10921 not be available.
10922 CUM is a variable of type CUMULATIVE_ARGS which gives info about
10923 the preceding args and about the function being called. It is
10924 not modified in this routine.
10925 NAMED is nonzero if this argument is a named parameter
10926 (otherwise it is an extra parameter matching an ellipsis).
10927
10928 On RS/6000 the first eight words of non-FP are normally in registers
10929 and the rest are pushed. Under AIX, the first 13 FP args are in registers.
10930 Under V.4, the first 8 FP args are in registers.
10931
10932 If this is floating-point and no prototype is specified, we use
10933 both an FP and integer register (or possibly FP reg and stack). Library
10934 functions (when CALL_LIBCALL is set) always have the proper types for args,
10935 so we can pass the FP value just in one register. emit_library_function
10936 doesn't support PARALLEL anyway.
10937
10938 Note that for args passed by reference, function_arg will be called
10939 with MODE and TYPE set to that of the pointer to the arg, not the arg
10940 itself. */
10941
10942 static rtx
10943 rs6000_function_arg (cumulative_args_t cum_v, machine_mode mode,
10944 const_tree type, bool named)
10945 {
10946 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
10947 enum rs6000_abi abi = DEFAULT_ABI;
10948 machine_mode elt_mode;
10949 int n_elts;
10950
10951 /* Return a marker to indicate whether CR1 needs to set or clear the
10952 bit that V.4 uses to say fp args were passed in registers.
10953 Assume that we don't need the marker for software floating point,
10954 or compiler generated library calls. */
10955 if (mode == VOIDmode)
10956 {
10957 if (abi == ABI_V4
10958 && (cum->call_cookie & CALL_LIBCALL) == 0
10959 && (cum->stdarg
10960 || (cum->nargs_prototype < 0
10961 && (cum->prototype || TARGET_NO_PROTOTYPE))))
10962 {
10963 /* For the SPE, we need to crxor CR6 always. */
10964 if (TARGET_SPE_ABI)
10965 return GEN_INT (cum->call_cookie | CALL_V4_SET_FP_ARGS);
10966 else if (TARGET_HARD_FLOAT && TARGET_FPRS)
10967 return GEN_INT (cum->call_cookie
10968 | ((cum->fregno == FP_ARG_MIN_REG)
10969 ? CALL_V4_SET_FP_ARGS
10970 : CALL_V4_CLEAR_FP_ARGS));
10971 }
10972
10973 return GEN_INT (cum->call_cookie & ~CALL_LIBCALL);
10974 }
10975
10976 rs6000_discover_homogeneous_aggregate (mode, type, &elt_mode, &n_elts);
10977
10978 if (TARGET_MACHO && rs6000_darwin64_struct_check_p (mode, type))
10979 {
10980 rtx rslt = rs6000_darwin64_record_arg (cum, type, named, /*retval= */false);
10981 if (rslt != NULL_RTX)
10982 return rslt;
10983 /* Else fall through to usual handling. */
10984 }
10985
10986 if (USE_ALTIVEC_FOR_ARG_P (cum, elt_mode, named))
10987 {
10988 rtx rvec[GP_ARG_NUM_REG + AGGR_ARG_NUM_REG + 1];
10989 rtx r, off;
10990 int i, k = 0;
10991
10992 /* Do we also need to pass this argument in the parameter save area?
10993 Library support functions for IEEE 128-bit are assumed to not need the
10994 value passed both in GPRs and in vector registers. */
10995 if (TARGET_64BIT && !cum->prototype
10996 && (!cum->libcall || !FLOAT128_VECTOR_P (elt_mode)))
10997 {
10998 int align_words = ROUND_UP (cum->words, 2);
10999 k = rs6000_psave_function_arg (mode, type, align_words, rvec);
11000 }
11001
11002 /* Describe where this argument goes in the vector registers. */
11003 for (i = 0; i < n_elts && cum->vregno + i <= ALTIVEC_ARG_MAX_REG; i++)
11004 {
11005 r = gen_rtx_REG (elt_mode, cum->vregno + i);
11006 off = GEN_INT (i * GET_MODE_SIZE (elt_mode));
11007 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, r, off);
11008 }
11009
11010 return rs6000_finish_function_arg (mode, rvec, k);
11011 }
11012 else if (TARGET_ALTIVEC_ABI
11013 && (ALTIVEC_OR_VSX_VECTOR_MODE (mode)
11014 || (type && TREE_CODE (type) == VECTOR_TYPE
11015 && int_size_in_bytes (type) == 16)))
11016 {
11017 if (named || abi == ABI_V4)
11018 return NULL_RTX;
11019 else
11020 {
11021 /* Vector parameters to varargs functions under AIX or Darwin
11022 get passed in memory and possibly also in GPRs. */
11023 int align, align_words, n_words;
11024 machine_mode part_mode;
11025
11026 /* Vector parameters must be 16-byte aligned. In 32-bit
11027 mode this means we need to take into account the offset
11028 to the parameter save area. In 64-bit mode, they just
11029 have to start on an even word, since the parameter save
11030 area is 16-byte aligned. */
11031 if (TARGET_32BIT)
11032 align = -(rs6000_parm_offset () + cum->words) & 3;
11033 else
11034 align = cum->words & 1;
11035 align_words = cum->words + align;
11036
11037 /* Out of registers? Memory, then. */
11038 if (align_words >= GP_ARG_NUM_REG)
11039 return NULL_RTX;
11040
11041 if (TARGET_32BIT && TARGET_POWERPC64)
11042 return rs6000_mixed_function_arg (mode, type, align_words);
11043
11044 /* The vector value goes in GPRs. Only the part of the
11045 value in GPRs is reported here. */
11046 part_mode = mode;
11047 n_words = rs6000_arg_size (mode, type);
11048 if (align_words + n_words > GP_ARG_NUM_REG)
11049 /* Fortunately, there are only two possibilities, the value
11050 is either wholly in GPRs or half in GPRs and half not. */
11051 part_mode = DImode;
11052
11053 return gen_rtx_REG (part_mode, GP_ARG_MIN_REG + align_words);
11054 }
11055 }
11056 else if (TARGET_SPE_ABI && TARGET_SPE
11057 && (SPE_VECTOR_MODE (mode)
11058 || (TARGET_E500_DOUBLE && (mode == DFmode
11059 || mode == DCmode
11060 || mode == TFmode
11061 || mode == TCmode))))
11062 return rs6000_spe_function_arg (cum, mode, type);
11063
11064 else if (abi == ABI_V4)
11065 {
11066 if (TARGET_HARD_FLOAT && TARGET_FPRS
11067 && ((TARGET_SINGLE_FLOAT && mode == SFmode)
11068 || (TARGET_DOUBLE_FLOAT && mode == DFmode)
11069 || FLOAT128_2REG_P (mode)
11070 || DECIMAL_FLOAT_MODE_P (mode)))
11071 {
11072 /* _Decimal128 must use an even/odd register pair. This assumes
11073 that the register number is odd when fregno is odd. */
11074 if (mode == TDmode && (cum->fregno % 2) == 1)
11075 cum->fregno++;
11076
11077 if (cum->fregno + (FLOAT128_2REG_P (mode) ? 1 : 0)
11078 <= FP_ARG_V4_MAX_REG)
11079 return gen_rtx_REG (mode, cum->fregno);
11080 else
11081 return NULL_RTX;
11082 }
11083 else
11084 {
11085 int n_words = rs6000_arg_size (mode, type);
11086 int gregno = cum->sysv_gregno;
11087
11088 /* Long long and SPE vectors are put in (r3,r4), (r5,r6),
11089 (r7,r8) or (r9,r10). As does any other 2 word item such
11090 as complex int due to a historical mistake. */
11091 if (n_words == 2)
11092 gregno += (1 - gregno) & 1;
11093
11094 /* Multi-reg args are not split between registers and stack. */
11095 if (gregno + n_words - 1 > GP_ARG_MAX_REG)
11096 return NULL_RTX;
11097
11098 if (TARGET_32BIT && TARGET_POWERPC64)
11099 return rs6000_mixed_function_arg (mode, type,
11100 gregno - GP_ARG_MIN_REG);
11101 return gen_rtx_REG (mode, gregno);
11102 }
11103 }
11104 else
11105 {
11106 int align_words = rs6000_parm_start (mode, type, cum->words);
11107
11108 /* _Decimal128 must be passed in an even/odd float register pair.
11109 This assumes that the register number is odd when fregno is odd. */
11110 if (elt_mode == TDmode && (cum->fregno % 2) == 1)
11111 cum->fregno++;
11112
11113 if (USE_FP_FOR_ARG_P (cum, elt_mode))
11114 {
11115 rtx rvec[GP_ARG_NUM_REG + AGGR_ARG_NUM_REG + 1];
11116 rtx r, off;
11117 int i, k = 0;
11118 unsigned long n_fpreg = (GET_MODE_SIZE (elt_mode) + 7) >> 3;
11119 int fpr_words;
11120
11121 /* Do we also need to pass this argument in the parameter
11122 save area? */
11123 if (type && (cum->nargs_prototype <= 0
11124 || ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
11125 && TARGET_XL_COMPAT
11126 && align_words >= GP_ARG_NUM_REG)))
11127 k = rs6000_psave_function_arg (mode, type, align_words, rvec);
11128
11129 /* Describe where this argument goes in the fprs. */
11130 for (i = 0; i < n_elts
11131 && cum->fregno + i * n_fpreg <= FP_ARG_MAX_REG; i++)
11132 {
11133 /* Check if the argument is split over registers and memory.
11134 This can only ever happen for long double or _Decimal128;
11135 complex types are handled via split_complex_arg. */
11136 machine_mode fmode = elt_mode;
11137 if (cum->fregno + (i + 1) * n_fpreg > FP_ARG_MAX_REG + 1)
11138 {
11139 gcc_assert (FLOAT128_2REG_P (fmode));
11140 fmode = DECIMAL_FLOAT_MODE_P (fmode) ? DDmode : DFmode;
11141 }
11142
11143 r = gen_rtx_REG (fmode, cum->fregno + i * n_fpreg);
11144 off = GEN_INT (i * GET_MODE_SIZE (elt_mode));
11145 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, r, off);
11146 }
11147
11148 /* If there were not enough FPRs to hold the argument, the rest
11149 usually goes into memory. However, if the current position
11150 is still within the register parameter area, a portion may
11151 actually have to go into GPRs.
11152
11153 Note that it may happen that the portion of the argument
11154 passed in the first "half" of the first GPR was already
11155 passed in the last FPR as well.
11156
11157 For unnamed arguments, we already set up GPRs to cover the
11158 whole argument in rs6000_psave_function_arg, so there is
11159 nothing further to do at this point. */
11160 fpr_words = (i * GET_MODE_SIZE (elt_mode)) / (TARGET_32BIT ? 4 : 8);
11161 if (i < n_elts && align_words + fpr_words < GP_ARG_NUM_REG
11162 && cum->nargs_prototype > 0)
11163 {
11164 static bool warned;
11165
11166 machine_mode rmode = TARGET_32BIT ? SImode : DImode;
11167 int n_words = rs6000_arg_size (mode, type);
11168
11169 align_words += fpr_words;
11170 n_words -= fpr_words;
11171
11172 do
11173 {
11174 r = gen_rtx_REG (rmode, GP_ARG_MIN_REG + align_words);
11175 off = GEN_INT (fpr_words++ * GET_MODE_SIZE (rmode));
11176 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, r, off);
11177 }
11178 while (++align_words < GP_ARG_NUM_REG && --n_words != 0);
11179
11180 if (!warned && warn_psabi)
11181 {
11182 warned = true;
11183 inform (input_location,
11184 "the ABI of passing homogeneous float aggregates"
11185 " has changed in GCC 5");
11186 }
11187 }
11188
11189 return rs6000_finish_function_arg (mode, rvec, k);
11190 }
11191 else if (align_words < GP_ARG_NUM_REG)
11192 {
11193 if (TARGET_32BIT && TARGET_POWERPC64)
11194 return rs6000_mixed_function_arg (mode, type, align_words);
11195
11196 return gen_rtx_REG (mode, GP_ARG_MIN_REG + align_words);
11197 }
11198 else
11199 return NULL_RTX;
11200 }
11201 }
11202 \f
11203 /* For an arg passed partly in registers and partly in memory, this is
11204 the number of bytes passed in registers. For args passed entirely in
11205 registers or entirely in memory, zero. When an arg is described by a
11206 PARALLEL, perhaps using more than one register type, this function
11207 returns the number of bytes used by the first element of the PARALLEL. */
11208
11209 static int
11210 rs6000_arg_partial_bytes (cumulative_args_t cum_v, machine_mode mode,
11211 tree type, bool named)
11212 {
11213 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
11214 bool passed_in_gprs = true;
11215 int ret = 0;
11216 int align_words;
11217 machine_mode elt_mode;
11218 int n_elts;
11219
11220 rs6000_discover_homogeneous_aggregate (mode, type, &elt_mode, &n_elts);
11221
11222 if (DEFAULT_ABI == ABI_V4)
11223 return 0;
11224
11225 if (USE_ALTIVEC_FOR_ARG_P (cum, elt_mode, named))
11226 {
11227 /* If we are passing this arg in the fixed parameter save area (gprs or
11228 memory) as well as VRs, we do not use the partial bytes mechanism;
11229 instead, rs6000_function_arg will return a PARALLEL including a memory
11230 element as necessary. Library support functions for IEEE 128-bit are
11231 assumed to not need the value passed both in GPRs and in vector
11232 registers. */
11233 if (TARGET_64BIT && !cum->prototype
11234 && (!cum->libcall || !FLOAT128_VECTOR_P (elt_mode)))
11235 return 0;
11236
11237 /* Otherwise, we pass in VRs only. Check for partial copies. */
11238 passed_in_gprs = false;
11239 if (cum->vregno + n_elts > ALTIVEC_ARG_MAX_REG + 1)
11240 ret = (ALTIVEC_ARG_MAX_REG + 1 - cum->vregno) * 16;
11241 }
11242
11243 /* In this complicated case we just disable the partial_nregs code. */
11244 if (TARGET_MACHO && rs6000_darwin64_struct_check_p (mode, type))
11245 return 0;
11246
11247 align_words = rs6000_parm_start (mode, type, cum->words);
11248
11249 if (USE_FP_FOR_ARG_P (cum, elt_mode))
11250 {
11251 unsigned long n_fpreg = (GET_MODE_SIZE (elt_mode) + 7) >> 3;
11252
11253 /* If we are passing this arg in the fixed parameter save area
11254 (gprs or memory) as well as FPRs, we do not use the partial
11255 bytes mechanism; instead, rs6000_function_arg will return a
11256 PARALLEL including a memory element as necessary. */
11257 if (type
11258 && (cum->nargs_prototype <= 0
11259 || ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
11260 && TARGET_XL_COMPAT
11261 && align_words >= GP_ARG_NUM_REG)))
11262 return 0;
11263
11264 /* Otherwise, we pass in FPRs only. Check for partial copies. */
11265 passed_in_gprs = false;
11266 if (cum->fregno + n_elts * n_fpreg > FP_ARG_MAX_REG + 1)
11267 {
11268 /* Compute number of bytes / words passed in FPRs. If there
11269 is still space available in the register parameter area
11270 *after* that amount, a part of the argument will be passed
11271 in GPRs. In that case, the total amount passed in any
11272 registers is equal to the amount that would have been passed
11273 in GPRs if everything were passed there, so we fall back to
11274 the GPR code below to compute the appropriate value. */
11275 int fpr = ((FP_ARG_MAX_REG + 1 - cum->fregno)
11276 * MIN (8, GET_MODE_SIZE (elt_mode)));
11277 int fpr_words = fpr / (TARGET_32BIT ? 4 : 8);
11278
11279 if (align_words + fpr_words < GP_ARG_NUM_REG)
11280 passed_in_gprs = true;
11281 else
11282 ret = fpr;
11283 }
11284 }
11285
11286 if (passed_in_gprs
11287 && align_words < GP_ARG_NUM_REG
11288 && GP_ARG_NUM_REG < align_words + rs6000_arg_size (mode, type))
11289 ret = (GP_ARG_NUM_REG - align_words) * (TARGET_32BIT ? 4 : 8);
11290
11291 if (ret != 0 && TARGET_DEBUG_ARG)
11292 fprintf (stderr, "rs6000_arg_partial_bytes: %d\n", ret);
11293
11294 return ret;
11295 }
11296 \f
11297 /* A C expression that indicates when an argument must be passed by
11298 reference. If nonzero for an argument, a copy of that argument is
11299 made in memory and a pointer to the argument is passed instead of
11300 the argument itself. The pointer is passed in whatever way is
11301 appropriate for passing a pointer to that type.
11302
11303 Under V.4, aggregates and long double are passed by reference.
11304
11305 As an extension to all 32-bit ABIs, AltiVec vectors are passed by
11306 reference unless the AltiVec vector extension ABI is in force.
11307
11308 As an extension to all ABIs, variable sized types are passed by
11309 reference. */
11310
11311 static bool
11312 rs6000_pass_by_reference (cumulative_args_t cum ATTRIBUTE_UNUSED,
11313 machine_mode mode, const_tree type,
11314 bool named ATTRIBUTE_UNUSED)
11315 {
11316 if (!type)
11317 return 0;
11318
11319 if (DEFAULT_ABI == ABI_V4 && TARGET_IEEEQUAD
11320 && FLOAT128_IEEE_P (TYPE_MODE (type)))
11321 {
11322 if (TARGET_DEBUG_ARG)
11323 fprintf (stderr, "function_arg_pass_by_reference: V4 IEEE 128-bit\n");
11324 return 1;
11325 }
11326
11327 if (DEFAULT_ABI == ABI_V4 && AGGREGATE_TYPE_P (type))
11328 {
11329 if (TARGET_DEBUG_ARG)
11330 fprintf (stderr, "function_arg_pass_by_reference: V4 aggregate\n");
11331 return 1;
11332 }
11333
11334 if (int_size_in_bytes (type) < 0)
11335 {
11336 if (TARGET_DEBUG_ARG)
11337 fprintf (stderr, "function_arg_pass_by_reference: variable size\n");
11338 return 1;
11339 }
11340
11341 /* Allow -maltivec -mabi=no-altivec without warning. Altivec vector
11342 modes only exist for GCC vector types if -maltivec. */
11343 if (TARGET_32BIT && !TARGET_ALTIVEC_ABI && ALTIVEC_VECTOR_MODE (mode))
11344 {
11345 if (TARGET_DEBUG_ARG)
11346 fprintf (stderr, "function_arg_pass_by_reference: AltiVec\n");
11347 return 1;
11348 }
11349
11350 /* Pass synthetic vectors in memory. */
11351 if (TREE_CODE (type) == VECTOR_TYPE
11352 && int_size_in_bytes (type) > (TARGET_ALTIVEC_ABI ? 16 : 8))
11353 {
11354 static bool warned_for_pass_big_vectors = false;
11355 if (TARGET_DEBUG_ARG)
11356 fprintf (stderr, "function_arg_pass_by_reference: synthetic vector\n");
11357 if (!warned_for_pass_big_vectors)
11358 {
11359 warning (0, "GCC vector passed by reference: "
11360 "non-standard ABI extension with no compatibility guarantee");
11361 warned_for_pass_big_vectors = true;
11362 }
11363 return 1;
11364 }
11365
11366 return 0;
11367 }
11368
11369 /* Process parameter of type TYPE after ARGS_SO_FAR parameters were
11370 already processes. Return true if the parameter must be passed
11371 (fully or partially) on the stack. */
11372
11373 static bool
11374 rs6000_parm_needs_stack (cumulative_args_t args_so_far, tree type)
11375 {
11376 machine_mode mode;
11377 int unsignedp;
11378 rtx entry_parm;
11379
11380 /* Catch errors. */
11381 if (type == NULL || type == error_mark_node)
11382 return true;
11383
11384 /* Handle types with no storage requirement. */
11385 if (TYPE_MODE (type) == VOIDmode)
11386 return false;
11387
11388 /* Handle complex types. */
11389 if (TREE_CODE (type) == COMPLEX_TYPE)
11390 return (rs6000_parm_needs_stack (args_so_far, TREE_TYPE (type))
11391 || rs6000_parm_needs_stack (args_so_far, TREE_TYPE (type)));
11392
11393 /* Handle transparent aggregates. */
11394 if ((TREE_CODE (type) == UNION_TYPE || TREE_CODE (type) == RECORD_TYPE)
11395 && TYPE_TRANSPARENT_AGGR (type))
11396 type = TREE_TYPE (first_field (type));
11397
11398 /* See if this arg was passed by invisible reference. */
11399 if (pass_by_reference (get_cumulative_args (args_so_far),
11400 TYPE_MODE (type), type, true))
11401 type = build_pointer_type (type);
11402
11403 /* Find mode as it is passed by the ABI. */
11404 unsignedp = TYPE_UNSIGNED (type);
11405 mode = promote_mode (type, TYPE_MODE (type), &unsignedp);
11406
11407 /* If we must pass in stack, we need a stack. */
11408 if (rs6000_must_pass_in_stack (mode, type))
11409 return true;
11410
11411 /* If there is no incoming register, we need a stack. */
11412 entry_parm = rs6000_function_arg (args_so_far, mode, type, true);
11413 if (entry_parm == NULL)
11414 return true;
11415
11416 /* Likewise if we need to pass both in registers and on the stack. */
11417 if (GET_CODE (entry_parm) == PARALLEL
11418 && XEXP (XVECEXP (entry_parm, 0, 0), 0) == NULL_RTX)
11419 return true;
11420
11421 /* Also true if we're partially in registers and partially not. */
11422 if (rs6000_arg_partial_bytes (args_so_far, mode, type, true) != 0)
11423 return true;
11424
11425 /* Update info on where next arg arrives in registers. */
11426 rs6000_function_arg_advance (args_so_far, mode, type, true);
11427 return false;
11428 }
11429
11430 /* Return true if FUN has no prototype, has a variable argument
11431 list, or passes any parameter in memory. */
11432
11433 static bool
11434 rs6000_function_parms_need_stack (tree fun, bool incoming)
11435 {
11436 tree fntype, result;
11437 CUMULATIVE_ARGS args_so_far_v;
11438 cumulative_args_t args_so_far;
11439
11440 if (!fun)
11441 /* Must be a libcall, all of which only use reg parms. */
11442 return false;
11443
11444 fntype = fun;
11445 if (!TYPE_P (fun))
11446 fntype = TREE_TYPE (fun);
11447
11448 /* Varargs functions need the parameter save area. */
11449 if ((!incoming && !prototype_p (fntype)) || stdarg_p (fntype))
11450 return true;
11451
11452 INIT_CUMULATIVE_INCOMING_ARGS (args_so_far_v, fntype, NULL_RTX);
11453 args_so_far = pack_cumulative_args (&args_so_far_v);
11454
11455 /* When incoming, we will have been passed the function decl.
11456 It is necessary to use the decl to handle K&R style functions,
11457 where TYPE_ARG_TYPES may not be available. */
11458 if (incoming)
11459 {
11460 gcc_assert (DECL_P (fun));
11461 result = DECL_RESULT (fun);
11462 }
11463 else
11464 result = TREE_TYPE (fntype);
11465
11466 if (result && aggregate_value_p (result, fntype))
11467 {
11468 if (!TYPE_P (result))
11469 result = TREE_TYPE (result);
11470 result = build_pointer_type (result);
11471 rs6000_parm_needs_stack (args_so_far, result);
11472 }
11473
11474 if (incoming)
11475 {
11476 tree parm;
11477
11478 for (parm = DECL_ARGUMENTS (fun);
11479 parm && parm != void_list_node;
11480 parm = TREE_CHAIN (parm))
11481 if (rs6000_parm_needs_stack (args_so_far, TREE_TYPE (parm)))
11482 return true;
11483 }
11484 else
11485 {
11486 function_args_iterator args_iter;
11487 tree arg_type;
11488
11489 FOREACH_FUNCTION_ARGS (fntype, arg_type, args_iter)
11490 if (rs6000_parm_needs_stack (args_so_far, arg_type))
11491 return true;
11492 }
11493
11494 return false;
11495 }
11496
11497 /* Return the size of the REG_PARM_STACK_SPACE are for FUN. This is
11498 usually a constant depending on the ABI. However, in the ELFv2 ABI
11499 the register parameter area is optional when calling a function that
11500 has a prototype is scope, has no variable argument list, and passes
11501 all parameters in registers. */
11502
11503 int
11504 rs6000_reg_parm_stack_space (tree fun, bool incoming)
11505 {
11506 int reg_parm_stack_space;
11507
11508 switch (DEFAULT_ABI)
11509 {
11510 default:
11511 reg_parm_stack_space = 0;
11512 break;
11513
11514 case ABI_AIX:
11515 case ABI_DARWIN:
11516 reg_parm_stack_space = TARGET_64BIT ? 64 : 32;
11517 break;
11518
11519 case ABI_ELFv2:
11520 /* ??? Recomputing this every time is a bit expensive. Is there
11521 a place to cache this information? */
11522 if (rs6000_function_parms_need_stack (fun, incoming))
11523 reg_parm_stack_space = TARGET_64BIT ? 64 : 32;
11524 else
11525 reg_parm_stack_space = 0;
11526 break;
11527 }
11528
11529 return reg_parm_stack_space;
11530 }
11531
11532 static void
11533 rs6000_move_block_from_reg (int regno, rtx x, int nregs)
11534 {
11535 int i;
11536 machine_mode reg_mode = TARGET_32BIT ? SImode : DImode;
11537
11538 if (nregs == 0)
11539 return;
11540
11541 for (i = 0; i < nregs; i++)
11542 {
11543 rtx tem = adjust_address_nv (x, reg_mode, i * GET_MODE_SIZE (reg_mode));
11544 if (reload_completed)
11545 {
11546 if (! strict_memory_address_p (reg_mode, XEXP (tem, 0)))
11547 tem = NULL_RTX;
11548 else
11549 tem = simplify_gen_subreg (reg_mode, x, BLKmode,
11550 i * GET_MODE_SIZE (reg_mode));
11551 }
11552 else
11553 tem = replace_equiv_address (tem, XEXP (tem, 0));
11554
11555 gcc_assert (tem);
11556
11557 emit_move_insn (tem, gen_rtx_REG (reg_mode, regno + i));
11558 }
11559 }
11560 \f
11561 /* Perform any needed actions needed for a function that is receiving a
11562 variable number of arguments.
11563
11564 CUM is as above.
11565
11566 MODE and TYPE are the mode and type of the current parameter.
11567
11568 PRETEND_SIZE is a variable that should be set to the amount of stack
11569 that must be pushed by the prolog to pretend that our caller pushed
11570 it.
11571
11572 Normally, this macro will push all remaining incoming registers on the
11573 stack and set PRETEND_SIZE to the length of the registers pushed. */
11574
11575 static void
11576 setup_incoming_varargs (cumulative_args_t cum, machine_mode mode,
11577 tree type, int *pretend_size ATTRIBUTE_UNUSED,
11578 int no_rtl)
11579 {
11580 CUMULATIVE_ARGS next_cum;
11581 int reg_size = TARGET_32BIT ? 4 : 8;
11582 rtx save_area = NULL_RTX, mem;
11583 int first_reg_offset;
11584 alias_set_type set;
11585
11586 /* Skip the last named argument. */
11587 next_cum = *get_cumulative_args (cum);
11588 rs6000_function_arg_advance_1 (&next_cum, mode, type, true, 0);
11589
11590 if (DEFAULT_ABI == ABI_V4)
11591 {
11592 first_reg_offset = next_cum.sysv_gregno - GP_ARG_MIN_REG;
11593
11594 if (! no_rtl)
11595 {
11596 int gpr_reg_num = 0, gpr_size = 0, fpr_size = 0;
11597 HOST_WIDE_INT offset = 0;
11598
11599 /* Try to optimize the size of the varargs save area.
11600 The ABI requires that ap.reg_save_area is doubleword
11601 aligned, but we don't need to allocate space for all
11602 the bytes, only those to which we actually will save
11603 anything. */
11604 if (cfun->va_list_gpr_size && first_reg_offset < GP_ARG_NUM_REG)
11605 gpr_reg_num = GP_ARG_NUM_REG - first_reg_offset;
11606 if (TARGET_HARD_FLOAT && TARGET_FPRS
11607 && next_cum.fregno <= FP_ARG_V4_MAX_REG
11608 && cfun->va_list_fpr_size)
11609 {
11610 if (gpr_reg_num)
11611 fpr_size = (next_cum.fregno - FP_ARG_MIN_REG)
11612 * UNITS_PER_FP_WORD;
11613 if (cfun->va_list_fpr_size
11614 < FP_ARG_V4_MAX_REG + 1 - next_cum.fregno)
11615 fpr_size += cfun->va_list_fpr_size * UNITS_PER_FP_WORD;
11616 else
11617 fpr_size += (FP_ARG_V4_MAX_REG + 1 - next_cum.fregno)
11618 * UNITS_PER_FP_WORD;
11619 }
11620 if (gpr_reg_num)
11621 {
11622 offset = -((first_reg_offset * reg_size) & ~7);
11623 if (!fpr_size && gpr_reg_num > cfun->va_list_gpr_size)
11624 {
11625 gpr_reg_num = cfun->va_list_gpr_size;
11626 if (reg_size == 4 && (first_reg_offset & 1))
11627 gpr_reg_num++;
11628 }
11629 gpr_size = (gpr_reg_num * reg_size + 7) & ~7;
11630 }
11631 else if (fpr_size)
11632 offset = - (int) (next_cum.fregno - FP_ARG_MIN_REG)
11633 * UNITS_PER_FP_WORD
11634 - (int) (GP_ARG_NUM_REG * reg_size);
11635
11636 if (gpr_size + fpr_size)
11637 {
11638 rtx reg_save_area
11639 = assign_stack_local (BLKmode, gpr_size + fpr_size, 64);
11640 gcc_assert (GET_CODE (reg_save_area) == MEM);
11641 reg_save_area = XEXP (reg_save_area, 0);
11642 if (GET_CODE (reg_save_area) == PLUS)
11643 {
11644 gcc_assert (XEXP (reg_save_area, 0)
11645 == virtual_stack_vars_rtx);
11646 gcc_assert (GET_CODE (XEXP (reg_save_area, 1)) == CONST_INT);
11647 offset += INTVAL (XEXP (reg_save_area, 1));
11648 }
11649 else
11650 gcc_assert (reg_save_area == virtual_stack_vars_rtx);
11651 }
11652
11653 cfun->machine->varargs_save_offset = offset;
11654 save_area = plus_constant (Pmode, virtual_stack_vars_rtx, offset);
11655 }
11656 }
11657 else
11658 {
11659 first_reg_offset = next_cum.words;
11660 save_area = crtl->args.internal_arg_pointer;
11661
11662 if (targetm.calls.must_pass_in_stack (mode, type))
11663 first_reg_offset += rs6000_arg_size (TYPE_MODE (type), type);
11664 }
11665
11666 set = get_varargs_alias_set ();
11667 if (! no_rtl && first_reg_offset < GP_ARG_NUM_REG
11668 && cfun->va_list_gpr_size)
11669 {
11670 int n_gpr, nregs = GP_ARG_NUM_REG - first_reg_offset;
11671
11672 if (va_list_gpr_counter_field)
11673 /* V4 va_list_gpr_size counts number of registers needed. */
11674 n_gpr = cfun->va_list_gpr_size;
11675 else
11676 /* char * va_list instead counts number of bytes needed. */
11677 n_gpr = (cfun->va_list_gpr_size + reg_size - 1) / reg_size;
11678
11679 if (nregs > n_gpr)
11680 nregs = n_gpr;
11681
11682 mem = gen_rtx_MEM (BLKmode,
11683 plus_constant (Pmode, save_area,
11684 first_reg_offset * reg_size));
11685 MEM_NOTRAP_P (mem) = 1;
11686 set_mem_alias_set (mem, set);
11687 set_mem_align (mem, BITS_PER_WORD);
11688
11689 rs6000_move_block_from_reg (GP_ARG_MIN_REG + first_reg_offset, mem,
11690 nregs);
11691 }
11692
11693 /* Save FP registers if needed. */
11694 if (DEFAULT_ABI == ABI_V4
11695 && TARGET_HARD_FLOAT && TARGET_FPRS
11696 && ! no_rtl
11697 && next_cum.fregno <= FP_ARG_V4_MAX_REG
11698 && cfun->va_list_fpr_size)
11699 {
11700 int fregno = next_cum.fregno, nregs;
11701 rtx cr1 = gen_rtx_REG (CCmode, CR1_REGNO);
11702 rtx lab = gen_label_rtx ();
11703 int off = (GP_ARG_NUM_REG * reg_size) + ((fregno - FP_ARG_MIN_REG)
11704 * UNITS_PER_FP_WORD);
11705
11706 emit_jump_insn
11707 (gen_rtx_SET (pc_rtx,
11708 gen_rtx_IF_THEN_ELSE (VOIDmode,
11709 gen_rtx_NE (VOIDmode, cr1,
11710 const0_rtx),
11711 gen_rtx_LABEL_REF (VOIDmode, lab),
11712 pc_rtx)));
11713
11714 for (nregs = 0;
11715 fregno <= FP_ARG_V4_MAX_REG && nregs < cfun->va_list_fpr_size;
11716 fregno++, off += UNITS_PER_FP_WORD, nregs++)
11717 {
11718 mem = gen_rtx_MEM ((TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT)
11719 ? DFmode : SFmode,
11720 plus_constant (Pmode, save_area, off));
11721 MEM_NOTRAP_P (mem) = 1;
11722 set_mem_alias_set (mem, set);
11723 set_mem_align (mem, GET_MODE_ALIGNMENT (
11724 (TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT)
11725 ? DFmode : SFmode));
11726 emit_move_insn (mem, gen_rtx_REG (
11727 (TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT)
11728 ? DFmode : SFmode, fregno));
11729 }
11730
11731 emit_label (lab);
11732 }
11733 }
11734
11735 /* Create the va_list data type. */
11736
11737 static tree
11738 rs6000_build_builtin_va_list (void)
11739 {
11740 tree f_gpr, f_fpr, f_res, f_ovf, f_sav, record, type_decl;
11741
11742 /* For AIX, prefer 'char *' because that's what the system
11743 header files like. */
11744 if (DEFAULT_ABI != ABI_V4)
11745 return build_pointer_type (char_type_node);
11746
11747 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
11748 type_decl = build_decl (BUILTINS_LOCATION, TYPE_DECL,
11749 get_identifier ("__va_list_tag"), record);
11750
11751 f_gpr = build_decl (BUILTINS_LOCATION, FIELD_DECL, get_identifier ("gpr"),
11752 unsigned_char_type_node);
11753 f_fpr = build_decl (BUILTINS_LOCATION, FIELD_DECL, get_identifier ("fpr"),
11754 unsigned_char_type_node);
11755 /* Give the two bytes of padding a name, so that -Wpadded won't warn on
11756 every user file. */
11757 f_res = build_decl (BUILTINS_LOCATION, FIELD_DECL,
11758 get_identifier ("reserved"), short_unsigned_type_node);
11759 f_ovf = build_decl (BUILTINS_LOCATION, FIELD_DECL,
11760 get_identifier ("overflow_arg_area"),
11761 ptr_type_node);
11762 f_sav = build_decl (BUILTINS_LOCATION, FIELD_DECL,
11763 get_identifier ("reg_save_area"),
11764 ptr_type_node);
11765
11766 va_list_gpr_counter_field = f_gpr;
11767 va_list_fpr_counter_field = f_fpr;
11768
11769 DECL_FIELD_CONTEXT (f_gpr) = record;
11770 DECL_FIELD_CONTEXT (f_fpr) = record;
11771 DECL_FIELD_CONTEXT (f_res) = record;
11772 DECL_FIELD_CONTEXT (f_ovf) = record;
11773 DECL_FIELD_CONTEXT (f_sav) = record;
11774
11775 TYPE_STUB_DECL (record) = type_decl;
11776 TYPE_NAME (record) = type_decl;
11777 TYPE_FIELDS (record) = f_gpr;
11778 DECL_CHAIN (f_gpr) = f_fpr;
11779 DECL_CHAIN (f_fpr) = f_res;
11780 DECL_CHAIN (f_res) = f_ovf;
11781 DECL_CHAIN (f_ovf) = f_sav;
11782
11783 layout_type (record);
11784
11785 /* The correct type is an array type of one element. */
11786 return build_array_type (record, build_index_type (size_zero_node));
11787 }
11788
11789 /* Implement va_start. */
11790
11791 static void
11792 rs6000_va_start (tree valist, rtx nextarg)
11793 {
11794 HOST_WIDE_INT words, n_gpr, n_fpr;
11795 tree f_gpr, f_fpr, f_res, f_ovf, f_sav;
11796 tree gpr, fpr, ovf, sav, t;
11797
11798 /* Only SVR4 needs something special. */
11799 if (DEFAULT_ABI != ABI_V4)
11800 {
11801 std_expand_builtin_va_start (valist, nextarg);
11802 return;
11803 }
11804
11805 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
11806 f_fpr = DECL_CHAIN (f_gpr);
11807 f_res = DECL_CHAIN (f_fpr);
11808 f_ovf = DECL_CHAIN (f_res);
11809 f_sav = DECL_CHAIN (f_ovf);
11810
11811 valist = build_simple_mem_ref (valist);
11812 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
11813 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), unshare_expr (valist),
11814 f_fpr, NULL_TREE);
11815 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), unshare_expr (valist),
11816 f_ovf, NULL_TREE);
11817 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), unshare_expr (valist),
11818 f_sav, NULL_TREE);
11819
11820 /* Count number of gp and fp argument registers used. */
11821 words = crtl->args.info.words;
11822 n_gpr = MIN (crtl->args.info.sysv_gregno - GP_ARG_MIN_REG,
11823 GP_ARG_NUM_REG);
11824 n_fpr = MIN (crtl->args.info.fregno - FP_ARG_MIN_REG,
11825 FP_ARG_NUM_REG);
11826
11827 if (TARGET_DEBUG_ARG)
11828 fprintf (stderr, "va_start: words = " HOST_WIDE_INT_PRINT_DEC", n_gpr = "
11829 HOST_WIDE_INT_PRINT_DEC", n_fpr = " HOST_WIDE_INT_PRINT_DEC"\n",
11830 words, n_gpr, n_fpr);
11831
11832 if (cfun->va_list_gpr_size)
11833 {
11834 t = build2 (MODIFY_EXPR, TREE_TYPE (gpr), gpr,
11835 build_int_cst (NULL_TREE, n_gpr));
11836 TREE_SIDE_EFFECTS (t) = 1;
11837 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
11838 }
11839
11840 if (cfun->va_list_fpr_size)
11841 {
11842 t = build2 (MODIFY_EXPR, TREE_TYPE (fpr), fpr,
11843 build_int_cst (NULL_TREE, n_fpr));
11844 TREE_SIDE_EFFECTS (t) = 1;
11845 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
11846
11847 #ifdef HAVE_AS_GNU_ATTRIBUTE
11848 if (call_ABI_of_interest (cfun->decl))
11849 rs6000_passes_float = true;
11850 #endif
11851 }
11852
11853 /* Find the overflow area. */
11854 t = make_tree (TREE_TYPE (ovf), crtl->args.internal_arg_pointer);
11855 if (words != 0)
11856 t = fold_build_pointer_plus_hwi (t, words * MIN_UNITS_PER_WORD);
11857 t = build2 (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
11858 TREE_SIDE_EFFECTS (t) = 1;
11859 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
11860
11861 /* If there were no va_arg invocations, don't set up the register
11862 save area. */
11863 if (!cfun->va_list_gpr_size
11864 && !cfun->va_list_fpr_size
11865 && n_gpr < GP_ARG_NUM_REG
11866 && n_fpr < FP_ARG_V4_MAX_REG)
11867 return;
11868
11869 /* Find the register save area. */
11870 t = make_tree (TREE_TYPE (sav), virtual_stack_vars_rtx);
11871 if (cfun->machine->varargs_save_offset)
11872 t = fold_build_pointer_plus_hwi (t, cfun->machine->varargs_save_offset);
11873 t = build2 (MODIFY_EXPR, TREE_TYPE (sav), sav, t);
11874 TREE_SIDE_EFFECTS (t) = 1;
11875 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
11876 }
11877
11878 /* Implement va_arg. */
11879
11880 static tree
11881 rs6000_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
11882 gimple_seq *post_p)
11883 {
11884 tree f_gpr, f_fpr, f_res, f_ovf, f_sav;
11885 tree gpr, fpr, ovf, sav, reg, t, u;
11886 int size, rsize, n_reg, sav_ofs, sav_scale;
11887 tree lab_false, lab_over, addr;
11888 int align;
11889 tree ptrtype = build_pointer_type_for_mode (type, ptr_mode, true);
11890 int regalign = 0;
11891 gimple *stmt;
11892
11893 if (pass_by_reference (NULL, TYPE_MODE (type), type, false))
11894 {
11895 t = rs6000_gimplify_va_arg (valist, ptrtype, pre_p, post_p);
11896 return build_va_arg_indirect_ref (t);
11897 }
11898
11899 /* We need to deal with the fact that the darwin ppc64 ABI is defined by an
11900 earlier version of gcc, with the property that it always applied alignment
11901 adjustments to the va-args (even for zero-sized types). The cheapest way
11902 to deal with this is to replicate the effect of the part of
11903 std_gimplify_va_arg_expr that carries out the align adjust, for the case
11904 of relevance.
11905 We don't need to check for pass-by-reference because of the test above.
11906 We can return a simplifed answer, since we know there's no offset to add. */
11907
11908 if (((TARGET_MACHO
11909 && rs6000_darwin64_abi)
11910 || DEFAULT_ABI == ABI_ELFv2
11911 || (DEFAULT_ABI == ABI_AIX && !rs6000_compat_align_parm))
11912 && integer_zerop (TYPE_SIZE (type)))
11913 {
11914 unsigned HOST_WIDE_INT align, boundary;
11915 tree valist_tmp = get_initialized_tmp_var (valist, pre_p, NULL);
11916 align = PARM_BOUNDARY / BITS_PER_UNIT;
11917 boundary = rs6000_function_arg_boundary (TYPE_MODE (type), type);
11918 if (boundary > MAX_SUPPORTED_STACK_ALIGNMENT)
11919 boundary = MAX_SUPPORTED_STACK_ALIGNMENT;
11920 boundary /= BITS_PER_UNIT;
11921 if (boundary > align)
11922 {
11923 tree t ;
11924 /* This updates arg ptr by the amount that would be necessary
11925 to align the zero-sized (but not zero-alignment) item. */
11926 t = build2 (MODIFY_EXPR, TREE_TYPE (valist), valist_tmp,
11927 fold_build_pointer_plus_hwi (valist_tmp, boundary - 1));
11928 gimplify_and_add (t, pre_p);
11929
11930 t = fold_convert (sizetype, valist_tmp);
11931 t = build2 (MODIFY_EXPR, TREE_TYPE (valist), valist_tmp,
11932 fold_convert (TREE_TYPE (valist),
11933 fold_build2 (BIT_AND_EXPR, sizetype, t,
11934 size_int (-boundary))));
11935 t = build2 (MODIFY_EXPR, TREE_TYPE (valist), valist, t);
11936 gimplify_and_add (t, pre_p);
11937 }
11938 /* Since it is zero-sized there's no increment for the item itself. */
11939 valist_tmp = fold_convert (build_pointer_type (type), valist_tmp);
11940 return build_va_arg_indirect_ref (valist_tmp);
11941 }
11942
11943 if (DEFAULT_ABI != ABI_V4)
11944 {
11945 if (targetm.calls.split_complex_arg && TREE_CODE (type) == COMPLEX_TYPE)
11946 {
11947 tree elem_type = TREE_TYPE (type);
11948 machine_mode elem_mode = TYPE_MODE (elem_type);
11949 int elem_size = GET_MODE_SIZE (elem_mode);
11950
11951 if (elem_size < UNITS_PER_WORD)
11952 {
11953 tree real_part, imag_part;
11954 gimple_seq post = NULL;
11955
11956 real_part = rs6000_gimplify_va_arg (valist, elem_type, pre_p,
11957 &post);
11958 /* Copy the value into a temporary, lest the formal temporary
11959 be reused out from under us. */
11960 real_part = get_initialized_tmp_var (real_part, pre_p, &post);
11961 gimple_seq_add_seq (pre_p, post);
11962
11963 imag_part = rs6000_gimplify_va_arg (valist, elem_type, pre_p,
11964 post_p);
11965
11966 return build2 (COMPLEX_EXPR, type, real_part, imag_part);
11967 }
11968 }
11969
11970 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
11971 }
11972
11973 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
11974 f_fpr = DECL_CHAIN (f_gpr);
11975 f_res = DECL_CHAIN (f_fpr);
11976 f_ovf = DECL_CHAIN (f_res);
11977 f_sav = DECL_CHAIN (f_ovf);
11978
11979 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
11980 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), unshare_expr (valist),
11981 f_fpr, NULL_TREE);
11982 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), unshare_expr (valist),
11983 f_ovf, NULL_TREE);
11984 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), unshare_expr (valist),
11985 f_sav, NULL_TREE);
11986
11987 size = int_size_in_bytes (type);
11988 rsize = (size + 3) / 4;
11989 align = 1;
11990
11991 if (TARGET_HARD_FLOAT && TARGET_FPRS
11992 && ((TARGET_SINGLE_FLOAT && TYPE_MODE (type) == SFmode)
11993 || (TARGET_DOUBLE_FLOAT
11994 && (TYPE_MODE (type) == DFmode
11995 || FLOAT128_2REG_P (TYPE_MODE (type))
11996 || DECIMAL_FLOAT_MODE_P (TYPE_MODE (type))))))
11997 {
11998 /* FP args go in FP registers, if present. */
11999 reg = fpr;
12000 n_reg = (size + 7) / 8;
12001 sav_ofs = ((TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT) ? 8 : 4) * 4;
12002 sav_scale = ((TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT) ? 8 : 4);
12003 if (TYPE_MODE (type) != SFmode && TYPE_MODE (type) != SDmode)
12004 align = 8;
12005 }
12006 else
12007 {
12008 /* Otherwise into GP registers. */
12009 reg = gpr;
12010 n_reg = rsize;
12011 sav_ofs = 0;
12012 sav_scale = 4;
12013 if (n_reg == 2)
12014 align = 8;
12015 }
12016
12017 /* Pull the value out of the saved registers.... */
12018
12019 lab_over = NULL;
12020 addr = create_tmp_var (ptr_type_node, "addr");
12021
12022 /* AltiVec vectors never go in registers when -mabi=altivec. */
12023 if (TARGET_ALTIVEC_ABI && ALTIVEC_VECTOR_MODE (TYPE_MODE (type)))
12024 align = 16;
12025 else
12026 {
12027 lab_false = create_artificial_label (input_location);
12028 lab_over = create_artificial_label (input_location);
12029
12030 /* Long long and SPE vectors are aligned in the registers.
12031 As are any other 2 gpr item such as complex int due to a
12032 historical mistake. */
12033 u = reg;
12034 if (n_reg == 2 && reg == gpr)
12035 {
12036 regalign = 1;
12037 u = build2 (BIT_AND_EXPR, TREE_TYPE (reg), unshare_expr (reg),
12038 build_int_cst (TREE_TYPE (reg), n_reg - 1));
12039 u = build2 (POSTINCREMENT_EXPR, TREE_TYPE (reg),
12040 unshare_expr (reg), u);
12041 }
12042 /* _Decimal128 is passed in even/odd fpr pairs; the stored
12043 reg number is 0 for f1, so we want to make it odd. */
12044 else if (reg == fpr && TYPE_MODE (type) == TDmode)
12045 {
12046 t = build2 (BIT_IOR_EXPR, TREE_TYPE (reg), unshare_expr (reg),
12047 build_int_cst (TREE_TYPE (reg), 1));
12048 u = build2 (MODIFY_EXPR, void_type_node, unshare_expr (reg), t);
12049 }
12050
12051 t = fold_convert (TREE_TYPE (reg), size_int (8 - n_reg + 1));
12052 t = build2 (GE_EXPR, boolean_type_node, u, t);
12053 u = build1 (GOTO_EXPR, void_type_node, lab_false);
12054 t = build3 (COND_EXPR, void_type_node, t, u, NULL_TREE);
12055 gimplify_and_add (t, pre_p);
12056
12057 t = sav;
12058 if (sav_ofs)
12059 t = fold_build_pointer_plus_hwi (sav, sav_ofs);
12060
12061 u = build2 (POSTINCREMENT_EXPR, TREE_TYPE (reg), unshare_expr (reg),
12062 build_int_cst (TREE_TYPE (reg), n_reg));
12063 u = fold_convert (sizetype, u);
12064 u = build2 (MULT_EXPR, sizetype, u, size_int (sav_scale));
12065 t = fold_build_pointer_plus (t, u);
12066
12067 /* _Decimal32 varargs are located in the second word of the 64-bit
12068 FP register for 32-bit binaries. */
12069 if (TARGET_32BIT
12070 && TARGET_HARD_FLOAT && TARGET_FPRS
12071 && TYPE_MODE (type) == SDmode)
12072 t = fold_build_pointer_plus_hwi (t, size);
12073
12074 gimplify_assign (addr, t, pre_p);
12075
12076 gimple_seq_add_stmt (pre_p, gimple_build_goto (lab_over));
12077
12078 stmt = gimple_build_label (lab_false);
12079 gimple_seq_add_stmt (pre_p, stmt);
12080
12081 if ((n_reg == 2 && !regalign) || n_reg > 2)
12082 {
12083 /* Ensure that we don't find any more args in regs.
12084 Alignment has taken care of for special cases. */
12085 gimplify_assign (reg, build_int_cst (TREE_TYPE (reg), 8), pre_p);
12086 }
12087 }
12088
12089 /* ... otherwise out of the overflow area. */
12090
12091 /* Care for on-stack alignment if needed. */
12092 t = ovf;
12093 if (align != 1)
12094 {
12095 t = fold_build_pointer_plus_hwi (t, align - 1);
12096 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
12097 build_int_cst (TREE_TYPE (t), -align));
12098 }
12099 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
12100
12101 gimplify_assign (unshare_expr (addr), t, pre_p);
12102
12103 t = fold_build_pointer_plus_hwi (t, size);
12104 gimplify_assign (unshare_expr (ovf), t, pre_p);
12105
12106 if (lab_over)
12107 {
12108 stmt = gimple_build_label (lab_over);
12109 gimple_seq_add_stmt (pre_p, stmt);
12110 }
12111
12112 if (STRICT_ALIGNMENT
12113 && (TYPE_ALIGN (type)
12114 > (unsigned) BITS_PER_UNIT * (align < 4 ? 4 : align)))
12115 {
12116 /* The value (of type complex double, for example) may not be
12117 aligned in memory in the saved registers, so copy via a
12118 temporary. (This is the same code as used for SPARC.) */
12119 tree tmp = create_tmp_var (type, "va_arg_tmp");
12120 tree dest_addr = build_fold_addr_expr (tmp);
12121
12122 tree copy = build_call_expr (builtin_decl_implicit (BUILT_IN_MEMCPY),
12123 3, dest_addr, addr, size_int (rsize * 4));
12124
12125 gimplify_and_add (copy, pre_p);
12126 addr = dest_addr;
12127 }
12128
12129 addr = fold_convert (ptrtype, addr);
12130 return build_va_arg_indirect_ref (addr);
12131 }
12132
12133 /* Builtins. */
12134
12135 static void
12136 def_builtin (const char *name, tree type, enum rs6000_builtins code)
12137 {
12138 tree t;
12139 unsigned classify = rs6000_builtin_info[(int)code].attr;
12140 const char *attr_string = "";
12141
12142 gcc_assert (name != NULL);
12143 gcc_assert (IN_RANGE ((int)code, 0, (int)RS6000_BUILTIN_COUNT));
12144
12145 if (rs6000_builtin_decls[(int)code])
12146 fatal_error (input_location,
12147 "internal error: builtin function %s already processed", name);
12148
12149 rs6000_builtin_decls[(int)code] = t =
12150 add_builtin_function (name, type, (int)code, BUILT_IN_MD, NULL, NULL_TREE);
12151
12152 /* Set any special attributes. */
12153 if ((classify & RS6000_BTC_CONST) != 0)
12154 {
12155 /* const function, function only depends on the inputs. */
12156 TREE_READONLY (t) = 1;
12157 TREE_NOTHROW (t) = 1;
12158 attr_string = ", pure";
12159 }
12160 else if ((classify & RS6000_BTC_PURE) != 0)
12161 {
12162 /* pure function, function can read global memory, but does not set any
12163 external state. */
12164 DECL_PURE_P (t) = 1;
12165 TREE_NOTHROW (t) = 1;
12166 attr_string = ", const";
12167 }
12168 else if ((classify & RS6000_BTC_FP) != 0)
12169 {
12170 /* Function is a math function. If rounding mode is on, then treat the
12171 function as not reading global memory, but it can have arbitrary side
12172 effects. If it is off, then assume the function is a const function.
12173 This mimics the ATTR_MATHFN_FPROUNDING attribute in
12174 builtin-attribute.def that is used for the math functions. */
12175 TREE_NOTHROW (t) = 1;
12176 if (flag_rounding_math)
12177 {
12178 DECL_PURE_P (t) = 1;
12179 DECL_IS_NOVOPS (t) = 1;
12180 attr_string = ", fp, pure";
12181 }
12182 else
12183 {
12184 TREE_READONLY (t) = 1;
12185 attr_string = ", fp, const";
12186 }
12187 }
12188 else if ((classify & RS6000_BTC_ATTR_MASK) != 0)
12189 gcc_unreachable ();
12190
12191 if (TARGET_DEBUG_BUILTIN)
12192 fprintf (stderr, "rs6000_builtin, code = %4d, %s%s\n",
12193 (int)code, name, attr_string);
12194 }
12195
12196 /* Simple ternary operations: VECd = foo (VECa, VECb, VECc). */
12197
12198 #undef RS6000_BUILTIN_1
12199 #undef RS6000_BUILTIN_2
12200 #undef RS6000_BUILTIN_3
12201 #undef RS6000_BUILTIN_A
12202 #undef RS6000_BUILTIN_D
12203 #undef RS6000_BUILTIN_E
12204 #undef RS6000_BUILTIN_H
12205 #undef RS6000_BUILTIN_P
12206 #undef RS6000_BUILTIN_Q
12207 #undef RS6000_BUILTIN_S
12208 #undef RS6000_BUILTIN_X
12209
12210 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
12211 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
12212 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE) \
12213 { MASK, ICODE, NAME, ENUM },
12214
12215 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
12216 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
12217 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
12218 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
12219 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
12220 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
12221 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
12222 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
12223
12224 static const struct builtin_description bdesc_3arg[] =
12225 {
12226 #include "rs6000-builtin.def"
12227 };
12228
12229 /* DST operations: void foo (void *, const int, const char). */
12230
12231 #undef RS6000_BUILTIN_1
12232 #undef RS6000_BUILTIN_2
12233 #undef RS6000_BUILTIN_3
12234 #undef RS6000_BUILTIN_A
12235 #undef RS6000_BUILTIN_D
12236 #undef RS6000_BUILTIN_E
12237 #undef RS6000_BUILTIN_H
12238 #undef RS6000_BUILTIN_P
12239 #undef RS6000_BUILTIN_Q
12240 #undef RS6000_BUILTIN_S
12241 #undef RS6000_BUILTIN_X
12242
12243 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
12244 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
12245 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
12246 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
12247 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE) \
12248 { MASK, ICODE, NAME, ENUM },
12249
12250 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
12251 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
12252 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
12253 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
12254 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
12255 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
12256
12257 static const struct builtin_description bdesc_dst[] =
12258 {
12259 #include "rs6000-builtin.def"
12260 };
12261
12262 /* Simple binary operations: VECc = foo (VECa, VECb). */
12263
12264 #undef RS6000_BUILTIN_1
12265 #undef RS6000_BUILTIN_2
12266 #undef RS6000_BUILTIN_3
12267 #undef RS6000_BUILTIN_A
12268 #undef RS6000_BUILTIN_D
12269 #undef RS6000_BUILTIN_E
12270 #undef RS6000_BUILTIN_H
12271 #undef RS6000_BUILTIN_P
12272 #undef RS6000_BUILTIN_Q
12273 #undef RS6000_BUILTIN_S
12274 #undef RS6000_BUILTIN_X
12275
12276 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
12277 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE) \
12278 { MASK, ICODE, NAME, ENUM },
12279
12280 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
12281 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
12282 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
12283 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
12284 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
12285 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
12286 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
12287 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
12288 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
12289
12290 static const struct builtin_description bdesc_2arg[] =
12291 {
12292 #include "rs6000-builtin.def"
12293 };
12294
12295 #undef RS6000_BUILTIN_1
12296 #undef RS6000_BUILTIN_2
12297 #undef RS6000_BUILTIN_3
12298 #undef RS6000_BUILTIN_A
12299 #undef RS6000_BUILTIN_D
12300 #undef RS6000_BUILTIN_E
12301 #undef RS6000_BUILTIN_H
12302 #undef RS6000_BUILTIN_P
12303 #undef RS6000_BUILTIN_Q
12304 #undef RS6000_BUILTIN_S
12305 #undef RS6000_BUILTIN_X
12306
12307 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
12308 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
12309 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
12310 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
12311 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
12312 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
12313 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
12314 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE) \
12315 { MASK, ICODE, NAME, ENUM },
12316
12317 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
12318 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
12319 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
12320
12321 /* AltiVec predicates. */
12322
12323 static const struct builtin_description bdesc_altivec_preds[] =
12324 {
12325 #include "rs6000-builtin.def"
12326 };
12327
12328 /* SPE predicates. */
12329 #undef RS6000_BUILTIN_1
12330 #undef RS6000_BUILTIN_2
12331 #undef RS6000_BUILTIN_3
12332 #undef RS6000_BUILTIN_A
12333 #undef RS6000_BUILTIN_D
12334 #undef RS6000_BUILTIN_E
12335 #undef RS6000_BUILTIN_H
12336 #undef RS6000_BUILTIN_P
12337 #undef RS6000_BUILTIN_Q
12338 #undef RS6000_BUILTIN_S
12339 #undef RS6000_BUILTIN_X
12340
12341 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
12342 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
12343 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
12344 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
12345 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
12346 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
12347 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
12348 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
12349 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
12350 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE) \
12351 { MASK, ICODE, NAME, ENUM },
12352
12353 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
12354
12355 static const struct builtin_description bdesc_spe_predicates[] =
12356 {
12357 #include "rs6000-builtin.def"
12358 };
12359
12360 /* SPE evsel predicates. */
12361 #undef RS6000_BUILTIN_1
12362 #undef RS6000_BUILTIN_2
12363 #undef RS6000_BUILTIN_3
12364 #undef RS6000_BUILTIN_A
12365 #undef RS6000_BUILTIN_D
12366 #undef RS6000_BUILTIN_E
12367 #undef RS6000_BUILTIN_H
12368 #undef RS6000_BUILTIN_P
12369 #undef RS6000_BUILTIN_Q
12370 #undef RS6000_BUILTIN_S
12371 #undef RS6000_BUILTIN_X
12372
12373 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
12374 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
12375 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
12376 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
12377 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
12378 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE) \
12379 { MASK, ICODE, NAME, ENUM },
12380
12381 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
12382 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
12383 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
12384 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
12385 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
12386
12387 static const struct builtin_description bdesc_spe_evsel[] =
12388 {
12389 #include "rs6000-builtin.def"
12390 };
12391
12392 /* PAIRED predicates. */
12393 #undef RS6000_BUILTIN_1
12394 #undef RS6000_BUILTIN_2
12395 #undef RS6000_BUILTIN_3
12396 #undef RS6000_BUILTIN_A
12397 #undef RS6000_BUILTIN_D
12398 #undef RS6000_BUILTIN_E
12399 #undef RS6000_BUILTIN_H
12400 #undef RS6000_BUILTIN_P
12401 #undef RS6000_BUILTIN_Q
12402 #undef RS6000_BUILTIN_S
12403 #undef RS6000_BUILTIN_X
12404
12405 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
12406 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
12407 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
12408 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
12409 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
12410 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
12411 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
12412 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
12413 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE) \
12414 { MASK, ICODE, NAME, ENUM },
12415
12416 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
12417 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
12418
12419 static const struct builtin_description bdesc_paired_preds[] =
12420 {
12421 #include "rs6000-builtin.def"
12422 };
12423
12424 /* ABS* operations. */
12425
12426 #undef RS6000_BUILTIN_1
12427 #undef RS6000_BUILTIN_2
12428 #undef RS6000_BUILTIN_3
12429 #undef RS6000_BUILTIN_A
12430 #undef RS6000_BUILTIN_D
12431 #undef RS6000_BUILTIN_E
12432 #undef RS6000_BUILTIN_H
12433 #undef RS6000_BUILTIN_P
12434 #undef RS6000_BUILTIN_Q
12435 #undef RS6000_BUILTIN_S
12436 #undef RS6000_BUILTIN_X
12437
12438 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
12439 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
12440 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
12441 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE) \
12442 { MASK, ICODE, NAME, ENUM },
12443
12444 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
12445 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
12446 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
12447 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
12448 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
12449 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
12450 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
12451
12452 static const struct builtin_description bdesc_abs[] =
12453 {
12454 #include "rs6000-builtin.def"
12455 };
12456
12457 /* Simple unary operations: VECb = foo (unsigned literal) or VECb =
12458 foo (VECa). */
12459
12460 #undef RS6000_BUILTIN_1
12461 #undef RS6000_BUILTIN_2
12462 #undef RS6000_BUILTIN_3
12463 #undef RS6000_BUILTIN_A
12464 #undef RS6000_BUILTIN_D
12465 #undef RS6000_BUILTIN_E
12466 #undef RS6000_BUILTIN_H
12467 #undef RS6000_BUILTIN_P
12468 #undef RS6000_BUILTIN_Q
12469 #undef RS6000_BUILTIN_S
12470 #undef RS6000_BUILTIN_X
12471
12472 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE) \
12473 { MASK, ICODE, NAME, ENUM },
12474
12475 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
12476 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
12477 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
12478 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
12479 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
12480 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
12481 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
12482 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
12483 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
12484 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
12485
12486 static const struct builtin_description bdesc_1arg[] =
12487 {
12488 #include "rs6000-builtin.def"
12489 };
12490
12491 /* HTM builtins. */
12492 #undef RS6000_BUILTIN_1
12493 #undef RS6000_BUILTIN_2
12494 #undef RS6000_BUILTIN_3
12495 #undef RS6000_BUILTIN_A
12496 #undef RS6000_BUILTIN_D
12497 #undef RS6000_BUILTIN_E
12498 #undef RS6000_BUILTIN_H
12499 #undef RS6000_BUILTIN_P
12500 #undef RS6000_BUILTIN_Q
12501 #undef RS6000_BUILTIN_S
12502 #undef RS6000_BUILTIN_X
12503
12504 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
12505 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
12506 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
12507 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
12508 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
12509 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
12510 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE) \
12511 { MASK, ICODE, NAME, ENUM },
12512
12513 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
12514 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
12515 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
12516 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
12517
12518 static const struct builtin_description bdesc_htm[] =
12519 {
12520 #include "rs6000-builtin.def"
12521 };
12522
12523 #undef RS6000_BUILTIN_1
12524 #undef RS6000_BUILTIN_2
12525 #undef RS6000_BUILTIN_3
12526 #undef RS6000_BUILTIN_A
12527 #undef RS6000_BUILTIN_D
12528 #undef RS6000_BUILTIN_E
12529 #undef RS6000_BUILTIN_H
12530 #undef RS6000_BUILTIN_P
12531 #undef RS6000_BUILTIN_Q
12532 #undef RS6000_BUILTIN_S
12533
12534 /* Return true if a builtin function is overloaded. */
12535 bool
12536 rs6000_overloaded_builtin_p (enum rs6000_builtins fncode)
12537 {
12538 return (rs6000_builtin_info[(int)fncode].attr & RS6000_BTC_OVERLOADED) != 0;
12539 }
12540
12541 /* Expand an expression EXP that calls a builtin without arguments. */
12542 static rtx
12543 rs6000_expand_zeroop_builtin (enum insn_code icode, rtx target)
12544 {
12545 rtx pat;
12546 machine_mode tmode = insn_data[icode].operand[0].mode;
12547
12548 if (icode == CODE_FOR_nothing)
12549 /* Builtin not supported on this processor. */
12550 return 0;
12551
12552 if (target == 0
12553 || GET_MODE (target) != tmode
12554 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12555 target = gen_reg_rtx (tmode);
12556
12557 pat = GEN_FCN (icode) (target);
12558 if (! pat)
12559 return 0;
12560 emit_insn (pat);
12561
12562 return target;
12563 }
12564
12565
12566 static rtx
12567 rs6000_expand_mtfsf_builtin (enum insn_code icode, tree exp)
12568 {
12569 rtx pat;
12570 tree arg0 = CALL_EXPR_ARG (exp, 0);
12571 tree arg1 = CALL_EXPR_ARG (exp, 1);
12572 rtx op0 = expand_normal (arg0);
12573 rtx op1 = expand_normal (arg1);
12574 machine_mode mode0 = insn_data[icode].operand[0].mode;
12575 machine_mode mode1 = insn_data[icode].operand[1].mode;
12576
12577 if (icode == CODE_FOR_nothing)
12578 /* Builtin not supported on this processor. */
12579 return 0;
12580
12581 /* If we got invalid arguments bail out before generating bad rtl. */
12582 if (arg0 == error_mark_node || arg1 == error_mark_node)
12583 return const0_rtx;
12584
12585 if (GET_CODE (op0) != CONST_INT
12586 || INTVAL (op0) > 255
12587 || INTVAL (op0) < 0)
12588 {
12589 error ("argument 1 must be an 8-bit field value");
12590 return const0_rtx;
12591 }
12592
12593 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
12594 op0 = copy_to_mode_reg (mode0, op0);
12595
12596 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
12597 op1 = copy_to_mode_reg (mode1, op1);
12598
12599 pat = GEN_FCN (icode) (op0, op1);
12600 if (! pat)
12601 return const0_rtx;
12602 emit_insn (pat);
12603
12604 return NULL_RTX;
12605 }
12606
12607
12608 static rtx
12609 rs6000_expand_unop_builtin (enum insn_code icode, tree exp, rtx target)
12610 {
12611 rtx pat;
12612 tree arg0 = CALL_EXPR_ARG (exp, 0);
12613 rtx op0 = expand_normal (arg0);
12614 machine_mode tmode = insn_data[icode].operand[0].mode;
12615 machine_mode mode0 = insn_data[icode].operand[1].mode;
12616
12617 if (icode == CODE_FOR_nothing)
12618 /* Builtin not supported on this processor. */
12619 return 0;
12620
12621 /* If we got invalid arguments bail out before generating bad rtl. */
12622 if (arg0 == error_mark_node)
12623 return const0_rtx;
12624
12625 if (icode == CODE_FOR_altivec_vspltisb
12626 || icode == CODE_FOR_altivec_vspltish
12627 || icode == CODE_FOR_altivec_vspltisw
12628 || icode == CODE_FOR_spe_evsplatfi
12629 || icode == CODE_FOR_spe_evsplati)
12630 {
12631 /* Only allow 5-bit *signed* literals. */
12632 if (GET_CODE (op0) != CONST_INT
12633 || INTVAL (op0) > 15
12634 || INTVAL (op0) < -16)
12635 {
12636 error ("argument 1 must be a 5-bit signed literal");
12637 return const0_rtx;
12638 }
12639 }
12640
12641 if (target == 0
12642 || GET_MODE (target) != tmode
12643 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12644 target = gen_reg_rtx (tmode);
12645
12646 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
12647 op0 = copy_to_mode_reg (mode0, op0);
12648
12649 pat = GEN_FCN (icode) (target, op0);
12650 if (! pat)
12651 return 0;
12652 emit_insn (pat);
12653
12654 return target;
12655 }
12656
12657 static rtx
12658 altivec_expand_abs_builtin (enum insn_code icode, tree exp, rtx target)
12659 {
12660 rtx pat, scratch1, scratch2;
12661 tree arg0 = CALL_EXPR_ARG (exp, 0);
12662 rtx op0 = expand_normal (arg0);
12663 machine_mode tmode = insn_data[icode].operand[0].mode;
12664 machine_mode mode0 = insn_data[icode].operand[1].mode;
12665
12666 /* If we have invalid arguments, bail out before generating bad rtl. */
12667 if (arg0 == error_mark_node)
12668 return const0_rtx;
12669
12670 if (target == 0
12671 || GET_MODE (target) != tmode
12672 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12673 target = gen_reg_rtx (tmode);
12674
12675 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
12676 op0 = copy_to_mode_reg (mode0, op0);
12677
12678 scratch1 = gen_reg_rtx (mode0);
12679 scratch2 = gen_reg_rtx (mode0);
12680
12681 pat = GEN_FCN (icode) (target, op0, scratch1, scratch2);
12682 if (! pat)
12683 return 0;
12684 emit_insn (pat);
12685
12686 return target;
12687 }
12688
12689 static rtx
12690 rs6000_expand_binop_builtin (enum insn_code icode, tree exp, rtx target)
12691 {
12692 rtx pat;
12693 tree arg0 = CALL_EXPR_ARG (exp, 0);
12694 tree arg1 = CALL_EXPR_ARG (exp, 1);
12695 rtx op0 = expand_normal (arg0);
12696 rtx op1 = expand_normal (arg1);
12697 machine_mode tmode = insn_data[icode].operand[0].mode;
12698 machine_mode mode0 = insn_data[icode].operand[1].mode;
12699 machine_mode mode1 = insn_data[icode].operand[2].mode;
12700
12701 if (icode == CODE_FOR_nothing)
12702 /* Builtin not supported on this processor. */
12703 return 0;
12704
12705 /* If we got invalid arguments bail out before generating bad rtl. */
12706 if (arg0 == error_mark_node || arg1 == error_mark_node)
12707 return const0_rtx;
12708
12709 if (icode == CODE_FOR_altivec_vcfux
12710 || icode == CODE_FOR_altivec_vcfsx
12711 || icode == CODE_FOR_altivec_vctsxs
12712 || icode == CODE_FOR_altivec_vctuxs
12713 || icode == CODE_FOR_altivec_vspltb
12714 || icode == CODE_FOR_altivec_vsplth
12715 || icode == CODE_FOR_altivec_vspltw
12716 || icode == CODE_FOR_spe_evaddiw
12717 || icode == CODE_FOR_spe_evldd
12718 || icode == CODE_FOR_spe_evldh
12719 || icode == CODE_FOR_spe_evldw
12720 || icode == CODE_FOR_spe_evlhhesplat
12721 || icode == CODE_FOR_spe_evlhhossplat
12722 || icode == CODE_FOR_spe_evlhhousplat
12723 || icode == CODE_FOR_spe_evlwhe
12724 || icode == CODE_FOR_spe_evlwhos
12725 || icode == CODE_FOR_spe_evlwhou
12726 || icode == CODE_FOR_spe_evlwhsplat
12727 || icode == CODE_FOR_spe_evlwwsplat
12728 || icode == CODE_FOR_spe_evrlwi
12729 || icode == CODE_FOR_spe_evslwi
12730 || icode == CODE_FOR_spe_evsrwis
12731 || icode == CODE_FOR_spe_evsubifw
12732 || icode == CODE_FOR_spe_evsrwiu)
12733 {
12734 /* Only allow 5-bit unsigned literals. */
12735 STRIP_NOPS (arg1);
12736 if (TREE_CODE (arg1) != INTEGER_CST
12737 || TREE_INT_CST_LOW (arg1) & ~0x1f)
12738 {
12739 error ("argument 2 must be a 5-bit unsigned literal");
12740 return const0_rtx;
12741 }
12742 }
12743
12744 if (target == 0
12745 || GET_MODE (target) != tmode
12746 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12747 target = gen_reg_rtx (tmode);
12748
12749 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
12750 op0 = copy_to_mode_reg (mode0, op0);
12751 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
12752 op1 = copy_to_mode_reg (mode1, op1);
12753
12754 pat = GEN_FCN (icode) (target, op0, op1);
12755 if (! pat)
12756 return 0;
12757 emit_insn (pat);
12758
12759 return target;
12760 }
12761
12762 static rtx
12763 altivec_expand_predicate_builtin (enum insn_code icode, tree exp, rtx target)
12764 {
12765 rtx pat, scratch;
12766 tree cr6_form = CALL_EXPR_ARG (exp, 0);
12767 tree arg0 = CALL_EXPR_ARG (exp, 1);
12768 tree arg1 = CALL_EXPR_ARG (exp, 2);
12769 rtx op0 = expand_normal (arg0);
12770 rtx op1 = expand_normal (arg1);
12771 machine_mode tmode = SImode;
12772 machine_mode mode0 = insn_data[icode].operand[1].mode;
12773 machine_mode mode1 = insn_data[icode].operand[2].mode;
12774 int cr6_form_int;
12775
12776 if (TREE_CODE (cr6_form) != INTEGER_CST)
12777 {
12778 error ("argument 1 of __builtin_altivec_predicate must be a constant");
12779 return const0_rtx;
12780 }
12781 else
12782 cr6_form_int = TREE_INT_CST_LOW (cr6_form);
12783
12784 gcc_assert (mode0 == mode1);
12785
12786 /* If we have invalid arguments, bail out before generating bad rtl. */
12787 if (arg0 == error_mark_node || arg1 == error_mark_node)
12788 return const0_rtx;
12789
12790 if (target == 0
12791 || GET_MODE (target) != tmode
12792 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12793 target = gen_reg_rtx (tmode);
12794
12795 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
12796 op0 = copy_to_mode_reg (mode0, op0);
12797 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
12798 op1 = copy_to_mode_reg (mode1, op1);
12799
12800 scratch = gen_reg_rtx (mode0);
12801
12802 pat = GEN_FCN (icode) (scratch, op0, op1);
12803 if (! pat)
12804 return 0;
12805 emit_insn (pat);
12806
12807 /* The vec_any* and vec_all* predicates use the same opcodes for two
12808 different operations, but the bits in CR6 will be different
12809 depending on what information we want. So we have to play tricks
12810 with CR6 to get the right bits out.
12811
12812 If you think this is disgusting, look at the specs for the
12813 AltiVec predicates. */
12814
12815 switch (cr6_form_int)
12816 {
12817 case 0:
12818 emit_insn (gen_cr6_test_for_zero (target));
12819 break;
12820 case 1:
12821 emit_insn (gen_cr6_test_for_zero_reverse (target));
12822 break;
12823 case 2:
12824 emit_insn (gen_cr6_test_for_lt (target));
12825 break;
12826 case 3:
12827 emit_insn (gen_cr6_test_for_lt_reverse (target));
12828 break;
12829 default:
12830 error ("argument 1 of __builtin_altivec_predicate is out of range");
12831 break;
12832 }
12833
12834 return target;
12835 }
12836
12837 static rtx
12838 paired_expand_lv_builtin (enum insn_code icode, tree exp, rtx target)
12839 {
12840 rtx pat, addr;
12841 tree arg0 = CALL_EXPR_ARG (exp, 0);
12842 tree arg1 = CALL_EXPR_ARG (exp, 1);
12843 machine_mode tmode = insn_data[icode].operand[0].mode;
12844 machine_mode mode0 = Pmode;
12845 machine_mode mode1 = Pmode;
12846 rtx op0 = expand_normal (arg0);
12847 rtx op1 = expand_normal (arg1);
12848
12849 if (icode == CODE_FOR_nothing)
12850 /* Builtin not supported on this processor. */
12851 return 0;
12852
12853 /* If we got invalid arguments bail out before generating bad rtl. */
12854 if (arg0 == error_mark_node || arg1 == error_mark_node)
12855 return const0_rtx;
12856
12857 if (target == 0
12858 || GET_MODE (target) != tmode
12859 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12860 target = gen_reg_rtx (tmode);
12861
12862 op1 = copy_to_mode_reg (mode1, op1);
12863
12864 if (op0 == const0_rtx)
12865 {
12866 addr = gen_rtx_MEM (tmode, op1);
12867 }
12868 else
12869 {
12870 op0 = copy_to_mode_reg (mode0, op0);
12871 addr = gen_rtx_MEM (tmode, gen_rtx_PLUS (Pmode, op0, op1));
12872 }
12873
12874 pat = GEN_FCN (icode) (target, addr);
12875
12876 if (! pat)
12877 return 0;
12878 emit_insn (pat);
12879
12880 return target;
12881 }
12882
12883 /* Return a constant vector for use as a little-endian permute control vector
12884 to reverse the order of elements of the given vector mode. */
12885 static rtx
12886 swap_selector_for_mode (machine_mode mode)
12887 {
12888 /* These are little endian vectors, so their elements are reversed
12889 from what you would normally expect for a permute control vector. */
12890 unsigned int swap2[16] = {7,6,5,4,3,2,1,0,15,14,13,12,11,10,9,8};
12891 unsigned int swap4[16] = {3,2,1,0,7,6,5,4,11,10,9,8,15,14,13,12};
12892 unsigned int swap8[16] = {1,0,3,2,5,4,7,6,9,8,11,10,13,12,15,14};
12893 unsigned int swap16[16] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15};
12894 unsigned int *swaparray, i;
12895 rtx perm[16];
12896
12897 switch (mode)
12898 {
12899 case V2DFmode:
12900 case V2DImode:
12901 swaparray = swap2;
12902 break;
12903 case V4SFmode:
12904 case V4SImode:
12905 swaparray = swap4;
12906 break;
12907 case V8HImode:
12908 swaparray = swap8;
12909 break;
12910 case V16QImode:
12911 swaparray = swap16;
12912 break;
12913 default:
12914 gcc_unreachable ();
12915 }
12916
12917 for (i = 0; i < 16; ++i)
12918 perm[i] = GEN_INT (swaparray[i]);
12919
12920 return force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, perm)));
12921 }
12922
12923 /* Generate code for an "lvx", "lvxl", or "lve*x" built-in for a little endian target
12924 with -maltivec=be specified. Issue the load followed by an element-reversing
12925 permute. */
12926 void
12927 altivec_expand_lvx_be (rtx op0, rtx op1, machine_mode mode, unsigned unspec)
12928 {
12929 rtx tmp = gen_reg_rtx (mode);
12930 rtx load = gen_rtx_SET (tmp, op1);
12931 rtx lvx = gen_rtx_UNSPEC (mode, gen_rtvec (1, const0_rtx), unspec);
12932 rtx par = gen_rtx_PARALLEL (mode, gen_rtvec (2, load, lvx));
12933 rtx sel = swap_selector_for_mode (mode);
12934 rtx vperm = gen_rtx_UNSPEC (mode, gen_rtvec (3, tmp, tmp, sel), UNSPEC_VPERM);
12935
12936 gcc_assert (REG_P (op0));
12937 emit_insn (par);
12938 emit_insn (gen_rtx_SET (op0, vperm));
12939 }
12940
12941 /* Generate code for a "stvx" or "stvxl" built-in for a little endian target
12942 with -maltivec=be specified. Issue the store preceded by an element-reversing
12943 permute. */
12944 void
12945 altivec_expand_stvx_be (rtx op0, rtx op1, machine_mode mode, unsigned unspec)
12946 {
12947 rtx tmp = gen_reg_rtx (mode);
12948 rtx store = gen_rtx_SET (op0, tmp);
12949 rtx stvx = gen_rtx_UNSPEC (mode, gen_rtvec (1, const0_rtx), unspec);
12950 rtx par = gen_rtx_PARALLEL (mode, gen_rtvec (2, store, stvx));
12951 rtx sel = swap_selector_for_mode (mode);
12952 rtx vperm;
12953
12954 gcc_assert (REG_P (op1));
12955 vperm = gen_rtx_UNSPEC (mode, gen_rtvec (3, op1, op1, sel), UNSPEC_VPERM);
12956 emit_insn (gen_rtx_SET (tmp, vperm));
12957 emit_insn (par);
12958 }
12959
12960 /* Generate code for a "stve*x" built-in for a little endian target with -maltivec=be
12961 specified. Issue the store preceded by an element-reversing permute. */
12962 void
12963 altivec_expand_stvex_be (rtx op0, rtx op1, machine_mode mode, unsigned unspec)
12964 {
12965 machine_mode inner_mode = GET_MODE_INNER (mode);
12966 rtx tmp = gen_reg_rtx (mode);
12967 rtx stvx = gen_rtx_UNSPEC (inner_mode, gen_rtvec (1, tmp), unspec);
12968 rtx sel = swap_selector_for_mode (mode);
12969 rtx vperm;
12970
12971 gcc_assert (REG_P (op1));
12972 vperm = gen_rtx_UNSPEC (mode, gen_rtvec (3, op1, op1, sel), UNSPEC_VPERM);
12973 emit_insn (gen_rtx_SET (tmp, vperm));
12974 emit_insn (gen_rtx_SET (op0, stvx));
12975 }
12976
12977 static rtx
12978 altivec_expand_lv_builtin (enum insn_code icode, tree exp, rtx target, bool blk)
12979 {
12980 rtx pat, addr;
12981 tree arg0 = CALL_EXPR_ARG (exp, 0);
12982 tree arg1 = CALL_EXPR_ARG (exp, 1);
12983 machine_mode tmode = insn_data[icode].operand[0].mode;
12984 machine_mode mode0 = Pmode;
12985 machine_mode mode1 = Pmode;
12986 rtx op0 = expand_normal (arg0);
12987 rtx op1 = expand_normal (arg1);
12988
12989 if (icode == CODE_FOR_nothing)
12990 /* Builtin not supported on this processor. */
12991 return 0;
12992
12993 /* If we got invalid arguments bail out before generating bad rtl. */
12994 if (arg0 == error_mark_node || arg1 == error_mark_node)
12995 return const0_rtx;
12996
12997 if (target == 0
12998 || GET_MODE (target) != tmode
12999 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13000 target = gen_reg_rtx (tmode);
13001
13002 op1 = copy_to_mode_reg (mode1, op1);
13003
13004 if (op0 == const0_rtx)
13005 {
13006 addr = gen_rtx_MEM (blk ? BLKmode : tmode, op1);
13007 }
13008 else
13009 {
13010 op0 = copy_to_mode_reg (mode0, op0);
13011 addr = gen_rtx_MEM (blk ? BLKmode : tmode, gen_rtx_PLUS (Pmode, op0, op1));
13012 }
13013
13014 pat = GEN_FCN (icode) (target, addr);
13015
13016 if (! pat)
13017 return 0;
13018 emit_insn (pat);
13019
13020 return target;
13021 }
13022
13023 static rtx
13024 spe_expand_stv_builtin (enum insn_code icode, tree exp)
13025 {
13026 tree arg0 = CALL_EXPR_ARG (exp, 0);
13027 tree arg1 = CALL_EXPR_ARG (exp, 1);
13028 tree arg2 = CALL_EXPR_ARG (exp, 2);
13029 rtx op0 = expand_normal (arg0);
13030 rtx op1 = expand_normal (arg1);
13031 rtx op2 = expand_normal (arg2);
13032 rtx pat;
13033 machine_mode mode0 = insn_data[icode].operand[0].mode;
13034 machine_mode mode1 = insn_data[icode].operand[1].mode;
13035 machine_mode mode2 = insn_data[icode].operand[2].mode;
13036
13037 /* Invalid arguments. Bail before doing anything stoopid! */
13038 if (arg0 == error_mark_node
13039 || arg1 == error_mark_node
13040 || arg2 == error_mark_node)
13041 return const0_rtx;
13042
13043 if (! (*insn_data[icode].operand[2].predicate) (op0, mode2))
13044 op0 = copy_to_mode_reg (mode2, op0);
13045 if (! (*insn_data[icode].operand[0].predicate) (op1, mode0))
13046 op1 = copy_to_mode_reg (mode0, op1);
13047 if (! (*insn_data[icode].operand[1].predicate) (op2, mode1))
13048 op2 = copy_to_mode_reg (mode1, op2);
13049
13050 pat = GEN_FCN (icode) (op1, op2, op0);
13051 if (pat)
13052 emit_insn (pat);
13053 return NULL_RTX;
13054 }
13055
13056 static rtx
13057 paired_expand_stv_builtin (enum insn_code icode, tree exp)
13058 {
13059 tree arg0 = CALL_EXPR_ARG (exp, 0);
13060 tree arg1 = CALL_EXPR_ARG (exp, 1);
13061 tree arg2 = CALL_EXPR_ARG (exp, 2);
13062 rtx op0 = expand_normal (arg0);
13063 rtx op1 = expand_normal (arg1);
13064 rtx op2 = expand_normal (arg2);
13065 rtx pat, addr;
13066 machine_mode tmode = insn_data[icode].operand[0].mode;
13067 machine_mode mode1 = Pmode;
13068 machine_mode mode2 = Pmode;
13069
13070 /* Invalid arguments. Bail before doing anything stoopid! */
13071 if (arg0 == error_mark_node
13072 || arg1 == error_mark_node
13073 || arg2 == error_mark_node)
13074 return const0_rtx;
13075
13076 if (! (*insn_data[icode].operand[1].predicate) (op0, tmode))
13077 op0 = copy_to_mode_reg (tmode, op0);
13078
13079 op2 = copy_to_mode_reg (mode2, op2);
13080
13081 if (op1 == const0_rtx)
13082 {
13083 addr = gen_rtx_MEM (tmode, op2);
13084 }
13085 else
13086 {
13087 op1 = copy_to_mode_reg (mode1, op1);
13088 addr = gen_rtx_MEM (tmode, gen_rtx_PLUS (Pmode, op1, op2));
13089 }
13090
13091 pat = GEN_FCN (icode) (addr, op0);
13092 if (pat)
13093 emit_insn (pat);
13094 return NULL_RTX;
13095 }
13096
13097 static rtx
13098 altivec_expand_stv_builtin (enum insn_code icode, tree exp)
13099 {
13100 tree arg0 = CALL_EXPR_ARG (exp, 0);
13101 tree arg1 = CALL_EXPR_ARG (exp, 1);
13102 tree arg2 = CALL_EXPR_ARG (exp, 2);
13103 rtx op0 = expand_normal (arg0);
13104 rtx op1 = expand_normal (arg1);
13105 rtx op2 = expand_normal (arg2);
13106 rtx pat, addr;
13107 machine_mode tmode = insn_data[icode].operand[0].mode;
13108 machine_mode smode = insn_data[icode].operand[1].mode;
13109 machine_mode mode1 = Pmode;
13110 machine_mode mode2 = Pmode;
13111
13112 /* Invalid arguments. Bail before doing anything stoopid! */
13113 if (arg0 == error_mark_node
13114 || arg1 == error_mark_node
13115 || arg2 == error_mark_node)
13116 return const0_rtx;
13117
13118 if (! (*insn_data[icode].operand[1].predicate) (op0, smode))
13119 op0 = copy_to_mode_reg (smode, op0);
13120
13121 op2 = copy_to_mode_reg (mode2, op2);
13122
13123 if (op1 == const0_rtx)
13124 {
13125 addr = gen_rtx_MEM (tmode, op2);
13126 }
13127 else
13128 {
13129 op1 = copy_to_mode_reg (mode1, op1);
13130 addr = gen_rtx_MEM (tmode, gen_rtx_PLUS (Pmode, op1, op2));
13131 }
13132
13133 pat = GEN_FCN (icode) (addr, op0);
13134 if (pat)
13135 emit_insn (pat);
13136 return NULL_RTX;
13137 }
13138
13139 /* Return the appropriate SPR number associated with the given builtin. */
13140 static inline HOST_WIDE_INT
13141 htm_spr_num (enum rs6000_builtins code)
13142 {
13143 if (code == HTM_BUILTIN_GET_TFHAR
13144 || code == HTM_BUILTIN_SET_TFHAR)
13145 return TFHAR_SPR;
13146 else if (code == HTM_BUILTIN_GET_TFIAR
13147 || code == HTM_BUILTIN_SET_TFIAR)
13148 return TFIAR_SPR;
13149 else if (code == HTM_BUILTIN_GET_TEXASR
13150 || code == HTM_BUILTIN_SET_TEXASR)
13151 return TEXASR_SPR;
13152 gcc_assert (code == HTM_BUILTIN_GET_TEXASRU
13153 || code == HTM_BUILTIN_SET_TEXASRU);
13154 return TEXASRU_SPR;
13155 }
13156
13157 /* Return the appropriate SPR regno associated with the given builtin. */
13158 static inline HOST_WIDE_INT
13159 htm_spr_regno (enum rs6000_builtins code)
13160 {
13161 if (code == HTM_BUILTIN_GET_TFHAR
13162 || code == HTM_BUILTIN_SET_TFHAR)
13163 return TFHAR_REGNO;
13164 else if (code == HTM_BUILTIN_GET_TFIAR
13165 || code == HTM_BUILTIN_SET_TFIAR)
13166 return TFIAR_REGNO;
13167 gcc_assert (code == HTM_BUILTIN_GET_TEXASR
13168 || code == HTM_BUILTIN_SET_TEXASR
13169 || code == HTM_BUILTIN_GET_TEXASRU
13170 || code == HTM_BUILTIN_SET_TEXASRU);
13171 return TEXASR_REGNO;
13172 }
13173
13174 /* Return the correct ICODE value depending on whether we are
13175 setting or reading the HTM SPRs. */
13176 static inline enum insn_code
13177 rs6000_htm_spr_icode (bool nonvoid)
13178 {
13179 if (nonvoid)
13180 return (TARGET_POWERPC64) ? CODE_FOR_htm_mfspr_di : CODE_FOR_htm_mfspr_si;
13181 else
13182 return (TARGET_POWERPC64) ? CODE_FOR_htm_mtspr_di : CODE_FOR_htm_mtspr_si;
13183 }
13184
13185 /* Expand the HTM builtin in EXP and store the result in TARGET.
13186 Store true in *EXPANDEDP if we found a builtin to expand. */
13187 static rtx
13188 htm_expand_builtin (tree exp, rtx target, bool * expandedp)
13189 {
13190 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
13191 bool nonvoid = TREE_TYPE (TREE_TYPE (fndecl)) != void_type_node;
13192 enum rs6000_builtins fcode = (enum rs6000_builtins) DECL_FUNCTION_CODE (fndecl);
13193 const struct builtin_description *d;
13194 size_t i;
13195
13196 *expandedp = true;
13197
13198 if (!TARGET_POWERPC64
13199 && (fcode == HTM_BUILTIN_TABORTDC
13200 || fcode == HTM_BUILTIN_TABORTDCI))
13201 {
13202 size_t uns_fcode = (size_t)fcode;
13203 const char *name = rs6000_builtin_info[uns_fcode].name;
13204 error ("builtin %s is only valid in 64-bit mode", name);
13205 return const0_rtx;
13206 }
13207
13208 /* Expand the HTM builtins. */
13209 d = bdesc_htm;
13210 for (i = 0; i < ARRAY_SIZE (bdesc_htm); i++, d++)
13211 if (d->code == fcode)
13212 {
13213 rtx op[MAX_HTM_OPERANDS], pat;
13214 int nopnds = 0;
13215 tree arg;
13216 call_expr_arg_iterator iter;
13217 unsigned attr = rs6000_builtin_info[fcode].attr;
13218 enum insn_code icode = d->icode;
13219 const struct insn_operand_data *insn_op;
13220 bool uses_spr = (attr & RS6000_BTC_SPR);
13221 rtx cr = NULL_RTX;
13222
13223 if (uses_spr)
13224 icode = rs6000_htm_spr_icode (nonvoid);
13225 insn_op = &insn_data[icode].operand[0];
13226
13227 if (nonvoid)
13228 {
13229 machine_mode tmode = (uses_spr) ? insn_op->mode : SImode;
13230 if (!target
13231 || GET_MODE (target) != tmode
13232 || (uses_spr && !(*insn_op->predicate) (target, tmode)))
13233 target = gen_reg_rtx (tmode);
13234 if (uses_spr)
13235 op[nopnds++] = target;
13236 }
13237
13238 FOR_EACH_CALL_EXPR_ARG (arg, iter, exp)
13239 {
13240 if (arg == error_mark_node || nopnds >= MAX_HTM_OPERANDS)
13241 return const0_rtx;
13242
13243 insn_op = &insn_data[icode].operand[nopnds];
13244
13245 op[nopnds] = expand_normal (arg);
13246
13247 if (!(*insn_op->predicate) (op[nopnds], insn_op->mode))
13248 {
13249 if (!strcmp (insn_op->constraint, "n"))
13250 {
13251 int arg_num = (nonvoid) ? nopnds : nopnds + 1;
13252 if (!CONST_INT_P (op[nopnds]))
13253 error ("argument %d must be an unsigned literal", arg_num);
13254 else
13255 error ("argument %d is an unsigned literal that is "
13256 "out of range", arg_num);
13257 return const0_rtx;
13258 }
13259 op[nopnds] = copy_to_mode_reg (insn_op->mode, op[nopnds]);
13260 }
13261
13262 nopnds++;
13263 }
13264
13265 /* Handle the builtins for extended mnemonics. These accept
13266 no arguments, but map to builtins that take arguments. */
13267 switch (fcode)
13268 {
13269 case HTM_BUILTIN_TENDALL: /* Alias for: tend. 1 */
13270 case HTM_BUILTIN_TRESUME: /* Alias for: tsr. 1 */
13271 op[nopnds++] = GEN_INT (1);
13272 if (flag_checking)
13273 attr |= RS6000_BTC_UNARY;
13274 break;
13275 case HTM_BUILTIN_TSUSPEND: /* Alias for: tsr. 0 */
13276 op[nopnds++] = GEN_INT (0);
13277 if (flag_checking)
13278 attr |= RS6000_BTC_UNARY;
13279 break;
13280 default:
13281 break;
13282 }
13283
13284 /* If this builtin accesses SPRs, then pass in the appropriate
13285 SPR number and SPR regno as the last two operands. */
13286 if (uses_spr)
13287 {
13288 machine_mode mode = (TARGET_POWERPC64) ? DImode : SImode;
13289 op[nopnds++] = gen_rtx_CONST_INT (mode, htm_spr_num (fcode));
13290 op[nopnds++] = gen_rtx_REG (mode, htm_spr_regno (fcode));
13291 }
13292 /* If this builtin accesses a CR, then pass in a scratch
13293 CR as the last operand. */
13294 else if (attr & RS6000_BTC_CR)
13295 { cr = gen_reg_rtx (CCmode);
13296 op[nopnds++] = cr;
13297 }
13298
13299 if (flag_checking)
13300 {
13301 int expected_nopnds = 0;
13302 if ((attr & RS6000_BTC_TYPE_MASK) == RS6000_BTC_UNARY)
13303 expected_nopnds = 1;
13304 else if ((attr & RS6000_BTC_TYPE_MASK) == RS6000_BTC_BINARY)
13305 expected_nopnds = 2;
13306 else if ((attr & RS6000_BTC_TYPE_MASK) == RS6000_BTC_TERNARY)
13307 expected_nopnds = 3;
13308 if (!(attr & RS6000_BTC_VOID))
13309 expected_nopnds += 1;
13310 if (uses_spr)
13311 expected_nopnds += 2;
13312
13313 gcc_assert (nopnds == expected_nopnds
13314 && nopnds <= MAX_HTM_OPERANDS);
13315 }
13316
13317 switch (nopnds)
13318 {
13319 case 1:
13320 pat = GEN_FCN (icode) (op[0]);
13321 break;
13322 case 2:
13323 pat = GEN_FCN (icode) (op[0], op[1]);
13324 break;
13325 case 3:
13326 pat = GEN_FCN (icode) (op[0], op[1], op[2]);
13327 break;
13328 case 4:
13329 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3]);
13330 break;
13331 default:
13332 gcc_unreachable ();
13333 }
13334 if (!pat)
13335 return NULL_RTX;
13336 emit_insn (pat);
13337
13338 if (attr & RS6000_BTC_CR)
13339 {
13340 if (fcode == HTM_BUILTIN_TBEGIN)
13341 {
13342 /* Emit code to set TARGET to true or false depending on
13343 whether the tbegin. instruction successfully or failed
13344 to start a transaction. We do this by placing the 1's
13345 complement of CR's EQ bit into TARGET. */
13346 rtx scratch = gen_reg_rtx (SImode);
13347 emit_insn (gen_rtx_SET (scratch,
13348 gen_rtx_EQ (SImode, cr,
13349 const0_rtx)));
13350 emit_insn (gen_rtx_SET (target,
13351 gen_rtx_XOR (SImode, scratch,
13352 GEN_INT (1))));
13353 }
13354 else
13355 {
13356 /* Emit code to copy the 4-bit condition register field
13357 CR into the least significant end of register TARGET. */
13358 rtx scratch1 = gen_reg_rtx (SImode);
13359 rtx scratch2 = gen_reg_rtx (SImode);
13360 rtx subreg = simplify_gen_subreg (CCmode, scratch1, SImode, 0);
13361 emit_insn (gen_movcc (subreg, cr));
13362 emit_insn (gen_lshrsi3 (scratch2, scratch1, GEN_INT (28)));
13363 emit_insn (gen_andsi3 (target, scratch2, GEN_INT (0xf)));
13364 }
13365 }
13366
13367 if (nonvoid)
13368 return target;
13369 return const0_rtx;
13370 }
13371
13372 *expandedp = false;
13373 return NULL_RTX;
13374 }
13375
13376 static rtx
13377 rs6000_expand_ternop_builtin (enum insn_code icode, tree exp, rtx target)
13378 {
13379 rtx pat;
13380 tree arg0 = CALL_EXPR_ARG (exp, 0);
13381 tree arg1 = CALL_EXPR_ARG (exp, 1);
13382 tree arg2 = CALL_EXPR_ARG (exp, 2);
13383 rtx op0 = expand_normal (arg0);
13384 rtx op1 = expand_normal (arg1);
13385 rtx op2 = expand_normal (arg2);
13386 machine_mode tmode = insn_data[icode].operand[0].mode;
13387 machine_mode mode0 = insn_data[icode].operand[1].mode;
13388 machine_mode mode1 = insn_data[icode].operand[2].mode;
13389 machine_mode mode2 = insn_data[icode].operand[3].mode;
13390
13391 if (icode == CODE_FOR_nothing)
13392 /* Builtin not supported on this processor. */
13393 return 0;
13394
13395 /* If we got invalid arguments bail out before generating bad rtl. */
13396 if (arg0 == error_mark_node
13397 || arg1 == error_mark_node
13398 || arg2 == error_mark_node)
13399 return const0_rtx;
13400
13401 /* Check and prepare argument depending on the instruction code.
13402
13403 Note that a switch statement instead of the sequence of tests
13404 would be incorrect as many of the CODE_FOR values could be
13405 CODE_FOR_nothing and that would yield multiple alternatives
13406 with identical values. We'd never reach here at runtime in
13407 this case. */
13408 if (icode == CODE_FOR_altivec_vsldoi_v4sf
13409 || icode == CODE_FOR_altivec_vsldoi_v4si
13410 || icode == CODE_FOR_altivec_vsldoi_v8hi
13411 || icode == CODE_FOR_altivec_vsldoi_v16qi)
13412 {
13413 /* Only allow 4-bit unsigned literals. */
13414 STRIP_NOPS (arg2);
13415 if (TREE_CODE (arg2) != INTEGER_CST
13416 || TREE_INT_CST_LOW (arg2) & ~0xf)
13417 {
13418 error ("argument 3 must be a 4-bit unsigned literal");
13419 return const0_rtx;
13420 }
13421 }
13422 else if (icode == CODE_FOR_vsx_xxpermdi_v2df
13423 || icode == CODE_FOR_vsx_xxpermdi_v2di
13424 || icode == CODE_FOR_vsx_xxsldwi_v16qi
13425 || icode == CODE_FOR_vsx_xxsldwi_v8hi
13426 || icode == CODE_FOR_vsx_xxsldwi_v4si
13427 || icode == CODE_FOR_vsx_xxsldwi_v4sf
13428 || icode == CODE_FOR_vsx_xxsldwi_v2di
13429 || icode == CODE_FOR_vsx_xxsldwi_v2df)
13430 {
13431 /* Only allow 2-bit unsigned literals. */
13432 STRIP_NOPS (arg2);
13433 if (TREE_CODE (arg2) != INTEGER_CST
13434 || TREE_INT_CST_LOW (arg2) & ~0x3)
13435 {
13436 error ("argument 3 must be a 2-bit unsigned literal");
13437 return const0_rtx;
13438 }
13439 }
13440 else if (icode == CODE_FOR_vsx_set_v2df
13441 || icode == CODE_FOR_vsx_set_v2di
13442 || icode == CODE_FOR_bcdadd
13443 || icode == CODE_FOR_bcdadd_lt
13444 || icode == CODE_FOR_bcdadd_eq
13445 || icode == CODE_FOR_bcdadd_gt
13446 || icode == CODE_FOR_bcdsub
13447 || icode == CODE_FOR_bcdsub_lt
13448 || icode == CODE_FOR_bcdsub_eq
13449 || icode == CODE_FOR_bcdsub_gt)
13450 {
13451 /* Only allow 1-bit unsigned literals. */
13452 STRIP_NOPS (arg2);
13453 if (TREE_CODE (arg2) != INTEGER_CST
13454 || TREE_INT_CST_LOW (arg2) & ~0x1)
13455 {
13456 error ("argument 3 must be a 1-bit unsigned literal");
13457 return const0_rtx;
13458 }
13459 }
13460 else if (icode == CODE_FOR_dfp_ddedpd_dd
13461 || icode == CODE_FOR_dfp_ddedpd_td)
13462 {
13463 /* Only allow 2-bit unsigned literals where the value is 0 or 2. */
13464 STRIP_NOPS (arg0);
13465 if (TREE_CODE (arg0) != INTEGER_CST
13466 || TREE_INT_CST_LOW (arg2) & ~0x3)
13467 {
13468 error ("argument 1 must be 0 or 2");
13469 return const0_rtx;
13470 }
13471 }
13472 else if (icode == CODE_FOR_dfp_denbcd_dd
13473 || icode == CODE_FOR_dfp_denbcd_td)
13474 {
13475 /* Only allow 1-bit unsigned literals. */
13476 STRIP_NOPS (arg0);
13477 if (TREE_CODE (arg0) != INTEGER_CST
13478 || TREE_INT_CST_LOW (arg0) & ~0x1)
13479 {
13480 error ("argument 1 must be a 1-bit unsigned literal");
13481 return const0_rtx;
13482 }
13483 }
13484 else if (icode == CODE_FOR_dfp_dscli_dd
13485 || icode == CODE_FOR_dfp_dscli_td
13486 || icode == CODE_FOR_dfp_dscri_dd
13487 || icode == CODE_FOR_dfp_dscri_td)
13488 {
13489 /* Only allow 6-bit unsigned literals. */
13490 STRIP_NOPS (arg1);
13491 if (TREE_CODE (arg1) != INTEGER_CST
13492 || TREE_INT_CST_LOW (arg1) & ~0x3f)
13493 {
13494 error ("argument 2 must be a 6-bit unsigned literal");
13495 return const0_rtx;
13496 }
13497 }
13498 else if (icode == CODE_FOR_crypto_vshasigmaw
13499 || icode == CODE_FOR_crypto_vshasigmad)
13500 {
13501 /* Check whether the 2nd and 3rd arguments are integer constants and in
13502 range and prepare arguments. */
13503 STRIP_NOPS (arg1);
13504 if (TREE_CODE (arg1) != INTEGER_CST || wi::geu_p (arg1, 2))
13505 {
13506 error ("argument 2 must be 0 or 1");
13507 return const0_rtx;
13508 }
13509
13510 STRIP_NOPS (arg2);
13511 if (TREE_CODE (arg2) != INTEGER_CST || wi::geu_p (arg1, 16))
13512 {
13513 error ("argument 3 must be in the range 0..15");
13514 return const0_rtx;
13515 }
13516 }
13517
13518 if (target == 0
13519 || GET_MODE (target) != tmode
13520 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13521 target = gen_reg_rtx (tmode);
13522
13523 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13524 op0 = copy_to_mode_reg (mode0, op0);
13525 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13526 op1 = copy_to_mode_reg (mode1, op1);
13527 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
13528 op2 = copy_to_mode_reg (mode2, op2);
13529
13530 if (TARGET_PAIRED_FLOAT && icode == CODE_FOR_selv2sf4)
13531 pat = GEN_FCN (icode) (target, op0, op1, op2, CONST0_RTX (SFmode));
13532 else
13533 pat = GEN_FCN (icode) (target, op0, op1, op2);
13534 if (! pat)
13535 return 0;
13536 emit_insn (pat);
13537
13538 return target;
13539 }
13540
13541 /* Expand the lvx builtins. */
13542 static rtx
13543 altivec_expand_ld_builtin (tree exp, rtx target, bool *expandedp)
13544 {
13545 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
13546 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
13547 tree arg0;
13548 machine_mode tmode, mode0;
13549 rtx pat, op0;
13550 enum insn_code icode;
13551
13552 switch (fcode)
13553 {
13554 case ALTIVEC_BUILTIN_LD_INTERNAL_16qi:
13555 icode = CODE_FOR_vector_altivec_load_v16qi;
13556 break;
13557 case ALTIVEC_BUILTIN_LD_INTERNAL_8hi:
13558 icode = CODE_FOR_vector_altivec_load_v8hi;
13559 break;
13560 case ALTIVEC_BUILTIN_LD_INTERNAL_4si:
13561 icode = CODE_FOR_vector_altivec_load_v4si;
13562 break;
13563 case ALTIVEC_BUILTIN_LD_INTERNAL_4sf:
13564 icode = CODE_FOR_vector_altivec_load_v4sf;
13565 break;
13566 case ALTIVEC_BUILTIN_LD_INTERNAL_2df:
13567 icode = CODE_FOR_vector_altivec_load_v2df;
13568 break;
13569 case ALTIVEC_BUILTIN_LD_INTERNAL_2di:
13570 icode = CODE_FOR_vector_altivec_load_v2di;
13571 case ALTIVEC_BUILTIN_LD_INTERNAL_1ti:
13572 icode = CODE_FOR_vector_altivec_load_v1ti;
13573 break;
13574 default:
13575 *expandedp = false;
13576 return NULL_RTX;
13577 }
13578
13579 *expandedp = true;
13580
13581 arg0 = CALL_EXPR_ARG (exp, 0);
13582 op0 = expand_normal (arg0);
13583 tmode = insn_data[icode].operand[0].mode;
13584 mode0 = insn_data[icode].operand[1].mode;
13585
13586 if (target == 0
13587 || GET_MODE (target) != tmode
13588 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13589 target = gen_reg_rtx (tmode);
13590
13591 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13592 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
13593
13594 pat = GEN_FCN (icode) (target, op0);
13595 if (! pat)
13596 return 0;
13597 emit_insn (pat);
13598 return target;
13599 }
13600
13601 /* Expand the stvx builtins. */
13602 static rtx
13603 altivec_expand_st_builtin (tree exp, rtx target ATTRIBUTE_UNUSED,
13604 bool *expandedp)
13605 {
13606 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
13607 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
13608 tree arg0, arg1;
13609 machine_mode mode0, mode1;
13610 rtx pat, op0, op1;
13611 enum insn_code icode;
13612
13613 switch (fcode)
13614 {
13615 case ALTIVEC_BUILTIN_ST_INTERNAL_16qi:
13616 icode = CODE_FOR_vector_altivec_store_v16qi;
13617 break;
13618 case ALTIVEC_BUILTIN_ST_INTERNAL_8hi:
13619 icode = CODE_FOR_vector_altivec_store_v8hi;
13620 break;
13621 case ALTIVEC_BUILTIN_ST_INTERNAL_4si:
13622 icode = CODE_FOR_vector_altivec_store_v4si;
13623 break;
13624 case ALTIVEC_BUILTIN_ST_INTERNAL_4sf:
13625 icode = CODE_FOR_vector_altivec_store_v4sf;
13626 break;
13627 case ALTIVEC_BUILTIN_ST_INTERNAL_2df:
13628 icode = CODE_FOR_vector_altivec_store_v2df;
13629 break;
13630 case ALTIVEC_BUILTIN_ST_INTERNAL_2di:
13631 icode = CODE_FOR_vector_altivec_store_v2di;
13632 case ALTIVEC_BUILTIN_ST_INTERNAL_1ti:
13633 icode = CODE_FOR_vector_altivec_store_v1ti;
13634 break;
13635 default:
13636 *expandedp = false;
13637 return NULL_RTX;
13638 }
13639
13640 arg0 = CALL_EXPR_ARG (exp, 0);
13641 arg1 = CALL_EXPR_ARG (exp, 1);
13642 op0 = expand_normal (arg0);
13643 op1 = expand_normal (arg1);
13644 mode0 = insn_data[icode].operand[0].mode;
13645 mode1 = insn_data[icode].operand[1].mode;
13646
13647 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
13648 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
13649 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
13650 op1 = copy_to_mode_reg (mode1, op1);
13651
13652 pat = GEN_FCN (icode) (op0, op1);
13653 if (pat)
13654 emit_insn (pat);
13655
13656 *expandedp = true;
13657 return NULL_RTX;
13658 }
13659
13660 /* Expand the dst builtins. */
13661 static rtx
13662 altivec_expand_dst_builtin (tree exp, rtx target ATTRIBUTE_UNUSED,
13663 bool *expandedp)
13664 {
13665 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
13666 enum rs6000_builtins fcode = (enum rs6000_builtins) DECL_FUNCTION_CODE (fndecl);
13667 tree arg0, arg1, arg2;
13668 machine_mode mode0, mode1;
13669 rtx pat, op0, op1, op2;
13670 const struct builtin_description *d;
13671 size_t i;
13672
13673 *expandedp = false;
13674
13675 /* Handle DST variants. */
13676 d = bdesc_dst;
13677 for (i = 0; i < ARRAY_SIZE (bdesc_dst); i++, d++)
13678 if (d->code == fcode)
13679 {
13680 arg0 = CALL_EXPR_ARG (exp, 0);
13681 arg1 = CALL_EXPR_ARG (exp, 1);
13682 arg2 = CALL_EXPR_ARG (exp, 2);
13683 op0 = expand_normal (arg0);
13684 op1 = expand_normal (arg1);
13685 op2 = expand_normal (arg2);
13686 mode0 = insn_data[d->icode].operand[0].mode;
13687 mode1 = insn_data[d->icode].operand[1].mode;
13688
13689 /* Invalid arguments, bail out before generating bad rtl. */
13690 if (arg0 == error_mark_node
13691 || arg1 == error_mark_node
13692 || arg2 == error_mark_node)
13693 return const0_rtx;
13694
13695 *expandedp = true;
13696 STRIP_NOPS (arg2);
13697 if (TREE_CODE (arg2) != INTEGER_CST
13698 || TREE_INT_CST_LOW (arg2) & ~0x3)
13699 {
13700 error ("argument to %qs must be a 2-bit unsigned literal", d->name);
13701 return const0_rtx;
13702 }
13703
13704 if (! (*insn_data[d->icode].operand[0].predicate) (op0, mode0))
13705 op0 = copy_to_mode_reg (Pmode, op0);
13706 if (! (*insn_data[d->icode].operand[1].predicate) (op1, mode1))
13707 op1 = copy_to_mode_reg (mode1, op1);
13708
13709 pat = GEN_FCN (d->icode) (op0, op1, op2);
13710 if (pat != 0)
13711 emit_insn (pat);
13712
13713 return NULL_RTX;
13714 }
13715
13716 return NULL_RTX;
13717 }
13718
13719 /* Expand vec_init builtin. */
13720 static rtx
13721 altivec_expand_vec_init_builtin (tree type, tree exp, rtx target)
13722 {
13723 machine_mode tmode = TYPE_MODE (type);
13724 machine_mode inner_mode = GET_MODE_INNER (tmode);
13725 int i, n_elt = GET_MODE_NUNITS (tmode);
13726
13727 gcc_assert (VECTOR_MODE_P (tmode));
13728 gcc_assert (n_elt == call_expr_nargs (exp));
13729
13730 if (!target || !register_operand (target, tmode))
13731 target = gen_reg_rtx (tmode);
13732
13733 /* If we have a vector compromised of a single element, such as V1TImode, do
13734 the initialization directly. */
13735 if (n_elt == 1 && GET_MODE_SIZE (tmode) == GET_MODE_SIZE (inner_mode))
13736 {
13737 rtx x = expand_normal (CALL_EXPR_ARG (exp, 0));
13738 emit_move_insn (target, gen_lowpart (tmode, x));
13739 }
13740 else
13741 {
13742 rtvec v = rtvec_alloc (n_elt);
13743
13744 for (i = 0; i < n_elt; ++i)
13745 {
13746 rtx x = expand_normal (CALL_EXPR_ARG (exp, i));
13747 RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
13748 }
13749
13750 rs6000_expand_vector_init (target, gen_rtx_PARALLEL (tmode, v));
13751 }
13752
13753 return target;
13754 }
13755
13756 /* Return the integer constant in ARG. Constrain it to be in the range
13757 of the subparts of VEC_TYPE; issue an error if not. */
13758
13759 static int
13760 get_element_number (tree vec_type, tree arg)
13761 {
13762 unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
13763
13764 if (!tree_fits_uhwi_p (arg)
13765 || (elt = tree_to_uhwi (arg), elt > max))
13766 {
13767 error ("selector must be an integer constant in the range 0..%wi", max);
13768 return 0;
13769 }
13770
13771 return elt;
13772 }
13773
13774 /* Expand vec_set builtin. */
13775 static rtx
13776 altivec_expand_vec_set_builtin (tree exp)
13777 {
13778 machine_mode tmode, mode1;
13779 tree arg0, arg1, arg2;
13780 int elt;
13781 rtx op0, op1;
13782
13783 arg0 = CALL_EXPR_ARG (exp, 0);
13784 arg1 = CALL_EXPR_ARG (exp, 1);
13785 arg2 = CALL_EXPR_ARG (exp, 2);
13786
13787 tmode = TYPE_MODE (TREE_TYPE (arg0));
13788 mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
13789 gcc_assert (VECTOR_MODE_P (tmode));
13790
13791 op0 = expand_expr (arg0, NULL_RTX, tmode, EXPAND_NORMAL);
13792 op1 = expand_expr (arg1, NULL_RTX, mode1, EXPAND_NORMAL);
13793 elt = get_element_number (TREE_TYPE (arg0), arg2);
13794
13795 if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
13796 op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
13797
13798 op0 = force_reg (tmode, op0);
13799 op1 = force_reg (mode1, op1);
13800
13801 rs6000_expand_vector_set (op0, op1, elt);
13802
13803 return op0;
13804 }
13805
13806 /* Expand vec_ext builtin. */
13807 static rtx
13808 altivec_expand_vec_ext_builtin (tree exp, rtx target)
13809 {
13810 machine_mode tmode, mode0;
13811 tree arg0, arg1;
13812 int elt;
13813 rtx op0;
13814
13815 arg0 = CALL_EXPR_ARG (exp, 0);
13816 arg1 = CALL_EXPR_ARG (exp, 1);
13817
13818 op0 = expand_normal (arg0);
13819 elt = get_element_number (TREE_TYPE (arg0), arg1);
13820
13821 tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
13822 mode0 = TYPE_MODE (TREE_TYPE (arg0));
13823 gcc_assert (VECTOR_MODE_P (mode0));
13824
13825 op0 = force_reg (mode0, op0);
13826
13827 if (optimize || !target || !register_operand (target, tmode))
13828 target = gen_reg_rtx (tmode);
13829
13830 rs6000_expand_vector_extract (target, op0, elt);
13831
13832 return target;
13833 }
13834
13835 /* Expand the builtin in EXP and store the result in TARGET. Store
13836 true in *EXPANDEDP if we found a builtin to expand. */
13837 static rtx
13838 altivec_expand_builtin (tree exp, rtx target, bool *expandedp)
13839 {
13840 const struct builtin_description *d;
13841 size_t i;
13842 enum insn_code icode;
13843 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
13844 tree arg0;
13845 rtx op0, pat;
13846 machine_mode tmode, mode0;
13847 enum rs6000_builtins fcode
13848 = (enum rs6000_builtins) DECL_FUNCTION_CODE (fndecl);
13849
13850 if (rs6000_overloaded_builtin_p (fcode))
13851 {
13852 *expandedp = true;
13853 error ("unresolved overload for Altivec builtin %qF", fndecl);
13854
13855 /* Given it is invalid, just generate a normal call. */
13856 return expand_call (exp, target, false);
13857 }
13858
13859 target = altivec_expand_ld_builtin (exp, target, expandedp);
13860 if (*expandedp)
13861 return target;
13862
13863 target = altivec_expand_st_builtin (exp, target, expandedp);
13864 if (*expandedp)
13865 return target;
13866
13867 target = altivec_expand_dst_builtin (exp, target, expandedp);
13868 if (*expandedp)
13869 return target;
13870
13871 *expandedp = true;
13872
13873 switch (fcode)
13874 {
13875 case ALTIVEC_BUILTIN_STVX_V2DF:
13876 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v2df, exp);
13877 case ALTIVEC_BUILTIN_STVX_V2DI:
13878 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v2di, exp);
13879 case ALTIVEC_BUILTIN_STVX_V4SF:
13880 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v4sf, exp);
13881 case ALTIVEC_BUILTIN_STVX:
13882 case ALTIVEC_BUILTIN_STVX_V4SI:
13883 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v4si, exp);
13884 case ALTIVEC_BUILTIN_STVX_V8HI:
13885 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v8hi, exp);
13886 case ALTIVEC_BUILTIN_STVX_V16QI:
13887 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v16qi, exp);
13888 case ALTIVEC_BUILTIN_STVEBX:
13889 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvebx, exp);
13890 case ALTIVEC_BUILTIN_STVEHX:
13891 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvehx, exp);
13892 case ALTIVEC_BUILTIN_STVEWX:
13893 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvewx, exp);
13894 case ALTIVEC_BUILTIN_STVXL_V2DF:
13895 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvxl_v2df, exp);
13896 case ALTIVEC_BUILTIN_STVXL_V2DI:
13897 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvxl_v2di, exp);
13898 case ALTIVEC_BUILTIN_STVXL_V4SF:
13899 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvxl_v4sf, exp);
13900 case ALTIVEC_BUILTIN_STVXL:
13901 case ALTIVEC_BUILTIN_STVXL_V4SI:
13902 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvxl_v4si, exp);
13903 case ALTIVEC_BUILTIN_STVXL_V8HI:
13904 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvxl_v8hi, exp);
13905 case ALTIVEC_BUILTIN_STVXL_V16QI:
13906 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvxl_v16qi, exp);
13907
13908 case ALTIVEC_BUILTIN_STVLX:
13909 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvlx, exp);
13910 case ALTIVEC_BUILTIN_STVLXL:
13911 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvlxl, exp);
13912 case ALTIVEC_BUILTIN_STVRX:
13913 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvrx, exp);
13914 case ALTIVEC_BUILTIN_STVRXL:
13915 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvrxl, exp);
13916
13917 case VSX_BUILTIN_STXVD2X_V1TI:
13918 return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v1ti, exp);
13919 case VSX_BUILTIN_STXVD2X_V2DF:
13920 return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v2df, exp);
13921 case VSX_BUILTIN_STXVD2X_V2DI:
13922 return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v2di, exp);
13923 case VSX_BUILTIN_STXVW4X_V4SF:
13924 return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v4sf, exp);
13925 case VSX_BUILTIN_STXVW4X_V4SI:
13926 return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v4si, exp);
13927 case VSX_BUILTIN_STXVW4X_V8HI:
13928 return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v8hi, exp);
13929 case VSX_BUILTIN_STXVW4X_V16QI:
13930 return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v16qi, exp);
13931
13932 case ALTIVEC_BUILTIN_MFVSCR:
13933 icode = CODE_FOR_altivec_mfvscr;
13934 tmode = insn_data[icode].operand[0].mode;
13935
13936 if (target == 0
13937 || GET_MODE (target) != tmode
13938 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13939 target = gen_reg_rtx (tmode);
13940
13941 pat = GEN_FCN (icode) (target);
13942 if (! pat)
13943 return 0;
13944 emit_insn (pat);
13945 return target;
13946
13947 case ALTIVEC_BUILTIN_MTVSCR:
13948 icode = CODE_FOR_altivec_mtvscr;
13949 arg0 = CALL_EXPR_ARG (exp, 0);
13950 op0 = expand_normal (arg0);
13951 mode0 = insn_data[icode].operand[0].mode;
13952
13953 /* If we got invalid arguments bail out before generating bad rtl. */
13954 if (arg0 == error_mark_node)
13955 return const0_rtx;
13956
13957 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
13958 op0 = copy_to_mode_reg (mode0, op0);
13959
13960 pat = GEN_FCN (icode) (op0);
13961 if (pat)
13962 emit_insn (pat);
13963 return NULL_RTX;
13964
13965 case ALTIVEC_BUILTIN_DSSALL:
13966 emit_insn (gen_altivec_dssall ());
13967 return NULL_RTX;
13968
13969 case ALTIVEC_BUILTIN_DSS:
13970 icode = CODE_FOR_altivec_dss;
13971 arg0 = CALL_EXPR_ARG (exp, 0);
13972 STRIP_NOPS (arg0);
13973 op0 = expand_normal (arg0);
13974 mode0 = insn_data[icode].operand[0].mode;
13975
13976 /* If we got invalid arguments bail out before generating bad rtl. */
13977 if (arg0 == error_mark_node)
13978 return const0_rtx;
13979
13980 if (TREE_CODE (arg0) != INTEGER_CST
13981 || TREE_INT_CST_LOW (arg0) & ~0x3)
13982 {
13983 error ("argument to dss must be a 2-bit unsigned literal");
13984 return const0_rtx;
13985 }
13986
13987 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
13988 op0 = copy_to_mode_reg (mode0, op0);
13989
13990 emit_insn (gen_altivec_dss (op0));
13991 return NULL_RTX;
13992
13993 case ALTIVEC_BUILTIN_VEC_INIT_V4SI:
13994 case ALTIVEC_BUILTIN_VEC_INIT_V8HI:
13995 case ALTIVEC_BUILTIN_VEC_INIT_V16QI:
13996 case ALTIVEC_BUILTIN_VEC_INIT_V4SF:
13997 case VSX_BUILTIN_VEC_INIT_V2DF:
13998 case VSX_BUILTIN_VEC_INIT_V2DI:
13999 case VSX_BUILTIN_VEC_INIT_V1TI:
14000 return altivec_expand_vec_init_builtin (TREE_TYPE (exp), exp, target);
14001
14002 case ALTIVEC_BUILTIN_VEC_SET_V4SI:
14003 case ALTIVEC_BUILTIN_VEC_SET_V8HI:
14004 case ALTIVEC_BUILTIN_VEC_SET_V16QI:
14005 case ALTIVEC_BUILTIN_VEC_SET_V4SF:
14006 case VSX_BUILTIN_VEC_SET_V2DF:
14007 case VSX_BUILTIN_VEC_SET_V2DI:
14008 case VSX_BUILTIN_VEC_SET_V1TI:
14009 return altivec_expand_vec_set_builtin (exp);
14010
14011 case ALTIVEC_BUILTIN_VEC_EXT_V4SI:
14012 case ALTIVEC_BUILTIN_VEC_EXT_V8HI:
14013 case ALTIVEC_BUILTIN_VEC_EXT_V16QI:
14014 case ALTIVEC_BUILTIN_VEC_EXT_V4SF:
14015 case VSX_BUILTIN_VEC_EXT_V2DF:
14016 case VSX_BUILTIN_VEC_EXT_V2DI:
14017 case VSX_BUILTIN_VEC_EXT_V1TI:
14018 return altivec_expand_vec_ext_builtin (exp, target);
14019
14020 default:
14021 break;
14022 /* Fall through. */
14023 }
14024
14025 /* Expand abs* operations. */
14026 d = bdesc_abs;
14027 for (i = 0; i < ARRAY_SIZE (bdesc_abs); i++, d++)
14028 if (d->code == fcode)
14029 return altivec_expand_abs_builtin (d->icode, exp, target);
14030
14031 /* Expand the AltiVec predicates. */
14032 d = bdesc_altivec_preds;
14033 for (i = 0; i < ARRAY_SIZE (bdesc_altivec_preds); i++, d++)
14034 if (d->code == fcode)
14035 return altivec_expand_predicate_builtin (d->icode, exp, target);
14036
14037 /* LV* are funky. We initialized them differently. */
14038 switch (fcode)
14039 {
14040 case ALTIVEC_BUILTIN_LVSL:
14041 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvsl,
14042 exp, target, false);
14043 case ALTIVEC_BUILTIN_LVSR:
14044 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvsr,
14045 exp, target, false);
14046 case ALTIVEC_BUILTIN_LVEBX:
14047 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvebx,
14048 exp, target, false);
14049 case ALTIVEC_BUILTIN_LVEHX:
14050 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvehx,
14051 exp, target, false);
14052 case ALTIVEC_BUILTIN_LVEWX:
14053 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvewx,
14054 exp, target, false);
14055 case ALTIVEC_BUILTIN_LVXL_V2DF:
14056 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvxl_v2df,
14057 exp, target, false);
14058 case ALTIVEC_BUILTIN_LVXL_V2DI:
14059 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvxl_v2di,
14060 exp, target, false);
14061 case ALTIVEC_BUILTIN_LVXL_V4SF:
14062 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvxl_v4sf,
14063 exp, target, false);
14064 case ALTIVEC_BUILTIN_LVXL:
14065 case ALTIVEC_BUILTIN_LVXL_V4SI:
14066 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvxl_v4si,
14067 exp, target, false);
14068 case ALTIVEC_BUILTIN_LVXL_V8HI:
14069 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvxl_v8hi,
14070 exp, target, false);
14071 case ALTIVEC_BUILTIN_LVXL_V16QI:
14072 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvxl_v16qi,
14073 exp, target, false);
14074 case ALTIVEC_BUILTIN_LVX_V2DF:
14075 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v2df,
14076 exp, target, false);
14077 case ALTIVEC_BUILTIN_LVX_V2DI:
14078 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v2di,
14079 exp, target, false);
14080 case ALTIVEC_BUILTIN_LVX_V4SF:
14081 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v4sf,
14082 exp, target, false);
14083 case ALTIVEC_BUILTIN_LVX:
14084 case ALTIVEC_BUILTIN_LVX_V4SI:
14085 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v4si,
14086 exp, target, false);
14087 case ALTIVEC_BUILTIN_LVX_V8HI:
14088 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v8hi,
14089 exp, target, false);
14090 case ALTIVEC_BUILTIN_LVX_V16QI:
14091 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v16qi,
14092 exp, target, false);
14093 case ALTIVEC_BUILTIN_LVLX:
14094 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvlx,
14095 exp, target, true);
14096 case ALTIVEC_BUILTIN_LVLXL:
14097 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvlxl,
14098 exp, target, true);
14099 case ALTIVEC_BUILTIN_LVRX:
14100 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvrx,
14101 exp, target, true);
14102 case ALTIVEC_BUILTIN_LVRXL:
14103 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvrxl,
14104 exp, target, true);
14105 case VSX_BUILTIN_LXVD2X_V1TI:
14106 return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v1ti,
14107 exp, target, false);
14108 case VSX_BUILTIN_LXVD2X_V2DF:
14109 return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v2df,
14110 exp, target, false);
14111 case VSX_BUILTIN_LXVD2X_V2DI:
14112 return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v2di,
14113 exp, target, false);
14114 case VSX_BUILTIN_LXVW4X_V4SF:
14115 return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v4sf,
14116 exp, target, false);
14117 case VSX_BUILTIN_LXVW4X_V4SI:
14118 return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v4si,
14119 exp, target, false);
14120 case VSX_BUILTIN_LXVW4X_V8HI:
14121 return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v8hi,
14122 exp, target, false);
14123 case VSX_BUILTIN_LXVW4X_V16QI:
14124 return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v16qi,
14125 exp, target, false);
14126 break;
14127 default:
14128 break;
14129 /* Fall through. */
14130 }
14131
14132 *expandedp = false;
14133 return NULL_RTX;
14134 }
14135
14136 /* Expand the builtin in EXP and store the result in TARGET. Store
14137 true in *EXPANDEDP if we found a builtin to expand. */
14138 static rtx
14139 paired_expand_builtin (tree exp, rtx target, bool * expandedp)
14140 {
14141 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
14142 enum rs6000_builtins fcode = (enum rs6000_builtins) DECL_FUNCTION_CODE (fndecl);
14143 const struct builtin_description *d;
14144 size_t i;
14145
14146 *expandedp = true;
14147
14148 switch (fcode)
14149 {
14150 case PAIRED_BUILTIN_STX:
14151 return paired_expand_stv_builtin (CODE_FOR_paired_stx, exp);
14152 case PAIRED_BUILTIN_LX:
14153 return paired_expand_lv_builtin (CODE_FOR_paired_lx, exp, target);
14154 default:
14155 break;
14156 /* Fall through. */
14157 }
14158
14159 /* Expand the paired predicates. */
14160 d = bdesc_paired_preds;
14161 for (i = 0; i < ARRAY_SIZE (bdesc_paired_preds); i++, d++)
14162 if (d->code == fcode)
14163 return paired_expand_predicate_builtin (d->icode, exp, target);
14164
14165 *expandedp = false;
14166 return NULL_RTX;
14167 }
14168
14169 /* Binops that need to be initialized manually, but can be expanded
14170 automagically by rs6000_expand_binop_builtin. */
14171 static const struct builtin_description bdesc_2arg_spe[] =
14172 {
14173 { RS6000_BTM_SPE, CODE_FOR_spe_evlddx, "__builtin_spe_evlddx", SPE_BUILTIN_EVLDDX },
14174 { RS6000_BTM_SPE, CODE_FOR_spe_evldwx, "__builtin_spe_evldwx", SPE_BUILTIN_EVLDWX },
14175 { RS6000_BTM_SPE, CODE_FOR_spe_evldhx, "__builtin_spe_evldhx", SPE_BUILTIN_EVLDHX },
14176 { RS6000_BTM_SPE, CODE_FOR_spe_evlwhex, "__builtin_spe_evlwhex", SPE_BUILTIN_EVLWHEX },
14177 { RS6000_BTM_SPE, CODE_FOR_spe_evlwhoux, "__builtin_spe_evlwhoux", SPE_BUILTIN_EVLWHOUX },
14178 { RS6000_BTM_SPE, CODE_FOR_spe_evlwhosx, "__builtin_spe_evlwhosx", SPE_BUILTIN_EVLWHOSX },
14179 { RS6000_BTM_SPE, CODE_FOR_spe_evlwwsplatx, "__builtin_spe_evlwwsplatx", SPE_BUILTIN_EVLWWSPLATX },
14180 { RS6000_BTM_SPE, CODE_FOR_spe_evlwhsplatx, "__builtin_spe_evlwhsplatx", SPE_BUILTIN_EVLWHSPLATX },
14181 { RS6000_BTM_SPE, CODE_FOR_spe_evlhhesplatx, "__builtin_spe_evlhhesplatx", SPE_BUILTIN_EVLHHESPLATX },
14182 { RS6000_BTM_SPE, CODE_FOR_spe_evlhhousplatx, "__builtin_spe_evlhhousplatx", SPE_BUILTIN_EVLHHOUSPLATX },
14183 { RS6000_BTM_SPE, CODE_FOR_spe_evlhhossplatx, "__builtin_spe_evlhhossplatx", SPE_BUILTIN_EVLHHOSSPLATX },
14184 { RS6000_BTM_SPE, CODE_FOR_spe_evldd, "__builtin_spe_evldd", SPE_BUILTIN_EVLDD },
14185 { RS6000_BTM_SPE, CODE_FOR_spe_evldw, "__builtin_spe_evldw", SPE_BUILTIN_EVLDW },
14186 { RS6000_BTM_SPE, CODE_FOR_spe_evldh, "__builtin_spe_evldh", SPE_BUILTIN_EVLDH },
14187 { RS6000_BTM_SPE, CODE_FOR_spe_evlwhe, "__builtin_spe_evlwhe", SPE_BUILTIN_EVLWHE },
14188 { RS6000_BTM_SPE, CODE_FOR_spe_evlwhou, "__builtin_spe_evlwhou", SPE_BUILTIN_EVLWHOU },
14189 { RS6000_BTM_SPE, CODE_FOR_spe_evlwhos, "__builtin_spe_evlwhos", SPE_BUILTIN_EVLWHOS },
14190 { RS6000_BTM_SPE, CODE_FOR_spe_evlwwsplat, "__builtin_spe_evlwwsplat", SPE_BUILTIN_EVLWWSPLAT },
14191 { RS6000_BTM_SPE, CODE_FOR_spe_evlwhsplat, "__builtin_spe_evlwhsplat", SPE_BUILTIN_EVLWHSPLAT },
14192 { RS6000_BTM_SPE, CODE_FOR_spe_evlhhesplat, "__builtin_spe_evlhhesplat", SPE_BUILTIN_EVLHHESPLAT },
14193 { RS6000_BTM_SPE, CODE_FOR_spe_evlhhousplat, "__builtin_spe_evlhhousplat", SPE_BUILTIN_EVLHHOUSPLAT },
14194 { RS6000_BTM_SPE, CODE_FOR_spe_evlhhossplat, "__builtin_spe_evlhhossplat", SPE_BUILTIN_EVLHHOSSPLAT }
14195 };
14196
14197 /* Expand the builtin in EXP and store the result in TARGET. Store
14198 true in *EXPANDEDP if we found a builtin to expand.
14199
14200 This expands the SPE builtins that are not simple unary and binary
14201 operations. */
14202 static rtx
14203 spe_expand_builtin (tree exp, rtx target, bool *expandedp)
14204 {
14205 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
14206 tree arg1, arg0;
14207 enum rs6000_builtins fcode = (enum rs6000_builtins) DECL_FUNCTION_CODE (fndecl);
14208 enum insn_code icode;
14209 machine_mode tmode, mode0;
14210 rtx pat, op0;
14211 const struct builtin_description *d;
14212 size_t i;
14213
14214 *expandedp = true;
14215
14216 /* Syntax check for a 5-bit unsigned immediate. */
14217 switch (fcode)
14218 {
14219 case SPE_BUILTIN_EVSTDD:
14220 case SPE_BUILTIN_EVSTDH:
14221 case SPE_BUILTIN_EVSTDW:
14222 case SPE_BUILTIN_EVSTWHE:
14223 case SPE_BUILTIN_EVSTWHO:
14224 case SPE_BUILTIN_EVSTWWE:
14225 case SPE_BUILTIN_EVSTWWO:
14226 arg1 = CALL_EXPR_ARG (exp, 2);
14227 if (TREE_CODE (arg1) != INTEGER_CST
14228 || TREE_INT_CST_LOW (arg1) & ~0x1f)
14229 {
14230 error ("argument 2 must be a 5-bit unsigned literal");
14231 return const0_rtx;
14232 }
14233 break;
14234 default:
14235 break;
14236 }
14237
14238 /* The evsplat*i instructions are not quite generic. */
14239 switch (fcode)
14240 {
14241 case SPE_BUILTIN_EVSPLATFI:
14242 return rs6000_expand_unop_builtin (CODE_FOR_spe_evsplatfi,
14243 exp, target);
14244 case SPE_BUILTIN_EVSPLATI:
14245 return rs6000_expand_unop_builtin (CODE_FOR_spe_evsplati,
14246 exp, target);
14247 default:
14248 break;
14249 }
14250
14251 d = bdesc_2arg_spe;
14252 for (i = 0; i < ARRAY_SIZE (bdesc_2arg_spe); ++i, ++d)
14253 if (d->code == fcode)
14254 return rs6000_expand_binop_builtin (d->icode, exp, target);
14255
14256 d = bdesc_spe_predicates;
14257 for (i = 0; i < ARRAY_SIZE (bdesc_spe_predicates); ++i, ++d)
14258 if (d->code == fcode)
14259 return spe_expand_predicate_builtin (d->icode, exp, target);
14260
14261 d = bdesc_spe_evsel;
14262 for (i = 0; i < ARRAY_SIZE (bdesc_spe_evsel); ++i, ++d)
14263 if (d->code == fcode)
14264 return spe_expand_evsel_builtin (d->icode, exp, target);
14265
14266 switch (fcode)
14267 {
14268 case SPE_BUILTIN_EVSTDDX:
14269 return spe_expand_stv_builtin (CODE_FOR_spe_evstddx, exp);
14270 case SPE_BUILTIN_EVSTDHX:
14271 return spe_expand_stv_builtin (CODE_FOR_spe_evstdhx, exp);
14272 case SPE_BUILTIN_EVSTDWX:
14273 return spe_expand_stv_builtin (CODE_FOR_spe_evstdwx, exp);
14274 case SPE_BUILTIN_EVSTWHEX:
14275 return spe_expand_stv_builtin (CODE_FOR_spe_evstwhex, exp);
14276 case SPE_BUILTIN_EVSTWHOX:
14277 return spe_expand_stv_builtin (CODE_FOR_spe_evstwhox, exp);
14278 case SPE_BUILTIN_EVSTWWEX:
14279 return spe_expand_stv_builtin (CODE_FOR_spe_evstwwex, exp);
14280 case SPE_BUILTIN_EVSTWWOX:
14281 return spe_expand_stv_builtin (CODE_FOR_spe_evstwwox, exp);
14282 case SPE_BUILTIN_EVSTDD:
14283 return spe_expand_stv_builtin (CODE_FOR_spe_evstdd, exp);
14284 case SPE_BUILTIN_EVSTDH:
14285 return spe_expand_stv_builtin (CODE_FOR_spe_evstdh, exp);
14286 case SPE_BUILTIN_EVSTDW:
14287 return spe_expand_stv_builtin (CODE_FOR_spe_evstdw, exp);
14288 case SPE_BUILTIN_EVSTWHE:
14289 return spe_expand_stv_builtin (CODE_FOR_spe_evstwhe, exp);
14290 case SPE_BUILTIN_EVSTWHO:
14291 return spe_expand_stv_builtin (CODE_FOR_spe_evstwho, exp);
14292 case SPE_BUILTIN_EVSTWWE:
14293 return spe_expand_stv_builtin (CODE_FOR_spe_evstwwe, exp);
14294 case SPE_BUILTIN_EVSTWWO:
14295 return spe_expand_stv_builtin (CODE_FOR_spe_evstwwo, exp);
14296 case SPE_BUILTIN_MFSPEFSCR:
14297 icode = CODE_FOR_spe_mfspefscr;
14298 tmode = insn_data[icode].operand[0].mode;
14299
14300 if (target == 0
14301 || GET_MODE (target) != tmode
14302 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14303 target = gen_reg_rtx (tmode);
14304
14305 pat = GEN_FCN (icode) (target);
14306 if (! pat)
14307 return 0;
14308 emit_insn (pat);
14309 return target;
14310 case SPE_BUILTIN_MTSPEFSCR:
14311 icode = CODE_FOR_spe_mtspefscr;
14312 arg0 = CALL_EXPR_ARG (exp, 0);
14313 op0 = expand_normal (arg0);
14314 mode0 = insn_data[icode].operand[0].mode;
14315
14316 if (arg0 == error_mark_node)
14317 return const0_rtx;
14318
14319 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
14320 op0 = copy_to_mode_reg (mode0, op0);
14321
14322 pat = GEN_FCN (icode) (op0);
14323 if (pat)
14324 emit_insn (pat);
14325 return NULL_RTX;
14326 default:
14327 break;
14328 }
14329
14330 *expandedp = false;
14331 return NULL_RTX;
14332 }
14333
14334 static rtx
14335 paired_expand_predicate_builtin (enum insn_code icode, tree exp, rtx target)
14336 {
14337 rtx pat, scratch, tmp;
14338 tree form = CALL_EXPR_ARG (exp, 0);
14339 tree arg0 = CALL_EXPR_ARG (exp, 1);
14340 tree arg1 = CALL_EXPR_ARG (exp, 2);
14341 rtx op0 = expand_normal (arg0);
14342 rtx op1 = expand_normal (arg1);
14343 machine_mode mode0 = insn_data[icode].operand[1].mode;
14344 machine_mode mode1 = insn_data[icode].operand[2].mode;
14345 int form_int;
14346 enum rtx_code code;
14347
14348 if (TREE_CODE (form) != INTEGER_CST)
14349 {
14350 error ("argument 1 of __builtin_paired_predicate must be a constant");
14351 return const0_rtx;
14352 }
14353 else
14354 form_int = TREE_INT_CST_LOW (form);
14355
14356 gcc_assert (mode0 == mode1);
14357
14358 if (arg0 == error_mark_node || arg1 == error_mark_node)
14359 return const0_rtx;
14360
14361 if (target == 0
14362 || GET_MODE (target) != SImode
14363 || !(*insn_data[icode].operand[0].predicate) (target, SImode))
14364 target = gen_reg_rtx (SImode);
14365 if (!(*insn_data[icode].operand[1].predicate) (op0, mode0))
14366 op0 = copy_to_mode_reg (mode0, op0);
14367 if (!(*insn_data[icode].operand[2].predicate) (op1, mode1))
14368 op1 = copy_to_mode_reg (mode1, op1);
14369
14370 scratch = gen_reg_rtx (CCFPmode);
14371
14372 pat = GEN_FCN (icode) (scratch, op0, op1);
14373 if (!pat)
14374 return const0_rtx;
14375
14376 emit_insn (pat);
14377
14378 switch (form_int)
14379 {
14380 /* LT bit. */
14381 case 0:
14382 code = LT;
14383 break;
14384 /* GT bit. */
14385 case 1:
14386 code = GT;
14387 break;
14388 /* EQ bit. */
14389 case 2:
14390 code = EQ;
14391 break;
14392 /* UN bit. */
14393 case 3:
14394 emit_insn (gen_move_from_CR_ov_bit (target, scratch));
14395 return target;
14396 default:
14397 error ("argument 1 of __builtin_paired_predicate is out of range");
14398 return const0_rtx;
14399 }
14400
14401 tmp = gen_rtx_fmt_ee (code, SImode, scratch, const0_rtx);
14402 emit_move_insn (target, tmp);
14403 return target;
14404 }
14405
14406 static rtx
14407 spe_expand_predicate_builtin (enum insn_code icode, tree exp, rtx target)
14408 {
14409 rtx pat, scratch, tmp;
14410 tree form = CALL_EXPR_ARG (exp, 0);
14411 tree arg0 = CALL_EXPR_ARG (exp, 1);
14412 tree arg1 = CALL_EXPR_ARG (exp, 2);
14413 rtx op0 = expand_normal (arg0);
14414 rtx op1 = expand_normal (arg1);
14415 machine_mode mode0 = insn_data[icode].operand[1].mode;
14416 machine_mode mode1 = insn_data[icode].operand[2].mode;
14417 int form_int;
14418 enum rtx_code code;
14419
14420 if (TREE_CODE (form) != INTEGER_CST)
14421 {
14422 error ("argument 1 of __builtin_spe_predicate must be a constant");
14423 return const0_rtx;
14424 }
14425 else
14426 form_int = TREE_INT_CST_LOW (form);
14427
14428 gcc_assert (mode0 == mode1);
14429
14430 if (arg0 == error_mark_node || arg1 == error_mark_node)
14431 return const0_rtx;
14432
14433 if (target == 0
14434 || GET_MODE (target) != SImode
14435 || ! (*insn_data[icode].operand[0].predicate) (target, SImode))
14436 target = gen_reg_rtx (SImode);
14437
14438 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
14439 op0 = copy_to_mode_reg (mode0, op0);
14440 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
14441 op1 = copy_to_mode_reg (mode1, op1);
14442
14443 scratch = gen_reg_rtx (CCmode);
14444
14445 pat = GEN_FCN (icode) (scratch, op0, op1);
14446 if (! pat)
14447 return const0_rtx;
14448 emit_insn (pat);
14449
14450 /* There are 4 variants for each predicate: _any_, _all_, _upper_,
14451 _lower_. We use one compare, but look in different bits of the
14452 CR for each variant.
14453
14454 There are 2 elements in each SPE simd type (upper/lower). The CR
14455 bits are set as follows:
14456
14457 BIT0 | BIT 1 | BIT 2 | BIT 3
14458 U | L | (U | L) | (U & L)
14459
14460 So, for an "all" relationship, BIT 3 would be set.
14461 For an "any" relationship, BIT 2 would be set. Etc.
14462
14463 Following traditional nomenclature, these bits map to:
14464
14465 BIT0 | BIT 1 | BIT 2 | BIT 3
14466 LT | GT | EQ | OV
14467
14468 Later, we will generate rtl to look in the LT/EQ/EQ/OV bits.
14469 */
14470
14471 switch (form_int)
14472 {
14473 /* All variant. OV bit. */
14474 case 0:
14475 /* We need to get to the OV bit, which is the ORDERED bit. We
14476 could generate (ordered:SI (reg:CC xx) (const_int 0)), but
14477 that's ugly and will make validate_condition_mode die.
14478 So let's just use another pattern. */
14479 emit_insn (gen_move_from_CR_ov_bit (target, scratch));
14480 return target;
14481 /* Any variant. EQ bit. */
14482 case 1:
14483 code = EQ;
14484 break;
14485 /* Upper variant. LT bit. */
14486 case 2:
14487 code = LT;
14488 break;
14489 /* Lower variant. GT bit. */
14490 case 3:
14491 code = GT;
14492 break;
14493 default:
14494 error ("argument 1 of __builtin_spe_predicate is out of range");
14495 return const0_rtx;
14496 }
14497
14498 tmp = gen_rtx_fmt_ee (code, SImode, scratch, const0_rtx);
14499 emit_move_insn (target, tmp);
14500
14501 return target;
14502 }
14503
14504 /* The evsel builtins look like this:
14505
14506 e = __builtin_spe_evsel_OP (a, b, c, d);
14507
14508 and work like this:
14509
14510 e[upper] = a[upper] *OP* b[upper] ? c[upper] : d[upper];
14511 e[lower] = a[lower] *OP* b[lower] ? c[lower] : d[lower];
14512 */
14513
14514 static rtx
14515 spe_expand_evsel_builtin (enum insn_code icode, tree exp, rtx target)
14516 {
14517 rtx pat, scratch;
14518 tree arg0 = CALL_EXPR_ARG (exp, 0);
14519 tree arg1 = CALL_EXPR_ARG (exp, 1);
14520 tree arg2 = CALL_EXPR_ARG (exp, 2);
14521 tree arg3 = CALL_EXPR_ARG (exp, 3);
14522 rtx op0 = expand_normal (arg0);
14523 rtx op1 = expand_normal (arg1);
14524 rtx op2 = expand_normal (arg2);
14525 rtx op3 = expand_normal (arg3);
14526 machine_mode mode0 = insn_data[icode].operand[1].mode;
14527 machine_mode mode1 = insn_data[icode].operand[2].mode;
14528
14529 gcc_assert (mode0 == mode1);
14530
14531 if (arg0 == error_mark_node || arg1 == error_mark_node
14532 || arg2 == error_mark_node || arg3 == error_mark_node)
14533 return const0_rtx;
14534
14535 if (target == 0
14536 || GET_MODE (target) != mode0
14537 || ! (*insn_data[icode].operand[0].predicate) (target, mode0))
14538 target = gen_reg_rtx (mode0);
14539
14540 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
14541 op0 = copy_to_mode_reg (mode0, op0);
14542 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
14543 op1 = copy_to_mode_reg (mode0, op1);
14544 if (! (*insn_data[icode].operand[1].predicate) (op2, mode1))
14545 op2 = copy_to_mode_reg (mode0, op2);
14546 if (! (*insn_data[icode].operand[1].predicate) (op3, mode1))
14547 op3 = copy_to_mode_reg (mode0, op3);
14548
14549 /* Generate the compare. */
14550 scratch = gen_reg_rtx (CCmode);
14551 pat = GEN_FCN (icode) (scratch, op0, op1);
14552 if (! pat)
14553 return const0_rtx;
14554 emit_insn (pat);
14555
14556 if (mode0 == V2SImode)
14557 emit_insn (gen_spe_evsel (target, op2, op3, scratch));
14558 else
14559 emit_insn (gen_spe_evsel_fs (target, op2, op3, scratch));
14560
14561 return target;
14562 }
14563
14564 /* Raise an error message for a builtin function that is called without the
14565 appropriate target options being set. */
14566
14567 static void
14568 rs6000_invalid_builtin (enum rs6000_builtins fncode)
14569 {
14570 size_t uns_fncode = (size_t)fncode;
14571 const char *name = rs6000_builtin_info[uns_fncode].name;
14572 HOST_WIDE_INT fnmask = rs6000_builtin_info[uns_fncode].mask;
14573
14574 gcc_assert (name != NULL);
14575 if ((fnmask & RS6000_BTM_CELL) != 0)
14576 error ("Builtin function %s is only valid for the cell processor", name);
14577 else if ((fnmask & RS6000_BTM_VSX) != 0)
14578 error ("Builtin function %s requires the -mvsx option", name);
14579 else if ((fnmask & RS6000_BTM_HTM) != 0)
14580 error ("Builtin function %s requires the -mhtm option", name);
14581 else if ((fnmask & RS6000_BTM_ALTIVEC) != 0)
14582 error ("Builtin function %s requires the -maltivec option", name);
14583 else if ((fnmask & RS6000_BTM_PAIRED) != 0)
14584 error ("Builtin function %s requires the -mpaired option", name);
14585 else if ((fnmask & RS6000_BTM_SPE) != 0)
14586 error ("Builtin function %s requires the -mspe option", name);
14587 else if ((fnmask & (RS6000_BTM_DFP | RS6000_BTM_P8_VECTOR))
14588 == (RS6000_BTM_DFP | RS6000_BTM_P8_VECTOR))
14589 error ("Builtin function %s requires the -mhard-dfp and"
14590 " -mpower8-vector options", name);
14591 else if ((fnmask & RS6000_BTM_DFP) != 0)
14592 error ("Builtin function %s requires the -mhard-dfp option", name);
14593 else if ((fnmask & RS6000_BTM_P8_VECTOR) != 0)
14594 error ("Builtin function %s requires the -mpower8-vector option", name);
14595 else if ((fnmask & (RS6000_BTM_HARD_FLOAT | RS6000_BTM_LDBL128))
14596 == (RS6000_BTM_HARD_FLOAT | RS6000_BTM_LDBL128))
14597 error ("Builtin function %s requires the -mhard-float and"
14598 " -mlong-double-128 options", name);
14599 else if ((fnmask & RS6000_BTM_HARD_FLOAT) != 0)
14600 error ("Builtin function %s requires the -mhard-float option", name);
14601 else
14602 error ("Builtin function %s is not supported with the current options",
14603 name);
14604 }
14605
14606 /* Expand an expression EXP that calls a built-in function,
14607 with result going to TARGET if that's convenient
14608 (and in mode MODE if that's convenient).
14609 SUBTARGET may be used as the target for computing one of EXP's operands.
14610 IGNORE is nonzero if the value is to be ignored. */
14611
14612 static rtx
14613 rs6000_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
14614 machine_mode mode ATTRIBUTE_UNUSED,
14615 int ignore ATTRIBUTE_UNUSED)
14616 {
14617 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
14618 enum rs6000_builtins fcode
14619 = (enum rs6000_builtins)DECL_FUNCTION_CODE (fndecl);
14620 size_t uns_fcode = (size_t)fcode;
14621 const struct builtin_description *d;
14622 size_t i;
14623 rtx ret;
14624 bool success;
14625 HOST_WIDE_INT mask = rs6000_builtin_info[uns_fcode].mask;
14626 bool func_valid_p = ((rs6000_builtin_mask & mask) == mask);
14627
14628 if (TARGET_DEBUG_BUILTIN)
14629 {
14630 enum insn_code icode = rs6000_builtin_info[uns_fcode].icode;
14631 const char *name1 = rs6000_builtin_info[uns_fcode].name;
14632 const char *name2 = ((icode != CODE_FOR_nothing)
14633 ? get_insn_name ((int)icode)
14634 : "nothing");
14635 const char *name3;
14636
14637 switch (rs6000_builtin_info[uns_fcode].attr & RS6000_BTC_TYPE_MASK)
14638 {
14639 default: name3 = "unknown"; break;
14640 case RS6000_BTC_SPECIAL: name3 = "special"; break;
14641 case RS6000_BTC_UNARY: name3 = "unary"; break;
14642 case RS6000_BTC_BINARY: name3 = "binary"; break;
14643 case RS6000_BTC_TERNARY: name3 = "ternary"; break;
14644 case RS6000_BTC_PREDICATE: name3 = "predicate"; break;
14645 case RS6000_BTC_ABS: name3 = "abs"; break;
14646 case RS6000_BTC_EVSEL: name3 = "evsel"; break;
14647 case RS6000_BTC_DST: name3 = "dst"; break;
14648 }
14649
14650
14651 fprintf (stderr,
14652 "rs6000_expand_builtin, %s (%d), insn = %s (%d), type=%s%s\n",
14653 (name1) ? name1 : "---", fcode,
14654 (name2) ? name2 : "---", (int)icode,
14655 name3,
14656 func_valid_p ? "" : ", not valid");
14657 }
14658
14659 if (!func_valid_p)
14660 {
14661 rs6000_invalid_builtin (fcode);
14662
14663 /* Given it is invalid, just generate a normal call. */
14664 return expand_call (exp, target, ignore);
14665 }
14666
14667 switch (fcode)
14668 {
14669 case RS6000_BUILTIN_RECIP:
14670 return rs6000_expand_binop_builtin (CODE_FOR_recipdf3, exp, target);
14671
14672 case RS6000_BUILTIN_RECIPF:
14673 return rs6000_expand_binop_builtin (CODE_FOR_recipsf3, exp, target);
14674
14675 case RS6000_BUILTIN_RSQRTF:
14676 return rs6000_expand_unop_builtin (CODE_FOR_rsqrtsf2, exp, target);
14677
14678 case RS6000_BUILTIN_RSQRT:
14679 return rs6000_expand_unop_builtin (CODE_FOR_rsqrtdf2, exp, target);
14680
14681 case POWER7_BUILTIN_BPERMD:
14682 return rs6000_expand_binop_builtin (((TARGET_64BIT)
14683 ? CODE_FOR_bpermd_di
14684 : CODE_FOR_bpermd_si), exp, target);
14685
14686 case RS6000_BUILTIN_GET_TB:
14687 return rs6000_expand_zeroop_builtin (CODE_FOR_rs6000_get_timebase,
14688 target);
14689
14690 case RS6000_BUILTIN_MFTB:
14691 return rs6000_expand_zeroop_builtin (((TARGET_64BIT)
14692 ? CODE_FOR_rs6000_mftb_di
14693 : CODE_FOR_rs6000_mftb_si),
14694 target);
14695
14696 case RS6000_BUILTIN_MFFS:
14697 return rs6000_expand_zeroop_builtin (CODE_FOR_rs6000_mffs, target);
14698
14699 case RS6000_BUILTIN_MTFSF:
14700 return rs6000_expand_mtfsf_builtin (CODE_FOR_rs6000_mtfsf, exp);
14701
14702 case ALTIVEC_BUILTIN_MASK_FOR_LOAD:
14703 case ALTIVEC_BUILTIN_MASK_FOR_STORE:
14704 {
14705 int icode = (BYTES_BIG_ENDIAN ? (int) CODE_FOR_altivec_lvsr_direct
14706 : (int) CODE_FOR_altivec_lvsl_direct);
14707 machine_mode tmode = insn_data[icode].operand[0].mode;
14708 machine_mode mode = insn_data[icode].operand[1].mode;
14709 tree arg;
14710 rtx op, addr, pat;
14711
14712 gcc_assert (TARGET_ALTIVEC);
14713
14714 arg = CALL_EXPR_ARG (exp, 0);
14715 gcc_assert (POINTER_TYPE_P (TREE_TYPE (arg)));
14716 op = expand_expr (arg, NULL_RTX, Pmode, EXPAND_NORMAL);
14717 addr = memory_address (mode, op);
14718 if (fcode == ALTIVEC_BUILTIN_MASK_FOR_STORE)
14719 op = addr;
14720 else
14721 {
14722 /* For the load case need to negate the address. */
14723 op = gen_reg_rtx (GET_MODE (addr));
14724 emit_insn (gen_rtx_SET (op, gen_rtx_NEG (GET_MODE (addr), addr)));
14725 }
14726 op = gen_rtx_MEM (mode, op);
14727
14728 if (target == 0
14729 || GET_MODE (target) != tmode
14730 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14731 target = gen_reg_rtx (tmode);
14732
14733 pat = GEN_FCN (icode) (target, op);
14734 if (!pat)
14735 return 0;
14736 emit_insn (pat);
14737
14738 return target;
14739 }
14740
14741 case ALTIVEC_BUILTIN_VCFUX:
14742 case ALTIVEC_BUILTIN_VCFSX:
14743 case ALTIVEC_BUILTIN_VCTUXS:
14744 case ALTIVEC_BUILTIN_VCTSXS:
14745 /* FIXME: There's got to be a nicer way to handle this case than
14746 constructing a new CALL_EXPR. */
14747 if (call_expr_nargs (exp) == 1)
14748 {
14749 exp = build_call_nary (TREE_TYPE (exp), CALL_EXPR_FN (exp),
14750 2, CALL_EXPR_ARG (exp, 0), integer_zero_node);
14751 }
14752 break;
14753
14754 default:
14755 break;
14756 }
14757
14758 if (TARGET_ALTIVEC)
14759 {
14760 ret = altivec_expand_builtin (exp, target, &success);
14761
14762 if (success)
14763 return ret;
14764 }
14765 if (TARGET_SPE)
14766 {
14767 ret = spe_expand_builtin (exp, target, &success);
14768
14769 if (success)
14770 return ret;
14771 }
14772 if (TARGET_PAIRED_FLOAT)
14773 {
14774 ret = paired_expand_builtin (exp, target, &success);
14775
14776 if (success)
14777 return ret;
14778 }
14779 if (TARGET_HTM)
14780 {
14781 ret = htm_expand_builtin (exp, target, &success);
14782
14783 if (success)
14784 return ret;
14785 }
14786
14787 unsigned attr = rs6000_builtin_info[uns_fcode].attr & RS6000_BTC_TYPE_MASK;
14788 gcc_assert (attr == RS6000_BTC_UNARY
14789 || attr == RS6000_BTC_BINARY
14790 || attr == RS6000_BTC_TERNARY);
14791
14792 /* Handle simple unary operations. */
14793 d = bdesc_1arg;
14794 for (i = 0; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
14795 if (d->code == fcode)
14796 return rs6000_expand_unop_builtin (d->icode, exp, target);
14797
14798 /* Handle simple binary operations. */
14799 d = bdesc_2arg;
14800 for (i = 0; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
14801 if (d->code == fcode)
14802 return rs6000_expand_binop_builtin (d->icode, exp, target);
14803
14804 /* Handle simple ternary operations. */
14805 d = bdesc_3arg;
14806 for (i = 0; i < ARRAY_SIZE (bdesc_3arg); i++, d++)
14807 if (d->code == fcode)
14808 return rs6000_expand_ternop_builtin (d->icode, exp, target);
14809
14810 gcc_unreachable ();
14811 }
14812
14813 static void
14814 rs6000_init_builtins (void)
14815 {
14816 tree tdecl;
14817 tree ftype;
14818 machine_mode mode;
14819
14820 if (TARGET_DEBUG_BUILTIN)
14821 fprintf (stderr, "rs6000_init_builtins%s%s%s%s\n",
14822 (TARGET_PAIRED_FLOAT) ? ", paired" : "",
14823 (TARGET_SPE) ? ", spe" : "",
14824 (TARGET_ALTIVEC) ? ", altivec" : "",
14825 (TARGET_VSX) ? ", vsx" : "");
14826
14827 V2SI_type_node = build_vector_type (intSI_type_node, 2);
14828 V2SF_type_node = build_vector_type (float_type_node, 2);
14829 V2DI_type_node = build_vector_type (intDI_type_node, 2);
14830 V2DF_type_node = build_vector_type (double_type_node, 2);
14831 V4HI_type_node = build_vector_type (intHI_type_node, 4);
14832 V4SI_type_node = build_vector_type (intSI_type_node, 4);
14833 V4SF_type_node = build_vector_type (float_type_node, 4);
14834 V8HI_type_node = build_vector_type (intHI_type_node, 8);
14835 V16QI_type_node = build_vector_type (intQI_type_node, 16);
14836
14837 unsigned_V16QI_type_node = build_vector_type (unsigned_intQI_type_node, 16);
14838 unsigned_V8HI_type_node = build_vector_type (unsigned_intHI_type_node, 8);
14839 unsigned_V4SI_type_node = build_vector_type (unsigned_intSI_type_node, 4);
14840 unsigned_V2DI_type_node = build_vector_type (unsigned_intDI_type_node, 2);
14841
14842 opaque_V2SF_type_node = build_opaque_vector_type (float_type_node, 2);
14843 opaque_V2SI_type_node = build_opaque_vector_type (intSI_type_node, 2);
14844 opaque_p_V2SI_type_node = build_pointer_type (opaque_V2SI_type_node);
14845 opaque_V4SI_type_node = build_opaque_vector_type (intSI_type_node, 4);
14846
14847 /* We use V1TI mode as a special container to hold __int128_t items that
14848 must live in VSX registers. */
14849 if (intTI_type_node)
14850 {
14851 V1TI_type_node = build_vector_type (intTI_type_node, 1);
14852 unsigned_V1TI_type_node = build_vector_type (unsigned_intTI_type_node, 1);
14853 }
14854
14855 /* The 'vector bool ...' types must be kept distinct from 'vector unsigned ...'
14856 types, especially in C++ land. Similarly, 'vector pixel' is distinct from
14857 'vector unsigned short'. */
14858
14859 bool_char_type_node = build_distinct_type_copy (unsigned_intQI_type_node);
14860 bool_short_type_node = build_distinct_type_copy (unsigned_intHI_type_node);
14861 bool_int_type_node = build_distinct_type_copy (unsigned_intSI_type_node);
14862 bool_long_type_node = build_distinct_type_copy (unsigned_intDI_type_node);
14863 pixel_type_node = build_distinct_type_copy (unsigned_intHI_type_node);
14864
14865 long_integer_type_internal_node = long_integer_type_node;
14866 long_unsigned_type_internal_node = long_unsigned_type_node;
14867 long_long_integer_type_internal_node = long_long_integer_type_node;
14868 long_long_unsigned_type_internal_node = long_long_unsigned_type_node;
14869 intQI_type_internal_node = intQI_type_node;
14870 uintQI_type_internal_node = unsigned_intQI_type_node;
14871 intHI_type_internal_node = intHI_type_node;
14872 uintHI_type_internal_node = unsigned_intHI_type_node;
14873 intSI_type_internal_node = intSI_type_node;
14874 uintSI_type_internal_node = unsigned_intSI_type_node;
14875 intDI_type_internal_node = intDI_type_node;
14876 uintDI_type_internal_node = unsigned_intDI_type_node;
14877 intTI_type_internal_node = intTI_type_node;
14878 uintTI_type_internal_node = unsigned_intTI_type_node;
14879 float_type_internal_node = float_type_node;
14880 double_type_internal_node = double_type_node;
14881 long_double_type_internal_node = long_double_type_node;
14882 dfloat64_type_internal_node = dfloat64_type_node;
14883 dfloat128_type_internal_node = dfloat128_type_node;
14884 void_type_internal_node = void_type_node;
14885
14886 /* 128-bit floating point support. KFmode is IEEE 128-bit floating point.
14887 IFmode is the IBM extended 128-bit format that is a pair of doubles.
14888 TFmode will be either IEEE 128-bit floating point or the IBM double-double
14889 format that uses a pair of doubles, depending on the switches and
14890 defaults. */
14891 if (TARGET_FLOAT128)
14892 {
14893 ibm128_float_type_node = make_node (REAL_TYPE);
14894 TYPE_PRECISION (ibm128_float_type_node) = 128;
14895 layout_type (ibm128_float_type_node);
14896 SET_TYPE_MODE (ibm128_float_type_node, IFmode);
14897
14898 ieee128_float_type_node = make_node (REAL_TYPE);
14899 TYPE_PRECISION (ieee128_float_type_node) = 128;
14900 layout_type (ieee128_float_type_node);
14901 SET_TYPE_MODE (ieee128_float_type_node, KFmode);
14902
14903 lang_hooks.types.register_builtin_type (ieee128_float_type_node,
14904 "__float128");
14905
14906 lang_hooks.types.register_builtin_type (ibm128_float_type_node,
14907 "__ibm128");
14908 }
14909
14910 /* Initialize the modes for builtin_function_type, mapping a machine mode to
14911 tree type node. */
14912 builtin_mode_to_type[QImode][0] = integer_type_node;
14913 builtin_mode_to_type[HImode][0] = integer_type_node;
14914 builtin_mode_to_type[SImode][0] = intSI_type_node;
14915 builtin_mode_to_type[SImode][1] = unsigned_intSI_type_node;
14916 builtin_mode_to_type[DImode][0] = intDI_type_node;
14917 builtin_mode_to_type[DImode][1] = unsigned_intDI_type_node;
14918 builtin_mode_to_type[TImode][0] = intTI_type_node;
14919 builtin_mode_to_type[TImode][1] = unsigned_intTI_type_node;
14920 builtin_mode_to_type[SFmode][0] = float_type_node;
14921 builtin_mode_to_type[DFmode][0] = double_type_node;
14922 builtin_mode_to_type[IFmode][0] = ibm128_float_type_node;
14923 builtin_mode_to_type[KFmode][0] = ieee128_float_type_node;
14924 builtin_mode_to_type[TFmode][0] = long_double_type_node;
14925 builtin_mode_to_type[DDmode][0] = dfloat64_type_node;
14926 builtin_mode_to_type[TDmode][0] = dfloat128_type_node;
14927 builtin_mode_to_type[V1TImode][0] = V1TI_type_node;
14928 builtin_mode_to_type[V1TImode][1] = unsigned_V1TI_type_node;
14929 builtin_mode_to_type[V2SImode][0] = V2SI_type_node;
14930 builtin_mode_to_type[V2SFmode][0] = V2SF_type_node;
14931 builtin_mode_to_type[V2DImode][0] = V2DI_type_node;
14932 builtin_mode_to_type[V2DImode][1] = unsigned_V2DI_type_node;
14933 builtin_mode_to_type[V2DFmode][0] = V2DF_type_node;
14934 builtin_mode_to_type[V4HImode][0] = V4HI_type_node;
14935 builtin_mode_to_type[V4SImode][0] = V4SI_type_node;
14936 builtin_mode_to_type[V4SImode][1] = unsigned_V4SI_type_node;
14937 builtin_mode_to_type[V4SFmode][0] = V4SF_type_node;
14938 builtin_mode_to_type[V8HImode][0] = V8HI_type_node;
14939 builtin_mode_to_type[V8HImode][1] = unsigned_V8HI_type_node;
14940 builtin_mode_to_type[V16QImode][0] = V16QI_type_node;
14941 builtin_mode_to_type[V16QImode][1] = unsigned_V16QI_type_node;
14942
14943 tdecl = add_builtin_type ("__bool char", bool_char_type_node);
14944 TYPE_NAME (bool_char_type_node) = tdecl;
14945
14946 tdecl = add_builtin_type ("__bool short", bool_short_type_node);
14947 TYPE_NAME (bool_short_type_node) = tdecl;
14948
14949 tdecl = add_builtin_type ("__bool int", bool_int_type_node);
14950 TYPE_NAME (bool_int_type_node) = tdecl;
14951
14952 tdecl = add_builtin_type ("__pixel", pixel_type_node);
14953 TYPE_NAME (pixel_type_node) = tdecl;
14954
14955 bool_V16QI_type_node = build_vector_type (bool_char_type_node, 16);
14956 bool_V8HI_type_node = build_vector_type (bool_short_type_node, 8);
14957 bool_V4SI_type_node = build_vector_type (bool_int_type_node, 4);
14958 bool_V2DI_type_node = build_vector_type (bool_long_type_node, 2);
14959 pixel_V8HI_type_node = build_vector_type (pixel_type_node, 8);
14960
14961 tdecl = add_builtin_type ("__vector unsigned char", unsigned_V16QI_type_node);
14962 TYPE_NAME (unsigned_V16QI_type_node) = tdecl;
14963
14964 tdecl = add_builtin_type ("__vector signed char", V16QI_type_node);
14965 TYPE_NAME (V16QI_type_node) = tdecl;
14966
14967 tdecl = add_builtin_type ("__vector __bool char", bool_V16QI_type_node);
14968 TYPE_NAME ( bool_V16QI_type_node) = tdecl;
14969
14970 tdecl = add_builtin_type ("__vector unsigned short", unsigned_V8HI_type_node);
14971 TYPE_NAME (unsigned_V8HI_type_node) = tdecl;
14972
14973 tdecl = add_builtin_type ("__vector signed short", V8HI_type_node);
14974 TYPE_NAME (V8HI_type_node) = tdecl;
14975
14976 tdecl = add_builtin_type ("__vector __bool short", bool_V8HI_type_node);
14977 TYPE_NAME (bool_V8HI_type_node) = tdecl;
14978
14979 tdecl = add_builtin_type ("__vector unsigned int", unsigned_V4SI_type_node);
14980 TYPE_NAME (unsigned_V4SI_type_node) = tdecl;
14981
14982 tdecl = add_builtin_type ("__vector signed int", V4SI_type_node);
14983 TYPE_NAME (V4SI_type_node) = tdecl;
14984
14985 tdecl = add_builtin_type ("__vector __bool int", bool_V4SI_type_node);
14986 TYPE_NAME (bool_V4SI_type_node) = tdecl;
14987
14988 tdecl = add_builtin_type ("__vector float", V4SF_type_node);
14989 TYPE_NAME (V4SF_type_node) = tdecl;
14990
14991 tdecl = add_builtin_type ("__vector __pixel", pixel_V8HI_type_node);
14992 TYPE_NAME (pixel_V8HI_type_node) = tdecl;
14993
14994 tdecl = add_builtin_type ("__vector double", V2DF_type_node);
14995 TYPE_NAME (V2DF_type_node) = tdecl;
14996
14997 if (TARGET_POWERPC64)
14998 {
14999 tdecl = add_builtin_type ("__vector long", V2DI_type_node);
15000 TYPE_NAME (V2DI_type_node) = tdecl;
15001
15002 tdecl = add_builtin_type ("__vector unsigned long",
15003 unsigned_V2DI_type_node);
15004 TYPE_NAME (unsigned_V2DI_type_node) = tdecl;
15005
15006 tdecl = add_builtin_type ("__vector __bool long", bool_V2DI_type_node);
15007 TYPE_NAME (bool_V2DI_type_node) = tdecl;
15008 }
15009 else
15010 {
15011 tdecl = add_builtin_type ("__vector long long", V2DI_type_node);
15012 TYPE_NAME (V2DI_type_node) = tdecl;
15013
15014 tdecl = add_builtin_type ("__vector unsigned long long",
15015 unsigned_V2DI_type_node);
15016 TYPE_NAME (unsigned_V2DI_type_node) = tdecl;
15017
15018 tdecl = add_builtin_type ("__vector __bool long long",
15019 bool_V2DI_type_node);
15020 TYPE_NAME (bool_V2DI_type_node) = tdecl;
15021 }
15022
15023 if (V1TI_type_node)
15024 {
15025 tdecl = add_builtin_type ("__vector __int128", V1TI_type_node);
15026 TYPE_NAME (V1TI_type_node) = tdecl;
15027
15028 tdecl = add_builtin_type ("__vector unsigned __int128",
15029 unsigned_V1TI_type_node);
15030 TYPE_NAME (unsigned_V1TI_type_node) = tdecl;
15031 }
15032
15033 /* Paired and SPE builtins are only available if you build a compiler with
15034 the appropriate options, so only create those builtins with the
15035 appropriate compiler option. Create Altivec and VSX builtins on machines
15036 with at least the general purpose extensions (970 and newer) to allow the
15037 use of the target attribute. */
15038 if (TARGET_PAIRED_FLOAT)
15039 paired_init_builtins ();
15040 if (TARGET_SPE)
15041 spe_init_builtins ();
15042 if (TARGET_EXTRA_BUILTINS)
15043 altivec_init_builtins ();
15044 if (TARGET_HTM)
15045 htm_init_builtins ();
15046
15047 if (TARGET_EXTRA_BUILTINS || TARGET_SPE || TARGET_PAIRED_FLOAT)
15048 rs6000_common_init_builtins ();
15049
15050 ftype = builtin_function_type (DFmode, DFmode, DFmode, VOIDmode,
15051 RS6000_BUILTIN_RECIP, "__builtin_recipdiv");
15052 def_builtin ("__builtin_recipdiv", ftype, RS6000_BUILTIN_RECIP);
15053
15054 ftype = builtin_function_type (SFmode, SFmode, SFmode, VOIDmode,
15055 RS6000_BUILTIN_RECIPF, "__builtin_recipdivf");
15056 def_builtin ("__builtin_recipdivf", ftype, RS6000_BUILTIN_RECIPF);
15057
15058 ftype = builtin_function_type (DFmode, DFmode, VOIDmode, VOIDmode,
15059 RS6000_BUILTIN_RSQRT, "__builtin_rsqrt");
15060 def_builtin ("__builtin_rsqrt", ftype, RS6000_BUILTIN_RSQRT);
15061
15062 ftype = builtin_function_type (SFmode, SFmode, VOIDmode, VOIDmode,
15063 RS6000_BUILTIN_RSQRTF, "__builtin_rsqrtf");
15064 def_builtin ("__builtin_rsqrtf", ftype, RS6000_BUILTIN_RSQRTF);
15065
15066 mode = (TARGET_64BIT) ? DImode : SImode;
15067 ftype = builtin_function_type (mode, mode, mode, VOIDmode,
15068 POWER7_BUILTIN_BPERMD, "__builtin_bpermd");
15069 def_builtin ("__builtin_bpermd", ftype, POWER7_BUILTIN_BPERMD);
15070
15071 ftype = build_function_type_list (unsigned_intDI_type_node,
15072 NULL_TREE);
15073 def_builtin ("__builtin_ppc_get_timebase", ftype, RS6000_BUILTIN_GET_TB);
15074
15075 if (TARGET_64BIT)
15076 ftype = build_function_type_list (unsigned_intDI_type_node,
15077 NULL_TREE);
15078 else
15079 ftype = build_function_type_list (unsigned_intSI_type_node,
15080 NULL_TREE);
15081 def_builtin ("__builtin_ppc_mftb", ftype, RS6000_BUILTIN_MFTB);
15082
15083 ftype = build_function_type_list (double_type_node, NULL_TREE);
15084 def_builtin ("__builtin_mffs", ftype, RS6000_BUILTIN_MFFS);
15085
15086 ftype = build_function_type_list (void_type_node,
15087 intSI_type_node, double_type_node,
15088 NULL_TREE);
15089 def_builtin ("__builtin_mtfsf", ftype, RS6000_BUILTIN_MTFSF);
15090
15091 #if TARGET_XCOFF
15092 /* AIX libm provides clog as __clog. */
15093 if ((tdecl = builtin_decl_explicit (BUILT_IN_CLOG)) != NULL_TREE)
15094 set_user_assembler_name (tdecl, "__clog");
15095 #endif
15096
15097 #ifdef SUBTARGET_INIT_BUILTINS
15098 SUBTARGET_INIT_BUILTINS;
15099 #endif
15100 }
15101
15102 /* Returns the rs6000 builtin decl for CODE. */
15103
15104 static tree
15105 rs6000_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
15106 {
15107 HOST_WIDE_INT fnmask;
15108
15109 if (code >= RS6000_BUILTIN_COUNT)
15110 return error_mark_node;
15111
15112 fnmask = rs6000_builtin_info[code].mask;
15113 if ((fnmask & rs6000_builtin_mask) != fnmask)
15114 {
15115 rs6000_invalid_builtin ((enum rs6000_builtins)code);
15116 return error_mark_node;
15117 }
15118
15119 return rs6000_builtin_decls[code];
15120 }
15121
15122 static void
15123 spe_init_builtins (void)
15124 {
15125 tree puint_type_node = build_pointer_type (unsigned_type_node);
15126 tree pushort_type_node = build_pointer_type (short_unsigned_type_node);
15127 const struct builtin_description *d;
15128 size_t i;
15129
15130 tree v2si_ftype_4_v2si
15131 = build_function_type_list (opaque_V2SI_type_node,
15132 opaque_V2SI_type_node,
15133 opaque_V2SI_type_node,
15134 opaque_V2SI_type_node,
15135 opaque_V2SI_type_node,
15136 NULL_TREE);
15137
15138 tree v2sf_ftype_4_v2sf
15139 = build_function_type_list (opaque_V2SF_type_node,
15140 opaque_V2SF_type_node,
15141 opaque_V2SF_type_node,
15142 opaque_V2SF_type_node,
15143 opaque_V2SF_type_node,
15144 NULL_TREE);
15145
15146 tree int_ftype_int_v2si_v2si
15147 = build_function_type_list (integer_type_node,
15148 integer_type_node,
15149 opaque_V2SI_type_node,
15150 opaque_V2SI_type_node,
15151 NULL_TREE);
15152
15153 tree int_ftype_int_v2sf_v2sf
15154 = build_function_type_list (integer_type_node,
15155 integer_type_node,
15156 opaque_V2SF_type_node,
15157 opaque_V2SF_type_node,
15158 NULL_TREE);
15159
15160 tree void_ftype_v2si_puint_int
15161 = build_function_type_list (void_type_node,
15162 opaque_V2SI_type_node,
15163 puint_type_node,
15164 integer_type_node,
15165 NULL_TREE);
15166
15167 tree void_ftype_v2si_puint_char
15168 = build_function_type_list (void_type_node,
15169 opaque_V2SI_type_node,
15170 puint_type_node,
15171 char_type_node,
15172 NULL_TREE);
15173
15174 tree void_ftype_v2si_pv2si_int
15175 = build_function_type_list (void_type_node,
15176 opaque_V2SI_type_node,
15177 opaque_p_V2SI_type_node,
15178 integer_type_node,
15179 NULL_TREE);
15180
15181 tree void_ftype_v2si_pv2si_char
15182 = build_function_type_list (void_type_node,
15183 opaque_V2SI_type_node,
15184 opaque_p_V2SI_type_node,
15185 char_type_node,
15186 NULL_TREE);
15187
15188 tree void_ftype_int
15189 = build_function_type_list (void_type_node, integer_type_node, NULL_TREE);
15190
15191 tree int_ftype_void
15192 = build_function_type_list (integer_type_node, NULL_TREE);
15193
15194 tree v2si_ftype_pv2si_int
15195 = build_function_type_list (opaque_V2SI_type_node,
15196 opaque_p_V2SI_type_node,
15197 integer_type_node,
15198 NULL_TREE);
15199
15200 tree v2si_ftype_puint_int
15201 = build_function_type_list (opaque_V2SI_type_node,
15202 puint_type_node,
15203 integer_type_node,
15204 NULL_TREE);
15205
15206 tree v2si_ftype_pushort_int
15207 = build_function_type_list (opaque_V2SI_type_node,
15208 pushort_type_node,
15209 integer_type_node,
15210 NULL_TREE);
15211
15212 tree v2si_ftype_signed_char
15213 = build_function_type_list (opaque_V2SI_type_node,
15214 signed_char_type_node,
15215 NULL_TREE);
15216
15217 add_builtin_type ("__ev64_opaque__", opaque_V2SI_type_node);
15218
15219 /* Initialize irregular SPE builtins. */
15220
15221 def_builtin ("__builtin_spe_mtspefscr", void_ftype_int, SPE_BUILTIN_MTSPEFSCR);
15222 def_builtin ("__builtin_spe_mfspefscr", int_ftype_void, SPE_BUILTIN_MFSPEFSCR);
15223 def_builtin ("__builtin_spe_evstddx", void_ftype_v2si_pv2si_int, SPE_BUILTIN_EVSTDDX);
15224 def_builtin ("__builtin_spe_evstdhx", void_ftype_v2si_pv2si_int, SPE_BUILTIN_EVSTDHX);
15225 def_builtin ("__builtin_spe_evstdwx", void_ftype_v2si_pv2si_int, SPE_BUILTIN_EVSTDWX);
15226 def_builtin ("__builtin_spe_evstwhex", void_ftype_v2si_puint_int, SPE_BUILTIN_EVSTWHEX);
15227 def_builtin ("__builtin_spe_evstwhox", void_ftype_v2si_puint_int, SPE_BUILTIN_EVSTWHOX);
15228 def_builtin ("__builtin_spe_evstwwex", void_ftype_v2si_puint_int, SPE_BUILTIN_EVSTWWEX);
15229 def_builtin ("__builtin_spe_evstwwox", void_ftype_v2si_puint_int, SPE_BUILTIN_EVSTWWOX);
15230 def_builtin ("__builtin_spe_evstdd", void_ftype_v2si_pv2si_char, SPE_BUILTIN_EVSTDD);
15231 def_builtin ("__builtin_spe_evstdh", void_ftype_v2si_pv2si_char, SPE_BUILTIN_EVSTDH);
15232 def_builtin ("__builtin_spe_evstdw", void_ftype_v2si_pv2si_char, SPE_BUILTIN_EVSTDW);
15233 def_builtin ("__builtin_spe_evstwhe", void_ftype_v2si_puint_char, SPE_BUILTIN_EVSTWHE);
15234 def_builtin ("__builtin_spe_evstwho", void_ftype_v2si_puint_char, SPE_BUILTIN_EVSTWHO);
15235 def_builtin ("__builtin_spe_evstwwe", void_ftype_v2si_puint_char, SPE_BUILTIN_EVSTWWE);
15236 def_builtin ("__builtin_spe_evstwwo", void_ftype_v2si_puint_char, SPE_BUILTIN_EVSTWWO);
15237 def_builtin ("__builtin_spe_evsplatfi", v2si_ftype_signed_char, SPE_BUILTIN_EVSPLATFI);
15238 def_builtin ("__builtin_spe_evsplati", v2si_ftype_signed_char, SPE_BUILTIN_EVSPLATI);
15239
15240 /* Loads. */
15241 def_builtin ("__builtin_spe_evlddx", v2si_ftype_pv2si_int, SPE_BUILTIN_EVLDDX);
15242 def_builtin ("__builtin_spe_evldwx", v2si_ftype_pv2si_int, SPE_BUILTIN_EVLDWX);
15243 def_builtin ("__builtin_spe_evldhx", v2si_ftype_pv2si_int, SPE_BUILTIN_EVLDHX);
15244 def_builtin ("__builtin_spe_evlwhex", v2si_ftype_puint_int, SPE_BUILTIN_EVLWHEX);
15245 def_builtin ("__builtin_spe_evlwhoux", v2si_ftype_puint_int, SPE_BUILTIN_EVLWHOUX);
15246 def_builtin ("__builtin_spe_evlwhosx", v2si_ftype_puint_int, SPE_BUILTIN_EVLWHOSX);
15247 def_builtin ("__builtin_spe_evlwwsplatx", v2si_ftype_puint_int, SPE_BUILTIN_EVLWWSPLATX);
15248 def_builtin ("__builtin_spe_evlwhsplatx", v2si_ftype_puint_int, SPE_BUILTIN_EVLWHSPLATX);
15249 def_builtin ("__builtin_spe_evlhhesplatx", v2si_ftype_pushort_int, SPE_BUILTIN_EVLHHESPLATX);
15250 def_builtin ("__builtin_spe_evlhhousplatx", v2si_ftype_pushort_int, SPE_BUILTIN_EVLHHOUSPLATX);
15251 def_builtin ("__builtin_spe_evlhhossplatx", v2si_ftype_pushort_int, SPE_BUILTIN_EVLHHOSSPLATX);
15252 def_builtin ("__builtin_spe_evldd", v2si_ftype_pv2si_int, SPE_BUILTIN_EVLDD);
15253 def_builtin ("__builtin_spe_evldw", v2si_ftype_pv2si_int, SPE_BUILTIN_EVLDW);
15254 def_builtin ("__builtin_spe_evldh", v2si_ftype_pv2si_int, SPE_BUILTIN_EVLDH);
15255 def_builtin ("__builtin_spe_evlhhesplat", v2si_ftype_pushort_int, SPE_BUILTIN_EVLHHESPLAT);
15256 def_builtin ("__builtin_spe_evlhhossplat", v2si_ftype_pushort_int, SPE_BUILTIN_EVLHHOSSPLAT);
15257 def_builtin ("__builtin_spe_evlhhousplat", v2si_ftype_pushort_int, SPE_BUILTIN_EVLHHOUSPLAT);
15258 def_builtin ("__builtin_spe_evlwhe", v2si_ftype_puint_int, SPE_BUILTIN_EVLWHE);
15259 def_builtin ("__builtin_spe_evlwhos", v2si_ftype_puint_int, SPE_BUILTIN_EVLWHOS);
15260 def_builtin ("__builtin_spe_evlwhou", v2si_ftype_puint_int, SPE_BUILTIN_EVLWHOU);
15261 def_builtin ("__builtin_spe_evlwhsplat", v2si_ftype_puint_int, SPE_BUILTIN_EVLWHSPLAT);
15262 def_builtin ("__builtin_spe_evlwwsplat", v2si_ftype_puint_int, SPE_BUILTIN_EVLWWSPLAT);
15263
15264 /* Predicates. */
15265 d = bdesc_spe_predicates;
15266 for (i = 0; i < ARRAY_SIZE (bdesc_spe_predicates); ++i, d++)
15267 {
15268 tree type;
15269
15270 switch (insn_data[d->icode].operand[1].mode)
15271 {
15272 case V2SImode:
15273 type = int_ftype_int_v2si_v2si;
15274 break;
15275 case V2SFmode:
15276 type = int_ftype_int_v2sf_v2sf;
15277 break;
15278 default:
15279 gcc_unreachable ();
15280 }
15281
15282 def_builtin (d->name, type, d->code);
15283 }
15284
15285 /* Evsel predicates. */
15286 d = bdesc_spe_evsel;
15287 for (i = 0; i < ARRAY_SIZE (bdesc_spe_evsel); ++i, d++)
15288 {
15289 tree type;
15290
15291 switch (insn_data[d->icode].operand[1].mode)
15292 {
15293 case V2SImode:
15294 type = v2si_ftype_4_v2si;
15295 break;
15296 case V2SFmode:
15297 type = v2sf_ftype_4_v2sf;
15298 break;
15299 default:
15300 gcc_unreachable ();
15301 }
15302
15303 def_builtin (d->name, type, d->code);
15304 }
15305 }
15306
15307 static void
15308 paired_init_builtins (void)
15309 {
15310 const struct builtin_description *d;
15311 size_t i;
15312
15313 tree int_ftype_int_v2sf_v2sf
15314 = build_function_type_list (integer_type_node,
15315 integer_type_node,
15316 V2SF_type_node,
15317 V2SF_type_node,
15318 NULL_TREE);
15319 tree pcfloat_type_node =
15320 build_pointer_type (build_qualified_type
15321 (float_type_node, TYPE_QUAL_CONST));
15322
15323 tree v2sf_ftype_long_pcfloat = build_function_type_list (V2SF_type_node,
15324 long_integer_type_node,
15325 pcfloat_type_node,
15326 NULL_TREE);
15327 tree void_ftype_v2sf_long_pcfloat =
15328 build_function_type_list (void_type_node,
15329 V2SF_type_node,
15330 long_integer_type_node,
15331 pcfloat_type_node,
15332 NULL_TREE);
15333
15334
15335 def_builtin ("__builtin_paired_lx", v2sf_ftype_long_pcfloat,
15336 PAIRED_BUILTIN_LX);
15337
15338
15339 def_builtin ("__builtin_paired_stx", void_ftype_v2sf_long_pcfloat,
15340 PAIRED_BUILTIN_STX);
15341
15342 /* Predicates. */
15343 d = bdesc_paired_preds;
15344 for (i = 0; i < ARRAY_SIZE (bdesc_paired_preds); ++i, d++)
15345 {
15346 tree type;
15347
15348 if (TARGET_DEBUG_BUILTIN)
15349 fprintf (stderr, "paired pred #%d, insn = %s [%d], mode = %s\n",
15350 (int)i, get_insn_name (d->icode), (int)d->icode,
15351 GET_MODE_NAME (insn_data[d->icode].operand[1].mode));
15352
15353 switch (insn_data[d->icode].operand[1].mode)
15354 {
15355 case V2SFmode:
15356 type = int_ftype_int_v2sf_v2sf;
15357 break;
15358 default:
15359 gcc_unreachable ();
15360 }
15361
15362 def_builtin (d->name, type, d->code);
15363 }
15364 }
15365
15366 static void
15367 altivec_init_builtins (void)
15368 {
15369 const struct builtin_description *d;
15370 size_t i;
15371 tree ftype;
15372 tree decl;
15373
15374 tree pvoid_type_node = build_pointer_type (void_type_node);
15375
15376 tree pcvoid_type_node
15377 = build_pointer_type (build_qualified_type (void_type_node,
15378 TYPE_QUAL_CONST));
15379
15380 tree int_ftype_opaque
15381 = build_function_type_list (integer_type_node,
15382 opaque_V4SI_type_node, NULL_TREE);
15383 tree opaque_ftype_opaque
15384 = build_function_type_list (integer_type_node, NULL_TREE);
15385 tree opaque_ftype_opaque_int
15386 = build_function_type_list (opaque_V4SI_type_node,
15387 opaque_V4SI_type_node, integer_type_node, NULL_TREE);
15388 tree opaque_ftype_opaque_opaque_int
15389 = build_function_type_list (opaque_V4SI_type_node,
15390 opaque_V4SI_type_node, opaque_V4SI_type_node,
15391 integer_type_node, NULL_TREE);
15392 tree int_ftype_int_opaque_opaque
15393 = build_function_type_list (integer_type_node,
15394 integer_type_node, opaque_V4SI_type_node,
15395 opaque_V4SI_type_node, NULL_TREE);
15396 tree int_ftype_int_v4si_v4si
15397 = build_function_type_list (integer_type_node,
15398 integer_type_node, V4SI_type_node,
15399 V4SI_type_node, NULL_TREE);
15400 tree int_ftype_int_v2di_v2di
15401 = build_function_type_list (integer_type_node,
15402 integer_type_node, V2DI_type_node,
15403 V2DI_type_node, NULL_TREE);
15404 tree void_ftype_v4si
15405 = build_function_type_list (void_type_node, V4SI_type_node, NULL_TREE);
15406 tree v8hi_ftype_void
15407 = build_function_type_list (V8HI_type_node, NULL_TREE);
15408 tree void_ftype_void
15409 = build_function_type_list (void_type_node, NULL_TREE);
15410 tree void_ftype_int
15411 = build_function_type_list (void_type_node, integer_type_node, NULL_TREE);
15412
15413 tree opaque_ftype_long_pcvoid
15414 = build_function_type_list (opaque_V4SI_type_node,
15415 long_integer_type_node, pcvoid_type_node,
15416 NULL_TREE);
15417 tree v16qi_ftype_long_pcvoid
15418 = build_function_type_list (V16QI_type_node,
15419 long_integer_type_node, pcvoid_type_node,
15420 NULL_TREE);
15421 tree v8hi_ftype_long_pcvoid
15422 = build_function_type_list (V8HI_type_node,
15423 long_integer_type_node, pcvoid_type_node,
15424 NULL_TREE);
15425 tree v4si_ftype_long_pcvoid
15426 = build_function_type_list (V4SI_type_node,
15427 long_integer_type_node, pcvoid_type_node,
15428 NULL_TREE);
15429 tree v4sf_ftype_long_pcvoid
15430 = build_function_type_list (V4SF_type_node,
15431 long_integer_type_node, pcvoid_type_node,
15432 NULL_TREE);
15433 tree v2df_ftype_long_pcvoid
15434 = build_function_type_list (V2DF_type_node,
15435 long_integer_type_node, pcvoid_type_node,
15436 NULL_TREE);
15437 tree v2di_ftype_long_pcvoid
15438 = build_function_type_list (V2DI_type_node,
15439 long_integer_type_node, pcvoid_type_node,
15440 NULL_TREE);
15441
15442 tree void_ftype_opaque_long_pvoid
15443 = build_function_type_list (void_type_node,
15444 opaque_V4SI_type_node, long_integer_type_node,
15445 pvoid_type_node, NULL_TREE);
15446 tree void_ftype_v4si_long_pvoid
15447 = build_function_type_list (void_type_node,
15448 V4SI_type_node, long_integer_type_node,
15449 pvoid_type_node, NULL_TREE);
15450 tree void_ftype_v16qi_long_pvoid
15451 = build_function_type_list (void_type_node,
15452 V16QI_type_node, long_integer_type_node,
15453 pvoid_type_node, NULL_TREE);
15454 tree void_ftype_v8hi_long_pvoid
15455 = build_function_type_list (void_type_node,
15456 V8HI_type_node, long_integer_type_node,
15457 pvoid_type_node, NULL_TREE);
15458 tree void_ftype_v4sf_long_pvoid
15459 = build_function_type_list (void_type_node,
15460 V4SF_type_node, long_integer_type_node,
15461 pvoid_type_node, NULL_TREE);
15462 tree void_ftype_v2df_long_pvoid
15463 = build_function_type_list (void_type_node,
15464 V2DF_type_node, long_integer_type_node,
15465 pvoid_type_node, NULL_TREE);
15466 tree void_ftype_v2di_long_pvoid
15467 = build_function_type_list (void_type_node,
15468 V2DI_type_node, long_integer_type_node,
15469 pvoid_type_node, NULL_TREE);
15470 tree int_ftype_int_v8hi_v8hi
15471 = build_function_type_list (integer_type_node,
15472 integer_type_node, V8HI_type_node,
15473 V8HI_type_node, NULL_TREE);
15474 tree int_ftype_int_v16qi_v16qi
15475 = build_function_type_list (integer_type_node,
15476 integer_type_node, V16QI_type_node,
15477 V16QI_type_node, NULL_TREE);
15478 tree int_ftype_int_v4sf_v4sf
15479 = build_function_type_list (integer_type_node,
15480 integer_type_node, V4SF_type_node,
15481 V4SF_type_node, NULL_TREE);
15482 tree int_ftype_int_v2df_v2df
15483 = build_function_type_list (integer_type_node,
15484 integer_type_node, V2DF_type_node,
15485 V2DF_type_node, NULL_TREE);
15486 tree v2di_ftype_v2di
15487 = build_function_type_list (V2DI_type_node, V2DI_type_node, NULL_TREE);
15488 tree v4si_ftype_v4si
15489 = build_function_type_list (V4SI_type_node, V4SI_type_node, NULL_TREE);
15490 tree v8hi_ftype_v8hi
15491 = build_function_type_list (V8HI_type_node, V8HI_type_node, NULL_TREE);
15492 tree v16qi_ftype_v16qi
15493 = build_function_type_list (V16QI_type_node, V16QI_type_node, NULL_TREE);
15494 tree v4sf_ftype_v4sf
15495 = build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE);
15496 tree v2df_ftype_v2df
15497 = build_function_type_list (V2DF_type_node, V2DF_type_node, NULL_TREE);
15498 tree void_ftype_pcvoid_int_int
15499 = build_function_type_list (void_type_node,
15500 pcvoid_type_node, integer_type_node,
15501 integer_type_node, NULL_TREE);
15502
15503 def_builtin ("__builtin_altivec_mtvscr", void_ftype_v4si, ALTIVEC_BUILTIN_MTVSCR);
15504 def_builtin ("__builtin_altivec_mfvscr", v8hi_ftype_void, ALTIVEC_BUILTIN_MFVSCR);
15505 def_builtin ("__builtin_altivec_dssall", void_ftype_void, ALTIVEC_BUILTIN_DSSALL);
15506 def_builtin ("__builtin_altivec_dss", void_ftype_int, ALTIVEC_BUILTIN_DSS);
15507 def_builtin ("__builtin_altivec_lvsl", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVSL);
15508 def_builtin ("__builtin_altivec_lvsr", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVSR);
15509 def_builtin ("__builtin_altivec_lvebx", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVEBX);
15510 def_builtin ("__builtin_altivec_lvehx", v8hi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVEHX);
15511 def_builtin ("__builtin_altivec_lvewx", v4si_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVEWX);
15512 def_builtin ("__builtin_altivec_lvxl", v4si_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVXL);
15513 def_builtin ("__builtin_altivec_lvxl_v2df", v2df_ftype_long_pcvoid,
15514 ALTIVEC_BUILTIN_LVXL_V2DF);
15515 def_builtin ("__builtin_altivec_lvxl_v2di", v2di_ftype_long_pcvoid,
15516 ALTIVEC_BUILTIN_LVXL_V2DI);
15517 def_builtin ("__builtin_altivec_lvxl_v4sf", v4sf_ftype_long_pcvoid,
15518 ALTIVEC_BUILTIN_LVXL_V4SF);
15519 def_builtin ("__builtin_altivec_lvxl_v4si", v4si_ftype_long_pcvoid,
15520 ALTIVEC_BUILTIN_LVXL_V4SI);
15521 def_builtin ("__builtin_altivec_lvxl_v8hi", v8hi_ftype_long_pcvoid,
15522 ALTIVEC_BUILTIN_LVXL_V8HI);
15523 def_builtin ("__builtin_altivec_lvxl_v16qi", v16qi_ftype_long_pcvoid,
15524 ALTIVEC_BUILTIN_LVXL_V16QI);
15525 def_builtin ("__builtin_altivec_lvx", v4si_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVX);
15526 def_builtin ("__builtin_altivec_lvx_v2df", v2df_ftype_long_pcvoid,
15527 ALTIVEC_BUILTIN_LVX_V2DF);
15528 def_builtin ("__builtin_altivec_lvx_v2di", v2di_ftype_long_pcvoid,
15529 ALTIVEC_BUILTIN_LVX_V2DI);
15530 def_builtin ("__builtin_altivec_lvx_v4sf", v4sf_ftype_long_pcvoid,
15531 ALTIVEC_BUILTIN_LVX_V4SF);
15532 def_builtin ("__builtin_altivec_lvx_v4si", v4si_ftype_long_pcvoid,
15533 ALTIVEC_BUILTIN_LVX_V4SI);
15534 def_builtin ("__builtin_altivec_lvx_v8hi", v8hi_ftype_long_pcvoid,
15535 ALTIVEC_BUILTIN_LVX_V8HI);
15536 def_builtin ("__builtin_altivec_lvx_v16qi", v16qi_ftype_long_pcvoid,
15537 ALTIVEC_BUILTIN_LVX_V16QI);
15538 def_builtin ("__builtin_altivec_stvx", void_ftype_v4si_long_pvoid, ALTIVEC_BUILTIN_STVX);
15539 def_builtin ("__builtin_altivec_stvx_v2df", void_ftype_v2df_long_pvoid,
15540 ALTIVEC_BUILTIN_STVX_V2DF);
15541 def_builtin ("__builtin_altivec_stvx_v2di", void_ftype_v2di_long_pvoid,
15542 ALTIVEC_BUILTIN_STVX_V2DI);
15543 def_builtin ("__builtin_altivec_stvx_v4sf", void_ftype_v4sf_long_pvoid,
15544 ALTIVEC_BUILTIN_STVX_V4SF);
15545 def_builtin ("__builtin_altivec_stvx_v4si", void_ftype_v4si_long_pvoid,
15546 ALTIVEC_BUILTIN_STVX_V4SI);
15547 def_builtin ("__builtin_altivec_stvx_v8hi", void_ftype_v8hi_long_pvoid,
15548 ALTIVEC_BUILTIN_STVX_V8HI);
15549 def_builtin ("__builtin_altivec_stvx_v16qi", void_ftype_v16qi_long_pvoid,
15550 ALTIVEC_BUILTIN_STVX_V16QI);
15551 def_builtin ("__builtin_altivec_stvewx", void_ftype_v4si_long_pvoid, ALTIVEC_BUILTIN_STVEWX);
15552 def_builtin ("__builtin_altivec_stvxl", void_ftype_v4si_long_pvoid, ALTIVEC_BUILTIN_STVXL);
15553 def_builtin ("__builtin_altivec_stvxl_v2df", void_ftype_v2df_long_pvoid,
15554 ALTIVEC_BUILTIN_STVXL_V2DF);
15555 def_builtin ("__builtin_altivec_stvxl_v2di", void_ftype_v2di_long_pvoid,
15556 ALTIVEC_BUILTIN_STVXL_V2DI);
15557 def_builtin ("__builtin_altivec_stvxl_v4sf", void_ftype_v4sf_long_pvoid,
15558 ALTIVEC_BUILTIN_STVXL_V4SF);
15559 def_builtin ("__builtin_altivec_stvxl_v4si", void_ftype_v4si_long_pvoid,
15560 ALTIVEC_BUILTIN_STVXL_V4SI);
15561 def_builtin ("__builtin_altivec_stvxl_v8hi", void_ftype_v8hi_long_pvoid,
15562 ALTIVEC_BUILTIN_STVXL_V8HI);
15563 def_builtin ("__builtin_altivec_stvxl_v16qi", void_ftype_v16qi_long_pvoid,
15564 ALTIVEC_BUILTIN_STVXL_V16QI);
15565 def_builtin ("__builtin_altivec_stvebx", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_STVEBX);
15566 def_builtin ("__builtin_altivec_stvehx", void_ftype_v8hi_long_pvoid, ALTIVEC_BUILTIN_STVEHX);
15567 def_builtin ("__builtin_vec_ld", opaque_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LD);
15568 def_builtin ("__builtin_vec_lde", opaque_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LDE);
15569 def_builtin ("__builtin_vec_ldl", opaque_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LDL);
15570 def_builtin ("__builtin_vec_lvsl", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVSL);
15571 def_builtin ("__builtin_vec_lvsr", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVSR);
15572 def_builtin ("__builtin_vec_lvebx", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVEBX);
15573 def_builtin ("__builtin_vec_lvehx", v8hi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVEHX);
15574 def_builtin ("__builtin_vec_lvewx", v4si_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVEWX);
15575 def_builtin ("__builtin_vec_st", void_ftype_opaque_long_pvoid, ALTIVEC_BUILTIN_VEC_ST);
15576 def_builtin ("__builtin_vec_ste", void_ftype_opaque_long_pvoid, ALTIVEC_BUILTIN_VEC_STE);
15577 def_builtin ("__builtin_vec_stl", void_ftype_opaque_long_pvoid, ALTIVEC_BUILTIN_VEC_STL);
15578 def_builtin ("__builtin_vec_stvewx", void_ftype_opaque_long_pvoid, ALTIVEC_BUILTIN_VEC_STVEWX);
15579 def_builtin ("__builtin_vec_stvebx", void_ftype_opaque_long_pvoid, ALTIVEC_BUILTIN_VEC_STVEBX);
15580 def_builtin ("__builtin_vec_stvehx", void_ftype_opaque_long_pvoid, ALTIVEC_BUILTIN_VEC_STVEHX);
15581
15582 def_builtin ("__builtin_vsx_lxvd2x_v2df", v2df_ftype_long_pcvoid,
15583 VSX_BUILTIN_LXVD2X_V2DF);
15584 def_builtin ("__builtin_vsx_lxvd2x_v2di", v2di_ftype_long_pcvoid,
15585 VSX_BUILTIN_LXVD2X_V2DI);
15586 def_builtin ("__builtin_vsx_lxvw4x_v4sf", v4sf_ftype_long_pcvoid,
15587 VSX_BUILTIN_LXVW4X_V4SF);
15588 def_builtin ("__builtin_vsx_lxvw4x_v4si", v4si_ftype_long_pcvoid,
15589 VSX_BUILTIN_LXVW4X_V4SI);
15590 def_builtin ("__builtin_vsx_lxvw4x_v8hi", v8hi_ftype_long_pcvoid,
15591 VSX_BUILTIN_LXVW4X_V8HI);
15592 def_builtin ("__builtin_vsx_lxvw4x_v16qi", v16qi_ftype_long_pcvoid,
15593 VSX_BUILTIN_LXVW4X_V16QI);
15594 def_builtin ("__builtin_vsx_stxvd2x_v2df", void_ftype_v2df_long_pvoid,
15595 VSX_BUILTIN_STXVD2X_V2DF);
15596 def_builtin ("__builtin_vsx_stxvd2x_v2di", void_ftype_v2di_long_pvoid,
15597 VSX_BUILTIN_STXVD2X_V2DI);
15598 def_builtin ("__builtin_vsx_stxvw4x_v4sf", void_ftype_v4sf_long_pvoid,
15599 VSX_BUILTIN_STXVW4X_V4SF);
15600 def_builtin ("__builtin_vsx_stxvw4x_v4si", void_ftype_v4si_long_pvoid,
15601 VSX_BUILTIN_STXVW4X_V4SI);
15602 def_builtin ("__builtin_vsx_stxvw4x_v8hi", void_ftype_v8hi_long_pvoid,
15603 VSX_BUILTIN_STXVW4X_V8HI);
15604 def_builtin ("__builtin_vsx_stxvw4x_v16qi", void_ftype_v16qi_long_pvoid,
15605 VSX_BUILTIN_STXVW4X_V16QI);
15606 def_builtin ("__builtin_vec_vsx_ld", opaque_ftype_long_pcvoid,
15607 VSX_BUILTIN_VEC_LD);
15608 def_builtin ("__builtin_vec_vsx_st", void_ftype_opaque_long_pvoid,
15609 VSX_BUILTIN_VEC_ST);
15610
15611 def_builtin ("__builtin_vec_step", int_ftype_opaque, ALTIVEC_BUILTIN_VEC_STEP);
15612 def_builtin ("__builtin_vec_splats", opaque_ftype_opaque, ALTIVEC_BUILTIN_VEC_SPLATS);
15613 def_builtin ("__builtin_vec_promote", opaque_ftype_opaque, ALTIVEC_BUILTIN_VEC_PROMOTE);
15614
15615 def_builtin ("__builtin_vec_sld", opaque_ftype_opaque_opaque_int, ALTIVEC_BUILTIN_VEC_SLD);
15616 def_builtin ("__builtin_vec_splat", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_SPLAT);
15617 def_builtin ("__builtin_vec_extract", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_EXTRACT);
15618 def_builtin ("__builtin_vec_insert", opaque_ftype_opaque_opaque_int, ALTIVEC_BUILTIN_VEC_INSERT);
15619 def_builtin ("__builtin_vec_vspltw", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_VSPLTW);
15620 def_builtin ("__builtin_vec_vsplth", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_VSPLTH);
15621 def_builtin ("__builtin_vec_vspltb", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_VSPLTB);
15622 def_builtin ("__builtin_vec_ctf", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_CTF);
15623 def_builtin ("__builtin_vec_vcfsx", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_VCFSX);
15624 def_builtin ("__builtin_vec_vcfux", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_VCFUX);
15625 def_builtin ("__builtin_vec_cts", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_CTS);
15626 def_builtin ("__builtin_vec_ctu", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_CTU);
15627
15628 /* Cell builtins. */
15629 def_builtin ("__builtin_altivec_lvlx", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVLX);
15630 def_builtin ("__builtin_altivec_lvlxl", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVLXL);
15631 def_builtin ("__builtin_altivec_lvrx", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVRX);
15632 def_builtin ("__builtin_altivec_lvrxl", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVRXL);
15633
15634 def_builtin ("__builtin_vec_lvlx", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVLX);
15635 def_builtin ("__builtin_vec_lvlxl", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVLXL);
15636 def_builtin ("__builtin_vec_lvrx", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVRX);
15637 def_builtin ("__builtin_vec_lvrxl", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVRXL);
15638
15639 def_builtin ("__builtin_altivec_stvlx", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_STVLX);
15640 def_builtin ("__builtin_altivec_stvlxl", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_STVLXL);
15641 def_builtin ("__builtin_altivec_stvrx", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_STVRX);
15642 def_builtin ("__builtin_altivec_stvrxl", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_STVRXL);
15643
15644 def_builtin ("__builtin_vec_stvlx", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_VEC_STVLX);
15645 def_builtin ("__builtin_vec_stvlxl", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_VEC_STVLXL);
15646 def_builtin ("__builtin_vec_stvrx", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_VEC_STVRX);
15647 def_builtin ("__builtin_vec_stvrxl", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_VEC_STVRXL);
15648
15649 /* Add the DST variants. */
15650 d = bdesc_dst;
15651 for (i = 0; i < ARRAY_SIZE (bdesc_dst); i++, d++)
15652 def_builtin (d->name, void_ftype_pcvoid_int_int, d->code);
15653
15654 /* Initialize the predicates. */
15655 d = bdesc_altivec_preds;
15656 for (i = 0; i < ARRAY_SIZE (bdesc_altivec_preds); i++, d++)
15657 {
15658 machine_mode mode1;
15659 tree type;
15660
15661 if (rs6000_overloaded_builtin_p (d->code))
15662 mode1 = VOIDmode;
15663 else
15664 mode1 = insn_data[d->icode].operand[1].mode;
15665
15666 switch (mode1)
15667 {
15668 case VOIDmode:
15669 type = int_ftype_int_opaque_opaque;
15670 break;
15671 case V2DImode:
15672 type = int_ftype_int_v2di_v2di;
15673 break;
15674 case V4SImode:
15675 type = int_ftype_int_v4si_v4si;
15676 break;
15677 case V8HImode:
15678 type = int_ftype_int_v8hi_v8hi;
15679 break;
15680 case V16QImode:
15681 type = int_ftype_int_v16qi_v16qi;
15682 break;
15683 case V4SFmode:
15684 type = int_ftype_int_v4sf_v4sf;
15685 break;
15686 case V2DFmode:
15687 type = int_ftype_int_v2df_v2df;
15688 break;
15689 default:
15690 gcc_unreachable ();
15691 }
15692
15693 def_builtin (d->name, type, d->code);
15694 }
15695
15696 /* Initialize the abs* operators. */
15697 d = bdesc_abs;
15698 for (i = 0; i < ARRAY_SIZE (bdesc_abs); i++, d++)
15699 {
15700 machine_mode mode0;
15701 tree type;
15702
15703 mode0 = insn_data[d->icode].operand[0].mode;
15704
15705 switch (mode0)
15706 {
15707 case V2DImode:
15708 type = v2di_ftype_v2di;
15709 break;
15710 case V4SImode:
15711 type = v4si_ftype_v4si;
15712 break;
15713 case V8HImode:
15714 type = v8hi_ftype_v8hi;
15715 break;
15716 case V16QImode:
15717 type = v16qi_ftype_v16qi;
15718 break;
15719 case V4SFmode:
15720 type = v4sf_ftype_v4sf;
15721 break;
15722 case V2DFmode:
15723 type = v2df_ftype_v2df;
15724 break;
15725 default:
15726 gcc_unreachable ();
15727 }
15728
15729 def_builtin (d->name, type, d->code);
15730 }
15731
15732 /* Initialize target builtin that implements
15733 targetm.vectorize.builtin_mask_for_load. */
15734
15735 decl = add_builtin_function ("__builtin_altivec_mask_for_load",
15736 v16qi_ftype_long_pcvoid,
15737 ALTIVEC_BUILTIN_MASK_FOR_LOAD,
15738 BUILT_IN_MD, NULL, NULL_TREE);
15739 TREE_READONLY (decl) = 1;
15740 /* Record the decl. Will be used by rs6000_builtin_mask_for_load. */
15741 altivec_builtin_mask_for_load = decl;
15742
15743 /* Access to the vec_init patterns. */
15744 ftype = build_function_type_list (V4SI_type_node, integer_type_node,
15745 integer_type_node, integer_type_node,
15746 integer_type_node, NULL_TREE);
15747 def_builtin ("__builtin_vec_init_v4si", ftype, ALTIVEC_BUILTIN_VEC_INIT_V4SI);
15748
15749 ftype = build_function_type_list (V8HI_type_node, short_integer_type_node,
15750 short_integer_type_node,
15751 short_integer_type_node,
15752 short_integer_type_node,
15753 short_integer_type_node,
15754 short_integer_type_node,
15755 short_integer_type_node,
15756 short_integer_type_node, NULL_TREE);
15757 def_builtin ("__builtin_vec_init_v8hi", ftype, ALTIVEC_BUILTIN_VEC_INIT_V8HI);
15758
15759 ftype = build_function_type_list (V16QI_type_node, char_type_node,
15760 char_type_node, char_type_node,
15761 char_type_node, char_type_node,
15762 char_type_node, char_type_node,
15763 char_type_node, char_type_node,
15764 char_type_node, char_type_node,
15765 char_type_node, char_type_node,
15766 char_type_node, char_type_node,
15767 char_type_node, NULL_TREE);
15768 def_builtin ("__builtin_vec_init_v16qi", ftype,
15769 ALTIVEC_BUILTIN_VEC_INIT_V16QI);
15770
15771 ftype = build_function_type_list (V4SF_type_node, float_type_node,
15772 float_type_node, float_type_node,
15773 float_type_node, NULL_TREE);
15774 def_builtin ("__builtin_vec_init_v4sf", ftype, ALTIVEC_BUILTIN_VEC_INIT_V4SF);
15775
15776 /* VSX builtins. */
15777 ftype = build_function_type_list (V2DF_type_node, double_type_node,
15778 double_type_node, NULL_TREE);
15779 def_builtin ("__builtin_vec_init_v2df", ftype, VSX_BUILTIN_VEC_INIT_V2DF);
15780
15781 ftype = build_function_type_list (V2DI_type_node, intDI_type_node,
15782 intDI_type_node, NULL_TREE);
15783 def_builtin ("__builtin_vec_init_v2di", ftype, VSX_BUILTIN_VEC_INIT_V2DI);
15784
15785 /* Access to the vec_set patterns. */
15786 ftype = build_function_type_list (V4SI_type_node, V4SI_type_node,
15787 intSI_type_node,
15788 integer_type_node, NULL_TREE);
15789 def_builtin ("__builtin_vec_set_v4si", ftype, ALTIVEC_BUILTIN_VEC_SET_V4SI);
15790
15791 ftype = build_function_type_list (V8HI_type_node, V8HI_type_node,
15792 intHI_type_node,
15793 integer_type_node, NULL_TREE);
15794 def_builtin ("__builtin_vec_set_v8hi", ftype, ALTIVEC_BUILTIN_VEC_SET_V8HI);
15795
15796 ftype = build_function_type_list (V16QI_type_node, V16QI_type_node,
15797 intQI_type_node,
15798 integer_type_node, NULL_TREE);
15799 def_builtin ("__builtin_vec_set_v16qi", ftype, ALTIVEC_BUILTIN_VEC_SET_V16QI);
15800
15801 ftype = build_function_type_list (V4SF_type_node, V4SF_type_node,
15802 float_type_node,
15803 integer_type_node, NULL_TREE);
15804 def_builtin ("__builtin_vec_set_v4sf", ftype, ALTIVEC_BUILTIN_VEC_SET_V4SF);
15805
15806 ftype = build_function_type_list (V2DF_type_node, V2DF_type_node,
15807 double_type_node,
15808 integer_type_node, NULL_TREE);
15809 def_builtin ("__builtin_vec_set_v2df", ftype, VSX_BUILTIN_VEC_SET_V2DF);
15810
15811 ftype = build_function_type_list (V2DI_type_node, V2DI_type_node,
15812 intDI_type_node,
15813 integer_type_node, NULL_TREE);
15814 def_builtin ("__builtin_vec_set_v2di", ftype, VSX_BUILTIN_VEC_SET_V2DI);
15815
15816 /* Access to the vec_extract patterns. */
15817 ftype = build_function_type_list (intSI_type_node, V4SI_type_node,
15818 integer_type_node, NULL_TREE);
15819 def_builtin ("__builtin_vec_ext_v4si", ftype, ALTIVEC_BUILTIN_VEC_EXT_V4SI);
15820
15821 ftype = build_function_type_list (intHI_type_node, V8HI_type_node,
15822 integer_type_node, NULL_TREE);
15823 def_builtin ("__builtin_vec_ext_v8hi", ftype, ALTIVEC_BUILTIN_VEC_EXT_V8HI);
15824
15825 ftype = build_function_type_list (intQI_type_node, V16QI_type_node,
15826 integer_type_node, NULL_TREE);
15827 def_builtin ("__builtin_vec_ext_v16qi", ftype, ALTIVEC_BUILTIN_VEC_EXT_V16QI);
15828
15829 ftype = build_function_type_list (float_type_node, V4SF_type_node,
15830 integer_type_node, NULL_TREE);
15831 def_builtin ("__builtin_vec_ext_v4sf", ftype, ALTIVEC_BUILTIN_VEC_EXT_V4SF);
15832
15833 ftype = build_function_type_list (double_type_node, V2DF_type_node,
15834 integer_type_node, NULL_TREE);
15835 def_builtin ("__builtin_vec_ext_v2df", ftype, VSX_BUILTIN_VEC_EXT_V2DF);
15836
15837 ftype = build_function_type_list (intDI_type_node, V2DI_type_node,
15838 integer_type_node, NULL_TREE);
15839 def_builtin ("__builtin_vec_ext_v2di", ftype, VSX_BUILTIN_VEC_EXT_V2DI);
15840
15841
15842 if (V1TI_type_node)
15843 {
15844 tree v1ti_ftype_long_pcvoid
15845 = build_function_type_list (V1TI_type_node,
15846 long_integer_type_node, pcvoid_type_node,
15847 NULL_TREE);
15848 tree void_ftype_v1ti_long_pvoid
15849 = build_function_type_list (void_type_node,
15850 V1TI_type_node, long_integer_type_node,
15851 pvoid_type_node, NULL_TREE);
15852 def_builtin ("__builtin_vsx_lxvd2x_v1ti", v1ti_ftype_long_pcvoid,
15853 VSX_BUILTIN_LXVD2X_V1TI);
15854 def_builtin ("__builtin_vsx_stxvd2x_v1ti", void_ftype_v1ti_long_pvoid,
15855 VSX_BUILTIN_STXVD2X_V1TI);
15856 ftype = build_function_type_list (V1TI_type_node, intTI_type_node,
15857 NULL_TREE, NULL_TREE);
15858 def_builtin ("__builtin_vec_init_v1ti", ftype, VSX_BUILTIN_VEC_INIT_V1TI);
15859 ftype = build_function_type_list (V1TI_type_node, V1TI_type_node,
15860 intTI_type_node,
15861 integer_type_node, NULL_TREE);
15862 def_builtin ("__builtin_vec_set_v1ti", ftype, VSX_BUILTIN_VEC_SET_V1TI);
15863 ftype = build_function_type_list (intTI_type_node, V1TI_type_node,
15864 integer_type_node, NULL_TREE);
15865 def_builtin ("__builtin_vec_ext_v1ti", ftype, VSX_BUILTIN_VEC_EXT_V1TI);
15866 }
15867
15868 }
15869
15870 static void
15871 htm_init_builtins (void)
15872 {
15873 HOST_WIDE_INT builtin_mask = rs6000_builtin_mask;
15874 const struct builtin_description *d;
15875 size_t i;
15876
15877 d = bdesc_htm;
15878 for (i = 0; i < ARRAY_SIZE (bdesc_htm); i++, d++)
15879 {
15880 tree op[MAX_HTM_OPERANDS], type;
15881 HOST_WIDE_INT mask = d->mask;
15882 unsigned attr = rs6000_builtin_info[d->code].attr;
15883 bool void_func = (attr & RS6000_BTC_VOID);
15884 int attr_args = (attr & RS6000_BTC_TYPE_MASK);
15885 int nopnds = 0;
15886 tree gpr_type_node;
15887 tree rettype;
15888 tree argtype;
15889
15890 if (TARGET_32BIT && TARGET_POWERPC64)
15891 gpr_type_node = long_long_unsigned_type_node;
15892 else
15893 gpr_type_node = long_unsigned_type_node;
15894
15895 if (attr & RS6000_BTC_SPR)
15896 {
15897 rettype = gpr_type_node;
15898 argtype = gpr_type_node;
15899 }
15900 else if (d->code == HTM_BUILTIN_TABORTDC
15901 || d->code == HTM_BUILTIN_TABORTDCI)
15902 {
15903 rettype = unsigned_type_node;
15904 argtype = gpr_type_node;
15905 }
15906 else
15907 {
15908 rettype = unsigned_type_node;
15909 argtype = unsigned_type_node;
15910 }
15911
15912 if ((mask & builtin_mask) != mask)
15913 {
15914 if (TARGET_DEBUG_BUILTIN)
15915 fprintf (stderr, "htm_builtin, skip binary %s\n", d->name);
15916 continue;
15917 }
15918
15919 if (d->name == 0)
15920 {
15921 if (TARGET_DEBUG_BUILTIN)
15922 fprintf (stderr, "htm_builtin, bdesc_htm[%ld] no name\n",
15923 (long unsigned) i);
15924 continue;
15925 }
15926
15927 op[nopnds++] = (void_func) ? void_type_node : rettype;
15928
15929 if (attr_args == RS6000_BTC_UNARY)
15930 op[nopnds++] = argtype;
15931 else if (attr_args == RS6000_BTC_BINARY)
15932 {
15933 op[nopnds++] = argtype;
15934 op[nopnds++] = argtype;
15935 }
15936 else if (attr_args == RS6000_BTC_TERNARY)
15937 {
15938 op[nopnds++] = argtype;
15939 op[nopnds++] = argtype;
15940 op[nopnds++] = argtype;
15941 }
15942
15943 switch (nopnds)
15944 {
15945 case 1:
15946 type = build_function_type_list (op[0], NULL_TREE);
15947 break;
15948 case 2:
15949 type = build_function_type_list (op[0], op[1], NULL_TREE);
15950 break;
15951 case 3:
15952 type = build_function_type_list (op[0], op[1], op[2], NULL_TREE);
15953 break;
15954 case 4:
15955 type = build_function_type_list (op[0], op[1], op[2], op[3],
15956 NULL_TREE);
15957 break;
15958 default:
15959 gcc_unreachable ();
15960 }
15961
15962 def_builtin (d->name, type, d->code);
15963 }
15964 }
15965
15966 /* Hash function for builtin functions with up to 3 arguments and a return
15967 type. */
15968 hashval_t
15969 builtin_hasher::hash (builtin_hash_struct *bh)
15970 {
15971 unsigned ret = 0;
15972 int i;
15973
15974 for (i = 0; i < 4; i++)
15975 {
15976 ret = (ret * (unsigned)MAX_MACHINE_MODE) + ((unsigned)bh->mode[i]);
15977 ret = (ret * 2) + bh->uns_p[i];
15978 }
15979
15980 return ret;
15981 }
15982
15983 /* Compare builtin hash entries H1 and H2 for equivalence. */
15984 bool
15985 builtin_hasher::equal (builtin_hash_struct *p1, builtin_hash_struct *p2)
15986 {
15987 return ((p1->mode[0] == p2->mode[0])
15988 && (p1->mode[1] == p2->mode[1])
15989 && (p1->mode[2] == p2->mode[2])
15990 && (p1->mode[3] == p2->mode[3])
15991 && (p1->uns_p[0] == p2->uns_p[0])
15992 && (p1->uns_p[1] == p2->uns_p[1])
15993 && (p1->uns_p[2] == p2->uns_p[2])
15994 && (p1->uns_p[3] == p2->uns_p[3]));
15995 }
15996
15997 /* Map types for builtin functions with an explicit return type and up to 3
15998 arguments. Functions with fewer than 3 arguments use VOIDmode as the type
15999 of the argument. */
16000 static tree
16001 builtin_function_type (machine_mode mode_ret, machine_mode mode_arg0,
16002 machine_mode mode_arg1, machine_mode mode_arg2,
16003 enum rs6000_builtins builtin, const char *name)
16004 {
16005 struct builtin_hash_struct h;
16006 struct builtin_hash_struct *h2;
16007 int num_args = 3;
16008 int i;
16009 tree ret_type = NULL_TREE;
16010 tree arg_type[3] = { NULL_TREE, NULL_TREE, NULL_TREE };
16011
16012 /* Create builtin_hash_table. */
16013 if (builtin_hash_table == NULL)
16014 builtin_hash_table = hash_table<builtin_hasher>::create_ggc (1500);
16015
16016 h.type = NULL_TREE;
16017 h.mode[0] = mode_ret;
16018 h.mode[1] = mode_arg0;
16019 h.mode[2] = mode_arg1;
16020 h.mode[3] = mode_arg2;
16021 h.uns_p[0] = 0;
16022 h.uns_p[1] = 0;
16023 h.uns_p[2] = 0;
16024 h.uns_p[3] = 0;
16025
16026 /* If the builtin is a type that produces unsigned results or takes unsigned
16027 arguments, and it is returned as a decl for the vectorizer (such as
16028 widening multiplies, permute), make sure the arguments and return value
16029 are type correct. */
16030 switch (builtin)
16031 {
16032 /* unsigned 1 argument functions. */
16033 case CRYPTO_BUILTIN_VSBOX:
16034 case P8V_BUILTIN_VGBBD:
16035 case MISC_BUILTIN_CDTBCD:
16036 case MISC_BUILTIN_CBCDTD:
16037 h.uns_p[0] = 1;
16038 h.uns_p[1] = 1;
16039 break;
16040
16041 /* unsigned 2 argument functions. */
16042 case ALTIVEC_BUILTIN_VMULEUB_UNS:
16043 case ALTIVEC_BUILTIN_VMULEUH_UNS:
16044 case ALTIVEC_BUILTIN_VMULOUB_UNS:
16045 case ALTIVEC_BUILTIN_VMULOUH_UNS:
16046 case CRYPTO_BUILTIN_VCIPHER:
16047 case CRYPTO_BUILTIN_VCIPHERLAST:
16048 case CRYPTO_BUILTIN_VNCIPHER:
16049 case CRYPTO_BUILTIN_VNCIPHERLAST:
16050 case CRYPTO_BUILTIN_VPMSUMB:
16051 case CRYPTO_BUILTIN_VPMSUMH:
16052 case CRYPTO_BUILTIN_VPMSUMW:
16053 case CRYPTO_BUILTIN_VPMSUMD:
16054 case CRYPTO_BUILTIN_VPMSUM:
16055 case MISC_BUILTIN_ADDG6S:
16056 case MISC_BUILTIN_DIVWEU:
16057 case MISC_BUILTIN_DIVWEUO:
16058 case MISC_BUILTIN_DIVDEU:
16059 case MISC_BUILTIN_DIVDEUO:
16060 h.uns_p[0] = 1;
16061 h.uns_p[1] = 1;
16062 h.uns_p[2] = 1;
16063 break;
16064
16065 /* unsigned 3 argument functions. */
16066 case ALTIVEC_BUILTIN_VPERM_16QI_UNS:
16067 case ALTIVEC_BUILTIN_VPERM_8HI_UNS:
16068 case ALTIVEC_BUILTIN_VPERM_4SI_UNS:
16069 case ALTIVEC_BUILTIN_VPERM_2DI_UNS:
16070 case ALTIVEC_BUILTIN_VSEL_16QI_UNS:
16071 case ALTIVEC_BUILTIN_VSEL_8HI_UNS:
16072 case ALTIVEC_BUILTIN_VSEL_4SI_UNS:
16073 case ALTIVEC_BUILTIN_VSEL_2DI_UNS:
16074 case VSX_BUILTIN_VPERM_16QI_UNS:
16075 case VSX_BUILTIN_VPERM_8HI_UNS:
16076 case VSX_BUILTIN_VPERM_4SI_UNS:
16077 case VSX_BUILTIN_VPERM_2DI_UNS:
16078 case VSX_BUILTIN_XXSEL_16QI_UNS:
16079 case VSX_BUILTIN_XXSEL_8HI_UNS:
16080 case VSX_BUILTIN_XXSEL_4SI_UNS:
16081 case VSX_BUILTIN_XXSEL_2DI_UNS:
16082 case CRYPTO_BUILTIN_VPERMXOR:
16083 case CRYPTO_BUILTIN_VPERMXOR_V2DI:
16084 case CRYPTO_BUILTIN_VPERMXOR_V4SI:
16085 case CRYPTO_BUILTIN_VPERMXOR_V8HI:
16086 case CRYPTO_BUILTIN_VPERMXOR_V16QI:
16087 case CRYPTO_BUILTIN_VSHASIGMAW:
16088 case CRYPTO_BUILTIN_VSHASIGMAD:
16089 case CRYPTO_BUILTIN_VSHASIGMA:
16090 h.uns_p[0] = 1;
16091 h.uns_p[1] = 1;
16092 h.uns_p[2] = 1;
16093 h.uns_p[3] = 1;
16094 break;
16095
16096 /* signed permute functions with unsigned char mask. */
16097 case ALTIVEC_BUILTIN_VPERM_16QI:
16098 case ALTIVEC_BUILTIN_VPERM_8HI:
16099 case ALTIVEC_BUILTIN_VPERM_4SI:
16100 case ALTIVEC_BUILTIN_VPERM_4SF:
16101 case ALTIVEC_BUILTIN_VPERM_2DI:
16102 case ALTIVEC_BUILTIN_VPERM_2DF:
16103 case VSX_BUILTIN_VPERM_16QI:
16104 case VSX_BUILTIN_VPERM_8HI:
16105 case VSX_BUILTIN_VPERM_4SI:
16106 case VSX_BUILTIN_VPERM_4SF:
16107 case VSX_BUILTIN_VPERM_2DI:
16108 case VSX_BUILTIN_VPERM_2DF:
16109 h.uns_p[3] = 1;
16110 break;
16111
16112 /* unsigned args, signed return. */
16113 case VSX_BUILTIN_XVCVUXDDP_UNS:
16114 case ALTIVEC_BUILTIN_UNSFLOAT_V4SI_V4SF:
16115 h.uns_p[1] = 1;
16116 break;
16117
16118 /* signed args, unsigned return. */
16119 case VSX_BUILTIN_XVCVDPUXDS_UNS:
16120 case ALTIVEC_BUILTIN_FIXUNS_V4SF_V4SI:
16121 case MISC_BUILTIN_UNPACK_TD:
16122 case MISC_BUILTIN_UNPACK_V1TI:
16123 h.uns_p[0] = 1;
16124 break;
16125
16126 /* unsigned arguments for 128-bit pack instructions. */
16127 case MISC_BUILTIN_PACK_TD:
16128 case MISC_BUILTIN_PACK_V1TI:
16129 h.uns_p[1] = 1;
16130 h.uns_p[2] = 1;
16131 break;
16132
16133 default:
16134 break;
16135 }
16136
16137 /* Figure out how many args are present. */
16138 while (num_args > 0 && h.mode[num_args] == VOIDmode)
16139 num_args--;
16140
16141 if (num_args == 0)
16142 fatal_error (input_location,
16143 "internal error: builtin function %s had no type", name);
16144
16145 ret_type = builtin_mode_to_type[h.mode[0]][h.uns_p[0]];
16146 if (!ret_type && h.uns_p[0])
16147 ret_type = builtin_mode_to_type[h.mode[0]][0];
16148
16149 if (!ret_type)
16150 fatal_error (input_location,
16151 "internal error: builtin function %s had an unexpected "
16152 "return type %s", name, GET_MODE_NAME (h.mode[0]));
16153
16154 for (i = 0; i < (int) ARRAY_SIZE (arg_type); i++)
16155 arg_type[i] = NULL_TREE;
16156
16157 for (i = 0; i < num_args; i++)
16158 {
16159 int m = (int) h.mode[i+1];
16160 int uns_p = h.uns_p[i+1];
16161
16162 arg_type[i] = builtin_mode_to_type[m][uns_p];
16163 if (!arg_type[i] && uns_p)
16164 arg_type[i] = builtin_mode_to_type[m][0];
16165
16166 if (!arg_type[i])
16167 fatal_error (input_location,
16168 "internal error: builtin function %s, argument %d "
16169 "had unexpected argument type %s", name, i,
16170 GET_MODE_NAME (m));
16171 }
16172
16173 builtin_hash_struct **found = builtin_hash_table->find_slot (&h, INSERT);
16174 if (*found == NULL)
16175 {
16176 h2 = ggc_alloc<builtin_hash_struct> ();
16177 *h2 = h;
16178 *found = h2;
16179
16180 h2->type = build_function_type_list (ret_type, arg_type[0], arg_type[1],
16181 arg_type[2], NULL_TREE);
16182 }
16183
16184 return (*found)->type;
16185 }
16186
16187 static void
16188 rs6000_common_init_builtins (void)
16189 {
16190 const struct builtin_description *d;
16191 size_t i;
16192
16193 tree opaque_ftype_opaque = NULL_TREE;
16194 tree opaque_ftype_opaque_opaque = NULL_TREE;
16195 tree opaque_ftype_opaque_opaque_opaque = NULL_TREE;
16196 tree v2si_ftype_qi = NULL_TREE;
16197 tree v2si_ftype_v2si_qi = NULL_TREE;
16198 tree v2si_ftype_int_qi = NULL_TREE;
16199 HOST_WIDE_INT builtin_mask = rs6000_builtin_mask;
16200
16201 if (!TARGET_PAIRED_FLOAT)
16202 {
16203 builtin_mode_to_type[V2SImode][0] = opaque_V2SI_type_node;
16204 builtin_mode_to_type[V2SFmode][0] = opaque_V2SF_type_node;
16205 }
16206
16207 /* Paired and SPE builtins are only available if you build a compiler with
16208 the appropriate options, so only create those builtins with the
16209 appropriate compiler option. Create Altivec and VSX builtins on machines
16210 with at least the general purpose extensions (970 and newer) to allow the
16211 use of the target attribute.. */
16212
16213 if (TARGET_EXTRA_BUILTINS)
16214 builtin_mask |= RS6000_BTM_COMMON;
16215
16216 /* Add the ternary operators. */
16217 d = bdesc_3arg;
16218 for (i = 0; i < ARRAY_SIZE (bdesc_3arg); i++, d++)
16219 {
16220 tree type;
16221 HOST_WIDE_INT mask = d->mask;
16222
16223 if ((mask & builtin_mask) != mask)
16224 {
16225 if (TARGET_DEBUG_BUILTIN)
16226 fprintf (stderr, "rs6000_builtin, skip ternary %s\n", d->name);
16227 continue;
16228 }
16229
16230 if (rs6000_overloaded_builtin_p (d->code))
16231 {
16232 if (! (type = opaque_ftype_opaque_opaque_opaque))
16233 type = opaque_ftype_opaque_opaque_opaque
16234 = build_function_type_list (opaque_V4SI_type_node,
16235 opaque_V4SI_type_node,
16236 opaque_V4SI_type_node,
16237 opaque_V4SI_type_node,
16238 NULL_TREE);
16239 }
16240 else
16241 {
16242 enum insn_code icode = d->icode;
16243 if (d->name == 0)
16244 {
16245 if (TARGET_DEBUG_BUILTIN)
16246 fprintf (stderr, "rs6000_builtin, bdesc_3arg[%ld] no name\n",
16247 (long unsigned)i);
16248
16249 continue;
16250 }
16251
16252 if (icode == CODE_FOR_nothing)
16253 {
16254 if (TARGET_DEBUG_BUILTIN)
16255 fprintf (stderr, "rs6000_builtin, skip ternary %s (no code)\n",
16256 d->name);
16257
16258 continue;
16259 }
16260
16261 type = builtin_function_type (insn_data[icode].operand[0].mode,
16262 insn_data[icode].operand[1].mode,
16263 insn_data[icode].operand[2].mode,
16264 insn_data[icode].operand[3].mode,
16265 d->code, d->name);
16266 }
16267
16268 def_builtin (d->name, type, d->code);
16269 }
16270
16271 /* Add the binary operators. */
16272 d = bdesc_2arg;
16273 for (i = 0; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
16274 {
16275 machine_mode mode0, mode1, mode2;
16276 tree type;
16277 HOST_WIDE_INT mask = d->mask;
16278
16279 if ((mask & builtin_mask) != mask)
16280 {
16281 if (TARGET_DEBUG_BUILTIN)
16282 fprintf (stderr, "rs6000_builtin, skip binary %s\n", d->name);
16283 continue;
16284 }
16285
16286 if (rs6000_overloaded_builtin_p (d->code))
16287 {
16288 if (! (type = opaque_ftype_opaque_opaque))
16289 type = opaque_ftype_opaque_opaque
16290 = build_function_type_list (opaque_V4SI_type_node,
16291 opaque_V4SI_type_node,
16292 opaque_V4SI_type_node,
16293 NULL_TREE);
16294 }
16295 else
16296 {
16297 enum insn_code icode = d->icode;
16298 if (d->name == 0)
16299 {
16300 if (TARGET_DEBUG_BUILTIN)
16301 fprintf (stderr, "rs6000_builtin, bdesc_2arg[%ld] no name\n",
16302 (long unsigned)i);
16303
16304 continue;
16305 }
16306
16307 if (icode == CODE_FOR_nothing)
16308 {
16309 if (TARGET_DEBUG_BUILTIN)
16310 fprintf (stderr, "rs6000_builtin, skip binary %s (no code)\n",
16311 d->name);
16312
16313 continue;
16314 }
16315
16316 mode0 = insn_data[icode].operand[0].mode;
16317 mode1 = insn_data[icode].operand[1].mode;
16318 mode2 = insn_data[icode].operand[2].mode;
16319
16320 if (mode0 == V2SImode && mode1 == V2SImode && mode2 == QImode)
16321 {
16322 if (! (type = v2si_ftype_v2si_qi))
16323 type = v2si_ftype_v2si_qi
16324 = build_function_type_list (opaque_V2SI_type_node,
16325 opaque_V2SI_type_node,
16326 char_type_node,
16327 NULL_TREE);
16328 }
16329
16330 else if (mode0 == V2SImode && GET_MODE_CLASS (mode1) == MODE_INT
16331 && mode2 == QImode)
16332 {
16333 if (! (type = v2si_ftype_int_qi))
16334 type = v2si_ftype_int_qi
16335 = build_function_type_list (opaque_V2SI_type_node,
16336 integer_type_node,
16337 char_type_node,
16338 NULL_TREE);
16339 }
16340
16341 else
16342 type = builtin_function_type (mode0, mode1, mode2, VOIDmode,
16343 d->code, d->name);
16344 }
16345
16346 def_builtin (d->name, type, d->code);
16347 }
16348
16349 /* Add the simple unary operators. */
16350 d = bdesc_1arg;
16351 for (i = 0; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
16352 {
16353 machine_mode mode0, mode1;
16354 tree type;
16355 HOST_WIDE_INT mask = d->mask;
16356
16357 if ((mask & builtin_mask) != mask)
16358 {
16359 if (TARGET_DEBUG_BUILTIN)
16360 fprintf (stderr, "rs6000_builtin, skip unary %s\n", d->name);
16361 continue;
16362 }
16363
16364 if (rs6000_overloaded_builtin_p (d->code))
16365 {
16366 if (! (type = opaque_ftype_opaque))
16367 type = opaque_ftype_opaque
16368 = build_function_type_list (opaque_V4SI_type_node,
16369 opaque_V4SI_type_node,
16370 NULL_TREE);
16371 }
16372 else
16373 {
16374 enum insn_code icode = d->icode;
16375 if (d->name == 0)
16376 {
16377 if (TARGET_DEBUG_BUILTIN)
16378 fprintf (stderr, "rs6000_builtin, bdesc_1arg[%ld] no name\n",
16379 (long unsigned)i);
16380
16381 continue;
16382 }
16383
16384 if (icode == CODE_FOR_nothing)
16385 {
16386 if (TARGET_DEBUG_BUILTIN)
16387 fprintf (stderr, "rs6000_builtin, skip unary %s (no code)\n",
16388 d->name);
16389
16390 continue;
16391 }
16392
16393 mode0 = insn_data[icode].operand[0].mode;
16394 mode1 = insn_data[icode].operand[1].mode;
16395
16396 if (mode0 == V2SImode && mode1 == QImode)
16397 {
16398 if (! (type = v2si_ftype_qi))
16399 type = v2si_ftype_qi
16400 = build_function_type_list (opaque_V2SI_type_node,
16401 char_type_node,
16402 NULL_TREE);
16403 }
16404
16405 else
16406 type = builtin_function_type (mode0, mode1, VOIDmode, VOIDmode,
16407 d->code, d->name);
16408 }
16409
16410 def_builtin (d->name, type, d->code);
16411 }
16412 }
16413
16414 /* Set up AIX/Darwin/64-bit Linux quad floating point routines. */
16415 static void
16416 init_float128_ibm (machine_mode mode)
16417 {
16418 if (!TARGET_XL_COMPAT)
16419 {
16420 set_optab_libfunc (add_optab, mode, "__gcc_qadd");
16421 set_optab_libfunc (sub_optab, mode, "__gcc_qsub");
16422 set_optab_libfunc (smul_optab, mode, "__gcc_qmul");
16423 set_optab_libfunc (sdiv_optab, mode, "__gcc_qdiv");
16424
16425 if (!(TARGET_HARD_FLOAT && (TARGET_FPRS || TARGET_E500_DOUBLE)))
16426 {
16427 set_optab_libfunc (neg_optab, mode, "__gcc_qneg");
16428 set_optab_libfunc (eq_optab, mode, "__gcc_qeq");
16429 set_optab_libfunc (ne_optab, mode, "__gcc_qne");
16430 set_optab_libfunc (gt_optab, mode, "__gcc_qgt");
16431 set_optab_libfunc (ge_optab, mode, "__gcc_qge");
16432 set_optab_libfunc (lt_optab, mode, "__gcc_qlt");
16433 set_optab_libfunc (le_optab, mode, "__gcc_qle");
16434
16435 set_conv_libfunc (sext_optab, mode, SFmode, "__gcc_stoq");
16436 set_conv_libfunc (sext_optab, mode, DFmode, "__gcc_dtoq");
16437 set_conv_libfunc (trunc_optab, SFmode, mode, "__gcc_qtos");
16438 set_conv_libfunc (trunc_optab, DFmode, mode, "__gcc_qtod");
16439 set_conv_libfunc (sfix_optab, SImode, mode, "__gcc_qtoi");
16440 set_conv_libfunc (ufix_optab, SImode, mode, "__gcc_qtou");
16441 set_conv_libfunc (sfloat_optab, mode, SImode, "__gcc_itoq");
16442 set_conv_libfunc (ufloat_optab, mode, SImode, "__gcc_utoq");
16443 }
16444
16445 if (!(TARGET_HARD_FLOAT && TARGET_FPRS))
16446 set_optab_libfunc (unord_optab, mode, "__gcc_qunord");
16447 }
16448 else
16449 {
16450 set_optab_libfunc (add_optab, mode, "_xlqadd");
16451 set_optab_libfunc (sub_optab, mode, "_xlqsub");
16452 set_optab_libfunc (smul_optab, mode, "_xlqmul");
16453 set_optab_libfunc (sdiv_optab, mode, "_xlqdiv");
16454 }
16455
16456 /* Add various conversions for IFmode to use the traditional TFmode
16457 names. */
16458 if (mode == IFmode)
16459 {
16460 set_conv_libfunc (sext_optab, mode, SDmode, "__dpd_extendsdtf2");
16461 set_conv_libfunc (sext_optab, mode, DDmode, "__dpd_extendddtf2");
16462 set_conv_libfunc (trunc_optab, mode, TDmode, "__dpd_trunctftd2");
16463 set_conv_libfunc (trunc_optab, SDmode, mode, "__dpd_trunctfsd2");
16464 set_conv_libfunc (trunc_optab, DDmode, mode, "__dpd_trunctfdd2");
16465 set_conv_libfunc (sext_optab, TDmode, mode, "__dpd_extendtdtf2");
16466
16467 if (TARGET_POWERPC64)
16468 {
16469 set_conv_libfunc (sfix_optab, TImode, mode, "__fixtfti");
16470 set_conv_libfunc (ufix_optab, TImode, mode, "__fixunstfti");
16471 set_conv_libfunc (sfloat_optab, mode, TImode, "__floattitf");
16472 set_conv_libfunc (ufloat_optab, mode, TImode, "__floatuntitf");
16473 }
16474 }
16475 }
16476
16477 /* Set up IEEE 128-bit floating point routines. Use different names if the
16478 arguments can be passed in a vector register. The historical PowerPC
16479 implementation of IEEE 128-bit floating point used _q_<op> for the names, so
16480 continue to use that if we aren't using vector registers to pass IEEE
16481 128-bit floating point. */
16482
16483 static void
16484 init_float128_ieee (machine_mode mode)
16485 {
16486 if (FLOAT128_VECTOR_P (mode))
16487 {
16488 set_optab_libfunc (add_optab, mode, "__addkf3");
16489 set_optab_libfunc (sub_optab, mode, "__subkf3");
16490 set_optab_libfunc (neg_optab, mode, "__negkf2");
16491 set_optab_libfunc (smul_optab, mode, "__mulkf3");
16492 set_optab_libfunc (sdiv_optab, mode, "__divkf3");
16493 set_optab_libfunc (sqrt_optab, mode, "__sqrtkf2");
16494 set_optab_libfunc (abs_optab, mode, "__abstkf2");
16495
16496 set_optab_libfunc (eq_optab, mode, "__eqkf2");
16497 set_optab_libfunc (ne_optab, mode, "__nekf2");
16498 set_optab_libfunc (gt_optab, mode, "__gtkf2");
16499 set_optab_libfunc (ge_optab, mode, "__gekf2");
16500 set_optab_libfunc (lt_optab, mode, "__ltkf2");
16501 set_optab_libfunc (le_optab, mode, "__lekf2");
16502 set_optab_libfunc (unord_optab, mode, "__unordkf2");
16503 set_optab_libfunc (cmp_optab, mode, "__cmpokf2"); /* fcmpo */
16504 set_optab_libfunc (ucmp_optab, mode, "__cmpukf2"); /* fcmpu */
16505
16506 set_conv_libfunc (sext_optab, mode, SFmode, "__extendsfkf2");
16507 set_conv_libfunc (sext_optab, mode, DFmode, "__extenddfkf2");
16508 set_conv_libfunc (trunc_optab, SFmode, mode, "__trunckfsf2");
16509 set_conv_libfunc (trunc_optab, DFmode, mode, "__trunckfdf2");
16510
16511 set_conv_libfunc (sext_optab, mode, IFmode, "__extendtfkf2");
16512 if (mode != TFmode && FLOAT128_IBM_P (TFmode))
16513 set_conv_libfunc (sext_optab, mode, TFmode, "__extendtfkf2");
16514
16515 set_conv_libfunc (trunc_optab, IFmode, mode, "__trunckftf2");
16516 if (mode != TFmode && FLOAT128_IBM_P (TFmode))
16517 set_conv_libfunc (trunc_optab, TFmode, mode, "__trunckftf2");
16518
16519 set_conv_libfunc (sext_optab, mode, SDmode, "__dpd_extendsdkf2");
16520 set_conv_libfunc (sext_optab, mode, DDmode, "__dpd_extendddkf2");
16521 set_conv_libfunc (trunc_optab, mode, TDmode, "__dpd_trunckftd2");
16522 set_conv_libfunc (trunc_optab, SDmode, mode, "__dpd_trunckfsd2");
16523 set_conv_libfunc (trunc_optab, DDmode, mode, "__dpd_trunckfdd2");
16524 set_conv_libfunc (sext_optab, TDmode, mode, "__dpd_extendtdkf2");
16525
16526 set_conv_libfunc (sfix_optab, SImode, mode, "__fixkfsi");
16527 set_conv_libfunc (ufix_optab, SImode, mode, "__fixunskfsi");
16528 set_conv_libfunc (sfix_optab, DImode, mode, "__fixkfdi");
16529 set_conv_libfunc (ufix_optab, DImode, mode, "__fixunskfdi");
16530
16531 set_conv_libfunc (sfloat_optab, mode, SImode, "__floatsikf");
16532 set_conv_libfunc (ufloat_optab, mode, SImode, "__floatunsikf");
16533 set_conv_libfunc (sfloat_optab, mode, DImode, "__floatdikf");
16534 set_conv_libfunc (ufloat_optab, mode, DImode, "__floatundikf");
16535
16536 if (TARGET_POWERPC64)
16537 {
16538 set_conv_libfunc (sfix_optab, TImode, mode, "__fixkfti");
16539 set_conv_libfunc (ufix_optab, TImode, mode, "__fixunskfti");
16540 set_conv_libfunc (sfloat_optab, mode, TImode, "__floattikf");
16541 set_conv_libfunc (ufloat_optab, mode, TImode, "__floatuntikf");
16542 }
16543 }
16544
16545 else
16546 {
16547 set_optab_libfunc (add_optab, mode, "_q_add");
16548 set_optab_libfunc (sub_optab, mode, "_q_sub");
16549 set_optab_libfunc (neg_optab, mode, "_q_neg");
16550 set_optab_libfunc (smul_optab, mode, "_q_mul");
16551 set_optab_libfunc (sdiv_optab, mode, "_q_div");
16552 if (TARGET_PPC_GPOPT)
16553 set_optab_libfunc (sqrt_optab, mode, "_q_sqrt");
16554
16555 set_optab_libfunc (eq_optab, mode, "_q_feq");
16556 set_optab_libfunc (ne_optab, mode, "_q_fne");
16557 set_optab_libfunc (gt_optab, mode, "_q_fgt");
16558 set_optab_libfunc (ge_optab, mode, "_q_fge");
16559 set_optab_libfunc (lt_optab, mode, "_q_flt");
16560 set_optab_libfunc (le_optab, mode, "_q_fle");
16561
16562 set_conv_libfunc (sext_optab, mode, SFmode, "_q_stoq");
16563 set_conv_libfunc (sext_optab, mode, DFmode, "_q_dtoq");
16564 set_conv_libfunc (trunc_optab, SFmode, mode, "_q_qtos");
16565 set_conv_libfunc (trunc_optab, DFmode, mode, "_q_qtod");
16566 set_conv_libfunc (sfix_optab, SImode, mode, "_q_qtoi");
16567 set_conv_libfunc (ufix_optab, SImode, mode, "_q_qtou");
16568 set_conv_libfunc (sfloat_optab, mode, SImode, "_q_itoq");
16569 set_conv_libfunc (ufloat_optab, mode, SImode, "_q_utoq");
16570 }
16571 }
16572
16573 static void
16574 rs6000_init_libfuncs (void)
16575 {
16576 /* __float128 support. */
16577 if (TARGET_FLOAT128)
16578 {
16579 init_float128_ibm (IFmode);
16580 init_float128_ieee (KFmode);
16581 }
16582
16583 /* AIX/Darwin/64-bit Linux quad floating point routines. */
16584 if (TARGET_LONG_DOUBLE_128)
16585 {
16586 if (!TARGET_IEEEQUAD)
16587 init_float128_ibm (TFmode);
16588
16589 /* IEEE 128-bit including 32-bit SVR4 quad floating point routines. */
16590 else
16591 init_float128_ieee (TFmode);
16592 }
16593 }
16594
16595 \f
16596 /* Expand a block clear operation, and return 1 if successful. Return 0
16597 if we should let the compiler generate normal code.
16598
16599 operands[0] is the destination
16600 operands[1] is the length
16601 operands[3] is the alignment */
16602
16603 int
16604 expand_block_clear (rtx operands[])
16605 {
16606 rtx orig_dest = operands[0];
16607 rtx bytes_rtx = operands[1];
16608 rtx align_rtx = operands[3];
16609 bool constp = (GET_CODE (bytes_rtx) == CONST_INT);
16610 HOST_WIDE_INT align;
16611 HOST_WIDE_INT bytes;
16612 int offset;
16613 int clear_bytes;
16614 int clear_step;
16615
16616 /* If this is not a fixed size move, just call memcpy */
16617 if (! constp)
16618 return 0;
16619
16620 /* This must be a fixed size alignment */
16621 gcc_assert (GET_CODE (align_rtx) == CONST_INT);
16622 align = INTVAL (align_rtx) * BITS_PER_UNIT;
16623
16624 /* Anything to clear? */
16625 bytes = INTVAL (bytes_rtx);
16626 if (bytes <= 0)
16627 return 1;
16628
16629 /* Use the builtin memset after a point, to avoid huge code bloat.
16630 When optimize_size, avoid any significant code bloat; calling
16631 memset is about 4 instructions, so allow for one instruction to
16632 load zero and three to do clearing. */
16633 if (TARGET_ALTIVEC && align >= 128)
16634 clear_step = 16;
16635 else if (TARGET_POWERPC64 && (align >= 64 || !STRICT_ALIGNMENT))
16636 clear_step = 8;
16637 else if (TARGET_SPE && align >= 64)
16638 clear_step = 8;
16639 else
16640 clear_step = 4;
16641
16642 if (optimize_size && bytes > 3 * clear_step)
16643 return 0;
16644 if (! optimize_size && bytes > 8 * clear_step)
16645 return 0;
16646
16647 for (offset = 0; bytes > 0; offset += clear_bytes, bytes -= clear_bytes)
16648 {
16649 machine_mode mode = BLKmode;
16650 rtx dest;
16651
16652 if (bytes >= 16 && TARGET_ALTIVEC && align >= 128)
16653 {
16654 clear_bytes = 16;
16655 mode = V4SImode;
16656 }
16657 else if (bytes >= 8 && TARGET_SPE && align >= 64)
16658 {
16659 clear_bytes = 8;
16660 mode = V2SImode;
16661 }
16662 else if (bytes >= 8 && TARGET_POWERPC64
16663 && (align >= 64 || !STRICT_ALIGNMENT))
16664 {
16665 clear_bytes = 8;
16666 mode = DImode;
16667 if (offset == 0 && align < 64)
16668 {
16669 rtx addr;
16670
16671 /* If the address form is reg+offset with offset not a
16672 multiple of four, reload into reg indirect form here
16673 rather than waiting for reload. This way we get one
16674 reload, not one per store. */
16675 addr = XEXP (orig_dest, 0);
16676 if ((GET_CODE (addr) == PLUS || GET_CODE (addr) == LO_SUM)
16677 && GET_CODE (XEXP (addr, 1)) == CONST_INT
16678 && (INTVAL (XEXP (addr, 1)) & 3) != 0)
16679 {
16680 addr = copy_addr_to_reg (addr);
16681 orig_dest = replace_equiv_address (orig_dest, addr);
16682 }
16683 }
16684 }
16685 else if (bytes >= 4 && (align >= 32 || !STRICT_ALIGNMENT))
16686 { /* move 4 bytes */
16687 clear_bytes = 4;
16688 mode = SImode;
16689 }
16690 else if (bytes >= 2 && (align >= 16 || !STRICT_ALIGNMENT))
16691 { /* move 2 bytes */
16692 clear_bytes = 2;
16693 mode = HImode;
16694 }
16695 else /* move 1 byte at a time */
16696 {
16697 clear_bytes = 1;
16698 mode = QImode;
16699 }
16700
16701 dest = adjust_address (orig_dest, mode, offset);
16702
16703 emit_move_insn (dest, CONST0_RTX (mode));
16704 }
16705
16706 return 1;
16707 }
16708
16709 \f
16710 /* Expand a block move operation, and return 1 if successful. Return 0
16711 if we should let the compiler generate normal code.
16712
16713 operands[0] is the destination
16714 operands[1] is the source
16715 operands[2] is the length
16716 operands[3] is the alignment */
16717
16718 #define MAX_MOVE_REG 4
16719
16720 int
16721 expand_block_move (rtx operands[])
16722 {
16723 rtx orig_dest = operands[0];
16724 rtx orig_src = operands[1];
16725 rtx bytes_rtx = operands[2];
16726 rtx align_rtx = operands[3];
16727 int constp = (GET_CODE (bytes_rtx) == CONST_INT);
16728 int align;
16729 int bytes;
16730 int offset;
16731 int move_bytes;
16732 rtx stores[MAX_MOVE_REG];
16733 int num_reg = 0;
16734
16735 /* If this is not a fixed size move, just call memcpy */
16736 if (! constp)
16737 return 0;
16738
16739 /* This must be a fixed size alignment */
16740 gcc_assert (GET_CODE (align_rtx) == CONST_INT);
16741 align = INTVAL (align_rtx) * BITS_PER_UNIT;
16742
16743 /* Anything to move? */
16744 bytes = INTVAL (bytes_rtx);
16745 if (bytes <= 0)
16746 return 1;
16747
16748 if (bytes > rs6000_block_move_inline_limit)
16749 return 0;
16750
16751 for (offset = 0; bytes > 0; offset += move_bytes, bytes -= move_bytes)
16752 {
16753 union {
16754 rtx (*movmemsi) (rtx, rtx, rtx, rtx);
16755 rtx (*mov) (rtx, rtx);
16756 } gen_func;
16757 machine_mode mode = BLKmode;
16758 rtx src, dest;
16759
16760 /* Altivec first, since it will be faster than a string move
16761 when it applies, and usually not significantly larger. */
16762 if (TARGET_ALTIVEC && bytes >= 16 && align >= 128)
16763 {
16764 move_bytes = 16;
16765 mode = V4SImode;
16766 gen_func.mov = gen_movv4si;
16767 }
16768 else if (TARGET_SPE && bytes >= 8 && align >= 64)
16769 {
16770 move_bytes = 8;
16771 mode = V2SImode;
16772 gen_func.mov = gen_movv2si;
16773 }
16774 else if (TARGET_STRING
16775 && bytes > 24 /* move up to 32 bytes at a time */
16776 && ! fixed_regs[5]
16777 && ! fixed_regs[6]
16778 && ! fixed_regs[7]
16779 && ! fixed_regs[8]
16780 && ! fixed_regs[9]
16781 && ! fixed_regs[10]
16782 && ! fixed_regs[11]
16783 && ! fixed_regs[12])
16784 {
16785 move_bytes = (bytes > 32) ? 32 : bytes;
16786 gen_func.movmemsi = gen_movmemsi_8reg;
16787 }
16788 else if (TARGET_STRING
16789 && bytes > 16 /* move up to 24 bytes at a time */
16790 && ! fixed_regs[5]
16791 && ! fixed_regs[6]
16792 && ! fixed_regs[7]
16793 && ! fixed_regs[8]
16794 && ! fixed_regs[9]
16795 && ! fixed_regs[10])
16796 {
16797 move_bytes = (bytes > 24) ? 24 : bytes;
16798 gen_func.movmemsi = gen_movmemsi_6reg;
16799 }
16800 else if (TARGET_STRING
16801 && bytes > 8 /* move up to 16 bytes at a time */
16802 && ! fixed_regs[5]
16803 && ! fixed_regs[6]
16804 && ! fixed_regs[7]
16805 && ! fixed_regs[8])
16806 {
16807 move_bytes = (bytes > 16) ? 16 : bytes;
16808 gen_func.movmemsi = gen_movmemsi_4reg;
16809 }
16810 else if (bytes >= 8 && TARGET_POWERPC64
16811 && (align >= 64 || !STRICT_ALIGNMENT))
16812 {
16813 move_bytes = 8;
16814 mode = DImode;
16815 gen_func.mov = gen_movdi;
16816 if (offset == 0 && align < 64)
16817 {
16818 rtx addr;
16819
16820 /* If the address form is reg+offset with offset not a
16821 multiple of four, reload into reg indirect form here
16822 rather than waiting for reload. This way we get one
16823 reload, not one per load and/or store. */
16824 addr = XEXP (orig_dest, 0);
16825 if ((GET_CODE (addr) == PLUS || GET_CODE (addr) == LO_SUM)
16826 && GET_CODE (XEXP (addr, 1)) == CONST_INT
16827 && (INTVAL (XEXP (addr, 1)) & 3) != 0)
16828 {
16829 addr = copy_addr_to_reg (addr);
16830 orig_dest = replace_equiv_address (orig_dest, addr);
16831 }
16832 addr = XEXP (orig_src, 0);
16833 if ((GET_CODE (addr) == PLUS || GET_CODE (addr) == LO_SUM)
16834 && GET_CODE (XEXP (addr, 1)) == CONST_INT
16835 && (INTVAL (XEXP (addr, 1)) & 3) != 0)
16836 {
16837 addr = copy_addr_to_reg (addr);
16838 orig_src = replace_equiv_address (orig_src, addr);
16839 }
16840 }
16841 }
16842 else if (TARGET_STRING && bytes > 4 && !TARGET_POWERPC64)
16843 { /* move up to 8 bytes at a time */
16844 move_bytes = (bytes > 8) ? 8 : bytes;
16845 gen_func.movmemsi = gen_movmemsi_2reg;
16846 }
16847 else if (bytes >= 4 && (align >= 32 || !STRICT_ALIGNMENT))
16848 { /* move 4 bytes */
16849 move_bytes = 4;
16850 mode = SImode;
16851 gen_func.mov = gen_movsi;
16852 }
16853 else if (bytes >= 2 && (align >= 16 || !STRICT_ALIGNMENT))
16854 { /* move 2 bytes */
16855 move_bytes = 2;
16856 mode = HImode;
16857 gen_func.mov = gen_movhi;
16858 }
16859 else if (TARGET_STRING && bytes > 1)
16860 { /* move up to 4 bytes at a time */
16861 move_bytes = (bytes > 4) ? 4 : bytes;
16862 gen_func.movmemsi = gen_movmemsi_1reg;
16863 }
16864 else /* move 1 byte at a time */
16865 {
16866 move_bytes = 1;
16867 mode = QImode;
16868 gen_func.mov = gen_movqi;
16869 }
16870
16871 src = adjust_address (orig_src, mode, offset);
16872 dest = adjust_address (orig_dest, mode, offset);
16873
16874 if (mode != BLKmode)
16875 {
16876 rtx tmp_reg = gen_reg_rtx (mode);
16877
16878 emit_insn ((*gen_func.mov) (tmp_reg, src));
16879 stores[num_reg++] = (*gen_func.mov) (dest, tmp_reg);
16880 }
16881
16882 if (mode == BLKmode || num_reg >= MAX_MOVE_REG || bytes == move_bytes)
16883 {
16884 int i;
16885 for (i = 0; i < num_reg; i++)
16886 emit_insn (stores[i]);
16887 num_reg = 0;
16888 }
16889
16890 if (mode == BLKmode)
16891 {
16892 /* Move the address into scratch registers. The movmemsi
16893 patterns require zero offset. */
16894 if (!REG_P (XEXP (src, 0)))
16895 {
16896 rtx src_reg = copy_addr_to_reg (XEXP (src, 0));
16897 src = replace_equiv_address (src, src_reg);
16898 }
16899 set_mem_size (src, move_bytes);
16900
16901 if (!REG_P (XEXP (dest, 0)))
16902 {
16903 rtx dest_reg = copy_addr_to_reg (XEXP (dest, 0));
16904 dest = replace_equiv_address (dest, dest_reg);
16905 }
16906 set_mem_size (dest, move_bytes);
16907
16908 emit_insn ((*gen_func.movmemsi) (dest, src,
16909 GEN_INT (move_bytes & 31),
16910 align_rtx));
16911 }
16912 }
16913
16914 return 1;
16915 }
16916
16917 \f
16918 /* Return a string to perform a load_multiple operation.
16919 operands[0] is the vector.
16920 operands[1] is the source address.
16921 operands[2] is the first destination register. */
16922
16923 const char *
16924 rs6000_output_load_multiple (rtx operands[3])
16925 {
16926 /* We have to handle the case where the pseudo used to contain the address
16927 is assigned to one of the output registers. */
16928 int i, j;
16929 int words = XVECLEN (operands[0], 0);
16930 rtx xop[10];
16931
16932 if (XVECLEN (operands[0], 0) == 1)
16933 return "lwz %2,0(%1)";
16934
16935 for (i = 0; i < words; i++)
16936 if (refers_to_regno_p (REGNO (operands[2]) + i, operands[1]))
16937 {
16938 if (i == words-1)
16939 {
16940 xop[0] = GEN_INT (4 * (words-1));
16941 xop[1] = operands[1];
16942 xop[2] = operands[2];
16943 output_asm_insn ("lswi %2,%1,%0\n\tlwz %1,%0(%1)", xop);
16944 return "";
16945 }
16946 else if (i == 0)
16947 {
16948 xop[0] = GEN_INT (4 * (words-1));
16949 xop[1] = operands[1];
16950 xop[2] = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
16951 output_asm_insn ("addi %1,%1,4\n\tlswi %2,%1,%0\n\tlwz %1,-4(%1)", xop);
16952 return "";
16953 }
16954 else
16955 {
16956 for (j = 0; j < words; j++)
16957 if (j != i)
16958 {
16959 xop[0] = GEN_INT (j * 4);
16960 xop[1] = operands[1];
16961 xop[2] = gen_rtx_REG (SImode, REGNO (operands[2]) + j);
16962 output_asm_insn ("lwz %2,%0(%1)", xop);
16963 }
16964 xop[0] = GEN_INT (i * 4);
16965 xop[1] = operands[1];
16966 output_asm_insn ("lwz %1,%0(%1)", xop);
16967 return "";
16968 }
16969 }
16970
16971 return "lswi %2,%1,%N0";
16972 }
16973
16974 \f
16975 /* A validation routine: say whether CODE, a condition code, and MODE
16976 match. The other alternatives either don't make sense or should
16977 never be generated. */
16978
16979 void
16980 validate_condition_mode (enum rtx_code code, machine_mode mode)
16981 {
16982 gcc_assert ((GET_RTX_CLASS (code) == RTX_COMPARE
16983 || GET_RTX_CLASS (code) == RTX_COMM_COMPARE)
16984 && GET_MODE_CLASS (mode) == MODE_CC);
16985
16986 /* These don't make sense. */
16987 gcc_assert ((code != GT && code != LT && code != GE && code != LE)
16988 || mode != CCUNSmode);
16989
16990 gcc_assert ((code != GTU && code != LTU && code != GEU && code != LEU)
16991 || mode == CCUNSmode);
16992
16993 gcc_assert (mode == CCFPmode
16994 || (code != ORDERED && code != UNORDERED
16995 && code != UNEQ && code != LTGT
16996 && code != UNGT && code != UNLT
16997 && code != UNGE && code != UNLE));
16998
16999 /* These should never be generated except for
17000 flag_finite_math_only. */
17001 gcc_assert (mode != CCFPmode
17002 || flag_finite_math_only
17003 || (code != LE && code != GE
17004 && code != UNEQ && code != LTGT
17005 && code != UNGT && code != UNLT));
17006
17007 /* These are invalid; the information is not there. */
17008 gcc_assert (mode != CCEQmode || code == EQ || code == NE);
17009 }
17010
17011 \f
17012 /* Return whether MASK (a CONST_INT) is a valid mask for any rlwinm,
17013 rldicl, rldicr, or rldic instruction in mode MODE. If so, if E is
17014 not zero, store there the bit offset (counted from the right) where
17015 the single stretch of 1 bits begins; and similarly for B, the bit
17016 offset where it ends. */
17017
17018 bool
17019 rs6000_is_valid_mask (rtx mask, int *b, int *e, machine_mode mode)
17020 {
17021 unsigned HOST_WIDE_INT val = INTVAL (mask);
17022 unsigned HOST_WIDE_INT bit;
17023 int nb, ne;
17024 int n = GET_MODE_PRECISION (mode);
17025
17026 if (mode != DImode && mode != SImode)
17027 return false;
17028
17029 if (INTVAL (mask) >= 0)
17030 {
17031 bit = val & -val;
17032 ne = exact_log2 (bit);
17033 nb = exact_log2 (val + bit);
17034 }
17035 else if (val + 1 == 0)
17036 {
17037 nb = n;
17038 ne = 0;
17039 }
17040 else if (val & 1)
17041 {
17042 val = ~val;
17043 bit = val & -val;
17044 nb = exact_log2 (bit);
17045 ne = exact_log2 (val + bit);
17046 }
17047 else
17048 {
17049 bit = val & -val;
17050 ne = exact_log2 (bit);
17051 if (val + bit == 0)
17052 nb = n;
17053 else
17054 nb = 0;
17055 }
17056
17057 nb--;
17058
17059 if (nb < 0 || ne < 0 || nb >= n || ne >= n)
17060 return false;
17061
17062 if (b)
17063 *b = nb;
17064 if (e)
17065 *e = ne;
17066
17067 return true;
17068 }
17069
17070 /* Return whether MASK (a CONST_INT) is a valid mask for any rlwinm, rldicl,
17071 or rldicr instruction, to implement an AND with it in mode MODE. */
17072
17073 bool
17074 rs6000_is_valid_and_mask (rtx mask, machine_mode mode)
17075 {
17076 int nb, ne;
17077
17078 if (!rs6000_is_valid_mask (mask, &nb, &ne, mode))
17079 return false;
17080
17081 /* For DImode, we need a rldicl, rldicr, or a rlwinm with mask that
17082 does not wrap. */
17083 if (mode == DImode)
17084 return (ne == 0 || nb == 63 || (nb < 32 && ne <= nb));
17085
17086 /* For SImode, rlwinm can do everything. */
17087 if (mode == SImode)
17088 return (nb < 32 && ne < 32);
17089
17090 return false;
17091 }
17092
17093 /* Return the instruction template for an AND with mask in mode MODE, with
17094 operands OPERANDS. If DOT is true, make it a record-form instruction. */
17095
17096 const char *
17097 rs6000_insn_for_and_mask (machine_mode mode, rtx *operands, bool dot)
17098 {
17099 int nb, ne;
17100
17101 if (!rs6000_is_valid_mask (operands[2], &nb, &ne, mode))
17102 gcc_unreachable ();
17103
17104 if (mode == DImode && ne == 0)
17105 {
17106 operands[3] = GEN_INT (63 - nb);
17107 if (dot)
17108 return "rldicl. %0,%1,0,%3";
17109 return "rldicl %0,%1,0,%3";
17110 }
17111
17112 if (mode == DImode && nb == 63)
17113 {
17114 operands[3] = GEN_INT (63 - ne);
17115 if (dot)
17116 return "rldicr. %0,%1,0,%3";
17117 return "rldicr %0,%1,0,%3";
17118 }
17119
17120 if (nb < 32 && ne < 32)
17121 {
17122 operands[3] = GEN_INT (31 - nb);
17123 operands[4] = GEN_INT (31 - ne);
17124 if (dot)
17125 return "rlwinm. %0,%1,0,%3,%4";
17126 return "rlwinm %0,%1,0,%3,%4";
17127 }
17128
17129 gcc_unreachable ();
17130 }
17131
17132 /* Return whether MASK (a CONST_INT) is a valid mask for any rlw[i]nm,
17133 rld[i]cl, rld[i]cr, or rld[i]c instruction, to implement an AND with
17134 shift SHIFT (a ROTATE, ASHIFT, or LSHIFTRT) in mode MODE. */
17135
17136 bool
17137 rs6000_is_valid_shift_mask (rtx mask, rtx shift, machine_mode mode)
17138 {
17139 int nb, ne;
17140
17141 if (!rs6000_is_valid_mask (mask, &nb, &ne, mode))
17142 return false;
17143
17144 int n = GET_MODE_PRECISION (mode);
17145 int sh = -1;
17146
17147 if (CONST_INT_P (XEXP (shift, 1)))
17148 {
17149 sh = INTVAL (XEXP (shift, 1));
17150 if (sh < 0 || sh >= n)
17151 return false;
17152 }
17153
17154 rtx_code code = GET_CODE (shift);
17155
17156 /* Convert any shift by 0 to a rotate, to simplify below code. */
17157 if (sh == 0)
17158 code = ROTATE;
17159
17160 /* Convert rotate to simple shift if we can, to make analysis simpler. */
17161 if (code == ROTATE && sh >= 0 && nb >= ne && ne >= sh)
17162 code = ASHIFT;
17163 if (code == ROTATE && sh >= 0 && nb >= ne && nb < sh)
17164 {
17165 code = LSHIFTRT;
17166 sh = n - sh;
17167 }
17168
17169 /* DImode rotates need rld*. */
17170 if (mode == DImode && code == ROTATE)
17171 return (nb == 63 || ne == 0 || ne == sh);
17172
17173 /* SImode rotates need rlw*. */
17174 if (mode == SImode && code == ROTATE)
17175 return (nb < 32 && ne < 32 && sh < 32);
17176
17177 /* Wrap-around masks are only okay for rotates. */
17178 if (ne > nb)
17179 return false;
17180
17181 /* Variable shifts are only okay for rotates. */
17182 if (sh < 0)
17183 return false;
17184
17185 /* Don't allow ASHIFT if the mask is wrong for that. */
17186 if (code == ASHIFT && ne < sh)
17187 return false;
17188
17189 /* If we can do it with an rlw*, we can do it. Don't allow LSHIFTRT
17190 if the mask is wrong for that. */
17191 if (nb < 32 && ne < 32 && sh < 32
17192 && !(code == LSHIFTRT && nb >= 32 - sh))
17193 return true;
17194
17195 /* If we can do it with an rld*, we can do it. Don't allow LSHIFTRT
17196 if the mask is wrong for that. */
17197 if (code == LSHIFTRT)
17198 sh = 64 - sh;
17199 if (nb == 63 || ne == 0 || ne == sh)
17200 return !(code == LSHIFTRT && nb >= sh);
17201
17202 return false;
17203 }
17204
17205 /* Return the instruction template for a shift with mask in mode MODE, with
17206 operands OPERANDS. If DOT is true, make it a record-form instruction. */
17207
17208 const char *
17209 rs6000_insn_for_shift_mask (machine_mode mode, rtx *operands, bool dot)
17210 {
17211 int nb, ne;
17212
17213 if (!rs6000_is_valid_mask (operands[3], &nb, &ne, mode))
17214 gcc_unreachable ();
17215
17216 if (mode == DImode && ne == 0)
17217 {
17218 if (GET_CODE (operands[4]) == LSHIFTRT && INTVAL (operands[2]))
17219 operands[2] = GEN_INT (64 - INTVAL (operands[2]));
17220 operands[3] = GEN_INT (63 - nb);
17221 if (dot)
17222 return "rld%I2cl. %0,%1,%2,%3";
17223 return "rld%I2cl %0,%1,%2,%3";
17224 }
17225
17226 if (mode == DImode && nb == 63)
17227 {
17228 operands[3] = GEN_INT (63 - ne);
17229 if (dot)
17230 return "rld%I2cr. %0,%1,%2,%3";
17231 return "rld%I2cr %0,%1,%2,%3";
17232 }
17233
17234 if (mode == DImode
17235 && GET_CODE (operands[4]) != LSHIFTRT
17236 && CONST_INT_P (operands[2])
17237 && ne == INTVAL (operands[2]))
17238 {
17239 operands[3] = GEN_INT (63 - nb);
17240 if (dot)
17241 return "rld%I2c. %0,%1,%2,%3";
17242 return "rld%I2c %0,%1,%2,%3";
17243 }
17244
17245 if (nb < 32 && ne < 32)
17246 {
17247 if (GET_CODE (operands[4]) == LSHIFTRT && INTVAL (operands[2]))
17248 operands[2] = GEN_INT (32 - INTVAL (operands[2]));
17249 operands[3] = GEN_INT (31 - nb);
17250 operands[4] = GEN_INT (31 - ne);
17251 if (dot)
17252 return "rlw%I2nm. %0,%1,%2,%3,%4";
17253 return "rlw%I2nm %0,%1,%2,%3,%4";
17254 }
17255
17256 gcc_unreachable ();
17257 }
17258
17259 /* Return whether MASK (a CONST_INT) is a valid mask for any rlwimi or
17260 rldimi instruction, to implement an insert with shift SHIFT (a ROTATE,
17261 ASHIFT, or LSHIFTRT) in mode MODE. */
17262
17263 bool
17264 rs6000_is_valid_insert_mask (rtx mask, rtx shift, machine_mode mode)
17265 {
17266 int nb, ne;
17267
17268 if (!rs6000_is_valid_mask (mask, &nb, &ne, mode))
17269 return false;
17270
17271 int n = GET_MODE_PRECISION (mode);
17272
17273 int sh = INTVAL (XEXP (shift, 1));
17274 if (sh < 0 || sh >= n)
17275 return false;
17276
17277 rtx_code code = GET_CODE (shift);
17278
17279 /* Convert any shift by 0 to a rotate, to simplify below code. */
17280 if (sh == 0)
17281 code = ROTATE;
17282
17283 /* Convert rotate to simple shift if we can, to make analysis simpler. */
17284 if (code == ROTATE && sh >= 0 && nb >= ne && ne >= sh)
17285 code = ASHIFT;
17286 if (code == ROTATE && sh >= 0 && nb >= ne && nb < sh)
17287 {
17288 code = LSHIFTRT;
17289 sh = n - sh;
17290 }
17291
17292 /* DImode rotates need rldimi. */
17293 if (mode == DImode && code == ROTATE)
17294 return (ne == sh);
17295
17296 /* SImode rotates need rlwimi. */
17297 if (mode == SImode && code == ROTATE)
17298 return (nb < 32 && ne < 32 && sh < 32);
17299
17300 /* Wrap-around masks are only okay for rotates. */
17301 if (ne > nb)
17302 return false;
17303
17304 /* Don't allow ASHIFT if the mask is wrong for that. */
17305 if (code == ASHIFT && ne < sh)
17306 return false;
17307
17308 /* If we can do it with an rlwimi, we can do it. Don't allow LSHIFTRT
17309 if the mask is wrong for that. */
17310 if (nb < 32 && ne < 32 && sh < 32
17311 && !(code == LSHIFTRT && nb >= 32 - sh))
17312 return true;
17313
17314 /* If we can do it with an rldimi, we can do it. Don't allow LSHIFTRT
17315 if the mask is wrong for that. */
17316 if (code == LSHIFTRT)
17317 sh = 64 - sh;
17318 if (ne == sh)
17319 return !(code == LSHIFTRT && nb >= sh);
17320
17321 return false;
17322 }
17323
17324 /* Return the instruction template for an insert with mask in mode MODE, with
17325 operands OPERANDS. If DOT is true, make it a record-form instruction. */
17326
17327 const char *
17328 rs6000_insn_for_insert_mask (machine_mode mode, rtx *operands, bool dot)
17329 {
17330 int nb, ne;
17331
17332 if (!rs6000_is_valid_mask (operands[3], &nb, &ne, mode))
17333 gcc_unreachable ();
17334
17335 /* Prefer rldimi because rlwimi is cracked. */
17336 if (TARGET_POWERPC64
17337 && (!dot || mode == DImode)
17338 && GET_CODE (operands[4]) != LSHIFTRT
17339 && ne == INTVAL (operands[2]))
17340 {
17341 operands[3] = GEN_INT (63 - nb);
17342 if (dot)
17343 return "rldimi. %0,%1,%2,%3";
17344 return "rldimi %0,%1,%2,%3";
17345 }
17346
17347 if (nb < 32 && ne < 32)
17348 {
17349 if (GET_CODE (operands[4]) == LSHIFTRT && INTVAL (operands[2]))
17350 operands[2] = GEN_INT (32 - INTVAL (operands[2]));
17351 operands[3] = GEN_INT (31 - nb);
17352 operands[4] = GEN_INT (31 - ne);
17353 if (dot)
17354 return "rlwimi. %0,%1,%2,%3,%4";
17355 return "rlwimi %0,%1,%2,%3,%4";
17356 }
17357
17358 gcc_unreachable ();
17359 }
17360
17361 /* Return whether an AND with C (a CONST_INT) in mode MODE can be done
17362 using two machine instructions. */
17363
17364 bool
17365 rs6000_is_valid_2insn_and (rtx c, machine_mode mode)
17366 {
17367 /* There are two kinds of AND we can handle with two insns:
17368 1) those we can do with two rl* insn;
17369 2) ori[s];xori[s].
17370
17371 We do not handle that last case yet. */
17372
17373 /* If there is just one stretch of ones, we can do it. */
17374 if (rs6000_is_valid_mask (c, NULL, NULL, mode))
17375 return true;
17376
17377 /* Otherwise, fill in the lowest "hole"; if we can do the result with
17378 one insn, we can do the whole thing with two. */
17379 unsigned HOST_WIDE_INT val = INTVAL (c);
17380 unsigned HOST_WIDE_INT bit1 = val & -val;
17381 unsigned HOST_WIDE_INT bit2 = (val + bit1) & ~val;
17382 unsigned HOST_WIDE_INT val1 = (val + bit1) & val;
17383 unsigned HOST_WIDE_INT bit3 = val1 & -val1;
17384 return rs6000_is_valid_and_mask (GEN_INT (val + bit3 - bit2), mode);
17385 }
17386
17387 /* Emit a potentially record-form instruction, setting DST from SRC.
17388 If DOT is 0, that is all; otherwise, set CCREG to the result of the
17389 signed comparison of DST with zero. If DOT is 1, the generated RTL
17390 doesn't care about the DST result; if DOT is 2, it does. If CCREG
17391 is CR0 do a single dot insn (as a PARALLEL); otherwise, do a SET and
17392 a separate COMPARE. */
17393
17394 static void
17395 rs6000_emit_dot_insn (rtx dst, rtx src, int dot, rtx ccreg)
17396 {
17397 if (dot == 0)
17398 {
17399 emit_move_insn (dst, src);
17400 return;
17401 }
17402
17403 if (cc_reg_not_cr0_operand (ccreg, CCmode))
17404 {
17405 emit_move_insn (dst, src);
17406 emit_move_insn (ccreg, gen_rtx_COMPARE (CCmode, dst, const0_rtx));
17407 return;
17408 }
17409
17410 rtx ccset = gen_rtx_SET (ccreg, gen_rtx_COMPARE (CCmode, src, const0_rtx));
17411 if (dot == 1)
17412 {
17413 rtx clobber = gen_rtx_CLOBBER (VOIDmode, dst);
17414 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, ccset, clobber)));
17415 }
17416 else
17417 {
17418 rtx set = gen_rtx_SET (dst, src);
17419 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, ccset, set)));
17420 }
17421 }
17422
17423 /* Emit the two insns to do an AND in mode MODE, with operands OPERANDS.
17424 If EXPAND is true, split rotate-and-mask instructions we generate to
17425 their constituent parts as well (this is used during expand); if DOT
17426 is 1, make the last insn a record-form instruction clobbering the
17427 destination GPR and setting the CC reg (from operands[3]); if 2, set
17428 that GPR as well as the CC reg. */
17429
17430 void
17431 rs6000_emit_2insn_and (machine_mode mode, rtx *operands, bool expand, int dot)
17432 {
17433 gcc_assert (!(expand && dot));
17434
17435 unsigned HOST_WIDE_INT val = INTVAL (operands[2]);
17436
17437 /* If it is one stretch of ones, it is DImode; shift left, mask, then
17438 shift right. This generates better code than doing the masks without
17439 shifts, or shifting first right and then left. */
17440 int nb, ne;
17441 if (rs6000_is_valid_mask (operands[2], &nb, &ne, mode) && nb >= ne)
17442 {
17443 gcc_assert (mode == DImode);
17444
17445 int shift = 63 - nb;
17446 if (expand)
17447 {
17448 rtx tmp1 = gen_reg_rtx (DImode);
17449 rtx tmp2 = gen_reg_rtx (DImode);
17450 emit_insn (gen_ashldi3 (tmp1, operands[1], GEN_INT (shift)));
17451 emit_insn (gen_anddi3 (tmp2, tmp1, GEN_INT (val << shift)));
17452 emit_insn (gen_lshrdi3 (operands[0], tmp2, GEN_INT (shift)));
17453 }
17454 else
17455 {
17456 rtx tmp = gen_rtx_ASHIFT (mode, operands[1], GEN_INT (shift));
17457 tmp = gen_rtx_AND (mode, tmp, GEN_INT (val << shift));
17458 emit_move_insn (operands[0], tmp);
17459 tmp = gen_rtx_LSHIFTRT (mode, operands[0], GEN_INT (shift));
17460 rs6000_emit_dot_insn (operands[0], tmp, dot, dot ? operands[3] : 0);
17461 }
17462 return;
17463 }
17464
17465 /* Otherwise, make a mask2 that cuts out the lowest "hole", and a mask1
17466 that does the rest. */
17467 unsigned HOST_WIDE_INT bit1 = val & -val;
17468 unsigned HOST_WIDE_INT bit2 = (val + bit1) & ~val;
17469 unsigned HOST_WIDE_INT val1 = (val + bit1) & val;
17470 unsigned HOST_WIDE_INT bit3 = val1 & -val1;
17471
17472 unsigned HOST_WIDE_INT mask1 = -bit3 + bit2 - 1;
17473 unsigned HOST_WIDE_INT mask2 = val + bit3 - bit2;
17474
17475 gcc_assert (rs6000_is_valid_and_mask (GEN_INT (mask2), mode));
17476
17477 /* Two "no-rotate"-and-mask instructions, for SImode. */
17478 if (rs6000_is_valid_and_mask (GEN_INT (mask1), mode))
17479 {
17480 gcc_assert (mode == SImode);
17481
17482 rtx reg = expand ? gen_reg_rtx (mode) : operands[0];
17483 rtx tmp = gen_rtx_AND (mode, operands[1], GEN_INT (mask1));
17484 emit_move_insn (reg, tmp);
17485 tmp = gen_rtx_AND (mode, reg, GEN_INT (mask2));
17486 rs6000_emit_dot_insn (operands[0], tmp, dot, dot ? operands[3] : 0);
17487 return;
17488 }
17489
17490 gcc_assert (mode == DImode);
17491
17492 /* Two "no-rotate"-and-mask instructions, for DImode: both are rlwinm
17493 insns; we have to do the first in SImode, because it wraps. */
17494 if (mask2 <= 0xffffffff
17495 && rs6000_is_valid_and_mask (GEN_INT (mask1), SImode))
17496 {
17497 rtx reg = expand ? gen_reg_rtx (mode) : operands[0];
17498 rtx tmp = gen_rtx_AND (SImode, gen_lowpart (SImode, operands[1]),
17499 GEN_INT (mask1));
17500 rtx reg_low = gen_lowpart (SImode, reg);
17501 emit_move_insn (reg_low, tmp);
17502 tmp = gen_rtx_AND (mode, reg, GEN_INT (mask2));
17503 rs6000_emit_dot_insn (operands[0], tmp, dot, dot ? operands[3] : 0);
17504 return;
17505 }
17506
17507 /* Two rld* insns: rotate, clear the hole in the middle (which now is
17508 at the top end), rotate back and clear the other hole. */
17509 int right = exact_log2 (bit3);
17510 int left = 64 - right;
17511
17512 /* Rotate the mask too. */
17513 mask1 = (mask1 >> right) | ((bit2 - 1) << left);
17514
17515 if (expand)
17516 {
17517 rtx tmp1 = gen_reg_rtx (DImode);
17518 rtx tmp2 = gen_reg_rtx (DImode);
17519 rtx tmp3 = gen_reg_rtx (DImode);
17520 emit_insn (gen_rotldi3 (tmp1, operands[1], GEN_INT (left)));
17521 emit_insn (gen_anddi3 (tmp2, tmp1, GEN_INT (mask1)));
17522 emit_insn (gen_rotldi3 (tmp3, tmp2, GEN_INT (right)));
17523 emit_insn (gen_anddi3 (operands[0], tmp3, GEN_INT (mask2)));
17524 }
17525 else
17526 {
17527 rtx tmp = gen_rtx_ROTATE (mode, operands[1], GEN_INT (left));
17528 tmp = gen_rtx_AND (mode, tmp, GEN_INT (mask1));
17529 emit_move_insn (operands[0], tmp);
17530 tmp = gen_rtx_ROTATE (mode, operands[0], GEN_INT (right));
17531 tmp = gen_rtx_AND (mode, tmp, GEN_INT (mask2));
17532 rs6000_emit_dot_insn (operands[0], tmp, dot, dot ? operands[3] : 0);
17533 }
17534 }
17535 \f
17536 /* Return 1 if REGNO (reg1) == REGNO (reg2) - 1 making them candidates
17537 for lfq and stfq insns iff the registers are hard registers. */
17538
17539 int
17540 registers_ok_for_quad_peep (rtx reg1, rtx reg2)
17541 {
17542 /* We might have been passed a SUBREG. */
17543 if (GET_CODE (reg1) != REG || GET_CODE (reg2) != REG)
17544 return 0;
17545
17546 /* We might have been passed non floating point registers. */
17547 if (!FP_REGNO_P (REGNO (reg1))
17548 || !FP_REGNO_P (REGNO (reg2)))
17549 return 0;
17550
17551 return (REGNO (reg1) == REGNO (reg2) - 1);
17552 }
17553
17554 /* Return 1 if addr1 and addr2 are suitable for lfq or stfq insn.
17555 addr1 and addr2 must be in consecutive memory locations
17556 (addr2 == addr1 + 8). */
17557
17558 int
17559 mems_ok_for_quad_peep (rtx mem1, rtx mem2)
17560 {
17561 rtx addr1, addr2;
17562 unsigned int reg1, reg2;
17563 int offset1, offset2;
17564
17565 /* The mems cannot be volatile. */
17566 if (MEM_VOLATILE_P (mem1) || MEM_VOLATILE_P (mem2))
17567 return 0;
17568
17569 addr1 = XEXP (mem1, 0);
17570 addr2 = XEXP (mem2, 0);
17571
17572 /* Extract an offset (if used) from the first addr. */
17573 if (GET_CODE (addr1) == PLUS)
17574 {
17575 /* If not a REG, return zero. */
17576 if (GET_CODE (XEXP (addr1, 0)) != REG)
17577 return 0;
17578 else
17579 {
17580 reg1 = REGNO (XEXP (addr1, 0));
17581 /* The offset must be constant! */
17582 if (GET_CODE (XEXP (addr1, 1)) != CONST_INT)
17583 return 0;
17584 offset1 = INTVAL (XEXP (addr1, 1));
17585 }
17586 }
17587 else if (GET_CODE (addr1) != REG)
17588 return 0;
17589 else
17590 {
17591 reg1 = REGNO (addr1);
17592 /* This was a simple (mem (reg)) expression. Offset is 0. */
17593 offset1 = 0;
17594 }
17595
17596 /* And now for the second addr. */
17597 if (GET_CODE (addr2) == PLUS)
17598 {
17599 /* If not a REG, return zero. */
17600 if (GET_CODE (XEXP (addr2, 0)) != REG)
17601 return 0;
17602 else
17603 {
17604 reg2 = REGNO (XEXP (addr2, 0));
17605 /* The offset must be constant. */
17606 if (GET_CODE (XEXP (addr2, 1)) != CONST_INT)
17607 return 0;
17608 offset2 = INTVAL (XEXP (addr2, 1));
17609 }
17610 }
17611 else if (GET_CODE (addr2) != REG)
17612 return 0;
17613 else
17614 {
17615 reg2 = REGNO (addr2);
17616 /* This was a simple (mem (reg)) expression. Offset is 0. */
17617 offset2 = 0;
17618 }
17619
17620 /* Both of these must have the same base register. */
17621 if (reg1 != reg2)
17622 return 0;
17623
17624 /* The offset for the second addr must be 8 more than the first addr. */
17625 if (offset2 != offset1 + 8)
17626 return 0;
17627
17628 /* All the tests passed. addr1 and addr2 are valid for lfq or stfq
17629 instructions. */
17630 return 1;
17631 }
17632 \f
17633
17634 rtx
17635 rs6000_secondary_memory_needed_rtx (machine_mode mode)
17636 {
17637 static bool eliminated = false;
17638 rtx ret;
17639
17640 if (mode != SDmode || TARGET_NO_SDMODE_STACK)
17641 ret = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
17642 else
17643 {
17644 rtx mem = cfun->machine->sdmode_stack_slot;
17645 gcc_assert (mem != NULL_RTX);
17646
17647 if (!eliminated)
17648 {
17649 mem = eliminate_regs (mem, VOIDmode, NULL_RTX);
17650 cfun->machine->sdmode_stack_slot = mem;
17651 eliminated = true;
17652 }
17653 ret = mem;
17654 }
17655
17656 if (TARGET_DEBUG_ADDR)
17657 {
17658 fprintf (stderr, "\nrs6000_secondary_memory_needed_rtx, mode %s, rtx:\n",
17659 GET_MODE_NAME (mode));
17660 if (!ret)
17661 fprintf (stderr, "\tNULL_RTX\n");
17662 else
17663 debug_rtx (ret);
17664 }
17665
17666 return ret;
17667 }
17668
17669 /* Return the mode to be used for memory when a secondary memory
17670 location is needed. For SDmode values we need to use DDmode, in
17671 all other cases we can use the same mode. */
17672 machine_mode
17673 rs6000_secondary_memory_needed_mode (machine_mode mode)
17674 {
17675 if (lra_in_progress && mode == SDmode)
17676 return DDmode;
17677 return mode;
17678 }
17679
17680 static tree
17681 rs6000_check_sdmode (tree *tp, int *walk_subtrees, void *data ATTRIBUTE_UNUSED)
17682 {
17683 /* Don't walk into types. */
17684 if (*tp == NULL_TREE || *tp == error_mark_node || TYPE_P (*tp))
17685 {
17686 *walk_subtrees = 0;
17687 return NULL_TREE;
17688 }
17689
17690 switch (TREE_CODE (*tp))
17691 {
17692 case VAR_DECL:
17693 case PARM_DECL:
17694 case FIELD_DECL:
17695 case RESULT_DECL:
17696 case SSA_NAME:
17697 case REAL_CST:
17698 case MEM_REF:
17699 case VIEW_CONVERT_EXPR:
17700 if (TYPE_MODE (TREE_TYPE (*tp)) == SDmode)
17701 return *tp;
17702 break;
17703 default:
17704 break;
17705 }
17706
17707 return NULL_TREE;
17708 }
17709
17710 /* Classify a register type. Because the FMRGOW/FMRGEW instructions only work
17711 on traditional floating point registers, and the VMRGOW/VMRGEW instructions
17712 only work on the traditional altivec registers, note if an altivec register
17713 was chosen. */
17714
17715 static enum rs6000_reg_type
17716 register_to_reg_type (rtx reg, bool *is_altivec)
17717 {
17718 HOST_WIDE_INT regno;
17719 enum reg_class rclass;
17720
17721 if (GET_CODE (reg) == SUBREG)
17722 reg = SUBREG_REG (reg);
17723
17724 if (!REG_P (reg))
17725 return NO_REG_TYPE;
17726
17727 regno = REGNO (reg);
17728 if (regno >= FIRST_PSEUDO_REGISTER)
17729 {
17730 if (!lra_in_progress && !reload_in_progress && !reload_completed)
17731 return PSEUDO_REG_TYPE;
17732
17733 regno = true_regnum (reg);
17734 if (regno < 0 || regno >= FIRST_PSEUDO_REGISTER)
17735 return PSEUDO_REG_TYPE;
17736 }
17737
17738 gcc_assert (regno >= 0);
17739
17740 if (is_altivec && ALTIVEC_REGNO_P (regno))
17741 *is_altivec = true;
17742
17743 rclass = rs6000_regno_regclass[regno];
17744 return reg_class_to_reg_type[(int)rclass];
17745 }
17746
17747 /* Helper function to return the cost of adding a TOC entry address. */
17748
17749 static inline int
17750 rs6000_secondary_reload_toc_costs (addr_mask_type addr_mask)
17751 {
17752 int ret;
17753
17754 if (TARGET_CMODEL != CMODEL_SMALL)
17755 ret = ((addr_mask & RELOAD_REG_OFFSET) == 0) ? 1 : 2;
17756
17757 else
17758 ret = (TARGET_MINIMAL_TOC) ? 6 : 3;
17759
17760 return ret;
17761 }
17762
17763 /* Helper function for rs6000_secondary_reload to determine whether the memory
17764 address (ADDR) with a given register class (RCLASS) and machine mode (MODE)
17765 needs reloading. Return negative if the memory is not handled by the memory
17766 helper functions and to try a different reload method, 0 if no additional
17767 instructions are need, and positive to give the extra cost for the
17768 memory. */
17769
17770 static int
17771 rs6000_secondary_reload_memory (rtx addr,
17772 enum reg_class rclass,
17773 machine_mode mode)
17774 {
17775 int extra_cost = 0;
17776 rtx reg, and_arg, plus_arg0, plus_arg1;
17777 addr_mask_type addr_mask;
17778 const char *type = NULL;
17779 const char *fail_msg = NULL;
17780
17781 if (GPR_REG_CLASS_P (rclass))
17782 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_GPR];
17783
17784 else if (rclass == FLOAT_REGS)
17785 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_FPR];
17786
17787 else if (rclass == ALTIVEC_REGS)
17788 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_VMX];
17789
17790 /* For the combined VSX_REGS, turn off Altivec AND -16. */
17791 else if (rclass == VSX_REGS)
17792 addr_mask = (reg_addr[mode].addr_mask[RELOAD_REG_VMX]
17793 & ~RELOAD_REG_AND_M16);
17794
17795 else
17796 {
17797 if (TARGET_DEBUG_ADDR)
17798 fprintf (stderr,
17799 "rs6000_secondary_reload_memory: mode = %s, class = %s, "
17800 "class is not GPR, FPR, VMX\n",
17801 GET_MODE_NAME (mode), reg_class_names[rclass]);
17802
17803 return -1;
17804 }
17805
17806 /* If the register isn't valid in this register class, just return now. */
17807 if ((addr_mask & RELOAD_REG_VALID) == 0)
17808 {
17809 if (TARGET_DEBUG_ADDR)
17810 fprintf (stderr,
17811 "rs6000_secondary_reload_memory: mode = %s, class = %s, "
17812 "not valid in class\n",
17813 GET_MODE_NAME (mode), reg_class_names[rclass]);
17814
17815 return -1;
17816 }
17817
17818 switch (GET_CODE (addr))
17819 {
17820 /* Does the register class supports auto update forms for this mode? We
17821 don't need a scratch register, since the powerpc only supports
17822 PRE_INC, PRE_DEC, and PRE_MODIFY. */
17823 case PRE_INC:
17824 case PRE_DEC:
17825 reg = XEXP (addr, 0);
17826 if (!base_reg_operand (addr, GET_MODE (reg)))
17827 {
17828 fail_msg = "no base register #1";
17829 extra_cost = -1;
17830 }
17831
17832 else if ((addr_mask & RELOAD_REG_PRE_INCDEC) == 0)
17833 {
17834 extra_cost = 1;
17835 type = "update";
17836 }
17837 break;
17838
17839 case PRE_MODIFY:
17840 reg = XEXP (addr, 0);
17841 plus_arg1 = XEXP (addr, 1);
17842 if (!base_reg_operand (reg, GET_MODE (reg))
17843 || GET_CODE (plus_arg1) != PLUS
17844 || !rtx_equal_p (reg, XEXP (plus_arg1, 0)))
17845 {
17846 fail_msg = "bad PRE_MODIFY";
17847 extra_cost = -1;
17848 }
17849
17850 else if ((addr_mask & RELOAD_REG_PRE_MODIFY) == 0)
17851 {
17852 extra_cost = 1;
17853 type = "update";
17854 }
17855 break;
17856
17857 /* Do we need to simulate AND -16 to clear the bottom address bits used
17858 in VMX load/stores? Only allow the AND for vector sizes. */
17859 case AND:
17860 and_arg = XEXP (addr, 0);
17861 if (GET_MODE_SIZE (mode) != 16
17862 || GET_CODE (XEXP (addr, 1)) != CONST_INT
17863 || INTVAL (XEXP (addr, 1)) != -16)
17864 {
17865 fail_msg = "bad Altivec AND #1";
17866 extra_cost = -1;
17867 }
17868
17869 if (rclass != ALTIVEC_REGS)
17870 {
17871 if (legitimate_indirect_address_p (and_arg, false))
17872 extra_cost = 1;
17873
17874 else if (legitimate_indexed_address_p (and_arg, false))
17875 extra_cost = 2;
17876
17877 else
17878 {
17879 fail_msg = "bad Altivec AND #2";
17880 extra_cost = -1;
17881 }
17882
17883 type = "and";
17884 }
17885 break;
17886
17887 /* If this is an indirect address, make sure it is a base register. */
17888 case REG:
17889 case SUBREG:
17890 if (!legitimate_indirect_address_p (addr, false))
17891 {
17892 extra_cost = 1;
17893 type = "move";
17894 }
17895 break;
17896
17897 /* If this is an indexed address, make sure the register class can handle
17898 indexed addresses for this mode. */
17899 case PLUS:
17900 plus_arg0 = XEXP (addr, 0);
17901 plus_arg1 = XEXP (addr, 1);
17902
17903 /* (plus (plus (reg) (constant)) (constant)) is generated during
17904 push_reload processing, so handle it now. */
17905 if (GET_CODE (plus_arg0) == PLUS && CONST_INT_P (plus_arg1))
17906 {
17907 if ((addr_mask & RELOAD_REG_OFFSET) == 0)
17908 {
17909 extra_cost = 1;
17910 type = "offset";
17911 }
17912 }
17913
17914 /* (plus (plus (reg) (constant)) (reg)) is also generated during
17915 push_reload processing, so handle it now. */
17916 else if (GET_CODE (plus_arg0) == PLUS && REG_P (plus_arg1))
17917 {
17918 if ((addr_mask & RELOAD_REG_INDEXED) == 0)
17919 {
17920 extra_cost = 1;
17921 type = "indexed #2";
17922 }
17923 }
17924
17925 else if (!base_reg_operand (plus_arg0, GET_MODE (plus_arg0)))
17926 {
17927 fail_msg = "no base register #2";
17928 extra_cost = -1;
17929 }
17930
17931 else if (int_reg_operand (plus_arg1, GET_MODE (plus_arg1)))
17932 {
17933 if ((addr_mask & RELOAD_REG_INDEXED) == 0
17934 || !legitimate_indexed_address_p (addr, false))
17935 {
17936 extra_cost = 1;
17937 type = "indexed";
17938 }
17939 }
17940
17941 /* Make sure the register class can handle offset addresses. */
17942 else if (rs6000_legitimate_offset_address_p (mode, addr, false, true))
17943 {
17944 if ((addr_mask & RELOAD_REG_OFFSET) == 0)
17945 {
17946 extra_cost = 1;
17947 type = "offset";
17948 }
17949 }
17950
17951 else
17952 {
17953 fail_msg = "bad PLUS";
17954 extra_cost = -1;
17955 }
17956
17957 break;
17958
17959 case LO_SUM:
17960 if (!legitimate_lo_sum_address_p (mode, addr, false))
17961 {
17962 fail_msg = "bad LO_SUM";
17963 extra_cost = -1;
17964 }
17965
17966 if ((addr_mask & RELOAD_REG_OFFSET) == 0)
17967 {
17968 extra_cost = 1;
17969 type = "lo_sum";
17970 }
17971 break;
17972
17973 /* Static addresses need to create a TOC entry. */
17974 case CONST:
17975 case SYMBOL_REF:
17976 case LABEL_REF:
17977 type = "address";
17978 extra_cost = rs6000_secondary_reload_toc_costs (addr_mask);
17979 break;
17980
17981 /* TOC references look like offsetable memory. */
17982 case UNSPEC:
17983 if (TARGET_CMODEL == CMODEL_SMALL || XINT (addr, 1) != UNSPEC_TOCREL)
17984 {
17985 fail_msg = "bad UNSPEC";
17986 extra_cost = -1;
17987 }
17988
17989 else if ((addr_mask & RELOAD_REG_OFFSET) == 0)
17990 {
17991 extra_cost = 1;
17992 type = "toc reference";
17993 }
17994 break;
17995
17996 default:
17997 {
17998 fail_msg = "bad address";
17999 extra_cost = -1;
18000 }
18001 }
18002
18003 if (TARGET_DEBUG_ADDR /* && extra_cost != 0 */)
18004 {
18005 if (extra_cost < 0)
18006 fprintf (stderr,
18007 "rs6000_secondary_reload_memory error: mode = %s, "
18008 "class = %s, addr_mask = '%s', %s\n",
18009 GET_MODE_NAME (mode),
18010 reg_class_names[rclass],
18011 rs6000_debug_addr_mask (addr_mask, false),
18012 (fail_msg != NULL) ? fail_msg : "<bad address>");
18013
18014 else
18015 fprintf (stderr,
18016 "rs6000_secondary_reload_memory: mode = %s, class = %s, "
18017 "addr_mask = '%s', extra cost = %d, %s\n",
18018 GET_MODE_NAME (mode),
18019 reg_class_names[rclass],
18020 rs6000_debug_addr_mask (addr_mask, false),
18021 extra_cost,
18022 (type) ? type : "<none>");
18023
18024 debug_rtx (addr);
18025 }
18026
18027 return extra_cost;
18028 }
18029
18030 /* Helper function for rs6000_secondary_reload to return true if a move to a
18031 different register classe is really a simple move. */
18032
18033 static bool
18034 rs6000_secondary_reload_simple_move (enum rs6000_reg_type to_type,
18035 enum rs6000_reg_type from_type,
18036 machine_mode mode)
18037 {
18038 int size;
18039
18040 /* Add support for various direct moves available. In this function, we only
18041 look at cases where we don't need any extra registers, and one or more
18042 simple move insns are issued. At present, 32-bit integers are not allowed
18043 in FPR/VSX registers. Single precision binary floating is not a simple
18044 move because we need to convert to the single precision memory layout.
18045 The 4-byte SDmode can be moved. */
18046 size = GET_MODE_SIZE (mode);
18047 if (TARGET_DIRECT_MOVE
18048 && ((mode == SDmode) || (TARGET_POWERPC64 && size == 8))
18049 && ((to_type == GPR_REG_TYPE && from_type == VSX_REG_TYPE)
18050 || (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE)))
18051 return true;
18052
18053 else if (TARGET_DIRECT_MOVE_128 && size == 16
18054 && ((to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE)
18055 || (to_type == GPR_REG_TYPE && from_type == VSX_REG_TYPE)))
18056 return true;
18057
18058 else if (TARGET_MFPGPR && TARGET_POWERPC64 && size == 8
18059 && ((to_type == GPR_REG_TYPE && from_type == FPR_REG_TYPE)
18060 || (to_type == FPR_REG_TYPE && from_type == GPR_REG_TYPE)))
18061 return true;
18062
18063 else if ((size == 4 || (TARGET_POWERPC64 && size == 8))
18064 && ((to_type == GPR_REG_TYPE && from_type == SPR_REG_TYPE)
18065 || (to_type == SPR_REG_TYPE && from_type == GPR_REG_TYPE)))
18066 return true;
18067
18068 return false;
18069 }
18070
18071 /* Direct move helper function for rs6000_secondary_reload, handle all of the
18072 special direct moves that involve allocating an extra register, return the
18073 insn code of the helper function if there is such a function or
18074 CODE_FOR_nothing if not. */
18075
18076 static bool
18077 rs6000_secondary_reload_direct_move (enum rs6000_reg_type to_type,
18078 enum rs6000_reg_type from_type,
18079 machine_mode mode,
18080 secondary_reload_info *sri,
18081 bool altivec_p)
18082 {
18083 bool ret = false;
18084 enum insn_code icode = CODE_FOR_nothing;
18085 int cost = 0;
18086 int size = GET_MODE_SIZE (mode);
18087
18088 if (TARGET_POWERPC64)
18089 {
18090 if (size == 16)
18091 {
18092 /* Handle moving 128-bit values from GPRs to VSX point registers on
18093 ISA 2.07 (power8, power9) when running in 64-bit mode using
18094 XXPERMDI to glue the two 64-bit values back together. */
18095 if (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE)
18096 {
18097 cost = 3; /* 2 mtvsrd's, 1 xxpermdi. */
18098 icode = reg_addr[mode].reload_vsx_gpr;
18099 }
18100
18101 /* Handle moving 128-bit values from VSX point registers to GPRs on
18102 ISA 2.07 when running in 64-bit mode using XXPERMDI to get access to the
18103 bottom 64-bit value. */
18104 else if (to_type == GPR_REG_TYPE && from_type == VSX_REG_TYPE)
18105 {
18106 cost = 3; /* 2 mfvsrd's, 1 xxpermdi. */
18107 icode = reg_addr[mode].reload_gpr_vsx;
18108 }
18109 }
18110
18111 else if (mode == SFmode)
18112 {
18113 if (to_type == GPR_REG_TYPE && from_type == VSX_REG_TYPE)
18114 {
18115 cost = 3; /* xscvdpspn, mfvsrd, and. */
18116 icode = reg_addr[mode].reload_gpr_vsx;
18117 }
18118
18119 else if (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE)
18120 {
18121 cost = 2; /* mtvsrz, xscvspdpn. */
18122 icode = reg_addr[mode].reload_vsx_gpr;
18123 }
18124 }
18125 }
18126
18127 if (TARGET_POWERPC64 && size == 16)
18128 {
18129 /* Handle moving 128-bit values from GPRs to VSX point registers on
18130 ISA 2.07 when running in 64-bit mode using XXPERMDI to glue the two
18131 64-bit values back together. */
18132 if (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE)
18133 {
18134 cost = 3; /* 2 mtvsrd's, 1 xxpermdi. */
18135 icode = reg_addr[mode].reload_vsx_gpr;
18136 }
18137
18138 /* Handle moving 128-bit values from VSX point registers to GPRs on
18139 ISA 2.07 when running in 64-bit mode using XXPERMDI to get access to the
18140 bottom 64-bit value. */
18141 else if (to_type == GPR_REG_TYPE && from_type == VSX_REG_TYPE)
18142 {
18143 cost = 3; /* 2 mfvsrd's, 1 xxpermdi. */
18144 icode = reg_addr[mode].reload_gpr_vsx;
18145 }
18146 }
18147
18148 else if (!TARGET_POWERPC64 && size == 8)
18149 {
18150 /* Handle moving 64-bit values from GPRs to floating point registers on
18151 ISA 2.07 when running in 32-bit mode using FMRGOW to glue the two
18152 32-bit values back together. Altivec register classes must be handled
18153 specially since a different instruction is used, and the secondary
18154 reload support requires a single instruction class in the scratch
18155 register constraint. However, right now TFmode is not allowed in
18156 Altivec registers, so the pattern will never match. */
18157 if (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE && !altivec_p)
18158 {
18159 cost = 3; /* 2 mtvsrwz's, 1 fmrgow. */
18160 icode = reg_addr[mode].reload_fpr_gpr;
18161 }
18162 }
18163
18164 if (icode != CODE_FOR_nothing)
18165 {
18166 ret = true;
18167 if (sri)
18168 {
18169 sri->icode = icode;
18170 sri->extra_cost = cost;
18171 }
18172 }
18173
18174 return ret;
18175 }
18176
18177 /* Return whether a move between two register classes can be done either
18178 directly (simple move) or via a pattern that uses a single extra temporary
18179 (using ISA 2.07's direct move in this case. */
18180
18181 static bool
18182 rs6000_secondary_reload_move (enum rs6000_reg_type to_type,
18183 enum rs6000_reg_type from_type,
18184 machine_mode mode,
18185 secondary_reload_info *sri,
18186 bool altivec_p)
18187 {
18188 /* Fall back to load/store reloads if either type is not a register. */
18189 if (to_type == NO_REG_TYPE || from_type == NO_REG_TYPE)
18190 return false;
18191
18192 /* If we haven't allocated registers yet, assume the move can be done for the
18193 standard register types. */
18194 if ((to_type == PSEUDO_REG_TYPE && from_type == PSEUDO_REG_TYPE)
18195 || (to_type == PSEUDO_REG_TYPE && IS_STD_REG_TYPE (from_type))
18196 || (from_type == PSEUDO_REG_TYPE && IS_STD_REG_TYPE (to_type)))
18197 return true;
18198
18199 /* Moves to the same set of registers is a simple move for non-specialized
18200 registers. */
18201 if (to_type == from_type && IS_STD_REG_TYPE (to_type))
18202 return true;
18203
18204 /* Check whether a simple move can be done directly. */
18205 if (rs6000_secondary_reload_simple_move (to_type, from_type, mode))
18206 {
18207 if (sri)
18208 {
18209 sri->icode = CODE_FOR_nothing;
18210 sri->extra_cost = 0;
18211 }
18212 return true;
18213 }
18214
18215 /* Now check if we can do it in a few steps. */
18216 return rs6000_secondary_reload_direct_move (to_type, from_type, mode, sri,
18217 altivec_p);
18218 }
18219
18220 /* Inform reload about cases where moving X with a mode MODE to a register in
18221 RCLASS requires an extra scratch or immediate register. Return the class
18222 needed for the immediate register.
18223
18224 For VSX and Altivec, we may need a register to convert sp+offset into
18225 reg+sp.
18226
18227 For misaligned 64-bit gpr loads and stores we need a register to
18228 convert an offset address to indirect. */
18229
18230 static reg_class_t
18231 rs6000_secondary_reload (bool in_p,
18232 rtx x,
18233 reg_class_t rclass_i,
18234 machine_mode mode,
18235 secondary_reload_info *sri)
18236 {
18237 enum reg_class rclass = (enum reg_class) rclass_i;
18238 reg_class_t ret = ALL_REGS;
18239 enum insn_code icode;
18240 bool default_p = false;
18241 bool done_p = false;
18242
18243 /* Allow subreg of memory before/during reload. */
18244 bool memory_p = (MEM_P (x)
18245 || (!reload_completed && GET_CODE (x) == SUBREG
18246 && MEM_P (SUBREG_REG (x))));
18247
18248 sri->icode = CODE_FOR_nothing;
18249 sri->extra_cost = 0;
18250 icode = ((in_p)
18251 ? reg_addr[mode].reload_load
18252 : reg_addr[mode].reload_store);
18253
18254 if (REG_P (x) || register_operand (x, mode))
18255 {
18256 enum rs6000_reg_type to_type = reg_class_to_reg_type[(int)rclass];
18257 bool altivec_p = (rclass == ALTIVEC_REGS);
18258 enum rs6000_reg_type from_type = register_to_reg_type (x, &altivec_p);
18259
18260 if (!in_p)
18261 {
18262 enum rs6000_reg_type exchange = to_type;
18263 to_type = from_type;
18264 from_type = exchange;
18265 }
18266
18267 /* Can we do a direct move of some sort? */
18268 if (rs6000_secondary_reload_move (to_type, from_type, mode, sri,
18269 altivec_p))
18270 {
18271 icode = (enum insn_code)sri->icode;
18272 default_p = false;
18273 done_p = true;
18274 ret = NO_REGS;
18275 }
18276 }
18277
18278 /* Make sure 0.0 is not reloaded or forced into memory. */
18279 if (x == CONST0_RTX (mode) && VSX_REG_CLASS_P (rclass))
18280 {
18281 ret = NO_REGS;
18282 default_p = false;
18283 done_p = true;
18284 }
18285
18286 /* If this is a scalar floating point value and we want to load it into the
18287 traditional Altivec registers, do it via a move via a traditional floating
18288 point register, unless we have D-form addressing. Also make sure that
18289 non-zero constants use a FPR. */
18290 if (!done_p && reg_addr[mode].scalar_in_vmx_p
18291 && !mode_supports_vmx_dform (mode)
18292 && (rclass == VSX_REGS || rclass == ALTIVEC_REGS)
18293 && (memory_p || (GET_CODE (x) == CONST_DOUBLE)))
18294 {
18295 ret = FLOAT_REGS;
18296 default_p = false;
18297 done_p = true;
18298 }
18299
18300 /* Handle reload of load/stores if we have reload helper functions. */
18301 if (!done_p && icode != CODE_FOR_nothing && memory_p)
18302 {
18303 int extra_cost = rs6000_secondary_reload_memory (XEXP (x, 0), rclass,
18304 mode);
18305
18306 if (extra_cost >= 0)
18307 {
18308 done_p = true;
18309 ret = NO_REGS;
18310 if (extra_cost > 0)
18311 {
18312 sri->extra_cost = extra_cost;
18313 sri->icode = icode;
18314 }
18315 }
18316 }
18317
18318 /* Handle unaligned loads and stores of integer registers. */
18319 if (!done_p && TARGET_POWERPC64
18320 && reg_class_to_reg_type[(int)rclass] == GPR_REG_TYPE
18321 && memory_p
18322 && GET_MODE_SIZE (GET_MODE (x)) >= UNITS_PER_WORD)
18323 {
18324 rtx addr = XEXP (x, 0);
18325 rtx off = address_offset (addr);
18326
18327 if (off != NULL_RTX)
18328 {
18329 unsigned int extra = GET_MODE_SIZE (GET_MODE (x)) - UNITS_PER_WORD;
18330 unsigned HOST_WIDE_INT offset = INTVAL (off);
18331
18332 /* We need a secondary reload when our legitimate_address_p
18333 says the address is good (as otherwise the entire address
18334 will be reloaded), and the offset is not a multiple of
18335 four or we have an address wrap. Address wrap will only
18336 occur for LO_SUMs since legitimate_offset_address_p
18337 rejects addresses for 16-byte mems that will wrap. */
18338 if (GET_CODE (addr) == LO_SUM
18339 ? (1 /* legitimate_address_p allows any offset for lo_sum */
18340 && ((offset & 3) != 0
18341 || ((offset & 0xffff) ^ 0x8000) >= 0x10000 - extra))
18342 : (offset + 0x8000 < 0x10000 - extra /* legitimate_address_p */
18343 && (offset & 3) != 0))
18344 {
18345 /* -m32 -mpowerpc64 needs to use a 32-bit scratch register. */
18346 if (in_p)
18347 sri->icode = ((TARGET_32BIT) ? CODE_FOR_reload_si_load
18348 : CODE_FOR_reload_di_load);
18349 else
18350 sri->icode = ((TARGET_32BIT) ? CODE_FOR_reload_si_store
18351 : CODE_FOR_reload_di_store);
18352 sri->extra_cost = 2;
18353 ret = NO_REGS;
18354 done_p = true;
18355 }
18356 else
18357 default_p = true;
18358 }
18359 else
18360 default_p = true;
18361 }
18362
18363 if (!done_p && !TARGET_POWERPC64
18364 && reg_class_to_reg_type[(int)rclass] == GPR_REG_TYPE
18365 && memory_p
18366 && GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD)
18367 {
18368 rtx addr = XEXP (x, 0);
18369 rtx off = address_offset (addr);
18370
18371 if (off != NULL_RTX)
18372 {
18373 unsigned int extra = GET_MODE_SIZE (GET_MODE (x)) - UNITS_PER_WORD;
18374 unsigned HOST_WIDE_INT offset = INTVAL (off);
18375
18376 /* We need a secondary reload when our legitimate_address_p
18377 says the address is good (as otherwise the entire address
18378 will be reloaded), and we have a wrap.
18379
18380 legitimate_lo_sum_address_p allows LO_SUM addresses to
18381 have any offset so test for wrap in the low 16 bits.
18382
18383 legitimate_offset_address_p checks for the range
18384 [-0x8000,0x7fff] for mode size of 8 and [-0x8000,0x7ff7]
18385 for mode size of 16. We wrap at [0x7ffc,0x7fff] and
18386 [0x7ff4,0x7fff] respectively, so test for the
18387 intersection of these ranges, [0x7ffc,0x7fff] and
18388 [0x7ff4,0x7ff7] respectively.
18389
18390 Note that the address we see here may have been
18391 manipulated by legitimize_reload_address. */
18392 if (GET_CODE (addr) == LO_SUM
18393 ? ((offset & 0xffff) ^ 0x8000) >= 0x10000 - extra
18394 : offset - (0x8000 - extra) < UNITS_PER_WORD)
18395 {
18396 if (in_p)
18397 sri->icode = CODE_FOR_reload_si_load;
18398 else
18399 sri->icode = CODE_FOR_reload_si_store;
18400 sri->extra_cost = 2;
18401 ret = NO_REGS;
18402 done_p = true;
18403 }
18404 else
18405 default_p = true;
18406 }
18407 else
18408 default_p = true;
18409 }
18410
18411 if (!done_p)
18412 default_p = true;
18413
18414 if (default_p)
18415 ret = default_secondary_reload (in_p, x, rclass, mode, sri);
18416
18417 gcc_assert (ret != ALL_REGS);
18418
18419 if (TARGET_DEBUG_ADDR)
18420 {
18421 fprintf (stderr,
18422 "\nrs6000_secondary_reload, return %s, in_p = %s, rclass = %s, "
18423 "mode = %s",
18424 reg_class_names[ret],
18425 in_p ? "true" : "false",
18426 reg_class_names[rclass],
18427 GET_MODE_NAME (mode));
18428
18429 if (reload_completed)
18430 fputs (", after reload", stderr);
18431
18432 if (!done_p)
18433 fputs (", done_p not set", stderr);
18434
18435 if (default_p)
18436 fputs (", default secondary reload", stderr);
18437
18438 if (sri->icode != CODE_FOR_nothing)
18439 fprintf (stderr, ", reload func = %s, extra cost = %d",
18440 insn_data[sri->icode].name, sri->extra_cost);
18441
18442 fputs ("\n", stderr);
18443 debug_rtx (x);
18444 }
18445
18446 return ret;
18447 }
18448
18449 /* Better tracing for rs6000_secondary_reload_inner. */
18450
18451 static void
18452 rs6000_secondary_reload_trace (int line, rtx reg, rtx mem, rtx scratch,
18453 bool store_p)
18454 {
18455 rtx set, clobber;
18456
18457 gcc_assert (reg != NULL_RTX && mem != NULL_RTX && scratch != NULL_RTX);
18458
18459 fprintf (stderr, "rs6000_secondary_reload_inner:%d, type = %s\n", line,
18460 store_p ? "store" : "load");
18461
18462 if (store_p)
18463 set = gen_rtx_SET (mem, reg);
18464 else
18465 set = gen_rtx_SET (reg, mem);
18466
18467 clobber = gen_rtx_CLOBBER (VOIDmode, scratch);
18468 debug_rtx (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, clobber)));
18469 }
18470
18471 static void rs6000_secondary_reload_fail (int, rtx, rtx, rtx, bool)
18472 ATTRIBUTE_NORETURN;
18473
18474 static void
18475 rs6000_secondary_reload_fail (int line, rtx reg, rtx mem, rtx scratch,
18476 bool store_p)
18477 {
18478 rs6000_secondary_reload_trace (line, reg, mem, scratch, store_p);
18479 gcc_unreachable ();
18480 }
18481
18482 /* Fixup reload addresses for values in GPR, FPR, and VMX registers that have
18483 reload helper functions. These were identified in
18484 rs6000_secondary_reload_memory, and if reload decided to use the secondary
18485 reload, it calls the insns:
18486 reload_<RELOAD:mode>_<P:mptrsize>_store
18487 reload_<RELOAD:mode>_<P:mptrsize>_load
18488
18489 which in turn calls this function, to do whatever is necessary to create
18490 valid addresses. */
18491
18492 void
18493 rs6000_secondary_reload_inner (rtx reg, rtx mem, rtx scratch, bool store_p)
18494 {
18495 int regno = true_regnum (reg);
18496 machine_mode mode = GET_MODE (reg);
18497 addr_mask_type addr_mask;
18498 rtx addr;
18499 rtx new_addr;
18500 rtx op_reg, op0, op1;
18501 rtx and_op;
18502 rtx cc_clobber;
18503 rtvec rv;
18504
18505 if (regno < 0 || regno >= FIRST_PSEUDO_REGISTER || !MEM_P (mem)
18506 || !base_reg_operand (scratch, GET_MODE (scratch)))
18507 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
18508
18509 if (IN_RANGE (regno, FIRST_GPR_REGNO, LAST_GPR_REGNO))
18510 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_GPR];
18511
18512 else if (IN_RANGE (regno, FIRST_FPR_REGNO, LAST_FPR_REGNO))
18513 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_FPR];
18514
18515 else if (IN_RANGE (regno, FIRST_ALTIVEC_REGNO, LAST_ALTIVEC_REGNO))
18516 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_VMX];
18517
18518 else
18519 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
18520
18521 /* Make sure the mode is valid in this register class. */
18522 if ((addr_mask & RELOAD_REG_VALID) == 0)
18523 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
18524
18525 if (TARGET_DEBUG_ADDR)
18526 rs6000_secondary_reload_trace (__LINE__, reg, mem, scratch, store_p);
18527
18528 new_addr = addr = XEXP (mem, 0);
18529 switch (GET_CODE (addr))
18530 {
18531 /* Does the register class support auto update forms for this mode? If
18532 not, do the update now. We don't need a scratch register, since the
18533 powerpc only supports PRE_INC, PRE_DEC, and PRE_MODIFY. */
18534 case PRE_INC:
18535 case PRE_DEC:
18536 op_reg = XEXP (addr, 0);
18537 if (!base_reg_operand (op_reg, Pmode))
18538 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
18539
18540 if ((addr_mask & RELOAD_REG_PRE_INCDEC) == 0)
18541 {
18542 emit_insn (gen_add2_insn (op_reg, GEN_INT (GET_MODE_SIZE (mode))));
18543 new_addr = op_reg;
18544 }
18545 break;
18546
18547 case PRE_MODIFY:
18548 op0 = XEXP (addr, 0);
18549 op1 = XEXP (addr, 1);
18550 if (!base_reg_operand (op0, Pmode)
18551 || GET_CODE (op1) != PLUS
18552 || !rtx_equal_p (op0, XEXP (op1, 0)))
18553 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
18554
18555 if ((addr_mask & RELOAD_REG_PRE_MODIFY) == 0)
18556 {
18557 emit_insn (gen_rtx_SET (op0, op1));
18558 new_addr = reg;
18559 }
18560 break;
18561
18562 /* Do we need to simulate AND -16 to clear the bottom address bits used
18563 in VMX load/stores? */
18564 case AND:
18565 op0 = XEXP (addr, 0);
18566 op1 = XEXP (addr, 1);
18567 if ((addr_mask & RELOAD_REG_AND_M16) == 0)
18568 {
18569 if (REG_P (op0) || GET_CODE (op0) == SUBREG)
18570 op_reg = op0;
18571
18572 else if (GET_CODE (op1) == PLUS)
18573 {
18574 emit_insn (gen_rtx_SET (scratch, op1));
18575 op_reg = scratch;
18576 }
18577
18578 else
18579 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
18580
18581 and_op = gen_rtx_AND (GET_MODE (scratch), op_reg, op1);
18582 cc_clobber = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (CCmode));
18583 rv = gen_rtvec (2, gen_rtx_SET (scratch, and_op), cc_clobber);
18584 emit_insn (gen_rtx_PARALLEL (VOIDmode, rv));
18585 new_addr = scratch;
18586 }
18587 break;
18588
18589 /* If this is an indirect address, make sure it is a base register. */
18590 case REG:
18591 case SUBREG:
18592 if (!base_reg_operand (addr, GET_MODE (addr)))
18593 {
18594 emit_insn (gen_rtx_SET (scratch, addr));
18595 new_addr = scratch;
18596 }
18597 break;
18598
18599 /* If this is an indexed address, make sure the register class can handle
18600 indexed addresses for this mode. */
18601 case PLUS:
18602 op0 = XEXP (addr, 0);
18603 op1 = XEXP (addr, 1);
18604 if (!base_reg_operand (op0, Pmode))
18605 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
18606
18607 else if (int_reg_operand (op1, Pmode))
18608 {
18609 if ((addr_mask & RELOAD_REG_INDEXED) == 0)
18610 {
18611 emit_insn (gen_rtx_SET (scratch, addr));
18612 new_addr = scratch;
18613 }
18614 }
18615
18616 /* Make sure the register class can handle offset addresses. */
18617 else if (rs6000_legitimate_offset_address_p (mode, addr, false, true))
18618 {
18619 if ((addr_mask & RELOAD_REG_OFFSET) == 0)
18620 {
18621 emit_insn (gen_rtx_SET (scratch, addr));
18622 new_addr = scratch;
18623 }
18624 }
18625
18626 else
18627 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
18628
18629 break;
18630
18631 case LO_SUM:
18632 op0 = XEXP (addr, 0);
18633 op1 = XEXP (addr, 1);
18634 if (!base_reg_operand (op0, Pmode))
18635 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
18636
18637 else if (int_reg_operand (op1, Pmode))
18638 {
18639 if ((addr_mask & RELOAD_REG_INDEXED) == 0)
18640 {
18641 emit_insn (gen_rtx_SET (scratch, addr));
18642 new_addr = scratch;
18643 }
18644 }
18645
18646 /* Make sure the register class can handle offset addresses. */
18647 else if (legitimate_lo_sum_address_p (mode, addr, false))
18648 {
18649 if ((addr_mask & RELOAD_REG_OFFSET) == 0)
18650 {
18651 emit_insn (gen_rtx_SET (scratch, addr));
18652 new_addr = scratch;
18653 }
18654 }
18655
18656 else
18657 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
18658
18659 break;
18660
18661 case SYMBOL_REF:
18662 case CONST:
18663 case LABEL_REF:
18664 rs6000_emit_move (scratch, addr, Pmode);
18665 new_addr = scratch;
18666 break;
18667
18668 default:
18669 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
18670 }
18671
18672 /* Adjust the address if it changed. */
18673 if (addr != new_addr)
18674 {
18675 mem = replace_equiv_address_nv (mem, new_addr);
18676 if (TARGET_DEBUG_ADDR)
18677 fprintf (stderr, "\nrs6000_secondary_reload_inner, mem adjusted.\n");
18678 }
18679
18680 /* Now create the move. */
18681 if (store_p)
18682 emit_insn (gen_rtx_SET (mem, reg));
18683 else
18684 emit_insn (gen_rtx_SET (reg, mem));
18685
18686 return;
18687 }
18688
18689 /* Convert reloads involving 64-bit gprs and misaligned offset
18690 addressing, or multiple 32-bit gprs and offsets that are too large,
18691 to use indirect addressing. */
18692
18693 void
18694 rs6000_secondary_reload_gpr (rtx reg, rtx mem, rtx scratch, bool store_p)
18695 {
18696 int regno = true_regnum (reg);
18697 enum reg_class rclass;
18698 rtx addr;
18699 rtx scratch_or_premodify = scratch;
18700
18701 if (TARGET_DEBUG_ADDR)
18702 {
18703 fprintf (stderr, "\nrs6000_secondary_reload_gpr, type = %s\n",
18704 store_p ? "store" : "load");
18705 fprintf (stderr, "reg:\n");
18706 debug_rtx (reg);
18707 fprintf (stderr, "mem:\n");
18708 debug_rtx (mem);
18709 fprintf (stderr, "scratch:\n");
18710 debug_rtx (scratch);
18711 }
18712
18713 gcc_assert (regno >= 0 && regno < FIRST_PSEUDO_REGISTER);
18714 gcc_assert (GET_CODE (mem) == MEM);
18715 rclass = REGNO_REG_CLASS (regno);
18716 gcc_assert (rclass == GENERAL_REGS || rclass == BASE_REGS);
18717 addr = XEXP (mem, 0);
18718
18719 if (GET_CODE (addr) == PRE_MODIFY)
18720 {
18721 gcc_assert (REG_P (XEXP (addr, 0))
18722 && GET_CODE (XEXP (addr, 1)) == PLUS
18723 && XEXP (XEXP (addr, 1), 0) == XEXP (addr, 0));
18724 scratch_or_premodify = XEXP (addr, 0);
18725 if (!HARD_REGISTER_P (scratch_or_premodify))
18726 /* If we have a pseudo here then reload will have arranged
18727 to have it replaced, but only in the original insn.
18728 Use the replacement here too. */
18729 scratch_or_premodify = find_replacement (&XEXP (addr, 0));
18730
18731 /* RTL emitted by rs6000_secondary_reload_gpr uses RTL
18732 expressions from the original insn, without unsharing them.
18733 Any RTL that points into the original insn will of course
18734 have register replacements applied. That is why we don't
18735 need to look for replacements under the PLUS. */
18736 addr = XEXP (addr, 1);
18737 }
18738 gcc_assert (GET_CODE (addr) == PLUS || GET_CODE (addr) == LO_SUM);
18739
18740 rs6000_emit_move (scratch_or_premodify, addr, Pmode);
18741
18742 mem = replace_equiv_address_nv (mem, scratch_or_premodify);
18743
18744 /* Now create the move. */
18745 if (store_p)
18746 emit_insn (gen_rtx_SET (mem, reg));
18747 else
18748 emit_insn (gen_rtx_SET (reg, mem));
18749
18750 return;
18751 }
18752
18753 /* Allocate a 64-bit stack slot to be used for copying SDmode values through if
18754 this function has any SDmode references. If we are on a power7 or later, we
18755 don't need the 64-bit stack slot since the LFIWZX and STIFWX instructions
18756 can load/store the value. */
18757
18758 static void
18759 rs6000_alloc_sdmode_stack_slot (void)
18760 {
18761 tree t;
18762 basic_block bb;
18763 gimple_stmt_iterator gsi;
18764
18765 gcc_assert (cfun->machine->sdmode_stack_slot == NULL_RTX);
18766 /* We use a different approach for dealing with the secondary
18767 memory in LRA. */
18768 if (ira_use_lra_p)
18769 return;
18770
18771 if (TARGET_NO_SDMODE_STACK)
18772 return;
18773
18774 FOR_EACH_BB_FN (bb, cfun)
18775 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
18776 {
18777 tree ret = walk_gimple_op (gsi_stmt (gsi), rs6000_check_sdmode, NULL);
18778 if (ret)
18779 {
18780 rtx stack = assign_stack_local (DDmode, GET_MODE_SIZE (DDmode), 0);
18781 cfun->machine->sdmode_stack_slot = adjust_address_nv (stack,
18782 SDmode, 0);
18783 return;
18784 }
18785 }
18786
18787 /* Check for any SDmode parameters of the function. */
18788 for (t = DECL_ARGUMENTS (cfun->decl); t; t = DECL_CHAIN (t))
18789 {
18790 if (TREE_TYPE (t) == error_mark_node)
18791 continue;
18792
18793 if (TYPE_MODE (TREE_TYPE (t)) == SDmode
18794 || TYPE_MODE (DECL_ARG_TYPE (t)) == SDmode)
18795 {
18796 rtx stack = assign_stack_local (DDmode, GET_MODE_SIZE (DDmode), 0);
18797 cfun->machine->sdmode_stack_slot = adjust_address_nv (stack,
18798 SDmode, 0);
18799 return;
18800 }
18801 }
18802 }
18803
18804 static void
18805 rs6000_instantiate_decls (void)
18806 {
18807 if (cfun->machine->sdmode_stack_slot != NULL_RTX)
18808 instantiate_decl_rtl (cfun->machine->sdmode_stack_slot);
18809 }
18810
18811 /* Given an rtx X being reloaded into a reg required to be
18812 in class CLASS, return the class of reg to actually use.
18813 In general this is just CLASS; but on some machines
18814 in some cases it is preferable to use a more restrictive class.
18815
18816 On the RS/6000, we have to return NO_REGS when we want to reload a
18817 floating-point CONST_DOUBLE to force it to be copied to memory.
18818
18819 We also don't want to reload integer values into floating-point
18820 registers if we can at all help it. In fact, this can
18821 cause reload to die, if it tries to generate a reload of CTR
18822 into a FP register and discovers it doesn't have the memory location
18823 required.
18824
18825 ??? Would it be a good idea to have reload do the converse, that is
18826 try to reload floating modes into FP registers if possible?
18827 */
18828
18829 static enum reg_class
18830 rs6000_preferred_reload_class (rtx x, enum reg_class rclass)
18831 {
18832 machine_mode mode = GET_MODE (x);
18833 bool is_constant = CONSTANT_P (x);
18834
18835 /* For VSX, see if we should prefer FLOAT_REGS or ALTIVEC_REGS. Do not allow
18836 the reloading of address expressions using PLUS into floating point
18837 registers. */
18838 if (TARGET_VSX && VSX_REG_CLASS_P (rclass) && GET_CODE (x) != PLUS)
18839 {
18840 if (is_constant)
18841 {
18842 /* Zero is always allowed in all VSX registers. */
18843 if (x == CONST0_RTX (mode))
18844 return rclass;
18845
18846 /* If this is a vector constant that can be formed with a few Altivec
18847 instructions, we want altivec registers. */
18848 if (GET_CODE (x) == CONST_VECTOR && easy_vector_constant (x, mode))
18849 return ALTIVEC_REGS;
18850
18851 /* Force constant to memory. */
18852 return NO_REGS;
18853 }
18854
18855 /* D-form addressing can easily reload the value. */
18856 if (mode_supports_vmx_dform (mode))
18857 return rclass;
18858
18859 /* If this is a scalar floating point value and we don't have D-form
18860 addressing, prefer the traditional floating point registers so that we
18861 can use D-form (register+offset) addressing. */
18862 if (GET_MODE_SIZE (mode) < 16 && rclass == VSX_REGS)
18863 return FLOAT_REGS;
18864
18865 /* Prefer the Altivec registers if Altivec is handling the vector
18866 operations (i.e. V16QI, V8HI, and V4SI), or if we prefer Altivec
18867 loads. */
18868 if (VECTOR_UNIT_ALTIVEC_P (mode) || VECTOR_MEM_ALTIVEC_P (mode)
18869 || mode == V1TImode)
18870 return ALTIVEC_REGS;
18871
18872 return rclass;
18873 }
18874
18875 if (is_constant || GET_CODE (x) == PLUS)
18876 {
18877 if (reg_class_subset_p (GENERAL_REGS, rclass))
18878 return GENERAL_REGS;
18879 if (reg_class_subset_p (BASE_REGS, rclass))
18880 return BASE_REGS;
18881 return NO_REGS;
18882 }
18883
18884 if (GET_MODE_CLASS (mode) == MODE_INT && rclass == NON_SPECIAL_REGS)
18885 return GENERAL_REGS;
18886
18887 return rclass;
18888 }
18889
18890 /* Debug version of rs6000_preferred_reload_class. */
18891 static enum reg_class
18892 rs6000_debug_preferred_reload_class (rtx x, enum reg_class rclass)
18893 {
18894 enum reg_class ret = rs6000_preferred_reload_class (x, rclass);
18895
18896 fprintf (stderr,
18897 "\nrs6000_preferred_reload_class, return %s, rclass = %s, "
18898 "mode = %s, x:\n",
18899 reg_class_names[ret], reg_class_names[rclass],
18900 GET_MODE_NAME (GET_MODE (x)));
18901 debug_rtx (x);
18902
18903 return ret;
18904 }
18905
18906 /* If we are copying between FP or AltiVec registers and anything else, we need
18907 a memory location. The exception is when we are targeting ppc64 and the
18908 move to/from fpr to gpr instructions are available. Also, under VSX, you
18909 can copy vector registers from the FP register set to the Altivec register
18910 set and vice versa. */
18911
18912 static bool
18913 rs6000_secondary_memory_needed (enum reg_class from_class,
18914 enum reg_class to_class,
18915 machine_mode mode)
18916 {
18917 enum rs6000_reg_type from_type, to_type;
18918 bool altivec_p = ((from_class == ALTIVEC_REGS)
18919 || (to_class == ALTIVEC_REGS));
18920
18921 /* If a simple/direct move is available, we don't need secondary memory */
18922 from_type = reg_class_to_reg_type[(int)from_class];
18923 to_type = reg_class_to_reg_type[(int)to_class];
18924
18925 if (rs6000_secondary_reload_move (to_type, from_type, mode,
18926 (secondary_reload_info *)0, altivec_p))
18927 return false;
18928
18929 /* If we have a floating point or vector register class, we need to use
18930 memory to transfer the data. */
18931 if (IS_FP_VECT_REG_TYPE (from_type) || IS_FP_VECT_REG_TYPE (to_type))
18932 return true;
18933
18934 return false;
18935 }
18936
18937 /* Debug version of rs6000_secondary_memory_needed. */
18938 static bool
18939 rs6000_debug_secondary_memory_needed (enum reg_class from_class,
18940 enum reg_class to_class,
18941 machine_mode mode)
18942 {
18943 bool ret = rs6000_secondary_memory_needed (from_class, to_class, mode);
18944
18945 fprintf (stderr,
18946 "rs6000_secondary_memory_needed, return: %s, from_class = %s, "
18947 "to_class = %s, mode = %s\n",
18948 ret ? "true" : "false",
18949 reg_class_names[from_class],
18950 reg_class_names[to_class],
18951 GET_MODE_NAME (mode));
18952
18953 return ret;
18954 }
18955
18956 /* Return the register class of a scratch register needed to copy IN into
18957 or out of a register in RCLASS in MODE. If it can be done directly,
18958 NO_REGS is returned. */
18959
18960 static enum reg_class
18961 rs6000_secondary_reload_class (enum reg_class rclass, machine_mode mode,
18962 rtx in)
18963 {
18964 int regno;
18965
18966 if (TARGET_ELF || (DEFAULT_ABI == ABI_DARWIN
18967 #if TARGET_MACHO
18968 && MACHOPIC_INDIRECT
18969 #endif
18970 ))
18971 {
18972 /* We cannot copy a symbolic operand directly into anything
18973 other than BASE_REGS for TARGET_ELF. So indicate that a
18974 register from BASE_REGS is needed as an intermediate
18975 register.
18976
18977 On Darwin, pic addresses require a load from memory, which
18978 needs a base register. */
18979 if (rclass != BASE_REGS
18980 && (GET_CODE (in) == SYMBOL_REF
18981 || GET_CODE (in) == HIGH
18982 || GET_CODE (in) == LABEL_REF
18983 || GET_CODE (in) == CONST))
18984 return BASE_REGS;
18985 }
18986
18987 if (GET_CODE (in) == REG)
18988 {
18989 regno = REGNO (in);
18990 if (regno >= FIRST_PSEUDO_REGISTER)
18991 {
18992 regno = true_regnum (in);
18993 if (regno >= FIRST_PSEUDO_REGISTER)
18994 regno = -1;
18995 }
18996 }
18997 else if (GET_CODE (in) == SUBREG)
18998 {
18999 regno = true_regnum (in);
19000 if (regno >= FIRST_PSEUDO_REGISTER)
19001 regno = -1;
19002 }
19003 else
19004 regno = -1;
19005
19006 /* If we have VSX register moves, prefer moving scalar values between
19007 Altivec registers and GPR by going via an FPR (and then via memory)
19008 instead of reloading the secondary memory address for Altivec moves. */
19009 if (TARGET_VSX
19010 && GET_MODE_SIZE (mode) < 16
19011 && !mode_supports_vmx_dform (mode)
19012 && (((rclass == GENERAL_REGS || rclass == BASE_REGS)
19013 && (regno >= 0 && ALTIVEC_REGNO_P (regno)))
19014 || ((rclass == VSX_REGS || rclass == ALTIVEC_REGS)
19015 && (regno >= 0 && INT_REGNO_P (regno)))))
19016 return FLOAT_REGS;
19017
19018 /* We can place anything into GENERAL_REGS and can put GENERAL_REGS
19019 into anything. */
19020 if (rclass == GENERAL_REGS || rclass == BASE_REGS
19021 || (regno >= 0 && INT_REGNO_P (regno)))
19022 return NO_REGS;
19023
19024 /* Constants, memory, and VSX registers can go into VSX registers (both the
19025 traditional floating point and the altivec registers). */
19026 if (rclass == VSX_REGS
19027 && (regno == -1 || VSX_REGNO_P (regno)))
19028 return NO_REGS;
19029
19030 /* Constants, memory, and FP registers can go into FP registers. */
19031 if ((regno == -1 || FP_REGNO_P (regno))
19032 && (rclass == FLOAT_REGS || rclass == NON_SPECIAL_REGS))
19033 return (mode != SDmode || lra_in_progress) ? NO_REGS : GENERAL_REGS;
19034
19035 /* Memory, and AltiVec registers can go into AltiVec registers. */
19036 if ((regno == -1 || ALTIVEC_REGNO_P (regno))
19037 && rclass == ALTIVEC_REGS)
19038 return NO_REGS;
19039
19040 /* We can copy among the CR registers. */
19041 if ((rclass == CR_REGS || rclass == CR0_REGS)
19042 && regno >= 0 && CR_REGNO_P (regno))
19043 return NO_REGS;
19044
19045 /* Otherwise, we need GENERAL_REGS. */
19046 return GENERAL_REGS;
19047 }
19048
19049 /* Debug version of rs6000_secondary_reload_class. */
19050 static enum reg_class
19051 rs6000_debug_secondary_reload_class (enum reg_class rclass,
19052 machine_mode mode, rtx in)
19053 {
19054 enum reg_class ret = rs6000_secondary_reload_class (rclass, mode, in);
19055 fprintf (stderr,
19056 "\nrs6000_secondary_reload_class, return %s, rclass = %s, "
19057 "mode = %s, input rtx:\n",
19058 reg_class_names[ret], reg_class_names[rclass],
19059 GET_MODE_NAME (mode));
19060 debug_rtx (in);
19061
19062 return ret;
19063 }
19064
19065 /* Return nonzero if for CLASS a mode change from FROM to TO is invalid. */
19066
19067 static bool
19068 rs6000_cannot_change_mode_class (machine_mode from,
19069 machine_mode to,
19070 enum reg_class rclass)
19071 {
19072 unsigned from_size = GET_MODE_SIZE (from);
19073 unsigned to_size = GET_MODE_SIZE (to);
19074
19075 if (from_size != to_size)
19076 {
19077 enum reg_class xclass = (TARGET_VSX) ? VSX_REGS : FLOAT_REGS;
19078
19079 if (reg_classes_intersect_p (xclass, rclass))
19080 {
19081 unsigned to_nregs = hard_regno_nregs[FIRST_FPR_REGNO][to];
19082 unsigned from_nregs = hard_regno_nregs[FIRST_FPR_REGNO][from];
19083 bool to_float128_vector_p = FLOAT128_VECTOR_P (to);
19084 bool from_float128_vector_p = FLOAT128_VECTOR_P (from);
19085
19086 /* Don't allow 64-bit types to overlap with 128-bit types that take a
19087 single register under VSX because the scalar part of the register
19088 is in the upper 64-bits, and not the lower 64-bits. Types like
19089 TFmode/TDmode that take 2 scalar register can overlap. 128-bit
19090 IEEE floating point can't overlap, and neither can small
19091 values. */
19092
19093 if (to_float128_vector_p && from_float128_vector_p)
19094 return false;
19095
19096 else if (to_float128_vector_p || from_float128_vector_p)
19097 return true;
19098
19099 /* TDmode in floating-mode registers must always go into a register
19100 pair with the most significant word in the even-numbered register
19101 to match ISA requirements. In little-endian mode, this does not
19102 match subreg numbering, so we cannot allow subregs. */
19103 if (!BYTES_BIG_ENDIAN && (to == TDmode || from == TDmode))
19104 return true;
19105
19106 if (from_size < 8 || to_size < 8)
19107 return true;
19108
19109 if (from_size == 8 && (8 * to_nregs) != to_size)
19110 return true;
19111
19112 if (to_size == 8 && (8 * from_nregs) != from_size)
19113 return true;
19114
19115 return false;
19116 }
19117 else
19118 return false;
19119 }
19120
19121 if (TARGET_E500_DOUBLE
19122 && ((((to) == DFmode) + ((from) == DFmode)) == 1
19123 || (((to) == TFmode) + ((from) == TFmode)) == 1
19124 || (((to) == IFmode) + ((from) == IFmode)) == 1
19125 || (((to) == KFmode) + ((from) == KFmode)) == 1
19126 || (((to) == DDmode) + ((from) == DDmode)) == 1
19127 || (((to) == TDmode) + ((from) == TDmode)) == 1
19128 || (((to) == DImode) + ((from) == DImode)) == 1))
19129 return true;
19130
19131 /* Since the VSX register set includes traditional floating point registers
19132 and altivec registers, just check for the size being different instead of
19133 trying to check whether the modes are vector modes. Otherwise it won't
19134 allow say DF and DI to change classes. For types like TFmode and TDmode
19135 that take 2 64-bit registers, rather than a single 128-bit register, don't
19136 allow subregs of those types to other 128 bit types. */
19137 if (TARGET_VSX && VSX_REG_CLASS_P (rclass))
19138 {
19139 unsigned num_regs = (from_size + 15) / 16;
19140 if (hard_regno_nregs[FIRST_FPR_REGNO][to] > num_regs
19141 || hard_regno_nregs[FIRST_FPR_REGNO][from] > num_regs)
19142 return true;
19143
19144 return (from_size != 8 && from_size != 16);
19145 }
19146
19147 if (TARGET_ALTIVEC && rclass == ALTIVEC_REGS
19148 && (ALTIVEC_VECTOR_MODE (from) + ALTIVEC_VECTOR_MODE (to)) == 1)
19149 return true;
19150
19151 if (TARGET_SPE && (SPE_VECTOR_MODE (from) + SPE_VECTOR_MODE (to)) == 1
19152 && reg_classes_intersect_p (GENERAL_REGS, rclass))
19153 return true;
19154
19155 return false;
19156 }
19157
19158 /* Debug version of rs6000_cannot_change_mode_class. */
19159 static bool
19160 rs6000_debug_cannot_change_mode_class (machine_mode from,
19161 machine_mode to,
19162 enum reg_class rclass)
19163 {
19164 bool ret = rs6000_cannot_change_mode_class (from, to, rclass);
19165
19166 fprintf (stderr,
19167 "rs6000_cannot_change_mode_class, return %s, from = %s, "
19168 "to = %s, rclass = %s\n",
19169 ret ? "true" : "false",
19170 GET_MODE_NAME (from), GET_MODE_NAME (to),
19171 reg_class_names[rclass]);
19172
19173 return ret;
19174 }
19175 \f
19176 /* Return a string to do a move operation of 128 bits of data. */
19177
19178 const char *
19179 rs6000_output_move_128bit (rtx operands[])
19180 {
19181 rtx dest = operands[0];
19182 rtx src = operands[1];
19183 machine_mode mode = GET_MODE (dest);
19184 int dest_regno;
19185 int src_regno;
19186 bool dest_gpr_p, dest_fp_p, dest_vmx_p, dest_vsx_p;
19187 bool src_gpr_p, src_fp_p, src_vmx_p, src_vsx_p;
19188
19189 if (REG_P (dest))
19190 {
19191 dest_regno = REGNO (dest);
19192 dest_gpr_p = INT_REGNO_P (dest_regno);
19193 dest_fp_p = FP_REGNO_P (dest_regno);
19194 dest_vmx_p = ALTIVEC_REGNO_P (dest_regno);
19195 dest_vsx_p = dest_fp_p | dest_vmx_p;
19196 }
19197 else
19198 {
19199 dest_regno = -1;
19200 dest_gpr_p = dest_fp_p = dest_vmx_p = dest_vsx_p = false;
19201 }
19202
19203 if (REG_P (src))
19204 {
19205 src_regno = REGNO (src);
19206 src_gpr_p = INT_REGNO_P (src_regno);
19207 src_fp_p = FP_REGNO_P (src_regno);
19208 src_vmx_p = ALTIVEC_REGNO_P (src_regno);
19209 src_vsx_p = src_fp_p | src_vmx_p;
19210 }
19211 else
19212 {
19213 src_regno = -1;
19214 src_gpr_p = src_fp_p = src_vmx_p = src_vsx_p = false;
19215 }
19216
19217 /* Register moves. */
19218 if (dest_regno >= 0 && src_regno >= 0)
19219 {
19220 if (dest_gpr_p)
19221 {
19222 if (src_gpr_p)
19223 return "#";
19224
19225 if (TARGET_DIRECT_MOVE_128 && src_vsx_p)
19226 return (WORDS_BIG_ENDIAN
19227 ? "mfvsrd %0,%x1\n\tmfvsrld %L0,%x1"
19228 : "mfvsrd %L0,%x1\n\tmfvsrld %0,%x1");
19229
19230 else if (TARGET_VSX && TARGET_DIRECT_MOVE && src_vsx_p)
19231 return "#";
19232 }
19233
19234 else if (TARGET_VSX && dest_vsx_p)
19235 {
19236 if (src_vsx_p)
19237 return "xxlor %x0,%x1,%x1";
19238
19239 else if (TARGET_DIRECT_MOVE_128 && src_gpr_p)
19240 return (WORDS_BIG_ENDIAN
19241 ? "mtvsrdd %x0,%1,%L1"
19242 : "mtvsrdd %x0,%L1,%1");
19243
19244 else if (TARGET_DIRECT_MOVE && src_gpr_p)
19245 return "#";
19246 }
19247
19248 else if (TARGET_ALTIVEC && dest_vmx_p && src_vmx_p)
19249 return "vor %0,%1,%1";
19250
19251 else if (dest_fp_p && src_fp_p)
19252 return "#";
19253 }
19254
19255 /* Loads. */
19256 else if (dest_regno >= 0 && MEM_P (src))
19257 {
19258 if (dest_gpr_p)
19259 {
19260 if (TARGET_QUAD_MEMORY && quad_load_store_p (dest, src))
19261 return "lq %0,%1";
19262 else
19263 return "#";
19264 }
19265
19266 else if (TARGET_ALTIVEC && dest_vmx_p
19267 && altivec_indexed_or_indirect_operand (src, mode))
19268 return "lvx %0,%y1";
19269
19270 else if (TARGET_VSX && dest_vsx_p)
19271 {
19272 if (mode == V16QImode || mode == V8HImode || mode == V4SImode)
19273 return "lxvw4x %x0,%y1";
19274 else
19275 return "lxvd2x %x0,%y1";
19276 }
19277
19278 else if (TARGET_ALTIVEC && dest_vmx_p)
19279 return "lvx %0,%y1";
19280
19281 else if (dest_fp_p)
19282 return "#";
19283 }
19284
19285 /* Stores. */
19286 else if (src_regno >= 0 && MEM_P (dest))
19287 {
19288 if (src_gpr_p)
19289 {
19290 if (TARGET_QUAD_MEMORY && quad_load_store_p (dest, src))
19291 return "stq %1,%0";
19292 else
19293 return "#";
19294 }
19295
19296 else if (TARGET_ALTIVEC && src_vmx_p
19297 && altivec_indexed_or_indirect_operand (src, mode))
19298 return "stvx %1,%y0";
19299
19300 else if (TARGET_VSX && src_vsx_p)
19301 {
19302 if (mode == V16QImode || mode == V8HImode || mode == V4SImode)
19303 return "stxvw4x %x1,%y0";
19304 else
19305 return "stxvd2x %x1,%y0";
19306 }
19307
19308 else if (TARGET_ALTIVEC && src_vmx_p)
19309 return "stvx %1,%y0";
19310
19311 else if (src_fp_p)
19312 return "#";
19313 }
19314
19315 /* Constants. */
19316 else if (dest_regno >= 0
19317 && (GET_CODE (src) == CONST_INT
19318 || GET_CODE (src) == CONST_WIDE_INT
19319 || GET_CODE (src) == CONST_DOUBLE
19320 || GET_CODE (src) == CONST_VECTOR))
19321 {
19322 if (dest_gpr_p)
19323 return "#";
19324
19325 else if (TARGET_VSX && dest_vsx_p && zero_constant (src, mode))
19326 return "xxlxor %x0,%x0,%x0";
19327
19328 else if (TARGET_ALTIVEC && dest_vmx_p)
19329 return output_vec_const_move (operands);
19330 }
19331
19332 fatal_insn ("Bad 128-bit move", gen_rtx_SET (dest, src));
19333 }
19334
19335 /* Validate a 128-bit move. */
19336 bool
19337 rs6000_move_128bit_ok_p (rtx operands[])
19338 {
19339 machine_mode mode = GET_MODE (operands[0]);
19340 return (gpc_reg_operand (operands[0], mode)
19341 || gpc_reg_operand (operands[1], mode));
19342 }
19343
19344 /* Return true if a 128-bit move needs to be split. */
19345 bool
19346 rs6000_split_128bit_ok_p (rtx operands[])
19347 {
19348 if (!reload_completed)
19349 return false;
19350
19351 if (!gpr_or_gpr_p (operands[0], operands[1]))
19352 return false;
19353
19354 if (quad_load_store_p (operands[0], operands[1]))
19355 return false;
19356
19357 return true;
19358 }
19359
19360 \f
19361 /* Given a comparison operation, return the bit number in CCR to test. We
19362 know this is a valid comparison.
19363
19364 SCC_P is 1 if this is for an scc. That means that %D will have been
19365 used instead of %C, so the bits will be in different places.
19366
19367 Return -1 if OP isn't a valid comparison for some reason. */
19368
19369 int
19370 ccr_bit (rtx op, int scc_p)
19371 {
19372 enum rtx_code code = GET_CODE (op);
19373 machine_mode cc_mode;
19374 int cc_regnum;
19375 int base_bit;
19376 rtx reg;
19377
19378 if (!COMPARISON_P (op))
19379 return -1;
19380
19381 reg = XEXP (op, 0);
19382
19383 gcc_assert (GET_CODE (reg) == REG && CR_REGNO_P (REGNO (reg)));
19384
19385 cc_mode = GET_MODE (reg);
19386 cc_regnum = REGNO (reg);
19387 base_bit = 4 * (cc_regnum - CR0_REGNO);
19388
19389 validate_condition_mode (code, cc_mode);
19390
19391 /* When generating a sCOND operation, only positive conditions are
19392 allowed. */
19393 gcc_assert (!scc_p
19394 || code == EQ || code == GT || code == LT || code == UNORDERED
19395 || code == GTU || code == LTU);
19396
19397 switch (code)
19398 {
19399 case NE:
19400 return scc_p ? base_bit + 3 : base_bit + 2;
19401 case EQ:
19402 return base_bit + 2;
19403 case GT: case GTU: case UNLE:
19404 return base_bit + 1;
19405 case LT: case LTU: case UNGE:
19406 return base_bit;
19407 case ORDERED: case UNORDERED:
19408 return base_bit + 3;
19409
19410 case GE: case GEU:
19411 /* If scc, we will have done a cror to put the bit in the
19412 unordered position. So test that bit. For integer, this is ! LT
19413 unless this is an scc insn. */
19414 return scc_p ? base_bit + 3 : base_bit;
19415
19416 case LE: case LEU:
19417 return scc_p ? base_bit + 3 : base_bit + 1;
19418
19419 default:
19420 gcc_unreachable ();
19421 }
19422 }
19423 \f
19424 /* Return the GOT register. */
19425
19426 rtx
19427 rs6000_got_register (rtx value ATTRIBUTE_UNUSED)
19428 {
19429 /* The second flow pass currently (June 1999) can't update
19430 regs_ever_live without disturbing other parts of the compiler, so
19431 update it here to make the prolog/epilogue code happy. */
19432 if (!can_create_pseudo_p ()
19433 && !df_regs_ever_live_p (RS6000_PIC_OFFSET_TABLE_REGNUM))
19434 df_set_regs_ever_live (RS6000_PIC_OFFSET_TABLE_REGNUM, true);
19435
19436 crtl->uses_pic_offset_table = 1;
19437
19438 return pic_offset_table_rtx;
19439 }
19440 \f
19441 static rs6000_stack_t stack_info;
19442
19443 /* Function to init struct machine_function.
19444 This will be called, via a pointer variable,
19445 from push_function_context. */
19446
19447 static struct machine_function *
19448 rs6000_init_machine_status (void)
19449 {
19450 stack_info.reload_completed = 0;
19451 return ggc_cleared_alloc<machine_function> ();
19452 }
19453 \f
19454 #define INT_P(X) (GET_CODE (X) == CONST_INT && GET_MODE (X) == VOIDmode)
19455
19456 /* Write out a function code label. */
19457
19458 void
19459 rs6000_output_function_entry (FILE *file, const char *fname)
19460 {
19461 if (fname[0] != '.')
19462 {
19463 switch (DEFAULT_ABI)
19464 {
19465 default:
19466 gcc_unreachable ();
19467
19468 case ABI_AIX:
19469 if (DOT_SYMBOLS)
19470 putc ('.', file);
19471 else
19472 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "L.");
19473 break;
19474
19475 case ABI_ELFv2:
19476 case ABI_V4:
19477 case ABI_DARWIN:
19478 break;
19479 }
19480 }
19481
19482 RS6000_OUTPUT_BASENAME (file, fname);
19483 }
19484
19485 /* Print an operand. Recognize special options, documented below. */
19486
19487 #if TARGET_ELF
19488 #define SMALL_DATA_RELOC ((rs6000_sdata == SDATA_EABI) ? "sda21" : "sdarel")
19489 #define SMALL_DATA_REG ((rs6000_sdata == SDATA_EABI) ? 0 : 13)
19490 #else
19491 #define SMALL_DATA_RELOC "sda21"
19492 #define SMALL_DATA_REG 0
19493 #endif
19494
19495 void
19496 print_operand (FILE *file, rtx x, int code)
19497 {
19498 int i;
19499 unsigned HOST_WIDE_INT uval;
19500
19501 switch (code)
19502 {
19503 /* %a is output_address. */
19504
19505 /* %c is output_addr_const if a CONSTANT_ADDRESS_P, otherwise
19506 output_operand. */
19507
19508 case 'D':
19509 /* Like 'J' but get to the GT bit only. */
19510 gcc_assert (REG_P (x));
19511
19512 /* Bit 1 is GT bit. */
19513 i = 4 * (REGNO (x) - CR0_REGNO) + 1;
19514
19515 /* Add one for shift count in rlinm for scc. */
19516 fprintf (file, "%d", i + 1);
19517 return;
19518
19519 case 'e':
19520 /* If the low 16 bits are 0, but some other bit is set, write 's'. */
19521 if (! INT_P (x))
19522 {
19523 output_operand_lossage ("invalid %%e value");
19524 return;
19525 }
19526
19527 uval = INTVAL (x);
19528 if ((uval & 0xffff) == 0 && uval != 0)
19529 putc ('s', file);
19530 return;
19531
19532 case 'E':
19533 /* X is a CR register. Print the number of the EQ bit of the CR */
19534 if (GET_CODE (x) != REG || ! CR_REGNO_P (REGNO (x)))
19535 output_operand_lossage ("invalid %%E value");
19536 else
19537 fprintf (file, "%d", 4 * (REGNO (x) - CR0_REGNO) + 2);
19538 return;
19539
19540 case 'f':
19541 /* X is a CR register. Print the shift count needed to move it
19542 to the high-order four bits. */
19543 if (GET_CODE (x) != REG || ! CR_REGNO_P (REGNO (x)))
19544 output_operand_lossage ("invalid %%f value");
19545 else
19546 fprintf (file, "%d", 4 * (REGNO (x) - CR0_REGNO));
19547 return;
19548
19549 case 'F':
19550 /* Similar, but print the count for the rotate in the opposite
19551 direction. */
19552 if (GET_CODE (x) != REG || ! CR_REGNO_P (REGNO (x)))
19553 output_operand_lossage ("invalid %%F value");
19554 else
19555 fprintf (file, "%d", 32 - 4 * (REGNO (x) - CR0_REGNO));
19556 return;
19557
19558 case 'G':
19559 /* X is a constant integer. If it is negative, print "m",
19560 otherwise print "z". This is to make an aze or ame insn. */
19561 if (GET_CODE (x) != CONST_INT)
19562 output_operand_lossage ("invalid %%G value");
19563 else if (INTVAL (x) >= 0)
19564 putc ('z', file);
19565 else
19566 putc ('m', file);
19567 return;
19568
19569 case 'h':
19570 /* If constant, output low-order five bits. Otherwise, write
19571 normally. */
19572 if (INT_P (x))
19573 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 31);
19574 else
19575 print_operand (file, x, 0);
19576 return;
19577
19578 case 'H':
19579 /* If constant, output low-order six bits. Otherwise, write
19580 normally. */
19581 if (INT_P (x))
19582 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 63);
19583 else
19584 print_operand (file, x, 0);
19585 return;
19586
19587 case 'I':
19588 /* Print `i' if this is a constant, else nothing. */
19589 if (INT_P (x))
19590 putc ('i', file);
19591 return;
19592
19593 case 'j':
19594 /* Write the bit number in CCR for jump. */
19595 i = ccr_bit (x, 0);
19596 if (i == -1)
19597 output_operand_lossage ("invalid %%j code");
19598 else
19599 fprintf (file, "%d", i);
19600 return;
19601
19602 case 'J':
19603 /* Similar, but add one for shift count in rlinm for scc and pass
19604 scc flag to `ccr_bit'. */
19605 i = ccr_bit (x, 1);
19606 if (i == -1)
19607 output_operand_lossage ("invalid %%J code");
19608 else
19609 /* If we want bit 31, write a shift count of zero, not 32. */
19610 fprintf (file, "%d", i == 31 ? 0 : i + 1);
19611 return;
19612
19613 case 'k':
19614 /* X must be a constant. Write the 1's complement of the
19615 constant. */
19616 if (! INT_P (x))
19617 output_operand_lossage ("invalid %%k value");
19618 else
19619 fprintf (file, HOST_WIDE_INT_PRINT_DEC, ~ INTVAL (x));
19620 return;
19621
19622 case 'K':
19623 /* X must be a symbolic constant on ELF. Write an
19624 expression suitable for an 'addi' that adds in the low 16
19625 bits of the MEM. */
19626 if (GET_CODE (x) == CONST)
19627 {
19628 if (GET_CODE (XEXP (x, 0)) != PLUS
19629 || (GET_CODE (XEXP (XEXP (x, 0), 0)) != SYMBOL_REF
19630 && GET_CODE (XEXP (XEXP (x, 0), 0)) != LABEL_REF)
19631 || GET_CODE (XEXP (XEXP (x, 0), 1)) != CONST_INT)
19632 output_operand_lossage ("invalid %%K value");
19633 }
19634 print_operand_address (file, x);
19635 fputs ("@l", file);
19636 return;
19637
19638 /* %l is output_asm_label. */
19639
19640 case 'L':
19641 /* Write second word of DImode or DFmode reference. Works on register
19642 or non-indexed memory only. */
19643 if (REG_P (x))
19644 fputs (reg_names[REGNO (x) + 1], file);
19645 else if (MEM_P (x))
19646 {
19647 machine_mode mode = GET_MODE (x);
19648 /* Handle possible auto-increment. Since it is pre-increment and
19649 we have already done it, we can just use an offset of word. */
19650 if (GET_CODE (XEXP (x, 0)) == PRE_INC
19651 || GET_CODE (XEXP (x, 0)) == PRE_DEC)
19652 output_address (mode, plus_constant (Pmode, XEXP (XEXP (x, 0), 0),
19653 UNITS_PER_WORD));
19654 else if (GET_CODE (XEXP (x, 0)) == PRE_MODIFY)
19655 output_address (mode, plus_constant (Pmode, XEXP (XEXP (x, 0), 0),
19656 UNITS_PER_WORD));
19657 else
19658 output_address (mode, XEXP (adjust_address_nv (x, SImode,
19659 UNITS_PER_WORD),
19660 0));
19661
19662 if (small_data_operand (x, GET_MODE (x)))
19663 fprintf (file, "@%s(%s)", SMALL_DATA_RELOC,
19664 reg_names[SMALL_DATA_REG]);
19665 }
19666 return;
19667
19668 case 'N':
19669 /* Write the number of elements in the vector times 4. */
19670 if (GET_CODE (x) != PARALLEL)
19671 output_operand_lossage ("invalid %%N value");
19672 else
19673 fprintf (file, "%d", XVECLEN (x, 0) * 4);
19674 return;
19675
19676 case 'O':
19677 /* Similar, but subtract 1 first. */
19678 if (GET_CODE (x) != PARALLEL)
19679 output_operand_lossage ("invalid %%O value");
19680 else
19681 fprintf (file, "%d", (XVECLEN (x, 0) - 1) * 4);
19682 return;
19683
19684 case 'p':
19685 /* X is a CONST_INT that is a power of two. Output the logarithm. */
19686 if (! INT_P (x)
19687 || INTVAL (x) < 0
19688 || (i = exact_log2 (INTVAL (x))) < 0)
19689 output_operand_lossage ("invalid %%p value");
19690 else
19691 fprintf (file, "%d", i);
19692 return;
19693
19694 case 'P':
19695 /* The operand must be an indirect memory reference. The result
19696 is the register name. */
19697 if (GET_CODE (x) != MEM || GET_CODE (XEXP (x, 0)) != REG
19698 || REGNO (XEXP (x, 0)) >= 32)
19699 output_operand_lossage ("invalid %%P value");
19700 else
19701 fputs (reg_names[REGNO (XEXP (x, 0))], file);
19702 return;
19703
19704 case 'q':
19705 /* This outputs the logical code corresponding to a boolean
19706 expression. The expression may have one or both operands
19707 negated (if one, only the first one). For condition register
19708 logical operations, it will also treat the negated
19709 CR codes as NOTs, but not handle NOTs of them. */
19710 {
19711 const char *const *t = 0;
19712 const char *s;
19713 enum rtx_code code = GET_CODE (x);
19714 static const char * const tbl[3][3] = {
19715 { "and", "andc", "nor" },
19716 { "or", "orc", "nand" },
19717 { "xor", "eqv", "xor" } };
19718
19719 if (code == AND)
19720 t = tbl[0];
19721 else if (code == IOR)
19722 t = tbl[1];
19723 else if (code == XOR)
19724 t = tbl[2];
19725 else
19726 output_operand_lossage ("invalid %%q value");
19727
19728 if (GET_CODE (XEXP (x, 0)) != NOT)
19729 s = t[0];
19730 else
19731 {
19732 if (GET_CODE (XEXP (x, 1)) == NOT)
19733 s = t[2];
19734 else
19735 s = t[1];
19736 }
19737
19738 fputs (s, file);
19739 }
19740 return;
19741
19742 case 'Q':
19743 if (! TARGET_MFCRF)
19744 return;
19745 fputc (',', file);
19746 /* FALLTHRU */
19747
19748 case 'R':
19749 /* X is a CR register. Print the mask for `mtcrf'. */
19750 if (GET_CODE (x) != REG || ! CR_REGNO_P (REGNO (x)))
19751 output_operand_lossage ("invalid %%R value");
19752 else
19753 fprintf (file, "%d", 128 >> (REGNO (x) - CR0_REGNO));
19754 return;
19755
19756 case 't':
19757 /* Like 'J' but get to the OVERFLOW/UNORDERED bit. */
19758 gcc_assert (REG_P (x) && GET_MODE (x) == CCmode);
19759
19760 /* Bit 3 is OV bit. */
19761 i = 4 * (REGNO (x) - CR0_REGNO) + 3;
19762
19763 /* If we want bit 31, write a shift count of zero, not 32. */
19764 fprintf (file, "%d", i == 31 ? 0 : i + 1);
19765 return;
19766
19767 case 'T':
19768 /* Print the symbolic name of a branch target register. */
19769 if (GET_CODE (x) != REG || (REGNO (x) != LR_REGNO
19770 && REGNO (x) != CTR_REGNO))
19771 output_operand_lossage ("invalid %%T value");
19772 else if (REGNO (x) == LR_REGNO)
19773 fputs ("lr", file);
19774 else
19775 fputs ("ctr", file);
19776 return;
19777
19778 case 'u':
19779 /* High-order or low-order 16 bits of constant, whichever is non-zero,
19780 for use in unsigned operand. */
19781 if (! INT_P (x))
19782 {
19783 output_operand_lossage ("invalid %%u value");
19784 return;
19785 }
19786
19787 uval = INTVAL (x);
19788 if ((uval & 0xffff) == 0)
19789 uval >>= 16;
19790
19791 fprintf (file, HOST_WIDE_INT_PRINT_HEX, uval & 0xffff);
19792 return;
19793
19794 case 'v':
19795 /* High-order 16 bits of constant for use in signed operand. */
19796 if (! INT_P (x))
19797 output_operand_lossage ("invalid %%v value");
19798 else
19799 fprintf (file, HOST_WIDE_INT_PRINT_HEX,
19800 (INTVAL (x) >> 16) & 0xffff);
19801 return;
19802
19803 case 'U':
19804 /* Print `u' if this has an auto-increment or auto-decrement. */
19805 if (MEM_P (x)
19806 && (GET_CODE (XEXP (x, 0)) == PRE_INC
19807 || GET_CODE (XEXP (x, 0)) == PRE_DEC
19808 || GET_CODE (XEXP (x, 0)) == PRE_MODIFY))
19809 putc ('u', file);
19810 return;
19811
19812 case 'V':
19813 /* Print the trap code for this operand. */
19814 switch (GET_CODE (x))
19815 {
19816 case EQ:
19817 fputs ("eq", file); /* 4 */
19818 break;
19819 case NE:
19820 fputs ("ne", file); /* 24 */
19821 break;
19822 case LT:
19823 fputs ("lt", file); /* 16 */
19824 break;
19825 case LE:
19826 fputs ("le", file); /* 20 */
19827 break;
19828 case GT:
19829 fputs ("gt", file); /* 8 */
19830 break;
19831 case GE:
19832 fputs ("ge", file); /* 12 */
19833 break;
19834 case LTU:
19835 fputs ("llt", file); /* 2 */
19836 break;
19837 case LEU:
19838 fputs ("lle", file); /* 6 */
19839 break;
19840 case GTU:
19841 fputs ("lgt", file); /* 1 */
19842 break;
19843 case GEU:
19844 fputs ("lge", file); /* 5 */
19845 break;
19846 default:
19847 gcc_unreachable ();
19848 }
19849 break;
19850
19851 case 'w':
19852 /* If constant, low-order 16 bits of constant, signed. Otherwise, write
19853 normally. */
19854 if (INT_P (x))
19855 fprintf (file, HOST_WIDE_INT_PRINT_DEC,
19856 ((INTVAL (x) & 0xffff) ^ 0x8000) - 0x8000);
19857 else
19858 print_operand (file, x, 0);
19859 return;
19860
19861 case 'x':
19862 /* X is a FPR or Altivec register used in a VSX context. */
19863 if (GET_CODE (x) != REG || !VSX_REGNO_P (REGNO (x)))
19864 output_operand_lossage ("invalid %%x value");
19865 else
19866 {
19867 int reg = REGNO (x);
19868 int vsx_reg = (FP_REGNO_P (reg)
19869 ? reg - 32
19870 : reg - FIRST_ALTIVEC_REGNO + 32);
19871
19872 #ifdef TARGET_REGNAMES
19873 if (TARGET_REGNAMES)
19874 fprintf (file, "%%vs%d", vsx_reg);
19875 else
19876 #endif
19877 fprintf (file, "%d", vsx_reg);
19878 }
19879 return;
19880
19881 case 'X':
19882 if (MEM_P (x)
19883 && (legitimate_indexed_address_p (XEXP (x, 0), 0)
19884 || (GET_CODE (XEXP (x, 0)) == PRE_MODIFY
19885 && legitimate_indexed_address_p (XEXP (XEXP (x, 0), 1), 0))))
19886 putc ('x', file);
19887 return;
19888
19889 case 'Y':
19890 /* Like 'L', for third word of TImode/PTImode */
19891 if (REG_P (x))
19892 fputs (reg_names[REGNO (x) + 2], file);
19893 else if (MEM_P (x))
19894 {
19895 machine_mode mode = GET_MODE (x);
19896 if (GET_CODE (XEXP (x, 0)) == PRE_INC
19897 || GET_CODE (XEXP (x, 0)) == PRE_DEC)
19898 output_address (mode, plus_constant (Pmode,
19899 XEXP (XEXP (x, 0), 0), 8));
19900 else if (GET_CODE (XEXP (x, 0)) == PRE_MODIFY)
19901 output_address (mode, plus_constant (Pmode,
19902 XEXP (XEXP (x, 0), 0), 8));
19903 else
19904 output_address (mode, XEXP (adjust_address_nv (x, SImode, 8), 0));
19905 if (small_data_operand (x, GET_MODE (x)))
19906 fprintf (file, "@%s(%s)", SMALL_DATA_RELOC,
19907 reg_names[SMALL_DATA_REG]);
19908 }
19909 return;
19910
19911 case 'z':
19912 /* X is a SYMBOL_REF. Write out the name preceded by a
19913 period and without any trailing data in brackets. Used for function
19914 names. If we are configured for System V (or the embedded ABI) on
19915 the PowerPC, do not emit the period, since those systems do not use
19916 TOCs and the like. */
19917 gcc_assert (GET_CODE (x) == SYMBOL_REF);
19918
19919 /* For macho, check to see if we need a stub. */
19920 if (TARGET_MACHO)
19921 {
19922 const char *name = XSTR (x, 0);
19923 #if TARGET_MACHO
19924 if (darwin_emit_branch_islands
19925 && MACHOPIC_INDIRECT
19926 && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
19927 name = machopic_indirection_name (x, /*stub_p=*/true);
19928 #endif
19929 assemble_name (file, name);
19930 }
19931 else if (!DOT_SYMBOLS)
19932 assemble_name (file, XSTR (x, 0));
19933 else
19934 rs6000_output_function_entry (file, XSTR (x, 0));
19935 return;
19936
19937 case 'Z':
19938 /* Like 'L', for last word of TImode/PTImode. */
19939 if (REG_P (x))
19940 fputs (reg_names[REGNO (x) + 3], file);
19941 else if (MEM_P (x))
19942 {
19943 machine_mode mode = GET_MODE (x);
19944 if (GET_CODE (XEXP (x, 0)) == PRE_INC
19945 || GET_CODE (XEXP (x, 0)) == PRE_DEC)
19946 output_address (mode, plus_constant (Pmode,
19947 XEXP (XEXP (x, 0), 0), 12));
19948 else if (GET_CODE (XEXP (x, 0)) == PRE_MODIFY)
19949 output_address (mode, plus_constant (Pmode,
19950 XEXP (XEXP (x, 0), 0), 12));
19951 else
19952 output_address (mode, XEXP (adjust_address_nv (x, SImode, 12), 0));
19953 if (small_data_operand (x, GET_MODE (x)))
19954 fprintf (file, "@%s(%s)", SMALL_DATA_RELOC,
19955 reg_names[SMALL_DATA_REG]);
19956 }
19957 return;
19958
19959 /* Print AltiVec or SPE memory operand. */
19960 case 'y':
19961 {
19962 rtx tmp;
19963
19964 gcc_assert (MEM_P (x));
19965
19966 tmp = XEXP (x, 0);
19967
19968 /* Ugly hack because %y is overloaded. */
19969 if ((TARGET_SPE || TARGET_E500_DOUBLE)
19970 && (GET_MODE_SIZE (GET_MODE (x)) == 8
19971 || FLOAT128_2REG_P (GET_MODE (x))
19972 || GET_MODE (x) == TImode
19973 || GET_MODE (x) == PTImode))
19974 {
19975 /* Handle [reg]. */
19976 if (REG_P (tmp))
19977 {
19978 fprintf (file, "0(%s)", reg_names[REGNO (tmp)]);
19979 break;
19980 }
19981 /* Handle [reg+UIMM]. */
19982 else if (GET_CODE (tmp) == PLUS &&
19983 GET_CODE (XEXP (tmp, 1)) == CONST_INT)
19984 {
19985 int x;
19986
19987 gcc_assert (REG_P (XEXP (tmp, 0)));
19988
19989 x = INTVAL (XEXP (tmp, 1));
19990 fprintf (file, "%d(%s)", x, reg_names[REGNO (XEXP (tmp, 0))]);
19991 break;
19992 }
19993
19994 /* Fall through. Must be [reg+reg]. */
19995 }
19996 if (VECTOR_MEM_ALTIVEC_P (GET_MODE (x))
19997 && GET_CODE (tmp) == AND
19998 && GET_CODE (XEXP (tmp, 1)) == CONST_INT
19999 && INTVAL (XEXP (tmp, 1)) == -16)
20000 tmp = XEXP (tmp, 0);
20001 else if (VECTOR_MEM_VSX_P (GET_MODE (x))
20002 && GET_CODE (tmp) == PRE_MODIFY)
20003 tmp = XEXP (tmp, 1);
20004 if (REG_P (tmp))
20005 fprintf (file, "0,%s", reg_names[REGNO (tmp)]);
20006 else
20007 {
20008 if (GET_CODE (tmp) != PLUS
20009 || !REG_P (XEXP (tmp, 0))
20010 || !REG_P (XEXP (tmp, 1)))
20011 {
20012 output_operand_lossage ("invalid %%y value, try using the 'Z' constraint");
20013 break;
20014 }
20015
20016 if (REGNO (XEXP (tmp, 0)) == 0)
20017 fprintf (file, "%s,%s", reg_names[ REGNO (XEXP (tmp, 1)) ],
20018 reg_names[ REGNO (XEXP (tmp, 0)) ]);
20019 else
20020 fprintf (file, "%s,%s", reg_names[ REGNO (XEXP (tmp, 0)) ],
20021 reg_names[ REGNO (XEXP (tmp, 1)) ]);
20022 }
20023 break;
20024 }
20025
20026 case 0:
20027 if (REG_P (x))
20028 fprintf (file, "%s", reg_names[REGNO (x)]);
20029 else if (MEM_P (x))
20030 {
20031 /* We need to handle PRE_INC and PRE_DEC here, since we need to
20032 know the width from the mode. */
20033 if (GET_CODE (XEXP (x, 0)) == PRE_INC)
20034 fprintf (file, "%d(%s)", GET_MODE_SIZE (GET_MODE (x)),
20035 reg_names[REGNO (XEXP (XEXP (x, 0), 0))]);
20036 else if (GET_CODE (XEXP (x, 0)) == PRE_DEC)
20037 fprintf (file, "%d(%s)", - GET_MODE_SIZE (GET_MODE (x)),
20038 reg_names[REGNO (XEXP (XEXP (x, 0), 0))]);
20039 else if (GET_CODE (XEXP (x, 0)) == PRE_MODIFY)
20040 output_address (GET_MODE (x), XEXP (XEXP (x, 0), 1));
20041 else
20042 output_address (GET_MODE (x), XEXP (x, 0));
20043 }
20044 else
20045 {
20046 if (toc_relative_expr_p (x, false))
20047 /* This hack along with a corresponding hack in
20048 rs6000_output_addr_const_extra arranges to output addends
20049 where the assembler expects to find them. eg.
20050 (plus (unspec [(symbol_ref ("x")) (reg 2)] tocrel) 4)
20051 without this hack would be output as "x@toc+4". We
20052 want "x+4@toc". */
20053 output_addr_const (file, CONST_CAST_RTX (tocrel_base));
20054 else
20055 output_addr_const (file, x);
20056 }
20057 return;
20058
20059 case '&':
20060 if (const char *name = get_some_local_dynamic_name ())
20061 assemble_name (file, name);
20062 else
20063 output_operand_lossage ("'%%&' used without any "
20064 "local dynamic TLS references");
20065 return;
20066
20067 default:
20068 output_operand_lossage ("invalid %%xn code");
20069 }
20070 }
20071 \f
20072 /* Print the address of an operand. */
20073
20074 void
20075 print_operand_address (FILE *file, rtx x)
20076 {
20077 if (REG_P (x))
20078 fprintf (file, "0(%s)", reg_names[ REGNO (x) ]);
20079 else if (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == CONST
20080 || GET_CODE (x) == LABEL_REF)
20081 {
20082 output_addr_const (file, x);
20083 if (small_data_operand (x, GET_MODE (x)))
20084 fprintf (file, "@%s(%s)", SMALL_DATA_RELOC,
20085 reg_names[SMALL_DATA_REG]);
20086 else
20087 gcc_assert (!TARGET_TOC);
20088 }
20089 else if (GET_CODE (x) == PLUS && REG_P (XEXP (x, 0))
20090 && REG_P (XEXP (x, 1)))
20091 {
20092 if (REGNO (XEXP (x, 0)) == 0)
20093 fprintf (file, "%s,%s", reg_names[ REGNO (XEXP (x, 1)) ],
20094 reg_names[ REGNO (XEXP (x, 0)) ]);
20095 else
20096 fprintf (file, "%s,%s", reg_names[ REGNO (XEXP (x, 0)) ],
20097 reg_names[ REGNO (XEXP (x, 1)) ]);
20098 }
20099 else if (GET_CODE (x) == PLUS && REG_P (XEXP (x, 0))
20100 && GET_CODE (XEXP (x, 1)) == CONST_INT)
20101 fprintf (file, HOST_WIDE_INT_PRINT_DEC "(%s)",
20102 INTVAL (XEXP (x, 1)), reg_names[ REGNO (XEXP (x, 0)) ]);
20103 #if TARGET_MACHO
20104 else if (GET_CODE (x) == LO_SUM && REG_P (XEXP (x, 0))
20105 && CONSTANT_P (XEXP (x, 1)))
20106 {
20107 fprintf (file, "lo16(");
20108 output_addr_const (file, XEXP (x, 1));
20109 fprintf (file, ")(%s)", reg_names[ REGNO (XEXP (x, 0)) ]);
20110 }
20111 #endif
20112 #if TARGET_ELF
20113 else if (GET_CODE (x) == LO_SUM && REG_P (XEXP (x, 0))
20114 && CONSTANT_P (XEXP (x, 1)))
20115 {
20116 output_addr_const (file, XEXP (x, 1));
20117 fprintf (file, "@l(%s)", reg_names[ REGNO (XEXP (x, 0)) ]);
20118 }
20119 #endif
20120 else if (toc_relative_expr_p (x, false))
20121 {
20122 /* This hack along with a corresponding hack in
20123 rs6000_output_addr_const_extra arranges to output addends
20124 where the assembler expects to find them. eg.
20125 (lo_sum (reg 9)
20126 . (plus (unspec [(symbol_ref ("x")) (reg 2)] tocrel) 8))
20127 without this hack would be output as "x@toc+8@l(9)". We
20128 want "x+8@toc@l(9)". */
20129 output_addr_const (file, CONST_CAST_RTX (tocrel_base));
20130 if (GET_CODE (x) == LO_SUM)
20131 fprintf (file, "@l(%s)", reg_names[REGNO (XEXP (x, 0))]);
20132 else
20133 fprintf (file, "(%s)", reg_names[REGNO (XVECEXP (tocrel_base, 0, 1))]);
20134 }
20135 else
20136 gcc_unreachable ();
20137 }
20138 \f
20139 /* Implement TARGET_OUTPUT_ADDR_CONST_EXTRA. */
20140
20141 static bool
20142 rs6000_output_addr_const_extra (FILE *file, rtx x)
20143 {
20144 if (GET_CODE (x) == UNSPEC)
20145 switch (XINT (x, 1))
20146 {
20147 case UNSPEC_TOCREL:
20148 gcc_checking_assert (GET_CODE (XVECEXP (x, 0, 0)) == SYMBOL_REF
20149 && REG_P (XVECEXP (x, 0, 1))
20150 && REGNO (XVECEXP (x, 0, 1)) == TOC_REGISTER);
20151 output_addr_const (file, XVECEXP (x, 0, 0));
20152 if (x == tocrel_base && tocrel_offset != const0_rtx)
20153 {
20154 if (INTVAL (tocrel_offset) >= 0)
20155 fprintf (file, "+");
20156 output_addr_const (file, CONST_CAST_RTX (tocrel_offset));
20157 }
20158 if (!TARGET_AIX || (TARGET_ELF && TARGET_MINIMAL_TOC))
20159 {
20160 putc ('-', file);
20161 assemble_name (file, toc_label_name);
20162 }
20163 else if (TARGET_ELF)
20164 fputs ("@toc", file);
20165 return true;
20166
20167 #if TARGET_MACHO
20168 case UNSPEC_MACHOPIC_OFFSET:
20169 output_addr_const (file, XVECEXP (x, 0, 0));
20170 putc ('-', file);
20171 machopic_output_function_base_name (file);
20172 return true;
20173 #endif
20174 }
20175 return false;
20176 }
20177 \f
20178 /* Target hook for assembling integer objects. The PowerPC version has
20179 to handle fixup entries for relocatable code if RELOCATABLE_NEEDS_FIXUP
20180 is defined. It also needs to handle DI-mode objects on 64-bit
20181 targets. */
20182
20183 static bool
20184 rs6000_assemble_integer (rtx x, unsigned int size, int aligned_p)
20185 {
20186 #ifdef RELOCATABLE_NEEDS_FIXUP
20187 /* Special handling for SI values. */
20188 if (RELOCATABLE_NEEDS_FIXUP && size == 4 && aligned_p)
20189 {
20190 static int recurse = 0;
20191
20192 /* For -mrelocatable, we mark all addresses that need to be fixed up in
20193 the .fixup section. Since the TOC section is already relocated, we
20194 don't need to mark it here. We used to skip the text section, but it
20195 should never be valid for relocated addresses to be placed in the text
20196 section. */
20197 if (TARGET_RELOCATABLE
20198 && in_section != toc_section
20199 && !recurse
20200 && !CONST_SCALAR_INT_P (x)
20201 && CONSTANT_P (x))
20202 {
20203 char buf[256];
20204
20205 recurse = 1;
20206 ASM_GENERATE_INTERNAL_LABEL (buf, "LCP", fixuplabelno);
20207 fixuplabelno++;
20208 ASM_OUTPUT_LABEL (asm_out_file, buf);
20209 fprintf (asm_out_file, "\t.long\t(");
20210 output_addr_const (asm_out_file, x);
20211 fprintf (asm_out_file, ")@fixup\n");
20212 fprintf (asm_out_file, "\t.section\t\".fixup\",\"aw\"\n");
20213 ASM_OUTPUT_ALIGN (asm_out_file, 2);
20214 fprintf (asm_out_file, "\t.long\t");
20215 assemble_name (asm_out_file, buf);
20216 fprintf (asm_out_file, "\n\t.previous\n");
20217 recurse = 0;
20218 return true;
20219 }
20220 /* Remove initial .'s to turn a -mcall-aixdesc function
20221 address into the address of the descriptor, not the function
20222 itself. */
20223 else if (GET_CODE (x) == SYMBOL_REF
20224 && XSTR (x, 0)[0] == '.'
20225 && DEFAULT_ABI == ABI_AIX)
20226 {
20227 const char *name = XSTR (x, 0);
20228 while (*name == '.')
20229 name++;
20230
20231 fprintf (asm_out_file, "\t.long\t%s\n", name);
20232 return true;
20233 }
20234 }
20235 #endif /* RELOCATABLE_NEEDS_FIXUP */
20236 return default_assemble_integer (x, size, aligned_p);
20237 }
20238
20239 #if defined (HAVE_GAS_HIDDEN) && !TARGET_MACHO
20240 /* Emit an assembler directive to set symbol visibility for DECL to
20241 VISIBILITY_TYPE. */
20242
20243 static void
20244 rs6000_assemble_visibility (tree decl, int vis)
20245 {
20246 if (TARGET_XCOFF)
20247 return;
20248
20249 /* Functions need to have their entry point symbol visibility set as
20250 well as their descriptor symbol visibility. */
20251 if (DEFAULT_ABI == ABI_AIX
20252 && DOT_SYMBOLS
20253 && TREE_CODE (decl) == FUNCTION_DECL)
20254 {
20255 static const char * const visibility_types[] = {
20256 NULL, "internal", "hidden", "protected"
20257 };
20258
20259 const char *name, *type;
20260
20261 name = ((* targetm.strip_name_encoding)
20262 (IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl))));
20263 type = visibility_types[vis];
20264
20265 fprintf (asm_out_file, "\t.%s\t%s\n", type, name);
20266 fprintf (asm_out_file, "\t.%s\t.%s\n", type, name);
20267 }
20268 else
20269 default_assemble_visibility (decl, vis);
20270 }
20271 #endif
20272 \f
20273 enum rtx_code
20274 rs6000_reverse_condition (machine_mode mode, enum rtx_code code)
20275 {
20276 /* Reversal of FP compares takes care -- an ordered compare
20277 becomes an unordered compare and vice versa. */
20278 if (mode == CCFPmode
20279 && (!flag_finite_math_only
20280 || code == UNLT || code == UNLE || code == UNGT || code == UNGE
20281 || code == UNEQ || code == LTGT))
20282 return reverse_condition_maybe_unordered (code);
20283 else
20284 return reverse_condition (code);
20285 }
20286
20287 /* Generate a compare for CODE. Return a brand-new rtx that
20288 represents the result of the compare. */
20289
20290 static rtx
20291 rs6000_generate_compare (rtx cmp, machine_mode mode)
20292 {
20293 machine_mode comp_mode;
20294 rtx compare_result;
20295 enum rtx_code code = GET_CODE (cmp);
20296 rtx op0 = XEXP (cmp, 0);
20297 rtx op1 = XEXP (cmp, 1);
20298
20299 if (FLOAT_MODE_P (mode))
20300 comp_mode = CCFPmode;
20301 else if (code == GTU || code == LTU
20302 || code == GEU || code == LEU)
20303 comp_mode = CCUNSmode;
20304 else if ((code == EQ || code == NE)
20305 && unsigned_reg_p (op0)
20306 && (unsigned_reg_p (op1)
20307 || (CONST_INT_P (op1) && INTVAL (op1) != 0)))
20308 /* These are unsigned values, perhaps there will be a later
20309 ordering compare that can be shared with this one. */
20310 comp_mode = CCUNSmode;
20311 else
20312 comp_mode = CCmode;
20313
20314 /* If we have an unsigned compare, make sure we don't have a signed value as
20315 an immediate. */
20316 if (comp_mode == CCUNSmode && GET_CODE (op1) == CONST_INT
20317 && INTVAL (op1) < 0)
20318 {
20319 op0 = copy_rtx_if_shared (op0);
20320 op1 = force_reg (GET_MODE (op0), op1);
20321 cmp = gen_rtx_fmt_ee (code, GET_MODE (cmp), op0, op1);
20322 }
20323
20324 /* First, the compare. */
20325 compare_result = gen_reg_rtx (comp_mode);
20326
20327 /* E500 FP compare instructions on the GPRs. Yuck! */
20328 if ((!TARGET_FPRS && TARGET_HARD_FLOAT)
20329 && FLOAT_MODE_P (mode))
20330 {
20331 rtx cmp, or_result, compare_result2;
20332 machine_mode op_mode = GET_MODE (op0);
20333 bool reverse_p;
20334
20335 if (op_mode == VOIDmode)
20336 op_mode = GET_MODE (op1);
20337
20338 /* First reverse the condition codes that aren't directly supported. */
20339 switch (code)
20340 {
20341 case NE:
20342 case UNLT:
20343 case UNLE:
20344 case UNGT:
20345 case UNGE:
20346 code = reverse_condition_maybe_unordered (code);
20347 reverse_p = true;
20348 break;
20349
20350 case EQ:
20351 case LT:
20352 case LE:
20353 case GT:
20354 case GE:
20355 reverse_p = false;
20356 break;
20357
20358 default:
20359 gcc_unreachable ();
20360 }
20361
20362 /* The E500 FP compare instructions toggle the GT bit (CR bit 1) only.
20363 This explains the following mess. */
20364
20365 switch (code)
20366 {
20367 case EQ:
20368 switch (op_mode)
20369 {
20370 case SFmode:
20371 cmp = (flag_finite_math_only && !flag_trapping_math)
20372 ? gen_tstsfeq_gpr (compare_result, op0, op1)
20373 : gen_cmpsfeq_gpr (compare_result, op0, op1);
20374 break;
20375
20376 case DFmode:
20377 cmp = (flag_finite_math_only && !flag_trapping_math)
20378 ? gen_tstdfeq_gpr (compare_result, op0, op1)
20379 : gen_cmpdfeq_gpr (compare_result, op0, op1);
20380 break;
20381
20382 case TFmode:
20383 case IFmode:
20384 case KFmode:
20385 cmp = (flag_finite_math_only && !flag_trapping_math)
20386 ? gen_tsttfeq_gpr (compare_result, op0, op1)
20387 : gen_cmptfeq_gpr (compare_result, op0, op1);
20388 break;
20389
20390 default:
20391 gcc_unreachable ();
20392 }
20393 break;
20394
20395 case GT:
20396 case GE:
20397 switch (op_mode)
20398 {
20399 case SFmode:
20400 cmp = (flag_finite_math_only && !flag_trapping_math)
20401 ? gen_tstsfgt_gpr (compare_result, op0, op1)
20402 : gen_cmpsfgt_gpr (compare_result, op0, op1);
20403 break;
20404
20405 case DFmode:
20406 cmp = (flag_finite_math_only && !flag_trapping_math)
20407 ? gen_tstdfgt_gpr (compare_result, op0, op1)
20408 : gen_cmpdfgt_gpr (compare_result, op0, op1);
20409 break;
20410
20411 case TFmode:
20412 case IFmode:
20413 case KFmode:
20414 cmp = (flag_finite_math_only && !flag_trapping_math)
20415 ? gen_tsttfgt_gpr (compare_result, op0, op1)
20416 : gen_cmptfgt_gpr (compare_result, op0, op1);
20417 break;
20418
20419 default:
20420 gcc_unreachable ();
20421 }
20422 break;
20423
20424 case LT:
20425 case LE:
20426 switch (op_mode)
20427 {
20428 case SFmode:
20429 cmp = (flag_finite_math_only && !flag_trapping_math)
20430 ? gen_tstsflt_gpr (compare_result, op0, op1)
20431 : gen_cmpsflt_gpr (compare_result, op0, op1);
20432 break;
20433
20434 case DFmode:
20435 cmp = (flag_finite_math_only && !flag_trapping_math)
20436 ? gen_tstdflt_gpr (compare_result, op0, op1)
20437 : gen_cmpdflt_gpr (compare_result, op0, op1);
20438 break;
20439
20440 case TFmode:
20441 case IFmode:
20442 case KFmode:
20443 cmp = (flag_finite_math_only && !flag_trapping_math)
20444 ? gen_tsttflt_gpr (compare_result, op0, op1)
20445 : gen_cmptflt_gpr (compare_result, op0, op1);
20446 break;
20447
20448 default:
20449 gcc_unreachable ();
20450 }
20451 break;
20452
20453 default:
20454 gcc_unreachable ();
20455 }
20456
20457 /* Synthesize LE and GE from LT/GT || EQ. */
20458 if (code == LE || code == GE)
20459 {
20460 emit_insn (cmp);
20461
20462 compare_result2 = gen_reg_rtx (CCFPmode);
20463
20464 /* Do the EQ. */
20465 switch (op_mode)
20466 {
20467 case SFmode:
20468 cmp = (flag_finite_math_only && !flag_trapping_math)
20469 ? gen_tstsfeq_gpr (compare_result2, op0, op1)
20470 : gen_cmpsfeq_gpr (compare_result2, op0, op1);
20471 break;
20472
20473 case DFmode:
20474 cmp = (flag_finite_math_only && !flag_trapping_math)
20475 ? gen_tstdfeq_gpr (compare_result2, op0, op1)
20476 : gen_cmpdfeq_gpr (compare_result2, op0, op1);
20477 break;
20478
20479 case TFmode:
20480 case IFmode:
20481 case KFmode:
20482 cmp = (flag_finite_math_only && !flag_trapping_math)
20483 ? gen_tsttfeq_gpr (compare_result2, op0, op1)
20484 : gen_cmptfeq_gpr (compare_result2, op0, op1);
20485 break;
20486
20487 default:
20488 gcc_unreachable ();
20489 }
20490
20491 emit_insn (cmp);
20492
20493 /* OR them together. */
20494 or_result = gen_reg_rtx (CCFPmode);
20495 cmp = gen_e500_cr_ior_compare (or_result, compare_result,
20496 compare_result2);
20497 compare_result = or_result;
20498 }
20499
20500 code = reverse_p ? NE : EQ;
20501
20502 emit_insn (cmp);
20503 }
20504
20505 /* IEEE 128-bit support in VSX registers. If we do not have IEEE 128-bit
20506 hardware, the comparison functions (__cmpokf2 and __cmpukf2) returns 0..15
20507 that is laid out the same way as the PowerPC CR register would for a
20508 normal floating point comparison from the fcmpo and fcmpu
20509 instructions. */
20510 else if (!TARGET_FLOAT128_HW && FLOAT128_IEEE_P (mode))
20511 {
20512 rtx and_reg = gen_reg_rtx (SImode);
20513 rtx dest = gen_reg_rtx (SImode);
20514 rtx libfunc = optab_libfunc (ucmp_optab, mode);
20515 HOST_WIDE_INT mask_value = 0;
20516
20517 /* Values that __cmpokf2/__cmpukf2 returns. */
20518 #define PPC_CMP_UNORDERED 0x1 /* isnan (a) || isnan (b). */
20519 #define PPC_CMP_EQUAL 0x2 /* a == b. */
20520 #define PPC_CMP_GREATER_THEN 0x4 /* a > b. */
20521 #define PPC_CMP_LESS_THEN 0x8 /* a < b. */
20522
20523 switch (code)
20524 {
20525 case EQ:
20526 mask_value = PPC_CMP_EQUAL;
20527 code = NE;
20528 break;
20529
20530 case NE:
20531 mask_value = PPC_CMP_EQUAL;
20532 code = EQ;
20533 break;
20534
20535 case GT:
20536 mask_value = PPC_CMP_GREATER_THEN;
20537 code = NE;
20538 break;
20539
20540 case GE:
20541 mask_value = PPC_CMP_GREATER_THEN | PPC_CMP_EQUAL;
20542 code = NE;
20543 break;
20544
20545 case LT:
20546 mask_value = PPC_CMP_LESS_THEN;
20547 code = NE;
20548 break;
20549
20550 case LE:
20551 mask_value = PPC_CMP_LESS_THEN | PPC_CMP_EQUAL;
20552 code = NE;
20553 break;
20554
20555 case UNLE:
20556 mask_value = PPC_CMP_GREATER_THEN;
20557 code = EQ;
20558 break;
20559
20560 case UNLT:
20561 mask_value = PPC_CMP_GREATER_THEN | PPC_CMP_EQUAL;
20562 code = EQ;
20563 break;
20564
20565 case UNGE:
20566 mask_value = PPC_CMP_LESS_THEN;
20567 code = EQ;
20568 break;
20569
20570 case UNGT:
20571 mask_value = PPC_CMP_LESS_THEN | PPC_CMP_EQUAL;
20572 code = EQ;
20573 break;
20574
20575 case UNEQ:
20576 mask_value = PPC_CMP_EQUAL | PPC_CMP_UNORDERED;
20577 code = NE;
20578
20579 case LTGT:
20580 mask_value = PPC_CMP_EQUAL | PPC_CMP_UNORDERED;
20581 code = EQ;
20582 break;
20583
20584 case UNORDERED:
20585 mask_value = PPC_CMP_UNORDERED;
20586 code = NE;
20587 break;
20588
20589 case ORDERED:
20590 mask_value = PPC_CMP_UNORDERED;
20591 code = EQ;
20592 break;
20593
20594 default:
20595 gcc_unreachable ();
20596 }
20597
20598 gcc_assert (mask_value != 0);
20599 and_reg = emit_library_call_value (libfunc, and_reg, LCT_CONST, SImode, 2,
20600 op0, mode, op1, mode);
20601
20602 emit_insn (gen_andsi3 (dest, and_reg, GEN_INT (mask_value)));
20603 compare_result = gen_reg_rtx (CCmode);
20604 comp_mode = CCmode;
20605
20606 emit_insn (gen_rtx_SET (compare_result,
20607 gen_rtx_COMPARE (comp_mode, dest, const0_rtx)));
20608 }
20609
20610 else
20611 {
20612 /* Generate XLC-compatible TFmode compare as PARALLEL with extra
20613 CLOBBERs to match cmptf_internal2 pattern. */
20614 if (comp_mode == CCFPmode && TARGET_XL_COMPAT
20615 && FLOAT128_IBM_P (GET_MODE (op0))
20616 && TARGET_HARD_FLOAT && TARGET_FPRS)
20617 emit_insn (gen_rtx_PARALLEL (VOIDmode,
20618 gen_rtvec (10,
20619 gen_rtx_SET (compare_result,
20620 gen_rtx_COMPARE (comp_mode, op0, op1)),
20621 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
20622 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
20623 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
20624 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
20625 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
20626 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
20627 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
20628 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
20629 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (Pmode)))));
20630 else if (GET_CODE (op1) == UNSPEC
20631 && XINT (op1, 1) == UNSPEC_SP_TEST)
20632 {
20633 rtx op1b = XVECEXP (op1, 0, 0);
20634 comp_mode = CCEQmode;
20635 compare_result = gen_reg_rtx (CCEQmode);
20636 if (TARGET_64BIT)
20637 emit_insn (gen_stack_protect_testdi (compare_result, op0, op1b));
20638 else
20639 emit_insn (gen_stack_protect_testsi (compare_result, op0, op1b));
20640 }
20641 else
20642 emit_insn (gen_rtx_SET (compare_result,
20643 gen_rtx_COMPARE (comp_mode, op0, op1)));
20644 }
20645
20646 /* Some kinds of FP comparisons need an OR operation;
20647 under flag_finite_math_only we don't bother. */
20648 if (FLOAT_MODE_P (mode)
20649 && (!FLOAT128_IEEE_P (mode) || TARGET_FLOAT128_HW)
20650 && !flag_finite_math_only
20651 && !(TARGET_HARD_FLOAT && !TARGET_FPRS)
20652 && (code == LE || code == GE
20653 || code == UNEQ || code == LTGT
20654 || code == UNGT || code == UNLT))
20655 {
20656 enum rtx_code or1, or2;
20657 rtx or1_rtx, or2_rtx, compare2_rtx;
20658 rtx or_result = gen_reg_rtx (CCEQmode);
20659
20660 switch (code)
20661 {
20662 case LE: or1 = LT; or2 = EQ; break;
20663 case GE: or1 = GT; or2 = EQ; break;
20664 case UNEQ: or1 = UNORDERED; or2 = EQ; break;
20665 case LTGT: or1 = LT; or2 = GT; break;
20666 case UNGT: or1 = UNORDERED; or2 = GT; break;
20667 case UNLT: or1 = UNORDERED; or2 = LT; break;
20668 default: gcc_unreachable ();
20669 }
20670 validate_condition_mode (or1, comp_mode);
20671 validate_condition_mode (or2, comp_mode);
20672 or1_rtx = gen_rtx_fmt_ee (or1, SImode, compare_result, const0_rtx);
20673 or2_rtx = gen_rtx_fmt_ee (or2, SImode, compare_result, const0_rtx);
20674 compare2_rtx = gen_rtx_COMPARE (CCEQmode,
20675 gen_rtx_IOR (SImode, or1_rtx, or2_rtx),
20676 const_true_rtx);
20677 emit_insn (gen_rtx_SET (or_result, compare2_rtx));
20678
20679 compare_result = or_result;
20680 code = EQ;
20681 }
20682
20683 validate_condition_mode (code, GET_MODE (compare_result));
20684
20685 return gen_rtx_fmt_ee (code, VOIDmode, compare_result, const0_rtx);
20686 }
20687
20688 \f
20689 /* Return the diagnostic message string if the binary operation OP is
20690 not permitted on TYPE1 and TYPE2, NULL otherwise. */
20691
20692 static const char*
20693 rs6000_invalid_binary_op (int op ATTRIBUTE_UNUSED,
20694 const_tree type1,
20695 const_tree type2)
20696 {
20697 enum machine_mode mode1 = TYPE_MODE (type1);
20698 enum machine_mode mode2 = TYPE_MODE (type2);
20699
20700 /* For complex modes, use the inner type. */
20701 if (COMPLEX_MODE_P (mode1))
20702 mode1 = GET_MODE_INNER (mode1);
20703
20704 if (COMPLEX_MODE_P (mode2))
20705 mode2 = GET_MODE_INNER (mode2);
20706
20707 /* Don't allow IEEE 754R 128-bit binary floating point and IBM extended
20708 double to intermix. */
20709 if (mode1 == mode2)
20710 return NULL;
20711
20712 if ((mode1 == KFmode && mode2 == IFmode)
20713 || (mode1 == IFmode && mode2 == KFmode))
20714 return N_("__float128 and __ibm128 cannot be used in the same expression");
20715
20716 if (TARGET_IEEEQUAD
20717 && ((mode1 == IFmode && mode2 == TFmode)
20718 || (mode1 == TFmode && mode2 == IFmode)))
20719 return N_("__ibm128 and long double cannot be used in the same expression");
20720
20721 if (!TARGET_IEEEQUAD
20722 && ((mode1 == KFmode && mode2 == TFmode)
20723 || (mode1 == TFmode && mode2 == KFmode)))
20724 return N_("__float128 and long double cannot be used in the same "
20725 "expression");
20726
20727 return NULL;
20728 }
20729
20730 \f
20731 /* Expand floating point conversion to/from __float128 and __ibm128. */
20732
20733 void
20734 rs6000_expand_float128_convert (rtx dest, rtx src, bool unsigned_p)
20735 {
20736 machine_mode dest_mode = GET_MODE (dest);
20737 machine_mode src_mode = GET_MODE (src);
20738 convert_optab cvt = unknown_optab;
20739 bool do_move = false;
20740 rtx libfunc = NULL_RTX;
20741 rtx dest2;
20742 typedef rtx (*rtx_2func_t) (rtx, rtx);
20743 rtx_2func_t hw_convert = (rtx_2func_t)0;
20744 size_t kf_or_tf;
20745
20746 struct hw_conv_t {
20747 rtx_2func_t from_df;
20748 rtx_2func_t from_sf;
20749 rtx_2func_t from_si_sign;
20750 rtx_2func_t from_si_uns;
20751 rtx_2func_t from_di_sign;
20752 rtx_2func_t from_di_uns;
20753 rtx_2func_t to_df;
20754 rtx_2func_t to_sf;
20755 rtx_2func_t to_si_sign;
20756 rtx_2func_t to_si_uns;
20757 rtx_2func_t to_di_sign;
20758 rtx_2func_t to_di_uns;
20759 } hw_conversions[2] = {
20760 /* convertions to/from KFmode */
20761 {
20762 gen_extenddfkf2_hw, /* KFmode <- DFmode. */
20763 gen_extendsfkf2_hw, /* KFmode <- SFmode. */
20764 gen_float_kfsi2_hw, /* KFmode <- SImode (signed). */
20765 gen_floatuns_kfsi2_hw, /* KFmode <- SImode (unsigned). */
20766 gen_float_kfdi2_hw, /* KFmode <- DImode (signed). */
20767 gen_floatuns_kfdi2_hw, /* KFmode <- DImode (unsigned). */
20768 gen_trunckfdf2_hw, /* DFmode <- KFmode. */
20769 gen_trunckfsf2_hw, /* SFmode <- KFmode. */
20770 gen_fix_kfsi2_hw, /* SImode <- KFmode (signed). */
20771 gen_fixuns_kfsi2_hw, /* SImode <- KFmode (unsigned). */
20772 gen_fix_kfdi2_hw, /* DImode <- KFmode (signed). */
20773 gen_fixuns_kfdi2_hw, /* DImode <- KFmode (unsigned). */
20774 },
20775
20776 /* convertions to/from TFmode */
20777 {
20778 gen_extenddftf2_hw, /* TFmode <- DFmode. */
20779 gen_extendsftf2_hw, /* TFmode <- SFmode. */
20780 gen_float_tfsi2_hw, /* TFmode <- SImode (signed). */
20781 gen_floatuns_tfsi2_hw, /* TFmode <- SImode (unsigned). */
20782 gen_float_tfdi2_hw, /* TFmode <- DImode (signed). */
20783 gen_floatuns_tfdi2_hw, /* TFmode <- DImode (unsigned). */
20784 gen_trunctfdf2_hw, /* DFmode <- TFmode. */
20785 gen_trunctfsf2_hw, /* SFmode <- TFmode. */
20786 gen_fix_tfsi2_hw, /* SImode <- TFmode (signed). */
20787 gen_fixuns_tfsi2_hw, /* SImode <- TFmode (unsigned). */
20788 gen_fix_tfdi2_hw, /* DImode <- TFmode (signed). */
20789 gen_fixuns_tfdi2_hw, /* DImode <- TFmode (unsigned). */
20790 },
20791 };
20792
20793 if (dest_mode == src_mode)
20794 gcc_unreachable ();
20795
20796 /* Eliminate memory operations. */
20797 if (MEM_P (src))
20798 src = force_reg (src_mode, src);
20799
20800 if (MEM_P (dest))
20801 {
20802 rtx tmp = gen_reg_rtx (dest_mode);
20803 rs6000_expand_float128_convert (tmp, src, unsigned_p);
20804 rs6000_emit_move (dest, tmp, dest_mode);
20805 return;
20806 }
20807
20808 /* Convert to IEEE 128-bit floating point. */
20809 if (FLOAT128_IEEE_P (dest_mode))
20810 {
20811 if (dest_mode == KFmode)
20812 kf_or_tf = 0;
20813 else if (dest_mode == TFmode)
20814 kf_or_tf = 1;
20815 else
20816 gcc_unreachable ();
20817
20818 switch (src_mode)
20819 {
20820 case DFmode:
20821 cvt = sext_optab;
20822 hw_convert = hw_conversions[kf_or_tf].from_df;
20823 break;
20824
20825 case SFmode:
20826 cvt = sext_optab;
20827 hw_convert = hw_conversions[kf_or_tf].from_sf;
20828 break;
20829
20830 case KFmode:
20831 case IFmode:
20832 case TFmode:
20833 if (FLOAT128_IBM_P (src_mode))
20834 cvt = sext_optab;
20835 else
20836 do_move = true;
20837 break;
20838
20839 case SImode:
20840 if (unsigned_p)
20841 {
20842 cvt = ufloat_optab;
20843 hw_convert = hw_conversions[kf_or_tf].from_si_uns;
20844 }
20845 else
20846 {
20847 cvt = sfloat_optab;
20848 hw_convert = hw_conversions[kf_or_tf].from_si_sign;
20849 }
20850 break;
20851
20852 case DImode:
20853 if (unsigned_p)
20854 {
20855 cvt = ufloat_optab;
20856 hw_convert = hw_conversions[kf_or_tf].from_di_uns;
20857 }
20858 else
20859 {
20860 cvt = sfloat_optab;
20861 hw_convert = hw_conversions[kf_or_tf].from_di_sign;
20862 }
20863 break;
20864
20865 default:
20866 gcc_unreachable ();
20867 }
20868 }
20869
20870 /* Convert from IEEE 128-bit floating point. */
20871 else if (FLOAT128_IEEE_P (src_mode))
20872 {
20873 if (src_mode == KFmode)
20874 kf_or_tf = 0;
20875 else if (src_mode == TFmode)
20876 kf_or_tf = 1;
20877 else
20878 gcc_unreachable ();
20879
20880 switch (dest_mode)
20881 {
20882 case DFmode:
20883 cvt = trunc_optab;
20884 hw_convert = hw_conversions[kf_or_tf].to_df;
20885 break;
20886
20887 case SFmode:
20888 cvt = trunc_optab;
20889 hw_convert = hw_conversions[kf_or_tf].to_sf;
20890 break;
20891
20892 case KFmode:
20893 case IFmode:
20894 case TFmode:
20895 if (FLOAT128_IBM_P (dest_mode))
20896 cvt = trunc_optab;
20897 else
20898 do_move = true;
20899 break;
20900
20901 case SImode:
20902 if (unsigned_p)
20903 {
20904 cvt = ufix_optab;
20905 hw_convert = hw_conversions[kf_or_tf].to_si_uns;
20906 }
20907 else
20908 {
20909 cvt = sfix_optab;
20910 hw_convert = hw_conversions[kf_or_tf].to_si_sign;
20911 }
20912 break;
20913
20914 case DImode:
20915 if (unsigned_p)
20916 {
20917 cvt = ufix_optab;
20918 hw_convert = hw_conversions[kf_or_tf].to_di_uns;
20919 }
20920 else
20921 {
20922 cvt = sfix_optab;
20923 hw_convert = hw_conversions[kf_or_tf].to_di_sign;
20924 }
20925 break;
20926
20927 default:
20928 gcc_unreachable ();
20929 }
20930 }
20931
20932 /* Both IBM format. */
20933 else if (FLOAT128_IBM_P (dest_mode) && FLOAT128_IBM_P (src_mode))
20934 do_move = true;
20935
20936 else
20937 gcc_unreachable ();
20938
20939 /* Handle conversion between TFmode/KFmode. */
20940 if (do_move)
20941 emit_move_insn (dest, gen_lowpart (dest_mode, src));
20942
20943 /* Handle conversion if we have hardware support. */
20944 else if (TARGET_FLOAT128_HW && hw_convert)
20945 emit_insn ((hw_convert) (dest, src));
20946
20947 /* Call an external function to do the conversion. */
20948 else if (cvt != unknown_optab)
20949 {
20950 libfunc = convert_optab_libfunc (cvt, dest_mode, src_mode);
20951 gcc_assert (libfunc != NULL_RTX);
20952
20953 dest2 = emit_library_call_value (libfunc, dest, LCT_CONST, dest_mode, 1, src,
20954 src_mode);
20955
20956 gcc_assert (dest2 != NULL_RTX);
20957 if (!rtx_equal_p (dest, dest2))
20958 emit_move_insn (dest, dest2);
20959 }
20960
20961 else
20962 gcc_unreachable ();
20963
20964 return;
20965 }
20966
20967 /* Split a conversion from __float128 to an integer type into separate insns.
20968 OPERANDS points to the destination, source, and V2DI temporary
20969 register. CODE is either FIX or UNSIGNED_FIX. */
20970
20971 void
20972 convert_float128_to_int (rtx *operands, enum rtx_code code)
20973 {
20974 rtx dest = operands[0];
20975 rtx src = operands[1];
20976 rtx tmp = operands[2];
20977 rtx cvt;
20978 rtvec cvt_vec;
20979 rtx cvt_unspec;
20980 rtvec move_vec;
20981 rtx move_unspec;
20982
20983 if (GET_CODE (tmp) == SCRATCH)
20984 tmp = gen_reg_rtx (V2DImode);
20985
20986 if (MEM_P (dest))
20987 dest = rs6000_address_for_fpconvert (dest);
20988
20989 /* Generate the actual convert insn of the form:
20990 (set (tmp) (unspec:V2DI [(fix:SI (reg:KF))] UNSPEC_IEEE128_CONVERT)). */
20991 cvt = gen_rtx_fmt_e (code, GET_MODE (dest), src);
20992 cvt_vec = gen_rtvec (1, cvt);
20993 cvt_unspec = gen_rtx_UNSPEC (V2DImode, cvt_vec, UNSPEC_IEEE128_CONVERT);
20994 emit_insn (gen_rtx_SET (tmp, cvt_unspec));
20995
20996 /* Generate the move insn of the form:
20997 (set (dest:SI) (unspec:SI [(tmp:V2DI))] UNSPEC_IEEE128_MOVE)). */
20998 move_vec = gen_rtvec (1, tmp);
20999 move_unspec = gen_rtx_UNSPEC (GET_MODE (dest), move_vec, UNSPEC_IEEE128_MOVE);
21000 emit_insn (gen_rtx_SET (dest, move_unspec));
21001 }
21002
21003 /* Split a conversion from an integer type to __float128 into separate insns.
21004 OPERANDS points to the destination, source, and V2DI temporary
21005 register. CODE is either FLOAT or UNSIGNED_FLOAT. */
21006
21007 void
21008 convert_int_to_float128 (rtx *operands, enum rtx_code code)
21009 {
21010 rtx dest = operands[0];
21011 rtx src = operands[1];
21012 rtx tmp = operands[2];
21013 rtx cvt;
21014 rtvec cvt_vec;
21015 rtx cvt_unspec;
21016 rtvec move_vec;
21017 rtx move_unspec;
21018 rtx unsigned_flag;
21019
21020 if (GET_CODE (tmp) == SCRATCH)
21021 tmp = gen_reg_rtx (V2DImode);
21022
21023 if (MEM_P (src))
21024 src = rs6000_address_for_fpconvert (src);
21025
21026 /* Generate the move of the integer into the Altivec register of the form:
21027 (set (tmp:V2DI) (unspec:V2DI [(src:SI)
21028 (const_int 0)] UNSPEC_IEEE128_MOVE)).
21029
21030 or:
21031 (set (tmp:V2DI) (unspec:V2DI [(src:DI)] UNSPEC_IEEE128_MOVE)). */
21032
21033 if (GET_MODE (src) == SImode)
21034 {
21035 unsigned_flag = (code == UNSIGNED_FLOAT) ? const1_rtx : const0_rtx;
21036 move_vec = gen_rtvec (2, src, unsigned_flag);
21037 }
21038 else
21039 move_vec = gen_rtvec (1, src);
21040
21041 move_unspec = gen_rtx_UNSPEC (V2DImode, move_vec, UNSPEC_IEEE128_MOVE);
21042 emit_insn (gen_rtx_SET (tmp, move_unspec));
21043
21044 /* Generate the actual convert insn of the form:
21045 (set (dest:KF) (float:KF (unspec:DI [(tmp:V2DI)]
21046 UNSPEC_IEEE128_CONVERT))). */
21047 cvt_vec = gen_rtvec (1, tmp);
21048 cvt_unspec = gen_rtx_UNSPEC (DImode, cvt_vec, UNSPEC_IEEE128_CONVERT);
21049 cvt = gen_rtx_fmt_e (code, GET_MODE (dest), cvt_unspec);
21050 emit_insn (gen_rtx_SET (dest, cvt));
21051 }
21052
21053 \f
21054 /* Emit the RTL for an sISEL pattern. */
21055
21056 void
21057 rs6000_emit_sISEL (machine_mode mode ATTRIBUTE_UNUSED, rtx operands[])
21058 {
21059 rs6000_emit_int_cmove (operands[0], operands[1], const1_rtx, const0_rtx);
21060 }
21061
21062 /* Emit RTL that sets a register to zero if OP1 and OP2 are equal. SCRATCH
21063 can be used as that dest register. Return the dest register. */
21064
21065 rtx
21066 rs6000_emit_eqne (machine_mode mode, rtx op1, rtx op2, rtx scratch)
21067 {
21068 if (op2 == const0_rtx)
21069 return op1;
21070
21071 if (GET_CODE (scratch) == SCRATCH)
21072 scratch = gen_reg_rtx (mode);
21073
21074 if (logical_operand (op2, mode))
21075 emit_insn (gen_rtx_SET (scratch, gen_rtx_XOR (mode, op1, op2)));
21076 else
21077 emit_insn (gen_rtx_SET (scratch,
21078 gen_rtx_PLUS (mode, op1, negate_rtx (mode, op2))));
21079
21080 return scratch;
21081 }
21082
21083 void
21084 rs6000_emit_sCOND (machine_mode mode, rtx operands[])
21085 {
21086 rtx condition_rtx;
21087 machine_mode op_mode;
21088 enum rtx_code cond_code;
21089 rtx result = operands[0];
21090
21091 condition_rtx = rs6000_generate_compare (operands[1], mode);
21092 cond_code = GET_CODE (condition_rtx);
21093
21094 if (FLOAT_MODE_P (mode)
21095 && !TARGET_FPRS && TARGET_HARD_FLOAT)
21096 {
21097 rtx t;
21098
21099 PUT_MODE (condition_rtx, SImode);
21100 t = XEXP (condition_rtx, 0);
21101
21102 gcc_assert (cond_code == NE || cond_code == EQ);
21103
21104 if (cond_code == NE)
21105 emit_insn (gen_e500_flip_gt_bit (t, t));
21106
21107 emit_insn (gen_move_from_CR_gt_bit (result, t));
21108 return;
21109 }
21110
21111 if (cond_code == NE
21112 || cond_code == GE || cond_code == LE
21113 || cond_code == GEU || cond_code == LEU
21114 || cond_code == ORDERED || cond_code == UNGE || cond_code == UNLE)
21115 {
21116 rtx not_result = gen_reg_rtx (CCEQmode);
21117 rtx not_op, rev_cond_rtx;
21118 machine_mode cc_mode;
21119
21120 cc_mode = GET_MODE (XEXP (condition_rtx, 0));
21121
21122 rev_cond_rtx = gen_rtx_fmt_ee (rs6000_reverse_condition (cc_mode, cond_code),
21123 SImode, XEXP (condition_rtx, 0), const0_rtx);
21124 not_op = gen_rtx_COMPARE (CCEQmode, rev_cond_rtx, const0_rtx);
21125 emit_insn (gen_rtx_SET (not_result, not_op));
21126 condition_rtx = gen_rtx_EQ (VOIDmode, not_result, const0_rtx);
21127 }
21128
21129 op_mode = GET_MODE (XEXP (operands[1], 0));
21130 if (op_mode == VOIDmode)
21131 op_mode = GET_MODE (XEXP (operands[1], 1));
21132
21133 if (TARGET_POWERPC64 && (op_mode == DImode || FLOAT_MODE_P (mode)))
21134 {
21135 PUT_MODE (condition_rtx, DImode);
21136 convert_move (result, condition_rtx, 0);
21137 }
21138 else
21139 {
21140 PUT_MODE (condition_rtx, SImode);
21141 emit_insn (gen_rtx_SET (result, condition_rtx));
21142 }
21143 }
21144
21145 /* Emit a branch of kind CODE to location LOC. */
21146
21147 void
21148 rs6000_emit_cbranch (machine_mode mode, rtx operands[])
21149 {
21150 rtx condition_rtx, loc_ref;
21151
21152 condition_rtx = rs6000_generate_compare (operands[0], mode);
21153 loc_ref = gen_rtx_LABEL_REF (VOIDmode, operands[3]);
21154 emit_jump_insn (gen_rtx_SET (pc_rtx,
21155 gen_rtx_IF_THEN_ELSE (VOIDmode, condition_rtx,
21156 loc_ref, pc_rtx)));
21157 }
21158
21159 /* Return the string to output a conditional branch to LABEL, which is
21160 the operand template of the label, or NULL if the branch is really a
21161 conditional return.
21162
21163 OP is the conditional expression. XEXP (OP, 0) is assumed to be a
21164 condition code register and its mode specifies what kind of
21165 comparison we made.
21166
21167 REVERSED is nonzero if we should reverse the sense of the comparison.
21168
21169 INSN is the insn. */
21170
21171 char *
21172 output_cbranch (rtx op, const char *label, int reversed, rtx_insn *insn)
21173 {
21174 static char string[64];
21175 enum rtx_code code = GET_CODE (op);
21176 rtx cc_reg = XEXP (op, 0);
21177 machine_mode mode = GET_MODE (cc_reg);
21178 int cc_regno = REGNO (cc_reg) - CR0_REGNO;
21179 int need_longbranch = label != NULL && get_attr_length (insn) == 8;
21180 int really_reversed = reversed ^ need_longbranch;
21181 char *s = string;
21182 const char *ccode;
21183 const char *pred;
21184 rtx note;
21185
21186 validate_condition_mode (code, mode);
21187
21188 /* Work out which way this really branches. We could use
21189 reverse_condition_maybe_unordered here always but this
21190 makes the resulting assembler clearer. */
21191 if (really_reversed)
21192 {
21193 /* Reversal of FP compares takes care -- an ordered compare
21194 becomes an unordered compare and vice versa. */
21195 if (mode == CCFPmode)
21196 code = reverse_condition_maybe_unordered (code);
21197 else
21198 code = reverse_condition (code);
21199 }
21200
21201 if ((!TARGET_FPRS && TARGET_HARD_FLOAT) && mode == CCFPmode)
21202 {
21203 /* The efscmp/tst* instructions twiddle bit 2, which maps nicely
21204 to the GT bit. */
21205 switch (code)
21206 {
21207 case EQ:
21208 /* Opposite of GT. */
21209 code = GT;
21210 break;
21211
21212 case NE:
21213 code = UNLE;
21214 break;
21215
21216 default:
21217 gcc_unreachable ();
21218 }
21219 }
21220
21221 switch (code)
21222 {
21223 /* Not all of these are actually distinct opcodes, but
21224 we distinguish them for clarity of the resulting assembler. */
21225 case NE: case LTGT:
21226 ccode = "ne"; break;
21227 case EQ: case UNEQ:
21228 ccode = "eq"; break;
21229 case GE: case GEU:
21230 ccode = "ge"; break;
21231 case GT: case GTU: case UNGT:
21232 ccode = "gt"; break;
21233 case LE: case LEU:
21234 ccode = "le"; break;
21235 case LT: case LTU: case UNLT:
21236 ccode = "lt"; break;
21237 case UNORDERED: ccode = "un"; break;
21238 case ORDERED: ccode = "nu"; break;
21239 case UNGE: ccode = "nl"; break;
21240 case UNLE: ccode = "ng"; break;
21241 default:
21242 gcc_unreachable ();
21243 }
21244
21245 /* Maybe we have a guess as to how likely the branch is. */
21246 pred = "";
21247 note = find_reg_note (insn, REG_BR_PROB, NULL_RTX);
21248 if (note != NULL_RTX)
21249 {
21250 /* PROB is the difference from 50%. */
21251 int prob = XINT (note, 0) - REG_BR_PROB_BASE / 2;
21252
21253 /* Only hint for highly probable/improbable branches on newer
21254 cpus as static prediction overrides processor dynamic
21255 prediction. For older cpus we may as well always hint, but
21256 assume not taken for branches that are very close to 50% as a
21257 mispredicted taken branch is more expensive than a
21258 mispredicted not-taken branch. */
21259 if (rs6000_always_hint
21260 || (abs (prob) > REG_BR_PROB_BASE / 100 * 48
21261 && br_prob_note_reliable_p (note)))
21262 {
21263 if (abs (prob) > REG_BR_PROB_BASE / 20
21264 && ((prob > 0) ^ need_longbranch))
21265 pred = "+";
21266 else
21267 pred = "-";
21268 }
21269 }
21270
21271 if (label == NULL)
21272 s += sprintf (s, "b%slr%s ", ccode, pred);
21273 else
21274 s += sprintf (s, "b%s%s ", ccode, pred);
21275
21276 /* We need to escape any '%' characters in the reg_names string.
21277 Assume they'd only be the first character.... */
21278 if (reg_names[cc_regno + CR0_REGNO][0] == '%')
21279 *s++ = '%';
21280 s += sprintf (s, "%s", reg_names[cc_regno + CR0_REGNO]);
21281
21282 if (label != NULL)
21283 {
21284 /* If the branch distance was too far, we may have to use an
21285 unconditional branch to go the distance. */
21286 if (need_longbranch)
21287 s += sprintf (s, ",$+8\n\tb %s", label);
21288 else
21289 s += sprintf (s, ",%s", label);
21290 }
21291
21292 return string;
21293 }
21294
21295 /* Return the string to flip the GT bit on a CR. */
21296 char *
21297 output_e500_flip_gt_bit (rtx dst, rtx src)
21298 {
21299 static char string[64];
21300 int a, b;
21301
21302 gcc_assert (GET_CODE (dst) == REG && CR_REGNO_P (REGNO (dst))
21303 && GET_CODE (src) == REG && CR_REGNO_P (REGNO (src)));
21304
21305 /* GT bit. */
21306 a = 4 * (REGNO (dst) - CR0_REGNO) + 1;
21307 b = 4 * (REGNO (src) - CR0_REGNO) + 1;
21308
21309 sprintf (string, "crnot %d,%d", a, b);
21310 return string;
21311 }
21312
21313 /* Return insn for VSX or Altivec comparisons. */
21314
21315 static rtx
21316 rs6000_emit_vector_compare_inner (enum rtx_code code, rtx op0, rtx op1)
21317 {
21318 rtx mask;
21319 machine_mode mode = GET_MODE (op0);
21320
21321 switch (code)
21322 {
21323 default:
21324 break;
21325
21326 case GE:
21327 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
21328 return NULL_RTX;
21329
21330 case EQ:
21331 case GT:
21332 case GTU:
21333 case ORDERED:
21334 case UNORDERED:
21335 case UNEQ:
21336 case LTGT:
21337 mask = gen_reg_rtx (mode);
21338 emit_insn (gen_rtx_SET (mask, gen_rtx_fmt_ee (code, mode, op0, op1)));
21339 return mask;
21340 }
21341
21342 return NULL_RTX;
21343 }
21344
21345 /* Emit vector compare for operands OP0 and OP1 using code RCODE.
21346 DMODE is expected destination mode. This is a recursive function. */
21347
21348 static rtx
21349 rs6000_emit_vector_compare (enum rtx_code rcode,
21350 rtx op0, rtx op1,
21351 machine_mode dmode)
21352 {
21353 rtx mask;
21354 bool swap_operands = false;
21355 bool try_again = false;
21356
21357 gcc_assert (VECTOR_UNIT_ALTIVEC_OR_VSX_P (dmode));
21358 gcc_assert (GET_MODE (op0) == GET_MODE (op1));
21359
21360 /* See if the comparison works as is. */
21361 mask = rs6000_emit_vector_compare_inner (rcode, op0, op1);
21362 if (mask)
21363 return mask;
21364
21365 switch (rcode)
21366 {
21367 case LT:
21368 rcode = GT;
21369 swap_operands = true;
21370 try_again = true;
21371 break;
21372 case LTU:
21373 rcode = GTU;
21374 swap_operands = true;
21375 try_again = true;
21376 break;
21377 case NE:
21378 case UNLE:
21379 case UNLT:
21380 case UNGE:
21381 case UNGT:
21382 /* Invert condition and try again.
21383 e.g., A != B becomes ~(A==B). */
21384 {
21385 enum rtx_code rev_code;
21386 enum insn_code nor_code;
21387 rtx mask2;
21388
21389 rev_code = reverse_condition_maybe_unordered (rcode);
21390 if (rev_code == UNKNOWN)
21391 return NULL_RTX;
21392
21393 nor_code = optab_handler (one_cmpl_optab, dmode);
21394 if (nor_code == CODE_FOR_nothing)
21395 return NULL_RTX;
21396
21397 mask2 = rs6000_emit_vector_compare (rev_code, op0, op1, dmode);
21398 if (!mask2)
21399 return NULL_RTX;
21400
21401 mask = gen_reg_rtx (dmode);
21402 emit_insn (GEN_FCN (nor_code) (mask, mask2));
21403 return mask;
21404 }
21405 break;
21406 case GE:
21407 case GEU:
21408 case LE:
21409 case LEU:
21410 /* Try GT/GTU/LT/LTU OR EQ */
21411 {
21412 rtx c_rtx, eq_rtx;
21413 enum insn_code ior_code;
21414 enum rtx_code new_code;
21415
21416 switch (rcode)
21417 {
21418 case GE:
21419 new_code = GT;
21420 break;
21421
21422 case GEU:
21423 new_code = GTU;
21424 break;
21425
21426 case LE:
21427 new_code = LT;
21428 break;
21429
21430 case LEU:
21431 new_code = LTU;
21432 break;
21433
21434 default:
21435 gcc_unreachable ();
21436 }
21437
21438 ior_code = optab_handler (ior_optab, dmode);
21439 if (ior_code == CODE_FOR_nothing)
21440 return NULL_RTX;
21441
21442 c_rtx = rs6000_emit_vector_compare (new_code, op0, op1, dmode);
21443 if (!c_rtx)
21444 return NULL_RTX;
21445
21446 eq_rtx = rs6000_emit_vector_compare (EQ, op0, op1, dmode);
21447 if (!eq_rtx)
21448 return NULL_RTX;
21449
21450 mask = gen_reg_rtx (dmode);
21451 emit_insn (GEN_FCN (ior_code) (mask, c_rtx, eq_rtx));
21452 return mask;
21453 }
21454 break;
21455 default:
21456 return NULL_RTX;
21457 }
21458
21459 if (try_again)
21460 {
21461 if (swap_operands)
21462 std::swap (op0, op1);
21463
21464 mask = rs6000_emit_vector_compare_inner (rcode, op0, op1);
21465 if (mask)
21466 return mask;
21467 }
21468
21469 /* You only get two chances. */
21470 return NULL_RTX;
21471 }
21472
21473 /* Emit vector conditional expression. DEST is destination. OP_TRUE and
21474 OP_FALSE are two VEC_COND_EXPR operands. CC_OP0 and CC_OP1 are the two
21475 operands for the relation operation COND. */
21476
21477 int
21478 rs6000_emit_vector_cond_expr (rtx dest, rtx op_true, rtx op_false,
21479 rtx cond, rtx cc_op0, rtx cc_op1)
21480 {
21481 machine_mode dest_mode = GET_MODE (dest);
21482 machine_mode mask_mode = GET_MODE (cc_op0);
21483 enum rtx_code rcode = GET_CODE (cond);
21484 machine_mode cc_mode = CCmode;
21485 rtx mask;
21486 rtx cond2;
21487 rtx tmp;
21488 bool invert_move = false;
21489
21490 if (VECTOR_UNIT_NONE_P (dest_mode))
21491 return 0;
21492
21493 gcc_assert (GET_MODE_SIZE (dest_mode) == GET_MODE_SIZE (mask_mode)
21494 && GET_MODE_NUNITS (dest_mode) == GET_MODE_NUNITS (mask_mode));
21495
21496 switch (rcode)
21497 {
21498 /* Swap operands if we can, and fall back to doing the operation as
21499 specified, and doing a NOR to invert the test. */
21500 case NE:
21501 case UNLE:
21502 case UNLT:
21503 case UNGE:
21504 case UNGT:
21505 /* Invert condition and try again.
21506 e.g., A = (B != C) ? D : E becomes A = (B == C) ? E : D. */
21507 invert_move = true;
21508 rcode = reverse_condition_maybe_unordered (rcode);
21509 if (rcode == UNKNOWN)
21510 return 0;
21511 break;
21512
21513 /* Mark unsigned tests with CCUNSmode. */
21514 case GTU:
21515 case GEU:
21516 case LTU:
21517 case LEU:
21518 cc_mode = CCUNSmode;
21519 break;
21520
21521 default:
21522 break;
21523 }
21524
21525 /* Get the vector mask for the given relational operations. */
21526 mask = rs6000_emit_vector_compare (rcode, cc_op0, cc_op1, mask_mode);
21527
21528 if (!mask)
21529 return 0;
21530
21531 if (invert_move)
21532 {
21533 tmp = op_true;
21534 op_true = op_false;
21535 op_false = tmp;
21536 }
21537
21538 cond2 = gen_rtx_fmt_ee (NE, cc_mode, gen_lowpart (dest_mode, mask),
21539 CONST0_RTX (dest_mode));
21540 emit_insn (gen_rtx_SET (dest,
21541 gen_rtx_IF_THEN_ELSE (dest_mode,
21542 cond2,
21543 op_true,
21544 op_false)));
21545 return 1;
21546 }
21547
21548 /* Emit a conditional move: move TRUE_COND to DEST if OP of the
21549 operands of the last comparison is nonzero/true, FALSE_COND if it
21550 is zero/false. Return 0 if the hardware has no such operation. */
21551
21552 int
21553 rs6000_emit_cmove (rtx dest, rtx op, rtx true_cond, rtx false_cond)
21554 {
21555 enum rtx_code code = GET_CODE (op);
21556 rtx op0 = XEXP (op, 0);
21557 rtx op1 = XEXP (op, 1);
21558 machine_mode compare_mode = GET_MODE (op0);
21559 machine_mode result_mode = GET_MODE (dest);
21560 rtx temp;
21561 bool is_against_zero;
21562
21563 /* These modes should always match. */
21564 if (GET_MODE (op1) != compare_mode
21565 /* In the isel case however, we can use a compare immediate, so
21566 op1 may be a small constant. */
21567 && (!TARGET_ISEL || !short_cint_operand (op1, VOIDmode)))
21568 return 0;
21569 if (GET_MODE (true_cond) != result_mode)
21570 return 0;
21571 if (GET_MODE (false_cond) != result_mode)
21572 return 0;
21573
21574 /* Don't allow using floating point comparisons for integer results for
21575 now. */
21576 if (FLOAT_MODE_P (compare_mode) && !FLOAT_MODE_P (result_mode))
21577 return 0;
21578
21579 /* First, work out if the hardware can do this at all, or
21580 if it's too slow.... */
21581 if (!FLOAT_MODE_P (compare_mode))
21582 {
21583 if (TARGET_ISEL)
21584 return rs6000_emit_int_cmove (dest, op, true_cond, false_cond);
21585 return 0;
21586 }
21587 else if (TARGET_HARD_FLOAT && !TARGET_FPRS
21588 && SCALAR_FLOAT_MODE_P (compare_mode))
21589 return 0;
21590
21591 is_against_zero = op1 == CONST0_RTX (compare_mode);
21592
21593 /* A floating-point subtract might overflow, underflow, or produce
21594 an inexact result, thus changing the floating-point flags, so it
21595 can't be generated if we care about that. It's safe if one side
21596 of the construct is zero, since then no subtract will be
21597 generated. */
21598 if (SCALAR_FLOAT_MODE_P (compare_mode)
21599 && flag_trapping_math && ! is_against_zero)
21600 return 0;
21601
21602 /* Eliminate half of the comparisons by switching operands, this
21603 makes the remaining code simpler. */
21604 if (code == UNLT || code == UNGT || code == UNORDERED || code == NE
21605 || code == LTGT || code == LT || code == UNLE)
21606 {
21607 code = reverse_condition_maybe_unordered (code);
21608 temp = true_cond;
21609 true_cond = false_cond;
21610 false_cond = temp;
21611 }
21612
21613 /* UNEQ and LTGT take four instructions for a comparison with zero,
21614 it'll probably be faster to use a branch here too. */
21615 if (code == UNEQ && HONOR_NANS (compare_mode))
21616 return 0;
21617
21618 /* We're going to try to implement comparisons by performing
21619 a subtract, then comparing against zero. Unfortunately,
21620 Inf - Inf is NaN which is not zero, and so if we don't
21621 know that the operand is finite and the comparison
21622 would treat EQ different to UNORDERED, we can't do it. */
21623 if (HONOR_INFINITIES (compare_mode)
21624 && code != GT && code != UNGE
21625 && (GET_CODE (op1) != CONST_DOUBLE
21626 || real_isinf (CONST_DOUBLE_REAL_VALUE (op1)))
21627 /* Constructs of the form (a OP b ? a : b) are safe. */
21628 && ((! rtx_equal_p (op0, false_cond) && ! rtx_equal_p (op1, false_cond))
21629 || (! rtx_equal_p (op0, true_cond)
21630 && ! rtx_equal_p (op1, true_cond))))
21631 return 0;
21632
21633 /* At this point we know we can use fsel. */
21634
21635 /* Reduce the comparison to a comparison against zero. */
21636 if (! is_against_zero)
21637 {
21638 temp = gen_reg_rtx (compare_mode);
21639 emit_insn (gen_rtx_SET (temp, gen_rtx_MINUS (compare_mode, op0, op1)));
21640 op0 = temp;
21641 op1 = CONST0_RTX (compare_mode);
21642 }
21643
21644 /* If we don't care about NaNs we can reduce some of the comparisons
21645 down to faster ones. */
21646 if (! HONOR_NANS (compare_mode))
21647 switch (code)
21648 {
21649 case GT:
21650 code = LE;
21651 temp = true_cond;
21652 true_cond = false_cond;
21653 false_cond = temp;
21654 break;
21655 case UNGE:
21656 code = GE;
21657 break;
21658 case UNEQ:
21659 code = EQ;
21660 break;
21661 default:
21662 break;
21663 }
21664
21665 /* Now, reduce everything down to a GE. */
21666 switch (code)
21667 {
21668 case GE:
21669 break;
21670
21671 case LE:
21672 temp = gen_reg_rtx (compare_mode);
21673 emit_insn (gen_rtx_SET (temp, gen_rtx_NEG (compare_mode, op0)));
21674 op0 = temp;
21675 break;
21676
21677 case ORDERED:
21678 temp = gen_reg_rtx (compare_mode);
21679 emit_insn (gen_rtx_SET (temp, gen_rtx_ABS (compare_mode, op0)));
21680 op0 = temp;
21681 break;
21682
21683 case EQ:
21684 temp = gen_reg_rtx (compare_mode);
21685 emit_insn (gen_rtx_SET (temp,
21686 gen_rtx_NEG (compare_mode,
21687 gen_rtx_ABS (compare_mode, op0))));
21688 op0 = temp;
21689 break;
21690
21691 case UNGE:
21692 /* a UNGE 0 <-> (a GE 0 || -a UNLT 0) */
21693 temp = gen_reg_rtx (result_mode);
21694 emit_insn (gen_rtx_SET (temp,
21695 gen_rtx_IF_THEN_ELSE (result_mode,
21696 gen_rtx_GE (VOIDmode,
21697 op0, op1),
21698 true_cond, false_cond)));
21699 false_cond = true_cond;
21700 true_cond = temp;
21701
21702 temp = gen_reg_rtx (compare_mode);
21703 emit_insn (gen_rtx_SET (temp, gen_rtx_NEG (compare_mode, op0)));
21704 op0 = temp;
21705 break;
21706
21707 case GT:
21708 /* a GT 0 <-> (a GE 0 && -a UNLT 0) */
21709 temp = gen_reg_rtx (result_mode);
21710 emit_insn (gen_rtx_SET (temp,
21711 gen_rtx_IF_THEN_ELSE (result_mode,
21712 gen_rtx_GE (VOIDmode,
21713 op0, op1),
21714 true_cond, false_cond)));
21715 true_cond = false_cond;
21716 false_cond = temp;
21717
21718 temp = gen_reg_rtx (compare_mode);
21719 emit_insn (gen_rtx_SET (temp, gen_rtx_NEG (compare_mode, op0)));
21720 op0 = temp;
21721 break;
21722
21723 default:
21724 gcc_unreachable ();
21725 }
21726
21727 emit_insn (gen_rtx_SET (dest,
21728 gen_rtx_IF_THEN_ELSE (result_mode,
21729 gen_rtx_GE (VOIDmode,
21730 op0, op1),
21731 true_cond, false_cond)));
21732 return 1;
21733 }
21734
21735 /* Same as above, but for ints (isel). */
21736
21737 static int
21738 rs6000_emit_int_cmove (rtx dest, rtx op, rtx true_cond, rtx false_cond)
21739 {
21740 rtx condition_rtx, cr;
21741 machine_mode mode = GET_MODE (dest);
21742 enum rtx_code cond_code;
21743 rtx (*isel_func) (rtx, rtx, rtx, rtx, rtx);
21744 bool signedp;
21745
21746 if (mode != SImode && (!TARGET_POWERPC64 || mode != DImode))
21747 return 0;
21748
21749 /* We still have to do the compare, because isel doesn't do a
21750 compare, it just looks at the CRx bits set by a previous compare
21751 instruction. */
21752 condition_rtx = rs6000_generate_compare (op, mode);
21753 cond_code = GET_CODE (condition_rtx);
21754 cr = XEXP (condition_rtx, 0);
21755 signedp = GET_MODE (cr) == CCmode;
21756
21757 isel_func = (mode == SImode
21758 ? (signedp ? gen_isel_signed_si : gen_isel_unsigned_si)
21759 : (signedp ? gen_isel_signed_di : gen_isel_unsigned_di));
21760
21761 switch (cond_code)
21762 {
21763 case LT: case GT: case LTU: case GTU: case EQ:
21764 /* isel handles these directly. */
21765 break;
21766
21767 default:
21768 /* We need to swap the sense of the comparison. */
21769 {
21770 std::swap (false_cond, true_cond);
21771 PUT_CODE (condition_rtx, reverse_condition (cond_code));
21772 }
21773 break;
21774 }
21775
21776 false_cond = force_reg (mode, false_cond);
21777 if (true_cond != const0_rtx)
21778 true_cond = force_reg (mode, true_cond);
21779
21780 emit_insn (isel_func (dest, condition_rtx, true_cond, false_cond, cr));
21781
21782 return 1;
21783 }
21784
21785 const char *
21786 output_isel (rtx *operands)
21787 {
21788 enum rtx_code code;
21789
21790 code = GET_CODE (operands[1]);
21791
21792 if (code == GE || code == GEU || code == LE || code == LEU || code == NE)
21793 {
21794 gcc_assert (GET_CODE (operands[2]) == REG
21795 && GET_CODE (operands[3]) == REG);
21796 PUT_CODE (operands[1], reverse_condition (code));
21797 return "isel %0,%3,%2,%j1";
21798 }
21799
21800 return "isel %0,%2,%3,%j1";
21801 }
21802
21803 void
21804 rs6000_emit_minmax (rtx dest, enum rtx_code code, rtx op0, rtx op1)
21805 {
21806 machine_mode mode = GET_MODE (op0);
21807 enum rtx_code c;
21808 rtx target;
21809
21810 /* VSX/altivec have direct min/max insns. */
21811 if ((code == SMAX || code == SMIN)
21812 && (VECTOR_UNIT_ALTIVEC_OR_VSX_P (mode)
21813 || (mode == SFmode && VECTOR_UNIT_VSX_P (DFmode))))
21814 {
21815 emit_insn (gen_rtx_SET (dest, gen_rtx_fmt_ee (code, mode, op0, op1)));
21816 return;
21817 }
21818
21819 if (code == SMAX || code == SMIN)
21820 c = GE;
21821 else
21822 c = GEU;
21823
21824 if (code == SMAX || code == UMAX)
21825 target = emit_conditional_move (dest, c, op0, op1, mode,
21826 op0, op1, mode, 0);
21827 else
21828 target = emit_conditional_move (dest, c, op0, op1, mode,
21829 op1, op0, mode, 0);
21830 gcc_assert (target);
21831 if (target != dest)
21832 emit_move_insn (dest, target);
21833 }
21834
21835 /* A subroutine of the atomic operation splitters. Jump to LABEL if
21836 COND is true. Mark the jump as unlikely to be taken. */
21837
21838 static void
21839 emit_unlikely_jump (rtx cond, rtx label)
21840 {
21841 int very_unlikely = REG_BR_PROB_BASE / 100 - 1;
21842 rtx x;
21843
21844 x = gen_rtx_IF_THEN_ELSE (VOIDmode, cond, label, pc_rtx);
21845 x = emit_jump_insn (gen_rtx_SET (pc_rtx, x));
21846 add_int_reg_note (x, REG_BR_PROB, very_unlikely);
21847 }
21848
21849 /* A subroutine of the atomic operation splitters. Emit a load-locked
21850 instruction in MODE. For QI/HImode, possibly use a pattern than includes
21851 the zero_extend operation. */
21852
21853 static void
21854 emit_load_locked (machine_mode mode, rtx reg, rtx mem)
21855 {
21856 rtx (*fn) (rtx, rtx) = NULL;
21857
21858 switch (mode)
21859 {
21860 case QImode:
21861 fn = gen_load_lockedqi;
21862 break;
21863 case HImode:
21864 fn = gen_load_lockedhi;
21865 break;
21866 case SImode:
21867 if (GET_MODE (mem) == QImode)
21868 fn = gen_load_lockedqi_si;
21869 else if (GET_MODE (mem) == HImode)
21870 fn = gen_load_lockedhi_si;
21871 else
21872 fn = gen_load_lockedsi;
21873 break;
21874 case DImode:
21875 fn = gen_load_lockeddi;
21876 break;
21877 case TImode:
21878 fn = gen_load_lockedti;
21879 break;
21880 default:
21881 gcc_unreachable ();
21882 }
21883 emit_insn (fn (reg, mem));
21884 }
21885
21886 /* A subroutine of the atomic operation splitters. Emit a store-conditional
21887 instruction in MODE. */
21888
21889 static void
21890 emit_store_conditional (machine_mode mode, rtx res, rtx mem, rtx val)
21891 {
21892 rtx (*fn) (rtx, rtx, rtx) = NULL;
21893
21894 switch (mode)
21895 {
21896 case QImode:
21897 fn = gen_store_conditionalqi;
21898 break;
21899 case HImode:
21900 fn = gen_store_conditionalhi;
21901 break;
21902 case SImode:
21903 fn = gen_store_conditionalsi;
21904 break;
21905 case DImode:
21906 fn = gen_store_conditionaldi;
21907 break;
21908 case TImode:
21909 fn = gen_store_conditionalti;
21910 break;
21911 default:
21912 gcc_unreachable ();
21913 }
21914
21915 /* Emit sync before stwcx. to address PPC405 Erratum. */
21916 if (PPC405_ERRATUM77)
21917 emit_insn (gen_hwsync ());
21918
21919 emit_insn (fn (res, mem, val));
21920 }
21921
21922 /* Expand barriers before and after a load_locked/store_cond sequence. */
21923
21924 static rtx
21925 rs6000_pre_atomic_barrier (rtx mem, enum memmodel model)
21926 {
21927 rtx addr = XEXP (mem, 0);
21928 int strict_p = (reload_in_progress || reload_completed);
21929
21930 if (!legitimate_indirect_address_p (addr, strict_p)
21931 && !legitimate_indexed_address_p (addr, strict_p))
21932 {
21933 addr = force_reg (Pmode, addr);
21934 mem = replace_equiv_address_nv (mem, addr);
21935 }
21936
21937 switch (model)
21938 {
21939 case MEMMODEL_RELAXED:
21940 case MEMMODEL_CONSUME:
21941 case MEMMODEL_ACQUIRE:
21942 break;
21943 case MEMMODEL_RELEASE:
21944 case MEMMODEL_ACQ_REL:
21945 emit_insn (gen_lwsync ());
21946 break;
21947 case MEMMODEL_SEQ_CST:
21948 emit_insn (gen_hwsync ());
21949 break;
21950 default:
21951 gcc_unreachable ();
21952 }
21953 return mem;
21954 }
21955
21956 static void
21957 rs6000_post_atomic_barrier (enum memmodel model)
21958 {
21959 switch (model)
21960 {
21961 case MEMMODEL_RELAXED:
21962 case MEMMODEL_CONSUME:
21963 case MEMMODEL_RELEASE:
21964 break;
21965 case MEMMODEL_ACQUIRE:
21966 case MEMMODEL_ACQ_REL:
21967 case MEMMODEL_SEQ_CST:
21968 emit_insn (gen_isync ());
21969 break;
21970 default:
21971 gcc_unreachable ();
21972 }
21973 }
21974
21975 /* A subroutine of the various atomic expanders. For sub-word operations,
21976 we must adjust things to operate on SImode. Given the original MEM,
21977 return a new aligned memory. Also build and return the quantities by
21978 which to shift and mask. */
21979
21980 static rtx
21981 rs6000_adjust_atomic_subword (rtx orig_mem, rtx *pshift, rtx *pmask)
21982 {
21983 rtx addr, align, shift, mask, mem;
21984 HOST_WIDE_INT shift_mask;
21985 machine_mode mode = GET_MODE (orig_mem);
21986
21987 /* For smaller modes, we have to implement this via SImode. */
21988 shift_mask = (mode == QImode ? 0x18 : 0x10);
21989
21990 addr = XEXP (orig_mem, 0);
21991 addr = force_reg (GET_MODE (addr), addr);
21992
21993 /* Aligned memory containing subword. Generate a new memory. We
21994 do not want any of the existing MEM_ATTR data, as we're now
21995 accessing memory outside the original object. */
21996 align = expand_simple_binop (Pmode, AND, addr, GEN_INT (-4),
21997 NULL_RTX, 1, OPTAB_LIB_WIDEN);
21998 mem = gen_rtx_MEM (SImode, align);
21999 MEM_VOLATILE_P (mem) = MEM_VOLATILE_P (orig_mem);
22000 if (MEM_ALIAS_SET (orig_mem) == ALIAS_SET_MEMORY_BARRIER)
22001 set_mem_alias_set (mem, ALIAS_SET_MEMORY_BARRIER);
22002
22003 /* Shift amount for subword relative to aligned word. */
22004 shift = gen_reg_rtx (SImode);
22005 addr = gen_lowpart (SImode, addr);
22006 rtx tmp = gen_reg_rtx (SImode);
22007 emit_insn (gen_ashlsi3 (tmp, addr, GEN_INT (3)));
22008 emit_insn (gen_andsi3 (shift, tmp, GEN_INT (shift_mask)));
22009 if (BYTES_BIG_ENDIAN)
22010 shift = expand_simple_binop (SImode, XOR, shift, GEN_INT (shift_mask),
22011 shift, 1, OPTAB_LIB_WIDEN);
22012 *pshift = shift;
22013
22014 /* Mask for insertion. */
22015 mask = expand_simple_binop (SImode, ASHIFT, GEN_INT (GET_MODE_MASK (mode)),
22016 shift, NULL_RTX, 1, OPTAB_LIB_WIDEN);
22017 *pmask = mask;
22018
22019 return mem;
22020 }
22021
22022 /* A subroutine of the various atomic expanders. For sub-word operands,
22023 combine OLDVAL and NEWVAL via MASK. Returns a new pseduo. */
22024
22025 static rtx
22026 rs6000_mask_atomic_subword (rtx oldval, rtx newval, rtx mask)
22027 {
22028 rtx x;
22029
22030 x = gen_reg_rtx (SImode);
22031 emit_insn (gen_rtx_SET (x, gen_rtx_AND (SImode,
22032 gen_rtx_NOT (SImode, mask),
22033 oldval)));
22034
22035 x = expand_simple_binop (SImode, IOR, newval, x, x, 1, OPTAB_LIB_WIDEN);
22036
22037 return x;
22038 }
22039
22040 /* A subroutine of the various atomic expanders. For sub-word operands,
22041 extract WIDE to NARROW via SHIFT. */
22042
22043 static void
22044 rs6000_finish_atomic_subword (rtx narrow, rtx wide, rtx shift)
22045 {
22046 wide = expand_simple_binop (SImode, LSHIFTRT, wide, shift,
22047 wide, 1, OPTAB_LIB_WIDEN);
22048 emit_move_insn (narrow, gen_lowpart (GET_MODE (narrow), wide));
22049 }
22050
22051 /* Expand an atomic compare and swap operation. */
22052
22053 void
22054 rs6000_expand_atomic_compare_and_swap (rtx operands[])
22055 {
22056 rtx boolval, retval, mem, oldval, newval, cond;
22057 rtx label1, label2, x, mask, shift;
22058 machine_mode mode, orig_mode;
22059 enum memmodel mod_s, mod_f;
22060 bool is_weak;
22061
22062 boolval = operands[0];
22063 retval = operands[1];
22064 mem = operands[2];
22065 oldval = operands[3];
22066 newval = operands[4];
22067 is_weak = (INTVAL (operands[5]) != 0);
22068 mod_s = memmodel_base (INTVAL (operands[6]));
22069 mod_f = memmodel_base (INTVAL (operands[7]));
22070 orig_mode = mode = GET_MODE (mem);
22071
22072 mask = shift = NULL_RTX;
22073 if (mode == QImode || mode == HImode)
22074 {
22075 /* Before power8, we didn't have access to lbarx/lharx, so generate a
22076 lwarx and shift/mask operations. With power8, we need to do the
22077 comparison in SImode, but the store is still done in QI/HImode. */
22078 oldval = convert_modes (SImode, mode, oldval, 1);
22079
22080 if (!TARGET_SYNC_HI_QI)
22081 {
22082 mem = rs6000_adjust_atomic_subword (mem, &shift, &mask);
22083
22084 /* Shift and mask OLDVAL into position with the word. */
22085 oldval = expand_simple_binop (SImode, ASHIFT, oldval, shift,
22086 NULL_RTX, 1, OPTAB_LIB_WIDEN);
22087
22088 /* Shift and mask NEWVAL into position within the word. */
22089 newval = convert_modes (SImode, mode, newval, 1);
22090 newval = expand_simple_binop (SImode, ASHIFT, newval, shift,
22091 NULL_RTX, 1, OPTAB_LIB_WIDEN);
22092 }
22093
22094 /* Prepare to adjust the return value. */
22095 retval = gen_reg_rtx (SImode);
22096 mode = SImode;
22097 }
22098 else if (reg_overlap_mentioned_p (retval, oldval))
22099 oldval = copy_to_reg (oldval);
22100
22101 mem = rs6000_pre_atomic_barrier (mem, mod_s);
22102
22103 label1 = NULL_RTX;
22104 if (!is_weak)
22105 {
22106 label1 = gen_rtx_LABEL_REF (VOIDmode, gen_label_rtx ());
22107 emit_label (XEXP (label1, 0));
22108 }
22109 label2 = gen_rtx_LABEL_REF (VOIDmode, gen_label_rtx ());
22110
22111 emit_load_locked (mode, retval, mem);
22112
22113 x = retval;
22114 if (mask)
22115 {
22116 x = expand_simple_binop (SImode, AND, retval, mask,
22117 NULL_RTX, 1, OPTAB_LIB_WIDEN);
22118 }
22119
22120 cond = gen_reg_rtx (CCmode);
22121 /* If we have TImode, synthesize a comparison. */
22122 if (mode != TImode)
22123 x = gen_rtx_COMPARE (CCmode, x, oldval);
22124 else
22125 {
22126 rtx xor1_result = gen_reg_rtx (DImode);
22127 rtx xor2_result = gen_reg_rtx (DImode);
22128 rtx or_result = gen_reg_rtx (DImode);
22129 rtx new_word0 = simplify_gen_subreg (DImode, x, TImode, 0);
22130 rtx new_word1 = simplify_gen_subreg (DImode, x, TImode, 8);
22131 rtx old_word0 = simplify_gen_subreg (DImode, oldval, TImode, 0);
22132 rtx old_word1 = simplify_gen_subreg (DImode, oldval, TImode, 8);
22133
22134 emit_insn (gen_xordi3 (xor1_result, new_word0, old_word0));
22135 emit_insn (gen_xordi3 (xor2_result, new_word1, old_word1));
22136 emit_insn (gen_iordi3 (or_result, xor1_result, xor2_result));
22137 x = gen_rtx_COMPARE (CCmode, or_result, const0_rtx);
22138 }
22139
22140 emit_insn (gen_rtx_SET (cond, x));
22141
22142 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
22143 emit_unlikely_jump (x, label2);
22144
22145 x = newval;
22146 if (mask)
22147 x = rs6000_mask_atomic_subword (retval, newval, mask);
22148
22149 emit_store_conditional (orig_mode, cond, mem, x);
22150
22151 if (!is_weak)
22152 {
22153 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
22154 emit_unlikely_jump (x, label1);
22155 }
22156
22157 if (!is_mm_relaxed (mod_f))
22158 emit_label (XEXP (label2, 0));
22159
22160 rs6000_post_atomic_barrier (mod_s);
22161
22162 if (is_mm_relaxed (mod_f))
22163 emit_label (XEXP (label2, 0));
22164
22165 if (shift)
22166 rs6000_finish_atomic_subword (operands[1], retval, shift);
22167 else if (mode != GET_MODE (operands[1]))
22168 convert_move (operands[1], retval, 1);
22169
22170 /* In all cases, CR0 contains EQ on success, and NE on failure. */
22171 x = gen_rtx_EQ (SImode, cond, const0_rtx);
22172 emit_insn (gen_rtx_SET (boolval, x));
22173 }
22174
22175 /* Expand an atomic exchange operation. */
22176
22177 void
22178 rs6000_expand_atomic_exchange (rtx operands[])
22179 {
22180 rtx retval, mem, val, cond;
22181 machine_mode mode;
22182 enum memmodel model;
22183 rtx label, x, mask, shift;
22184
22185 retval = operands[0];
22186 mem = operands[1];
22187 val = operands[2];
22188 model = memmodel_base (INTVAL (operands[3]));
22189 mode = GET_MODE (mem);
22190
22191 mask = shift = NULL_RTX;
22192 if (!TARGET_SYNC_HI_QI && (mode == QImode || mode == HImode))
22193 {
22194 mem = rs6000_adjust_atomic_subword (mem, &shift, &mask);
22195
22196 /* Shift and mask VAL into position with the word. */
22197 val = convert_modes (SImode, mode, val, 1);
22198 val = expand_simple_binop (SImode, ASHIFT, val, shift,
22199 NULL_RTX, 1, OPTAB_LIB_WIDEN);
22200
22201 /* Prepare to adjust the return value. */
22202 retval = gen_reg_rtx (SImode);
22203 mode = SImode;
22204 }
22205
22206 mem = rs6000_pre_atomic_barrier (mem, model);
22207
22208 label = gen_rtx_LABEL_REF (VOIDmode, gen_label_rtx ());
22209 emit_label (XEXP (label, 0));
22210
22211 emit_load_locked (mode, retval, mem);
22212
22213 x = val;
22214 if (mask)
22215 x = rs6000_mask_atomic_subword (retval, val, mask);
22216
22217 cond = gen_reg_rtx (CCmode);
22218 emit_store_conditional (mode, cond, mem, x);
22219
22220 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
22221 emit_unlikely_jump (x, label);
22222
22223 rs6000_post_atomic_barrier (model);
22224
22225 if (shift)
22226 rs6000_finish_atomic_subword (operands[0], retval, shift);
22227 }
22228
22229 /* Expand an atomic fetch-and-operate pattern. CODE is the binary operation
22230 to perform. MEM is the memory on which to operate. VAL is the second
22231 operand of the binary operator. BEFORE and AFTER are optional locations to
22232 return the value of MEM either before of after the operation. MODEL_RTX
22233 is a CONST_INT containing the memory model to use. */
22234
22235 void
22236 rs6000_expand_atomic_op (enum rtx_code code, rtx mem, rtx val,
22237 rtx orig_before, rtx orig_after, rtx model_rtx)
22238 {
22239 enum memmodel model = memmodel_base (INTVAL (model_rtx));
22240 machine_mode mode = GET_MODE (mem);
22241 machine_mode store_mode = mode;
22242 rtx label, x, cond, mask, shift;
22243 rtx before = orig_before, after = orig_after;
22244
22245 mask = shift = NULL_RTX;
22246 /* On power8, we want to use SImode for the operation. On previous systems,
22247 use the operation in a subword and shift/mask to get the proper byte or
22248 halfword. */
22249 if (mode == QImode || mode == HImode)
22250 {
22251 if (TARGET_SYNC_HI_QI)
22252 {
22253 val = convert_modes (SImode, mode, val, 1);
22254
22255 /* Prepare to adjust the return value. */
22256 before = gen_reg_rtx (SImode);
22257 if (after)
22258 after = gen_reg_rtx (SImode);
22259 mode = SImode;
22260 }
22261 else
22262 {
22263 mem = rs6000_adjust_atomic_subword (mem, &shift, &mask);
22264
22265 /* Shift and mask VAL into position with the word. */
22266 val = convert_modes (SImode, mode, val, 1);
22267 val = expand_simple_binop (SImode, ASHIFT, val, shift,
22268 NULL_RTX, 1, OPTAB_LIB_WIDEN);
22269
22270 switch (code)
22271 {
22272 case IOR:
22273 case XOR:
22274 /* We've already zero-extended VAL. That is sufficient to
22275 make certain that it does not affect other bits. */
22276 mask = NULL;
22277 break;
22278
22279 case AND:
22280 /* If we make certain that all of the other bits in VAL are
22281 set, that will be sufficient to not affect other bits. */
22282 x = gen_rtx_NOT (SImode, mask);
22283 x = gen_rtx_IOR (SImode, x, val);
22284 emit_insn (gen_rtx_SET (val, x));
22285 mask = NULL;
22286 break;
22287
22288 case NOT:
22289 case PLUS:
22290 case MINUS:
22291 /* These will all affect bits outside the field and need
22292 adjustment via MASK within the loop. */
22293 break;
22294
22295 default:
22296 gcc_unreachable ();
22297 }
22298
22299 /* Prepare to adjust the return value. */
22300 before = gen_reg_rtx (SImode);
22301 if (after)
22302 after = gen_reg_rtx (SImode);
22303 store_mode = mode = SImode;
22304 }
22305 }
22306
22307 mem = rs6000_pre_atomic_barrier (mem, model);
22308
22309 label = gen_label_rtx ();
22310 emit_label (label);
22311 label = gen_rtx_LABEL_REF (VOIDmode, label);
22312
22313 if (before == NULL_RTX)
22314 before = gen_reg_rtx (mode);
22315
22316 emit_load_locked (mode, before, mem);
22317
22318 if (code == NOT)
22319 {
22320 x = expand_simple_binop (mode, AND, before, val,
22321 NULL_RTX, 1, OPTAB_LIB_WIDEN);
22322 after = expand_simple_unop (mode, NOT, x, after, 1);
22323 }
22324 else
22325 {
22326 after = expand_simple_binop (mode, code, before, val,
22327 after, 1, OPTAB_LIB_WIDEN);
22328 }
22329
22330 x = after;
22331 if (mask)
22332 {
22333 x = expand_simple_binop (SImode, AND, after, mask,
22334 NULL_RTX, 1, OPTAB_LIB_WIDEN);
22335 x = rs6000_mask_atomic_subword (before, x, mask);
22336 }
22337 else if (store_mode != mode)
22338 x = convert_modes (store_mode, mode, x, 1);
22339
22340 cond = gen_reg_rtx (CCmode);
22341 emit_store_conditional (store_mode, cond, mem, x);
22342
22343 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
22344 emit_unlikely_jump (x, label);
22345
22346 rs6000_post_atomic_barrier (model);
22347
22348 if (shift)
22349 {
22350 /* QImode/HImode on machines without lbarx/lharx where we do a lwarx and
22351 then do the calcuations in a SImode register. */
22352 if (orig_before)
22353 rs6000_finish_atomic_subword (orig_before, before, shift);
22354 if (orig_after)
22355 rs6000_finish_atomic_subword (orig_after, after, shift);
22356 }
22357 else if (store_mode != mode)
22358 {
22359 /* QImode/HImode on machines with lbarx/lharx where we do the native
22360 operation and then do the calcuations in a SImode register. */
22361 if (orig_before)
22362 convert_move (orig_before, before, 1);
22363 if (orig_after)
22364 convert_move (orig_after, after, 1);
22365 }
22366 else if (orig_after && after != orig_after)
22367 emit_move_insn (orig_after, after);
22368 }
22369
22370 /* Emit instructions to move SRC to DST. Called by splitters for
22371 multi-register moves. It will emit at most one instruction for
22372 each register that is accessed; that is, it won't emit li/lis pairs
22373 (or equivalent for 64-bit code). One of SRC or DST must be a hard
22374 register. */
22375
22376 void
22377 rs6000_split_multireg_move (rtx dst, rtx src)
22378 {
22379 /* The register number of the first register being moved. */
22380 int reg;
22381 /* The mode that is to be moved. */
22382 machine_mode mode;
22383 /* The mode that the move is being done in, and its size. */
22384 machine_mode reg_mode;
22385 int reg_mode_size;
22386 /* The number of registers that will be moved. */
22387 int nregs;
22388
22389 reg = REG_P (dst) ? REGNO (dst) : REGNO (src);
22390 mode = GET_MODE (dst);
22391 nregs = hard_regno_nregs[reg][mode];
22392 if (FP_REGNO_P (reg))
22393 reg_mode = DECIMAL_FLOAT_MODE_P (mode) ? DDmode :
22394 ((TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT) ? DFmode : SFmode);
22395 else if (ALTIVEC_REGNO_P (reg))
22396 reg_mode = V16QImode;
22397 else if (TARGET_E500_DOUBLE && FLOAT128_2REG_P (mode))
22398 reg_mode = DFmode;
22399 else
22400 reg_mode = word_mode;
22401 reg_mode_size = GET_MODE_SIZE (reg_mode);
22402
22403 gcc_assert (reg_mode_size * nregs == GET_MODE_SIZE (mode));
22404
22405 /* TDmode residing in FP registers is special, since the ISA requires that
22406 the lower-numbered word of a register pair is always the most significant
22407 word, even in little-endian mode. This does not match the usual subreg
22408 semantics, so we cannnot use simplify_gen_subreg in those cases. Access
22409 the appropriate constituent registers "by hand" in little-endian mode.
22410
22411 Note we do not need to check for destructive overlap here since TDmode
22412 can only reside in even/odd register pairs. */
22413 if (FP_REGNO_P (reg) && DECIMAL_FLOAT_MODE_P (mode) && !BYTES_BIG_ENDIAN)
22414 {
22415 rtx p_src, p_dst;
22416 int i;
22417
22418 for (i = 0; i < nregs; i++)
22419 {
22420 if (REG_P (src) && FP_REGNO_P (REGNO (src)))
22421 p_src = gen_rtx_REG (reg_mode, REGNO (src) + nregs - 1 - i);
22422 else
22423 p_src = simplify_gen_subreg (reg_mode, src, mode,
22424 i * reg_mode_size);
22425
22426 if (REG_P (dst) && FP_REGNO_P (REGNO (dst)))
22427 p_dst = gen_rtx_REG (reg_mode, REGNO (dst) + nregs - 1 - i);
22428 else
22429 p_dst = simplify_gen_subreg (reg_mode, dst, mode,
22430 i * reg_mode_size);
22431
22432 emit_insn (gen_rtx_SET (p_dst, p_src));
22433 }
22434
22435 return;
22436 }
22437
22438 if (REG_P (src) && REG_P (dst) && (REGNO (src) < REGNO (dst)))
22439 {
22440 /* Move register range backwards, if we might have destructive
22441 overlap. */
22442 int i;
22443 for (i = nregs - 1; i >= 0; i--)
22444 emit_insn (gen_rtx_SET (simplify_gen_subreg (reg_mode, dst, mode,
22445 i * reg_mode_size),
22446 simplify_gen_subreg (reg_mode, src, mode,
22447 i * reg_mode_size)));
22448 }
22449 else
22450 {
22451 int i;
22452 int j = -1;
22453 bool used_update = false;
22454 rtx restore_basereg = NULL_RTX;
22455
22456 if (MEM_P (src) && INT_REGNO_P (reg))
22457 {
22458 rtx breg;
22459
22460 if (GET_CODE (XEXP (src, 0)) == PRE_INC
22461 || GET_CODE (XEXP (src, 0)) == PRE_DEC)
22462 {
22463 rtx delta_rtx;
22464 breg = XEXP (XEXP (src, 0), 0);
22465 delta_rtx = (GET_CODE (XEXP (src, 0)) == PRE_INC
22466 ? GEN_INT (GET_MODE_SIZE (GET_MODE (src)))
22467 : GEN_INT (-GET_MODE_SIZE (GET_MODE (src))));
22468 emit_insn (gen_add3_insn (breg, breg, delta_rtx));
22469 src = replace_equiv_address (src, breg);
22470 }
22471 else if (! rs6000_offsettable_memref_p (src, reg_mode))
22472 {
22473 if (GET_CODE (XEXP (src, 0)) == PRE_MODIFY)
22474 {
22475 rtx basereg = XEXP (XEXP (src, 0), 0);
22476 if (TARGET_UPDATE)
22477 {
22478 rtx ndst = simplify_gen_subreg (reg_mode, dst, mode, 0);
22479 emit_insn (gen_rtx_SET (ndst,
22480 gen_rtx_MEM (reg_mode,
22481 XEXP (src, 0))));
22482 used_update = true;
22483 }
22484 else
22485 emit_insn (gen_rtx_SET (basereg,
22486 XEXP (XEXP (src, 0), 1)));
22487 src = replace_equiv_address (src, basereg);
22488 }
22489 else
22490 {
22491 rtx basereg = gen_rtx_REG (Pmode, reg);
22492 emit_insn (gen_rtx_SET (basereg, XEXP (src, 0)));
22493 src = replace_equiv_address (src, basereg);
22494 }
22495 }
22496
22497 breg = XEXP (src, 0);
22498 if (GET_CODE (breg) == PLUS || GET_CODE (breg) == LO_SUM)
22499 breg = XEXP (breg, 0);
22500
22501 /* If the base register we are using to address memory is
22502 also a destination reg, then change that register last. */
22503 if (REG_P (breg)
22504 && REGNO (breg) >= REGNO (dst)
22505 && REGNO (breg) < REGNO (dst) + nregs)
22506 j = REGNO (breg) - REGNO (dst);
22507 }
22508 else if (MEM_P (dst) && INT_REGNO_P (reg))
22509 {
22510 rtx breg;
22511
22512 if (GET_CODE (XEXP (dst, 0)) == PRE_INC
22513 || GET_CODE (XEXP (dst, 0)) == PRE_DEC)
22514 {
22515 rtx delta_rtx;
22516 breg = XEXP (XEXP (dst, 0), 0);
22517 delta_rtx = (GET_CODE (XEXP (dst, 0)) == PRE_INC
22518 ? GEN_INT (GET_MODE_SIZE (GET_MODE (dst)))
22519 : GEN_INT (-GET_MODE_SIZE (GET_MODE (dst))));
22520
22521 /* We have to update the breg before doing the store.
22522 Use store with update, if available. */
22523
22524 if (TARGET_UPDATE)
22525 {
22526 rtx nsrc = simplify_gen_subreg (reg_mode, src, mode, 0);
22527 emit_insn (TARGET_32BIT
22528 ? (TARGET_POWERPC64
22529 ? gen_movdi_si_update (breg, breg, delta_rtx, nsrc)
22530 : gen_movsi_update (breg, breg, delta_rtx, nsrc))
22531 : gen_movdi_di_update (breg, breg, delta_rtx, nsrc));
22532 used_update = true;
22533 }
22534 else
22535 emit_insn (gen_add3_insn (breg, breg, delta_rtx));
22536 dst = replace_equiv_address (dst, breg);
22537 }
22538 else if (!rs6000_offsettable_memref_p (dst, reg_mode)
22539 && GET_CODE (XEXP (dst, 0)) != LO_SUM)
22540 {
22541 if (GET_CODE (XEXP (dst, 0)) == PRE_MODIFY)
22542 {
22543 rtx basereg = XEXP (XEXP (dst, 0), 0);
22544 if (TARGET_UPDATE)
22545 {
22546 rtx nsrc = simplify_gen_subreg (reg_mode, src, mode, 0);
22547 emit_insn (gen_rtx_SET (gen_rtx_MEM (reg_mode,
22548 XEXP (dst, 0)),
22549 nsrc));
22550 used_update = true;
22551 }
22552 else
22553 emit_insn (gen_rtx_SET (basereg,
22554 XEXP (XEXP (dst, 0), 1)));
22555 dst = replace_equiv_address (dst, basereg);
22556 }
22557 else
22558 {
22559 rtx basereg = XEXP (XEXP (dst, 0), 0);
22560 rtx offsetreg = XEXP (XEXP (dst, 0), 1);
22561 gcc_assert (GET_CODE (XEXP (dst, 0)) == PLUS
22562 && REG_P (basereg)
22563 && REG_P (offsetreg)
22564 && REGNO (basereg) != REGNO (offsetreg));
22565 if (REGNO (basereg) == 0)
22566 {
22567 rtx tmp = offsetreg;
22568 offsetreg = basereg;
22569 basereg = tmp;
22570 }
22571 emit_insn (gen_add3_insn (basereg, basereg, offsetreg));
22572 restore_basereg = gen_sub3_insn (basereg, basereg, offsetreg);
22573 dst = replace_equiv_address (dst, basereg);
22574 }
22575 }
22576 else if (GET_CODE (XEXP (dst, 0)) != LO_SUM)
22577 gcc_assert (rs6000_offsettable_memref_p (dst, reg_mode));
22578 }
22579
22580 for (i = 0; i < nregs; i++)
22581 {
22582 /* Calculate index to next subword. */
22583 ++j;
22584 if (j == nregs)
22585 j = 0;
22586
22587 /* If compiler already emitted move of first word by
22588 store with update, no need to do anything. */
22589 if (j == 0 && used_update)
22590 continue;
22591
22592 emit_insn (gen_rtx_SET (simplify_gen_subreg (reg_mode, dst, mode,
22593 j * reg_mode_size),
22594 simplify_gen_subreg (reg_mode, src, mode,
22595 j * reg_mode_size)));
22596 }
22597 if (restore_basereg != NULL_RTX)
22598 emit_insn (restore_basereg);
22599 }
22600 }
22601
22602 \f
22603 /* This page contains routines that are used to determine what the
22604 function prologue and epilogue code will do and write them out. */
22605
22606 static inline bool
22607 save_reg_p (int r)
22608 {
22609 return !call_used_regs[r] && df_regs_ever_live_p (r);
22610 }
22611
22612 /* Determine whether the gp REG is really used. */
22613
22614 static bool
22615 rs6000_reg_live_or_pic_offset_p (int reg)
22616 {
22617 /* We need to mark the PIC offset register live for the same conditions
22618 as it is set up, or otherwise it won't be saved before we clobber it. */
22619
22620 if (reg == RS6000_PIC_OFFSET_TABLE_REGNUM && !TARGET_SINGLE_PIC_BASE)
22621 {
22622 if (TARGET_TOC && TARGET_MINIMAL_TOC
22623 && (crtl->calls_eh_return
22624 || df_regs_ever_live_p (reg)
22625 || get_pool_size ()))
22626 return true;
22627
22628 if ((DEFAULT_ABI == ABI_V4 || DEFAULT_ABI == ABI_DARWIN)
22629 && flag_pic)
22630 return true;
22631 }
22632
22633 /* If the function calls eh_return, claim used all the registers that would
22634 be checked for liveness otherwise. */
22635
22636 return ((crtl->calls_eh_return || df_regs_ever_live_p (reg))
22637 && !call_used_regs[reg]);
22638 }
22639
22640 /* Return the first fixed-point register that is required to be
22641 saved. 32 if none. */
22642
22643 int
22644 first_reg_to_save (void)
22645 {
22646 int first_reg;
22647
22648 /* Find lowest numbered live register. */
22649 for (first_reg = 13; first_reg <= 31; first_reg++)
22650 if (save_reg_p (first_reg))
22651 break;
22652
22653 if (first_reg > RS6000_PIC_OFFSET_TABLE_REGNUM
22654 && ((DEFAULT_ABI == ABI_V4 && flag_pic != 0)
22655 || (DEFAULT_ABI == ABI_DARWIN && flag_pic)
22656 || (TARGET_TOC && TARGET_MINIMAL_TOC))
22657 && rs6000_reg_live_or_pic_offset_p (RS6000_PIC_OFFSET_TABLE_REGNUM))
22658 first_reg = RS6000_PIC_OFFSET_TABLE_REGNUM;
22659
22660 #if TARGET_MACHO
22661 if (flag_pic
22662 && crtl->uses_pic_offset_table
22663 && first_reg > RS6000_PIC_OFFSET_TABLE_REGNUM)
22664 return RS6000_PIC_OFFSET_TABLE_REGNUM;
22665 #endif
22666
22667 return first_reg;
22668 }
22669
22670 /* Similar, for FP regs. */
22671
22672 int
22673 first_fp_reg_to_save (void)
22674 {
22675 int first_reg;
22676
22677 /* Find lowest numbered live register. */
22678 for (first_reg = 14 + 32; first_reg <= 63; first_reg++)
22679 if (save_reg_p (first_reg))
22680 break;
22681
22682 return first_reg;
22683 }
22684
22685 /* Similar, for AltiVec regs. */
22686
22687 static int
22688 first_altivec_reg_to_save (void)
22689 {
22690 int i;
22691
22692 /* Stack frame remains as is unless we are in AltiVec ABI. */
22693 if (! TARGET_ALTIVEC_ABI)
22694 return LAST_ALTIVEC_REGNO + 1;
22695
22696 /* On Darwin, the unwind routines are compiled without
22697 TARGET_ALTIVEC, and use save_world to save/restore the
22698 altivec registers when necessary. */
22699 if (DEFAULT_ABI == ABI_DARWIN && crtl->calls_eh_return
22700 && ! TARGET_ALTIVEC)
22701 return FIRST_ALTIVEC_REGNO + 20;
22702
22703 /* Find lowest numbered live register. */
22704 for (i = FIRST_ALTIVEC_REGNO + 20; i <= LAST_ALTIVEC_REGNO; ++i)
22705 if (save_reg_p (i))
22706 break;
22707
22708 return i;
22709 }
22710
22711 /* Return a 32-bit mask of the AltiVec registers we need to set in
22712 VRSAVE. Bit n of the return value is 1 if Vn is live. The MSB in
22713 the 32-bit word is 0. */
22714
22715 static unsigned int
22716 compute_vrsave_mask (void)
22717 {
22718 unsigned int i, mask = 0;
22719
22720 /* On Darwin, the unwind routines are compiled without
22721 TARGET_ALTIVEC, and use save_world to save/restore the
22722 call-saved altivec registers when necessary. */
22723 if (DEFAULT_ABI == ABI_DARWIN && crtl->calls_eh_return
22724 && ! TARGET_ALTIVEC)
22725 mask |= 0xFFF;
22726
22727 /* First, find out if we use _any_ altivec registers. */
22728 for (i = FIRST_ALTIVEC_REGNO; i <= LAST_ALTIVEC_REGNO; ++i)
22729 if (df_regs_ever_live_p (i))
22730 mask |= ALTIVEC_REG_BIT (i);
22731
22732 if (mask == 0)
22733 return mask;
22734
22735 /* Next, remove the argument registers from the set. These must
22736 be in the VRSAVE mask set by the caller, so we don't need to add
22737 them in again. More importantly, the mask we compute here is
22738 used to generate CLOBBERs in the set_vrsave insn, and we do not
22739 wish the argument registers to die. */
22740 for (i = ALTIVEC_ARG_MIN_REG; i < (unsigned) crtl->args.info.vregno; i++)
22741 mask &= ~ALTIVEC_REG_BIT (i);
22742
22743 /* Similarly, remove the return value from the set. */
22744 {
22745 bool yes = false;
22746 diddle_return_value (is_altivec_return_reg, &yes);
22747 if (yes)
22748 mask &= ~ALTIVEC_REG_BIT (ALTIVEC_ARG_RETURN);
22749 }
22750
22751 return mask;
22752 }
22753
22754 /* For a very restricted set of circumstances, we can cut down the
22755 size of prologues/epilogues by calling our own save/restore-the-world
22756 routines. */
22757
22758 static void
22759 compute_save_world_info (rs6000_stack_t *info_ptr)
22760 {
22761 info_ptr->world_save_p = 1;
22762 info_ptr->world_save_p
22763 = (WORLD_SAVE_P (info_ptr)
22764 && DEFAULT_ABI == ABI_DARWIN
22765 && !cfun->has_nonlocal_label
22766 && info_ptr->first_fp_reg_save == FIRST_SAVED_FP_REGNO
22767 && info_ptr->first_gp_reg_save == FIRST_SAVED_GP_REGNO
22768 && info_ptr->first_altivec_reg_save == FIRST_SAVED_ALTIVEC_REGNO
22769 && info_ptr->cr_save_p);
22770
22771 /* This will not work in conjunction with sibcalls. Make sure there
22772 are none. (This check is expensive, but seldom executed.) */
22773 if (WORLD_SAVE_P (info_ptr))
22774 {
22775 rtx_insn *insn;
22776 for (insn = get_last_insn_anywhere (); insn; insn = PREV_INSN (insn))
22777 if (CALL_P (insn) && SIBLING_CALL_P (insn))
22778 {
22779 info_ptr->world_save_p = 0;
22780 break;
22781 }
22782 }
22783
22784 if (WORLD_SAVE_P (info_ptr))
22785 {
22786 /* Even if we're not touching VRsave, make sure there's room on the
22787 stack for it, if it looks like we're calling SAVE_WORLD, which
22788 will attempt to save it. */
22789 info_ptr->vrsave_size = 4;
22790
22791 /* If we are going to save the world, we need to save the link register too. */
22792 info_ptr->lr_save_p = 1;
22793
22794 /* "Save" the VRsave register too if we're saving the world. */
22795 if (info_ptr->vrsave_mask == 0)
22796 info_ptr->vrsave_mask = compute_vrsave_mask ();
22797
22798 /* Because the Darwin register save/restore routines only handle
22799 F14 .. F31 and V20 .. V31 as per the ABI, perform a consistency
22800 check. */
22801 gcc_assert (info_ptr->first_fp_reg_save >= FIRST_SAVED_FP_REGNO
22802 && (info_ptr->first_altivec_reg_save
22803 >= FIRST_SAVED_ALTIVEC_REGNO));
22804 }
22805 return;
22806 }
22807
22808
22809 static void
22810 is_altivec_return_reg (rtx reg, void *xyes)
22811 {
22812 bool *yes = (bool *) xyes;
22813 if (REGNO (reg) == ALTIVEC_ARG_RETURN)
22814 *yes = true;
22815 }
22816
22817 \f
22818 /* Look for user-defined global regs in the range FIRST to LAST-1.
22819 We should not restore these, and so cannot use lmw or out-of-line
22820 restore functions if there are any. We also can't save them
22821 (well, emit frame notes for them), because frame unwinding during
22822 exception handling will restore saved registers. */
22823
22824 static bool
22825 global_regs_p (unsigned first, unsigned last)
22826 {
22827 while (first < last)
22828 if (global_regs[first++])
22829 return true;
22830 return false;
22831 }
22832
22833 /* Determine the strategy for savings/restoring registers. */
22834
22835 enum {
22836 SAVRES_MULTIPLE = 0x1,
22837 SAVE_INLINE_FPRS = 0x2,
22838 SAVE_INLINE_GPRS = 0x4,
22839 REST_INLINE_FPRS = 0x8,
22840 REST_INLINE_GPRS = 0x10,
22841 SAVE_NOINLINE_GPRS_SAVES_LR = 0x20,
22842 SAVE_NOINLINE_FPRS_SAVES_LR = 0x40,
22843 REST_NOINLINE_FPRS_DOESNT_RESTORE_LR = 0x80,
22844 SAVE_INLINE_VRS = 0x100,
22845 REST_INLINE_VRS = 0x200
22846 };
22847
22848 static int
22849 rs6000_savres_strategy (rs6000_stack_t *info,
22850 bool using_static_chain_p)
22851 {
22852 int strategy = 0;
22853 bool lr_save_p;
22854
22855 if (TARGET_MULTIPLE
22856 && !TARGET_POWERPC64
22857 && !(TARGET_SPE_ABI && info->spe_64bit_regs_used)
22858 && info->first_gp_reg_save < 31
22859 && !global_regs_p (info->first_gp_reg_save, 32))
22860 strategy |= SAVRES_MULTIPLE;
22861
22862 if (crtl->calls_eh_return
22863 || cfun->machine->ra_need_lr)
22864 strategy |= (SAVE_INLINE_FPRS | REST_INLINE_FPRS
22865 | SAVE_INLINE_GPRS | REST_INLINE_GPRS
22866 | SAVE_INLINE_VRS | REST_INLINE_VRS);
22867
22868 if (info->first_fp_reg_save == 64
22869 /* The out-of-line FP routines use double-precision stores;
22870 we can't use those routines if we don't have such stores. */
22871 || (TARGET_HARD_FLOAT && !TARGET_DOUBLE_FLOAT)
22872 || global_regs_p (info->first_fp_reg_save, 64))
22873 strategy |= SAVE_INLINE_FPRS | REST_INLINE_FPRS;
22874
22875 if (info->first_gp_reg_save == 32
22876 || (!(strategy & SAVRES_MULTIPLE)
22877 && global_regs_p (info->first_gp_reg_save, 32)))
22878 strategy |= SAVE_INLINE_GPRS | REST_INLINE_GPRS;
22879
22880 if (info->first_altivec_reg_save == LAST_ALTIVEC_REGNO + 1
22881 || global_regs_p (info->first_altivec_reg_save, LAST_ALTIVEC_REGNO + 1))
22882 strategy |= SAVE_INLINE_VRS | REST_INLINE_VRS;
22883
22884 /* Define cutoff for using out-of-line functions to save registers. */
22885 if (DEFAULT_ABI == ABI_V4 || TARGET_ELF)
22886 {
22887 if (!optimize_size)
22888 {
22889 strategy |= SAVE_INLINE_FPRS | REST_INLINE_FPRS;
22890 strategy |= SAVE_INLINE_GPRS | REST_INLINE_GPRS;
22891 strategy |= SAVE_INLINE_VRS | REST_INLINE_VRS;
22892 }
22893 else
22894 {
22895 /* Prefer out-of-line restore if it will exit. */
22896 if (info->first_fp_reg_save > 61)
22897 strategy |= SAVE_INLINE_FPRS;
22898 if (info->first_gp_reg_save > 29)
22899 {
22900 if (info->first_fp_reg_save == 64)
22901 strategy |= SAVE_INLINE_GPRS;
22902 else
22903 strategy |= SAVE_INLINE_GPRS | REST_INLINE_GPRS;
22904 }
22905 if (info->first_altivec_reg_save == LAST_ALTIVEC_REGNO)
22906 strategy |= SAVE_INLINE_VRS | REST_INLINE_VRS;
22907 }
22908 }
22909 else if (DEFAULT_ABI == ABI_DARWIN)
22910 {
22911 if (info->first_fp_reg_save > 60)
22912 strategy |= SAVE_INLINE_FPRS | REST_INLINE_FPRS;
22913 if (info->first_gp_reg_save > 29)
22914 strategy |= SAVE_INLINE_GPRS | REST_INLINE_GPRS;
22915 strategy |= SAVE_INLINE_VRS | REST_INLINE_VRS;
22916 }
22917 else
22918 {
22919 gcc_checking_assert (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2);
22920 if (info->first_fp_reg_save > 61)
22921 strategy |= SAVE_INLINE_FPRS | REST_INLINE_FPRS;
22922 strategy |= SAVE_INLINE_GPRS | REST_INLINE_GPRS;
22923 strategy |= SAVE_INLINE_VRS | REST_INLINE_VRS;
22924 }
22925
22926 /* Don't bother to try to save things out-of-line if r11 is occupied
22927 by the static chain. It would require too much fiddling and the
22928 static chain is rarely used anyway. FPRs are saved w.r.t the stack
22929 pointer on Darwin, and AIX uses r1 or r12. */
22930 if (using_static_chain_p
22931 && (DEFAULT_ABI == ABI_V4 || DEFAULT_ABI == ABI_DARWIN))
22932 strategy |= ((DEFAULT_ABI == ABI_DARWIN ? 0 : SAVE_INLINE_FPRS)
22933 | SAVE_INLINE_GPRS
22934 | SAVE_INLINE_VRS | REST_INLINE_VRS);
22935
22936 /* We can only use the out-of-line routines to restore if we've
22937 saved all the registers from first_fp_reg_save in the prologue.
22938 Otherwise, we risk loading garbage. */
22939 if ((strategy & (SAVE_INLINE_FPRS | REST_INLINE_FPRS)) == SAVE_INLINE_FPRS)
22940 {
22941 int i;
22942
22943 for (i = info->first_fp_reg_save; i < 64; i++)
22944 if (!save_reg_p (i))
22945 {
22946 strategy |= REST_INLINE_FPRS;
22947 break;
22948 }
22949 }
22950
22951 /* If we are going to use store multiple, then don't even bother
22952 with the out-of-line routines, since the store-multiple
22953 instruction will always be smaller. */
22954 if ((strategy & SAVRES_MULTIPLE))
22955 strategy |= SAVE_INLINE_GPRS;
22956
22957 /* info->lr_save_p isn't yet set if the only reason lr needs to be
22958 saved is an out-of-line save or restore. Set up the value for
22959 the next test (excluding out-of-line gpr restore). */
22960 lr_save_p = (info->lr_save_p
22961 || !(strategy & SAVE_INLINE_GPRS)
22962 || !(strategy & SAVE_INLINE_FPRS)
22963 || !(strategy & SAVE_INLINE_VRS)
22964 || !(strategy & REST_INLINE_FPRS)
22965 || !(strategy & REST_INLINE_VRS));
22966
22967 /* The situation is more complicated with load multiple. We'd
22968 prefer to use the out-of-line routines for restores, since the
22969 "exit" out-of-line routines can handle the restore of LR and the
22970 frame teardown. However if doesn't make sense to use the
22971 out-of-line routine if that is the only reason we'd need to save
22972 LR, and we can't use the "exit" out-of-line gpr restore if we
22973 have saved some fprs; In those cases it is advantageous to use
22974 load multiple when available. */
22975 if ((strategy & SAVRES_MULTIPLE)
22976 && (!lr_save_p
22977 || info->first_fp_reg_save != 64))
22978 strategy |= REST_INLINE_GPRS;
22979
22980 /* Saving CR interferes with the exit routines used on the SPE, so
22981 just punt here. */
22982 if (TARGET_SPE_ABI
22983 && info->spe_64bit_regs_used
22984 && info->cr_save_p)
22985 strategy |= REST_INLINE_GPRS;
22986
22987 /* We can only use load multiple or the out-of-line routines to
22988 restore if we've used store multiple or out-of-line routines
22989 in the prologue, i.e. if we've saved all the registers from
22990 first_gp_reg_save. Otherwise, we risk loading garbage. */
22991 if ((strategy & (SAVE_INLINE_GPRS | REST_INLINE_GPRS | SAVRES_MULTIPLE))
22992 == SAVE_INLINE_GPRS)
22993 {
22994 int i;
22995
22996 for (i = info->first_gp_reg_save; i < 32; i++)
22997 if (!save_reg_p (i))
22998 {
22999 strategy |= REST_INLINE_GPRS;
23000 break;
23001 }
23002 }
23003
23004 if (TARGET_ELF && TARGET_64BIT)
23005 {
23006 if (!(strategy & SAVE_INLINE_FPRS))
23007 strategy |= SAVE_NOINLINE_FPRS_SAVES_LR;
23008 else if (!(strategy & SAVE_INLINE_GPRS)
23009 && info->first_fp_reg_save == 64)
23010 strategy |= SAVE_NOINLINE_GPRS_SAVES_LR;
23011 }
23012 else if (TARGET_AIX && !(strategy & REST_INLINE_FPRS))
23013 strategy |= REST_NOINLINE_FPRS_DOESNT_RESTORE_LR;
23014
23015 if (TARGET_MACHO && !(strategy & SAVE_INLINE_FPRS))
23016 strategy |= SAVE_NOINLINE_FPRS_SAVES_LR;
23017
23018 return strategy;
23019 }
23020
23021 /* Calculate the stack information for the current function. This is
23022 complicated by having two separate calling sequences, the AIX calling
23023 sequence and the V.4 calling sequence.
23024
23025 AIX (and Darwin/Mac OS X) stack frames look like:
23026 32-bit 64-bit
23027 SP----> +---------------------------------------+
23028 | back chain to caller | 0 0
23029 +---------------------------------------+
23030 | saved CR | 4 8 (8-11)
23031 +---------------------------------------+
23032 | saved LR | 8 16
23033 +---------------------------------------+
23034 | reserved for compilers | 12 24
23035 +---------------------------------------+
23036 | reserved for binders | 16 32
23037 +---------------------------------------+
23038 | saved TOC pointer | 20 40
23039 +---------------------------------------+
23040 | Parameter save area (P) | 24 48
23041 +---------------------------------------+
23042 | Alloca space (A) | 24+P etc.
23043 +---------------------------------------+
23044 | Local variable space (L) | 24+P+A
23045 +---------------------------------------+
23046 | Float/int conversion temporary (X) | 24+P+A+L
23047 +---------------------------------------+
23048 | Save area for AltiVec registers (W) | 24+P+A+L+X
23049 +---------------------------------------+
23050 | AltiVec alignment padding (Y) | 24+P+A+L+X+W
23051 +---------------------------------------+
23052 | Save area for VRSAVE register (Z) | 24+P+A+L+X+W+Y
23053 +---------------------------------------+
23054 | Save area for GP registers (G) | 24+P+A+X+L+X+W+Y+Z
23055 +---------------------------------------+
23056 | Save area for FP registers (F) | 24+P+A+X+L+X+W+Y+Z+G
23057 +---------------------------------------+
23058 old SP->| back chain to caller's caller |
23059 +---------------------------------------+
23060
23061 The required alignment for AIX configurations is two words (i.e., 8
23062 or 16 bytes).
23063
23064 The ELFv2 ABI is a variant of the AIX ABI. Stack frames look like:
23065
23066 SP----> +---------------------------------------+
23067 | Back chain to caller | 0
23068 +---------------------------------------+
23069 | Save area for CR | 8
23070 +---------------------------------------+
23071 | Saved LR | 16
23072 +---------------------------------------+
23073 | Saved TOC pointer | 24
23074 +---------------------------------------+
23075 | Parameter save area (P) | 32
23076 +---------------------------------------+
23077 | Alloca space (A) | 32+P
23078 +---------------------------------------+
23079 | Local variable space (L) | 32+P+A
23080 +---------------------------------------+
23081 | Save area for AltiVec registers (W) | 32+P+A+L
23082 +---------------------------------------+
23083 | AltiVec alignment padding (Y) | 32+P+A+L+W
23084 +---------------------------------------+
23085 | Save area for GP registers (G) | 32+P+A+L+W+Y
23086 +---------------------------------------+
23087 | Save area for FP registers (F) | 32+P+A+L+W+Y+G
23088 +---------------------------------------+
23089 old SP->| back chain to caller's caller | 32+P+A+L+W+Y+G+F
23090 +---------------------------------------+
23091
23092
23093 V.4 stack frames look like:
23094
23095 SP----> +---------------------------------------+
23096 | back chain to caller | 0
23097 +---------------------------------------+
23098 | caller's saved LR | 4
23099 +---------------------------------------+
23100 | Parameter save area (P) | 8
23101 +---------------------------------------+
23102 | Alloca space (A) | 8+P
23103 +---------------------------------------+
23104 | Varargs save area (V) | 8+P+A
23105 +---------------------------------------+
23106 | Local variable space (L) | 8+P+A+V
23107 +---------------------------------------+
23108 | Float/int conversion temporary (X) | 8+P+A+V+L
23109 +---------------------------------------+
23110 | Save area for AltiVec registers (W) | 8+P+A+V+L+X
23111 +---------------------------------------+
23112 | AltiVec alignment padding (Y) | 8+P+A+V+L+X+W
23113 +---------------------------------------+
23114 | Save area for VRSAVE register (Z) | 8+P+A+V+L+X+W+Y
23115 +---------------------------------------+
23116 | SPE: area for 64-bit GP registers |
23117 +---------------------------------------+
23118 | SPE alignment padding |
23119 +---------------------------------------+
23120 | saved CR (C) | 8+P+A+V+L+X+W+Y+Z
23121 +---------------------------------------+
23122 | Save area for GP registers (G) | 8+P+A+V+L+X+W+Y+Z+C
23123 +---------------------------------------+
23124 | Save area for FP registers (F) | 8+P+A+V+L+X+W+Y+Z+C+G
23125 +---------------------------------------+
23126 old SP->| back chain to caller's caller |
23127 +---------------------------------------+
23128
23129 The required alignment for V.4 is 16 bytes, or 8 bytes if -meabi is
23130 given. (But note below and in sysv4.h that we require only 8 and
23131 may round up the size of our stack frame anyways. The historical
23132 reason is early versions of powerpc-linux which didn't properly
23133 align the stack at program startup. A happy side-effect is that
23134 -mno-eabi libraries can be used with -meabi programs.)
23135
23136 The EABI configuration defaults to the V.4 layout. However,
23137 the stack alignment requirements may differ. If -mno-eabi is not
23138 given, the required stack alignment is 8 bytes; if -mno-eabi is
23139 given, the required alignment is 16 bytes. (But see V.4 comment
23140 above.) */
23141
23142 #ifndef ABI_STACK_BOUNDARY
23143 #define ABI_STACK_BOUNDARY STACK_BOUNDARY
23144 #endif
23145
23146 static rs6000_stack_t *
23147 rs6000_stack_info (void)
23148 {
23149 /* We should never be called for thunks, we are not set up for that. */
23150 gcc_assert (!cfun->is_thunk);
23151
23152 rs6000_stack_t *info_ptr = &stack_info;
23153 int reg_size = TARGET_32BIT ? 4 : 8;
23154 int ehrd_size;
23155 int ehcr_size;
23156 int save_align;
23157 int first_gp;
23158 HOST_WIDE_INT non_fixed_size;
23159 bool using_static_chain_p;
23160
23161 if (reload_completed && info_ptr->reload_completed)
23162 return info_ptr;
23163
23164 memset (info_ptr, 0, sizeof (*info_ptr));
23165 info_ptr->reload_completed = reload_completed;
23166
23167 if (TARGET_SPE)
23168 {
23169 /* Cache value so we don't rescan instruction chain over and over. */
23170 if (cfun->machine->insn_chain_scanned_p == 0)
23171 cfun->machine->insn_chain_scanned_p
23172 = spe_func_has_64bit_regs_p () + 1;
23173 info_ptr->spe_64bit_regs_used = cfun->machine->insn_chain_scanned_p - 1;
23174 }
23175
23176 /* Select which calling sequence. */
23177 info_ptr->abi = DEFAULT_ABI;
23178
23179 /* Calculate which registers need to be saved & save area size. */
23180 info_ptr->first_gp_reg_save = first_reg_to_save ();
23181 /* Assume that we will have to save RS6000_PIC_OFFSET_TABLE_REGNUM,
23182 even if it currently looks like we won't. Reload may need it to
23183 get at a constant; if so, it will have already created a constant
23184 pool entry for it. */
23185 if (((TARGET_TOC && TARGET_MINIMAL_TOC)
23186 || (flag_pic == 1 && DEFAULT_ABI == ABI_V4)
23187 || (flag_pic && DEFAULT_ABI == ABI_DARWIN))
23188 && crtl->uses_const_pool
23189 && info_ptr->first_gp_reg_save > RS6000_PIC_OFFSET_TABLE_REGNUM)
23190 first_gp = RS6000_PIC_OFFSET_TABLE_REGNUM;
23191 else
23192 first_gp = info_ptr->first_gp_reg_save;
23193
23194 info_ptr->gp_size = reg_size * (32 - first_gp);
23195
23196 /* For the SPE, we have an additional upper 32-bits on each GPR.
23197 Ideally we should save the entire 64-bits only when the upper
23198 half is used in SIMD instructions. Since we only record
23199 registers live (not the size they are used in), this proves
23200 difficult because we'd have to traverse the instruction chain at
23201 the right time, taking reload into account. This is a real pain,
23202 so we opt to save the GPRs in 64-bits always if but one register
23203 gets used in 64-bits. Otherwise, all the registers in the frame
23204 get saved in 32-bits.
23205
23206 So... since when we save all GPRs (except the SP) in 64-bits, the
23207 traditional GP save area will be empty. */
23208 if (TARGET_SPE_ABI && info_ptr->spe_64bit_regs_used != 0)
23209 info_ptr->gp_size = 0;
23210
23211 info_ptr->first_fp_reg_save = first_fp_reg_to_save ();
23212 info_ptr->fp_size = 8 * (64 - info_ptr->first_fp_reg_save);
23213
23214 info_ptr->first_altivec_reg_save = first_altivec_reg_to_save ();
23215 info_ptr->altivec_size = 16 * (LAST_ALTIVEC_REGNO + 1
23216 - info_ptr->first_altivec_reg_save);
23217
23218 /* Does this function call anything? */
23219 info_ptr->calls_p = (! crtl->is_leaf
23220 || cfun->machine->ra_needs_full_frame);
23221
23222 /* Determine if we need to save the condition code registers. */
23223 if (df_regs_ever_live_p (CR2_REGNO)
23224 || df_regs_ever_live_p (CR3_REGNO)
23225 || df_regs_ever_live_p (CR4_REGNO))
23226 {
23227 info_ptr->cr_save_p = 1;
23228 if (DEFAULT_ABI == ABI_V4)
23229 info_ptr->cr_size = reg_size;
23230 }
23231
23232 /* If the current function calls __builtin_eh_return, then we need
23233 to allocate stack space for registers that will hold data for
23234 the exception handler. */
23235 if (crtl->calls_eh_return)
23236 {
23237 unsigned int i;
23238 for (i = 0; EH_RETURN_DATA_REGNO (i) != INVALID_REGNUM; ++i)
23239 continue;
23240
23241 /* SPE saves EH registers in 64-bits. */
23242 ehrd_size = i * (TARGET_SPE_ABI
23243 && info_ptr->spe_64bit_regs_used != 0
23244 ? UNITS_PER_SPE_WORD : UNITS_PER_WORD);
23245 }
23246 else
23247 ehrd_size = 0;
23248
23249 /* In the ELFv2 ABI, we also need to allocate space for separate
23250 CR field save areas if the function calls __builtin_eh_return. */
23251 if (DEFAULT_ABI == ABI_ELFv2 && crtl->calls_eh_return)
23252 {
23253 /* This hard-codes that we have three call-saved CR fields. */
23254 ehcr_size = 3 * reg_size;
23255 /* We do *not* use the regular CR save mechanism. */
23256 info_ptr->cr_save_p = 0;
23257 }
23258 else
23259 ehcr_size = 0;
23260
23261 /* Determine various sizes. */
23262 info_ptr->reg_size = reg_size;
23263 info_ptr->fixed_size = RS6000_SAVE_AREA;
23264 info_ptr->vars_size = RS6000_ALIGN (get_frame_size (), 8);
23265 info_ptr->parm_size = RS6000_ALIGN (crtl->outgoing_args_size,
23266 TARGET_ALTIVEC ? 16 : 8);
23267 if (FRAME_GROWS_DOWNWARD)
23268 info_ptr->vars_size
23269 += RS6000_ALIGN (info_ptr->fixed_size + info_ptr->vars_size
23270 + info_ptr->parm_size,
23271 ABI_STACK_BOUNDARY / BITS_PER_UNIT)
23272 - (info_ptr->fixed_size + info_ptr->vars_size
23273 + info_ptr->parm_size);
23274
23275 if (TARGET_SPE_ABI && info_ptr->spe_64bit_regs_used != 0)
23276 info_ptr->spe_gp_size = 8 * (32 - first_gp);
23277 else
23278 info_ptr->spe_gp_size = 0;
23279
23280 if (TARGET_ALTIVEC_ABI)
23281 info_ptr->vrsave_mask = compute_vrsave_mask ();
23282 else
23283 info_ptr->vrsave_mask = 0;
23284
23285 if (TARGET_ALTIVEC_VRSAVE && info_ptr->vrsave_mask)
23286 info_ptr->vrsave_size = 4;
23287 else
23288 info_ptr->vrsave_size = 0;
23289
23290 compute_save_world_info (info_ptr);
23291
23292 /* Calculate the offsets. */
23293 switch (DEFAULT_ABI)
23294 {
23295 case ABI_NONE:
23296 default:
23297 gcc_unreachable ();
23298
23299 case ABI_AIX:
23300 case ABI_ELFv2:
23301 case ABI_DARWIN:
23302 info_ptr->fp_save_offset = - info_ptr->fp_size;
23303 info_ptr->gp_save_offset = info_ptr->fp_save_offset - info_ptr->gp_size;
23304
23305 if (TARGET_ALTIVEC_ABI)
23306 {
23307 info_ptr->vrsave_save_offset
23308 = info_ptr->gp_save_offset - info_ptr->vrsave_size;
23309
23310 /* Align stack so vector save area is on a quadword boundary.
23311 The padding goes above the vectors. */
23312 if (info_ptr->altivec_size != 0)
23313 info_ptr->altivec_padding_size
23314 = info_ptr->vrsave_save_offset & 0xF;
23315 else
23316 info_ptr->altivec_padding_size = 0;
23317
23318 info_ptr->altivec_save_offset
23319 = info_ptr->vrsave_save_offset
23320 - info_ptr->altivec_padding_size
23321 - info_ptr->altivec_size;
23322 gcc_assert (info_ptr->altivec_size == 0
23323 || info_ptr->altivec_save_offset % 16 == 0);
23324
23325 /* Adjust for AltiVec case. */
23326 info_ptr->ehrd_offset = info_ptr->altivec_save_offset - ehrd_size;
23327 }
23328 else
23329 info_ptr->ehrd_offset = info_ptr->gp_save_offset - ehrd_size;
23330
23331 info_ptr->ehcr_offset = info_ptr->ehrd_offset - ehcr_size;
23332 info_ptr->cr_save_offset = reg_size; /* first word when 64-bit. */
23333 info_ptr->lr_save_offset = 2*reg_size;
23334 break;
23335
23336 case ABI_V4:
23337 info_ptr->fp_save_offset = - info_ptr->fp_size;
23338 info_ptr->gp_save_offset = info_ptr->fp_save_offset - info_ptr->gp_size;
23339 info_ptr->cr_save_offset = info_ptr->gp_save_offset - info_ptr->cr_size;
23340
23341 if (TARGET_SPE_ABI && info_ptr->spe_64bit_regs_used != 0)
23342 {
23343 /* Align stack so SPE GPR save area is aligned on a
23344 double-word boundary. */
23345 if (info_ptr->spe_gp_size != 0 && info_ptr->cr_save_offset != 0)
23346 info_ptr->spe_padding_size
23347 = 8 - (-info_ptr->cr_save_offset % 8);
23348 else
23349 info_ptr->spe_padding_size = 0;
23350
23351 info_ptr->spe_gp_save_offset
23352 = info_ptr->cr_save_offset
23353 - info_ptr->spe_padding_size
23354 - info_ptr->spe_gp_size;
23355
23356 /* Adjust for SPE case. */
23357 info_ptr->ehrd_offset = info_ptr->spe_gp_save_offset;
23358 }
23359 else if (TARGET_ALTIVEC_ABI)
23360 {
23361 info_ptr->vrsave_save_offset
23362 = info_ptr->cr_save_offset - info_ptr->vrsave_size;
23363
23364 /* Align stack so vector save area is on a quadword boundary. */
23365 if (info_ptr->altivec_size != 0)
23366 info_ptr->altivec_padding_size
23367 = 16 - (-info_ptr->vrsave_save_offset % 16);
23368 else
23369 info_ptr->altivec_padding_size = 0;
23370
23371 info_ptr->altivec_save_offset
23372 = info_ptr->vrsave_save_offset
23373 - info_ptr->altivec_padding_size
23374 - info_ptr->altivec_size;
23375
23376 /* Adjust for AltiVec case. */
23377 info_ptr->ehrd_offset = info_ptr->altivec_save_offset;
23378 }
23379 else
23380 info_ptr->ehrd_offset = info_ptr->cr_save_offset;
23381 info_ptr->ehrd_offset -= ehrd_size;
23382 info_ptr->lr_save_offset = reg_size;
23383 break;
23384 }
23385
23386 save_align = (TARGET_ALTIVEC_ABI || DEFAULT_ABI == ABI_DARWIN) ? 16 : 8;
23387 info_ptr->save_size = RS6000_ALIGN (info_ptr->fp_size
23388 + info_ptr->gp_size
23389 + info_ptr->altivec_size
23390 + info_ptr->altivec_padding_size
23391 + info_ptr->spe_gp_size
23392 + info_ptr->spe_padding_size
23393 + ehrd_size
23394 + ehcr_size
23395 + info_ptr->cr_size
23396 + info_ptr->vrsave_size,
23397 save_align);
23398
23399 non_fixed_size = (info_ptr->vars_size
23400 + info_ptr->parm_size
23401 + info_ptr->save_size);
23402
23403 info_ptr->total_size = RS6000_ALIGN (non_fixed_size + info_ptr->fixed_size,
23404 ABI_STACK_BOUNDARY / BITS_PER_UNIT);
23405
23406 /* Determine if we need to save the link register. */
23407 if (info_ptr->calls_p
23408 || ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
23409 && crtl->profile
23410 && !TARGET_PROFILE_KERNEL)
23411 || (DEFAULT_ABI == ABI_V4 && cfun->calls_alloca)
23412 #ifdef TARGET_RELOCATABLE
23413 || (TARGET_RELOCATABLE && (get_pool_size () != 0))
23414 #endif
23415 || rs6000_ra_ever_killed ())
23416 info_ptr->lr_save_p = 1;
23417
23418 using_static_chain_p = (cfun->static_chain_decl != NULL_TREE
23419 && df_regs_ever_live_p (STATIC_CHAIN_REGNUM)
23420 && call_used_regs[STATIC_CHAIN_REGNUM]);
23421 info_ptr->savres_strategy = rs6000_savres_strategy (info_ptr,
23422 using_static_chain_p);
23423
23424 if (!(info_ptr->savres_strategy & SAVE_INLINE_GPRS)
23425 || !(info_ptr->savres_strategy & SAVE_INLINE_FPRS)
23426 || !(info_ptr->savres_strategy & SAVE_INLINE_VRS)
23427 || !(info_ptr->savres_strategy & REST_INLINE_GPRS)
23428 || !(info_ptr->savres_strategy & REST_INLINE_FPRS)
23429 || !(info_ptr->savres_strategy & REST_INLINE_VRS))
23430 info_ptr->lr_save_p = 1;
23431
23432 if (info_ptr->lr_save_p)
23433 df_set_regs_ever_live (LR_REGNO, true);
23434
23435 /* Determine if we need to allocate any stack frame:
23436
23437 For AIX we need to push the stack if a frame pointer is needed
23438 (because the stack might be dynamically adjusted), if we are
23439 debugging, if we make calls, or if the sum of fp_save, gp_save,
23440 and local variables are more than the space needed to save all
23441 non-volatile registers: 32-bit: 18*8 + 19*4 = 220 or 64-bit: 18*8
23442 + 18*8 = 288 (GPR13 reserved).
23443
23444 For V.4 we don't have the stack cushion that AIX uses, but assume
23445 that the debugger can handle stackless frames. */
23446
23447 if (info_ptr->calls_p)
23448 info_ptr->push_p = 1;
23449
23450 else if (DEFAULT_ABI == ABI_V4)
23451 info_ptr->push_p = non_fixed_size != 0;
23452
23453 else if (frame_pointer_needed)
23454 info_ptr->push_p = 1;
23455
23456 else if (TARGET_XCOFF && write_symbols != NO_DEBUG)
23457 info_ptr->push_p = 1;
23458
23459 else
23460 info_ptr->push_p = non_fixed_size > (TARGET_32BIT ? 220 : 288);
23461
23462 return info_ptr;
23463 }
23464
23465 /* Return true if the current function uses any GPRs in 64-bit SIMD
23466 mode. */
23467
23468 static bool
23469 spe_func_has_64bit_regs_p (void)
23470 {
23471 rtx_insn *insns, *insn;
23472
23473 /* Functions that save and restore all the call-saved registers will
23474 need to save/restore the registers in 64-bits. */
23475 if (crtl->calls_eh_return
23476 || cfun->calls_setjmp
23477 || crtl->has_nonlocal_goto)
23478 return true;
23479
23480 insns = get_insns ();
23481
23482 for (insn = NEXT_INSN (insns); insn != NULL_RTX; insn = NEXT_INSN (insn))
23483 {
23484 if (INSN_P (insn))
23485 {
23486 rtx i;
23487
23488 /* FIXME: This should be implemented with attributes...
23489
23490 (set_attr "spe64" "true")....then,
23491 if (get_spe64(insn)) return true;
23492
23493 It's the only reliable way to do the stuff below. */
23494
23495 i = PATTERN (insn);
23496 if (GET_CODE (i) == SET)
23497 {
23498 machine_mode mode = GET_MODE (SET_SRC (i));
23499
23500 if (SPE_VECTOR_MODE (mode))
23501 return true;
23502 if (TARGET_E500_DOUBLE
23503 && (mode == DFmode || FLOAT128_2REG_P (mode)))
23504 return true;
23505 }
23506 }
23507 }
23508
23509 return false;
23510 }
23511
23512 static void
23513 debug_stack_info (rs6000_stack_t *info)
23514 {
23515 const char *abi_string;
23516
23517 if (! info)
23518 info = rs6000_stack_info ();
23519
23520 fprintf (stderr, "\nStack information for function %s:\n",
23521 ((current_function_decl && DECL_NAME (current_function_decl))
23522 ? IDENTIFIER_POINTER (DECL_NAME (current_function_decl))
23523 : "<unknown>"));
23524
23525 switch (info->abi)
23526 {
23527 default: abi_string = "Unknown"; break;
23528 case ABI_NONE: abi_string = "NONE"; break;
23529 case ABI_AIX: abi_string = "AIX"; break;
23530 case ABI_ELFv2: abi_string = "ELFv2"; break;
23531 case ABI_DARWIN: abi_string = "Darwin"; break;
23532 case ABI_V4: abi_string = "V.4"; break;
23533 }
23534
23535 fprintf (stderr, "\tABI = %5s\n", abi_string);
23536
23537 if (TARGET_ALTIVEC_ABI)
23538 fprintf (stderr, "\tALTIVEC ABI extensions enabled.\n");
23539
23540 if (TARGET_SPE_ABI)
23541 fprintf (stderr, "\tSPE ABI extensions enabled.\n");
23542
23543 if (info->first_gp_reg_save != 32)
23544 fprintf (stderr, "\tfirst_gp_reg_save = %5d\n", info->first_gp_reg_save);
23545
23546 if (info->first_fp_reg_save != 64)
23547 fprintf (stderr, "\tfirst_fp_reg_save = %5d\n", info->first_fp_reg_save);
23548
23549 if (info->first_altivec_reg_save <= LAST_ALTIVEC_REGNO)
23550 fprintf (stderr, "\tfirst_altivec_reg_save = %5d\n",
23551 info->first_altivec_reg_save);
23552
23553 if (info->lr_save_p)
23554 fprintf (stderr, "\tlr_save_p = %5d\n", info->lr_save_p);
23555
23556 if (info->cr_save_p)
23557 fprintf (stderr, "\tcr_save_p = %5d\n", info->cr_save_p);
23558
23559 if (info->vrsave_mask)
23560 fprintf (stderr, "\tvrsave_mask = 0x%x\n", info->vrsave_mask);
23561
23562 if (info->push_p)
23563 fprintf (stderr, "\tpush_p = %5d\n", info->push_p);
23564
23565 if (info->calls_p)
23566 fprintf (stderr, "\tcalls_p = %5d\n", info->calls_p);
23567
23568 if (info->gp_size)
23569 fprintf (stderr, "\tgp_save_offset = %5d\n", info->gp_save_offset);
23570
23571 if (info->fp_size)
23572 fprintf (stderr, "\tfp_save_offset = %5d\n", info->fp_save_offset);
23573
23574 if (info->altivec_size)
23575 fprintf (stderr, "\taltivec_save_offset = %5d\n",
23576 info->altivec_save_offset);
23577
23578 if (info->spe_gp_size)
23579 fprintf (stderr, "\tspe_gp_save_offset = %5d\n",
23580 info->spe_gp_save_offset);
23581
23582 if (info->vrsave_size)
23583 fprintf (stderr, "\tvrsave_save_offset = %5d\n",
23584 info->vrsave_save_offset);
23585
23586 if (info->lr_save_p)
23587 fprintf (stderr, "\tlr_save_offset = %5d\n", info->lr_save_offset);
23588
23589 if (info->cr_save_p)
23590 fprintf (stderr, "\tcr_save_offset = %5d\n", info->cr_save_offset);
23591
23592 if (info->varargs_save_offset)
23593 fprintf (stderr, "\tvarargs_save_offset = %5d\n", info->varargs_save_offset);
23594
23595 if (info->total_size)
23596 fprintf (stderr, "\ttotal_size = " HOST_WIDE_INT_PRINT_DEC"\n",
23597 info->total_size);
23598
23599 if (info->vars_size)
23600 fprintf (stderr, "\tvars_size = " HOST_WIDE_INT_PRINT_DEC"\n",
23601 info->vars_size);
23602
23603 if (info->parm_size)
23604 fprintf (stderr, "\tparm_size = %5d\n", info->parm_size);
23605
23606 if (info->fixed_size)
23607 fprintf (stderr, "\tfixed_size = %5d\n", info->fixed_size);
23608
23609 if (info->gp_size)
23610 fprintf (stderr, "\tgp_size = %5d\n", info->gp_size);
23611
23612 if (info->spe_gp_size)
23613 fprintf (stderr, "\tspe_gp_size = %5d\n", info->spe_gp_size);
23614
23615 if (info->fp_size)
23616 fprintf (stderr, "\tfp_size = %5d\n", info->fp_size);
23617
23618 if (info->altivec_size)
23619 fprintf (stderr, "\taltivec_size = %5d\n", info->altivec_size);
23620
23621 if (info->vrsave_size)
23622 fprintf (stderr, "\tvrsave_size = %5d\n", info->vrsave_size);
23623
23624 if (info->altivec_padding_size)
23625 fprintf (stderr, "\taltivec_padding_size= %5d\n",
23626 info->altivec_padding_size);
23627
23628 if (info->spe_padding_size)
23629 fprintf (stderr, "\tspe_padding_size = %5d\n",
23630 info->spe_padding_size);
23631
23632 if (info->cr_size)
23633 fprintf (stderr, "\tcr_size = %5d\n", info->cr_size);
23634
23635 if (info->save_size)
23636 fprintf (stderr, "\tsave_size = %5d\n", info->save_size);
23637
23638 if (info->reg_size != 4)
23639 fprintf (stderr, "\treg_size = %5d\n", info->reg_size);
23640
23641 fprintf (stderr, "\tsave-strategy = %04x\n", info->savres_strategy);
23642
23643 fprintf (stderr, "\n");
23644 }
23645
23646 rtx
23647 rs6000_return_addr (int count, rtx frame)
23648 {
23649 /* Currently we don't optimize very well between prolog and body
23650 code and for PIC code the code can be actually quite bad, so
23651 don't try to be too clever here. */
23652 if (count != 0
23653 || ((DEFAULT_ABI == ABI_V4 || DEFAULT_ABI == ABI_DARWIN) && flag_pic))
23654 {
23655 cfun->machine->ra_needs_full_frame = 1;
23656
23657 return
23658 gen_rtx_MEM
23659 (Pmode,
23660 memory_address
23661 (Pmode,
23662 plus_constant (Pmode,
23663 copy_to_reg
23664 (gen_rtx_MEM (Pmode,
23665 memory_address (Pmode, frame))),
23666 RETURN_ADDRESS_OFFSET)));
23667 }
23668
23669 cfun->machine->ra_need_lr = 1;
23670 return get_hard_reg_initial_val (Pmode, LR_REGNO);
23671 }
23672
23673 /* Say whether a function is a candidate for sibcall handling or not. */
23674
23675 static bool
23676 rs6000_function_ok_for_sibcall (tree decl, tree exp)
23677 {
23678 tree fntype;
23679
23680 if (decl)
23681 fntype = TREE_TYPE (decl);
23682 else
23683 fntype = TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp)));
23684
23685 /* We can't do it if the called function has more vector parameters
23686 than the current function; there's nowhere to put the VRsave code. */
23687 if (TARGET_ALTIVEC_ABI
23688 && TARGET_ALTIVEC_VRSAVE
23689 && !(decl && decl == current_function_decl))
23690 {
23691 function_args_iterator args_iter;
23692 tree type;
23693 int nvreg = 0;
23694
23695 /* Functions with vector parameters are required to have a
23696 prototype, so the argument type info must be available
23697 here. */
23698 FOREACH_FUNCTION_ARGS(fntype, type, args_iter)
23699 if (TREE_CODE (type) == VECTOR_TYPE
23700 && ALTIVEC_OR_VSX_VECTOR_MODE (TYPE_MODE (type)))
23701 nvreg++;
23702
23703 FOREACH_FUNCTION_ARGS(TREE_TYPE (current_function_decl), type, args_iter)
23704 if (TREE_CODE (type) == VECTOR_TYPE
23705 && ALTIVEC_OR_VSX_VECTOR_MODE (TYPE_MODE (type)))
23706 nvreg--;
23707
23708 if (nvreg > 0)
23709 return false;
23710 }
23711
23712 /* Under the AIX or ELFv2 ABIs we can't allow calls to non-local
23713 functions, because the callee may have a different TOC pointer to
23714 the caller and there's no way to ensure we restore the TOC when
23715 we return. With the secure-plt SYSV ABI we can't make non-local
23716 calls when -fpic/PIC because the plt call stubs use r30. */
23717 if (DEFAULT_ABI == ABI_DARWIN
23718 || ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
23719 && decl
23720 && !DECL_EXTERNAL (decl)
23721 && !DECL_WEAK (decl)
23722 && (*targetm.binds_local_p) (decl))
23723 || (DEFAULT_ABI == ABI_V4
23724 && (!TARGET_SECURE_PLT
23725 || !flag_pic
23726 || (decl
23727 && (*targetm.binds_local_p) (decl)))))
23728 {
23729 tree attr_list = TYPE_ATTRIBUTES (fntype);
23730
23731 if (!lookup_attribute ("longcall", attr_list)
23732 || lookup_attribute ("shortcall", attr_list))
23733 return true;
23734 }
23735
23736 return false;
23737 }
23738
23739 static int
23740 rs6000_ra_ever_killed (void)
23741 {
23742 rtx_insn *top;
23743 rtx reg;
23744 rtx_insn *insn;
23745
23746 if (cfun->is_thunk)
23747 return 0;
23748
23749 if (cfun->machine->lr_save_state)
23750 return cfun->machine->lr_save_state - 1;
23751
23752 /* regs_ever_live has LR marked as used if any sibcalls are present,
23753 but this should not force saving and restoring in the
23754 pro/epilogue. Likewise, reg_set_between_p thinks a sibcall
23755 clobbers LR, so that is inappropriate. */
23756
23757 /* Also, the prologue can generate a store into LR that
23758 doesn't really count, like this:
23759
23760 move LR->R0
23761 bcl to set PIC register
23762 move LR->R31
23763 move R0->LR
23764
23765 When we're called from the epilogue, we need to avoid counting
23766 this as a store. */
23767
23768 push_topmost_sequence ();
23769 top = get_insns ();
23770 pop_topmost_sequence ();
23771 reg = gen_rtx_REG (Pmode, LR_REGNO);
23772
23773 for (insn = NEXT_INSN (top); insn != NULL_RTX; insn = NEXT_INSN (insn))
23774 {
23775 if (INSN_P (insn))
23776 {
23777 if (CALL_P (insn))
23778 {
23779 if (!SIBLING_CALL_P (insn))
23780 return 1;
23781 }
23782 else if (find_regno_note (insn, REG_INC, LR_REGNO))
23783 return 1;
23784 else if (set_of (reg, insn) != NULL_RTX
23785 && !prologue_epilogue_contains (insn))
23786 return 1;
23787 }
23788 }
23789 return 0;
23790 }
23791 \f
23792 /* Emit instructions needed to load the TOC register.
23793 This is only needed when TARGET_TOC, TARGET_MINIMAL_TOC, and there is
23794 a constant pool; or for SVR4 -fpic. */
23795
23796 void
23797 rs6000_emit_load_toc_table (int fromprolog)
23798 {
23799 rtx dest;
23800 dest = gen_rtx_REG (Pmode, RS6000_PIC_OFFSET_TABLE_REGNUM);
23801
23802 if (TARGET_ELF && TARGET_SECURE_PLT && DEFAULT_ABI == ABI_V4 && flag_pic)
23803 {
23804 char buf[30];
23805 rtx lab, tmp1, tmp2, got;
23806
23807 lab = gen_label_rtx ();
23808 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (lab));
23809 lab = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (buf));
23810 if (flag_pic == 2)
23811 got = gen_rtx_SYMBOL_REF (Pmode, toc_label_name);
23812 else
23813 got = rs6000_got_sym ();
23814 tmp1 = tmp2 = dest;
23815 if (!fromprolog)
23816 {
23817 tmp1 = gen_reg_rtx (Pmode);
23818 tmp2 = gen_reg_rtx (Pmode);
23819 }
23820 emit_insn (gen_load_toc_v4_PIC_1 (lab));
23821 emit_move_insn (tmp1, gen_rtx_REG (Pmode, LR_REGNO));
23822 emit_insn (gen_load_toc_v4_PIC_3b (tmp2, tmp1, got, lab));
23823 emit_insn (gen_load_toc_v4_PIC_3c (dest, tmp2, got, lab));
23824 }
23825 else if (TARGET_ELF && DEFAULT_ABI == ABI_V4 && flag_pic == 1)
23826 {
23827 emit_insn (gen_load_toc_v4_pic_si ());
23828 emit_move_insn (dest, gen_rtx_REG (Pmode, LR_REGNO));
23829 }
23830 else if (TARGET_ELF && DEFAULT_ABI == ABI_V4 && flag_pic == 2)
23831 {
23832 char buf[30];
23833 rtx temp0 = (fromprolog
23834 ? gen_rtx_REG (Pmode, 0)
23835 : gen_reg_rtx (Pmode));
23836
23837 if (fromprolog)
23838 {
23839 rtx symF, symL;
23840
23841 ASM_GENERATE_INTERNAL_LABEL (buf, "LCF", rs6000_pic_labelno);
23842 symF = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (buf));
23843
23844 ASM_GENERATE_INTERNAL_LABEL (buf, "LCL", rs6000_pic_labelno);
23845 symL = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (buf));
23846
23847 emit_insn (gen_load_toc_v4_PIC_1 (symF));
23848 emit_move_insn (dest, gen_rtx_REG (Pmode, LR_REGNO));
23849 emit_insn (gen_load_toc_v4_PIC_2 (temp0, dest, symL, symF));
23850 }
23851 else
23852 {
23853 rtx tocsym, lab;
23854
23855 tocsym = gen_rtx_SYMBOL_REF (Pmode, toc_label_name);
23856 lab = gen_label_rtx ();
23857 emit_insn (gen_load_toc_v4_PIC_1b (tocsym, lab));
23858 emit_move_insn (dest, gen_rtx_REG (Pmode, LR_REGNO));
23859 if (TARGET_LINK_STACK)
23860 emit_insn (gen_addsi3 (dest, dest, GEN_INT (4)));
23861 emit_move_insn (temp0, gen_rtx_MEM (Pmode, dest));
23862 }
23863 emit_insn (gen_addsi3 (dest, temp0, dest));
23864 }
23865 else if (TARGET_ELF && !TARGET_AIX && flag_pic == 0 && TARGET_MINIMAL_TOC)
23866 {
23867 /* This is for AIX code running in non-PIC ELF32. */
23868 char buf[30];
23869 rtx realsym;
23870 ASM_GENERATE_INTERNAL_LABEL (buf, "LCTOC", 1);
23871 realsym = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (buf));
23872
23873 emit_insn (gen_elf_high (dest, realsym));
23874 emit_insn (gen_elf_low (dest, dest, realsym));
23875 }
23876 else
23877 {
23878 gcc_assert (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2);
23879
23880 if (TARGET_32BIT)
23881 emit_insn (gen_load_toc_aix_si (dest));
23882 else
23883 emit_insn (gen_load_toc_aix_di (dest));
23884 }
23885 }
23886
23887 /* Emit instructions to restore the link register after determining where
23888 its value has been stored. */
23889
23890 void
23891 rs6000_emit_eh_reg_restore (rtx source, rtx scratch)
23892 {
23893 rs6000_stack_t *info = rs6000_stack_info ();
23894 rtx operands[2];
23895
23896 operands[0] = source;
23897 operands[1] = scratch;
23898
23899 if (info->lr_save_p)
23900 {
23901 rtx frame_rtx = stack_pointer_rtx;
23902 HOST_WIDE_INT sp_offset = 0;
23903 rtx tmp;
23904
23905 if (frame_pointer_needed
23906 || cfun->calls_alloca
23907 || info->total_size > 32767)
23908 {
23909 tmp = gen_frame_mem (Pmode, frame_rtx);
23910 emit_move_insn (operands[1], tmp);
23911 frame_rtx = operands[1];
23912 }
23913 else if (info->push_p)
23914 sp_offset = info->total_size;
23915
23916 tmp = plus_constant (Pmode, frame_rtx,
23917 info->lr_save_offset + sp_offset);
23918 tmp = gen_frame_mem (Pmode, tmp);
23919 emit_move_insn (tmp, operands[0]);
23920 }
23921 else
23922 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNO), operands[0]);
23923
23924 /* Freeze lr_save_p. We've just emitted rtl that depends on the
23925 state of lr_save_p so any change from here on would be a bug. In
23926 particular, stop rs6000_ra_ever_killed from considering the SET
23927 of lr we may have added just above. */
23928 cfun->machine->lr_save_state = info->lr_save_p + 1;
23929 }
23930
23931 static GTY(()) alias_set_type set = -1;
23932
23933 alias_set_type
23934 get_TOC_alias_set (void)
23935 {
23936 if (set == -1)
23937 set = new_alias_set ();
23938 return set;
23939 }
23940
23941 /* This returns nonzero if the current function uses the TOC. This is
23942 determined by the presence of (use (unspec ... UNSPEC_TOC)), which
23943 is generated by the ABI_V4 load_toc_* patterns. */
23944 #if TARGET_ELF
23945 static int
23946 uses_TOC (void)
23947 {
23948 rtx_insn *insn;
23949
23950 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
23951 if (INSN_P (insn))
23952 {
23953 rtx pat = PATTERN (insn);
23954 int i;
23955
23956 if (GET_CODE (pat) == PARALLEL)
23957 for (i = 0; i < XVECLEN (pat, 0); i++)
23958 {
23959 rtx sub = XVECEXP (pat, 0, i);
23960 if (GET_CODE (sub) == USE)
23961 {
23962 sub = XEXP (sub, 0);
23963 if (GET_CODE (sub) == UNSPEC
23964 && XINT (sub, 1) == UNSPEC_TOC)
23965 return 1;
23966 }
23967 }
23968 }
23969 return 0;
23970 }
23971 #endif
23972
23973 rtx
23974 create_TOC_reference (rtx symbol, rtx largetoc_reg)
23975 {
23976 rtx tocrel, tocreg, hi;
23977
23978 if (TARGET_DEBUG_ADDR)
23979 {
23980 if (GET_CODE (symbol) == SYMBOL_REF)
23981 fprintf (stderr, "\ncreate_TOC_reference, (symbol_ref %s)\n",
23982 XSTR (symbol, 0));
23983 else
23984 {
23985 fprintf (stderr, "\ncreate_TOC_reference, code %s:\n",
23986 GET_RTX_NAME (GET_CODE (symbol)));
23987 debug_rtx (symbol);
23988 }
23989 }
23990
23991 if (!can_create_pseudo_p ())
23992 df_set_regs_ever_live (TOC_REGISTER, true);
23993
23994 tocreg = gen_rtx_REG (Pmode, TOC_REGISTER);
23995 tocrel = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, symbol, tocreg), UNSPEC_TOCREL);
23996 if (TARGET_CMODEL == CMODEL_SMALL || can_create_pseudo_p ())
23997 return tocrel;
23998
23999 hi = gen_rtx_HIGH (Pmode, copy_rtx (tocrel));
24000 if (largetoc_reg != NULL)
24001 {
24002 emit_move_insn (largetoc_reg, hi);
24003 hi = largetoc_reg;
24004 }
24005 return gen_rtx_LO_SUM (Pmode, hi, tocrel);
24006 }
24007
24008 /* Issue assembly directives that create a reference to the given DWARF
24009 FRAME_TABLE_LABEL from the current function section. */
24010 void
24011 rs6000_aix_asm_output_dwarf_table_ref (char * frame_table_label)
24012 {
24013 fprintf (asm_out_file, "\t.ref %s\n",
24014 (* targetm.strip_name_encoding) (frame_table_label));
24015 }
24016 \f
24017 /* This ties together stack memory (MEM with an alias set of frame_alias_set)
24018 and the change to the stack pointer. */
24019
24020 static void
24021 rs6000_emit_stack_tie (rtx fp, bool hard_frame_needed)
24022 {
24023 rtvec p;
24024 int i;
24025 rtx regs[3];
24026
24027 i = 0;
24028 regs[i++] = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
24029 if (hard_frame_needed)
24030 regs[i++] = gen_rtx_REG (Pmode, HARD_FRAME_POINTER_REGNUM);
24031 if (!(REGNO (fp) == STACK_POINTER_REGNUM
24032 || (hard_frame_needed
24033 && REGNO (fp) == HARD_FRAME_POINTER_REGNUM)))
24034 regs[i++] = fp;
24035
24036 p = rtvec_alloc (i);
24037 while (--i >= 0)
24038 {
24039 rtx mem = gen_frame_mem (BLKmode, regs[i]);
24040 RTVEC_ELT (p, i) = gen_rtx_SET (mem, const0_rtx);
24041 }
24042
24043 emit_insn (gen_stack_tie (gen_rtx_PARALLEL (VOIDmode, p)));
24044 }
24045
24046 /* Emit the correct code for allocating stack space, as insns.
24047 If COPY_REG, make sure a copy of the old frame is left there.
24048 The generated code may use hard register 0 as a temporary. */
24049
24050 static rtx_insn *
24051 rs6000_emit_allocate_stack (HOST_WIDE_INT size, rtx copy_reg, int copy_off)
24052 {
24053 rtx_insn *insn;
24054 rtx stack_reg = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
24055 rtx tmp_reg = gen_rtx_REG (Pmode, 0);
24056 rtx todec = gen_int_mode (-size, Pmode);
24057 rtx par, set, mem;
24058
24059 if (INTVAL (todec) != -size)
24060 {
24061 warning (0, "stack frame too large");
24062 emit_insn (gen_trap ());
24063 return 0;
24064 }
24065
24066 if (crtl->limit_stack)
24067 {
24068 if (REG_P (stack_limit_rtx)
24069 && REGNO (stack_limit_rtx) > 1
24070 && REGNO (stack_limit_rtx) <= 31)
24071 {
24072 emit_insn (gen_add3_insn (tmp_reg, stack_limit_rtx, GEN_INT (size)));
24073 emit_insn (gen_cond_trap (LTU, stack_reg, tmp_reg,
24074 const0_rtx));
24075 }
24076 else if (GET_CODE (stack_limit_rtx) == SYMBOL_REF
24077 && TARGET_32BIT
24078 && DEFAULT_ABI == ABI_V4)
24079 {
24080 rtx toload = gen_rtx_CONST (VOIDmode,
24081 gen_rtx_PLUS (Pmode,
24082 stack_limit_rtx,
24083 GEN_INT (size)));
24084
24085 emit_insn (gen_elf_high (tmp_reg, toload));
24086 emit_insn (gen_elf_low (tmp_reg, tmp_reg, toload));
24087 emit_insn (gen_cond_trap (LTU, stack_reg, tmp_reg,
24088 const0_rtx));
24089 }
24090 else
24091 warning (0, "stack limit expression is not supported");
24092 }
24093
24094 if (copy_reg)
24095 {
24096 if (copy_off != 0)
24097 emit_insn (gen_add3_insn (copy_reg, stack_reg, GEN_INT (copy_off)));
24098 else
24099 emit_move_insn (copy_reg, stack_reg);
24100 }
24101
24102 if (size > 32767)
24103 {
24104 /* Need a note here so that try_split doesn't get confused. */
24105 if (get_last_insn () == NULL_RTX)
24106 emit_note (NOTE_INSN_DELETED);
24107 insn = emit_move_insn (tmp_reg, todec);
24108 try_split (PATTERN (insn), insn, 0);
24109 todec = tmp_reg;
24110 }
24111
24112 insn = emit_insn (TARGET_32BIT
24113 ? gen_movsi_update_stack (stack_reg, stack_reg,
24114 todec, stack_reg)
24115 : gen_movdi_di_update_stack (stack_reg, stack_reg,
24116 todec, stack_reg));
24117 /* Since we didn't use gen_frame_mem to generate the MEM, grab
24118 it now and set the alias set/attributes. The above gen_*_update
24119 calls will generate a PARALLEL with the MEM set being the first
24120 operation. */
24121 par = PATTERN (insn);
24122 gcc_assert (GET_CODE (par) == PARALLEL);
24123 set = XVECEXP (par, 0, 0);
24124 gcc_assert (GET_CODE (set) == SET);
24125 mem = SET_DEST (set);
24126 gcc_assert (MEM_P (mem));
24127 MEM_NOTRAP_P (mem) = 1;
24128 set_mem_alias_set (mem, get_frame_alias_set ());
24129
24130 RTX_FRAME_RELATED_P (insn) = 1;
24131 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
24132 gen_rtx_SET (stack_reg, gen_rtx_PLUS (Pmode, stack_reg,
24133 GEN_INT (-size))));
24134 return insn;
24135 }
24136
24137 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
24138
24139 #if PROBE_INTERVAL > 32768
24140 #error Cannot use indexed addressing mode for stack probing
24141 #endif
24142
24143 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
24144 inclusive. These are offsets from the current stack pointer. */
24145
24146 static void
24147 rs6000_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size)
24148 {
24149 /* See if we have a constant small number of probes to generate. If so,
24150 that's the easy case. */
24151 if (first + size <= 32768)
24152 {
24153 HOST_WIDE_INT i;
24154
24155 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 1 until
24156 it exceeds SIZE. If only one probe is needed, this will not
24157 generate any code. Then probe at FIRST + SIZE. */
24158 for (i = PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
24159 emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
24160 -(first + i)));
24161
24162 emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
24163 -(first + size)));
24164 }
24165
24166 /* Otherwise, do the same as above, but in a loop. Note that we must be
24167 extra careful with variables wrapping around because we might be at
24168 the very top (or the very bottom) of the address space and we have
24169 to be able to handle this case properly; in particular, we use an
24170 equality test for the loop condition. */
24171 else
24172 {
24173 HOST_WIDE_INT rounded_size;
24174 rtx r12 = gen_rtx_REG (Pmode, 12);
24175 rtx r0 = gen_rtx_REG (Pmode, 0);
24176
24177 /* Sanity check for the addressing mode we're going to use. */
24178 gcc_assert (first <= 32768);
24179
24180 /* Step 1: round SIZE to the previous multiple of the interval. */
24181
24182 rounded_size = ROUND_DOWN (size, PROBE_INTERVAL);
24183
24184
24185 /* Step 2: compute initial and final value of the loop counter. */
24186
24187 /* TEST_ADDR = SP + FIRST. */
24188 emit_insn (gen_rtx_SET (r12, plus_constant (Pmode, stack_pointer_rtx,
24189 -first)));
24190
24191 /* LAST_ADDR = SP + FIRST + ROUNDED_SIZE. */
24192 if (rounded_size > 32768)
24193 {
24194 emit_move_insn (r0, GEN_INT (-rounded_size));
24195 emit_insn (gen_rtx_SET (r0, gen_rtx_PLUS (Pmode, r12, r0)));
24196 }
24197 else
24198 emit_insn (gen_rtx_SET (r0, plus_constant (Pmode, r12,
24199 -rounded_size)));
24200
24201
24202 /* Step 3: the loop
24203
24204 do
24205 {
24206 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
24207 probe at TEST_ADDR
24208 }
24209 while (TEST_ADDR != LAST_ADDR)
24210
24211 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
24212 until it is equal to ROUNDED_SIZE. */
24213
24214 if (TARGET_64BIT)
24215 emit_insn (gen_probe_stack_rangedi (r12, r12, r0));
24216 else
24217 emit_insn (gen_probe_stack_rangesi (r12, r12, r0));
24218
24219
24220 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
24221 that SIZE is equal to ROUNDED_SIZE. */
24222
24223 if (size != rounded_size)
24224 emit_stack_probe (plus_constant (Pmode, r12, rounded_size - size));
24225 }
24226 }
24227
24228 /* Probe a range of stack addresses from REG1 to REG2 inclusive. These are
24229 absolute addresses. */
24230
24231 const char *
24232 output_probe_stack_range (rtx reg1, rtx reg2)
24233 {
24234 static int labelno = 0;
24235 char loop_lab[32];
24236 rtx xops[2];
24237
24238 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno++);
24239
24240 /* Loop. */
24241 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
24242
24243 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
24244 xops[0] = reg1;
24245 xops[1] = GEN_INT (-PROBE_INTERVAL);
24246 output_asm_insn ("addi %0,%0,%1", xops);
24247
24248 /* Probe at TEST_ADDR. */
24249 xops[1] = gen_rtx_REG (Pmode, 0);
24250 output_asm_insn ("stw %1,0(%0)", xops);
24251
24252 /* Test if TEST_ADDR == LAST_ADDR. */
24253 xops[1] = reg2;
24254 if (TARGET_64BIT)
24255 output_asm_insn ("cmpd 0,%0,%1", xops);
24256 else
24257 output_asm_insn ("cmpw 0,%0,%1", xops);
24258
24259 /* Branch. */
24260 fputs ("\tbne 0,", asm_out_file);
24261 assemble_name_raw (asm_out_file, loop_lab);
24262 fputc ('\n', asm_out_file);
24263
24264 return "";
24265 }
24266
24267 /* Add to 'insn' a note which is PATTERN (INSN) but with REG replaced
24268 with (plus:P (reg 1) VAL), and with REG2 replaced with RREG if REG2
24269 is not NULL. It would be nice if dwarf2out_frame_debug_expr could
24270 deduce these equivalences by itself so it wasn't necessary to hold
24271 its hand so much. Don't be tempted to always supply d2_f_d_e with
24272 the actual cfa register, ie. r31 when we are using a hard frame
24273 pointer. That fails when saving regs off r1, and sched moves the
24274 r31 setup past the reg saves. */
24275
24276 static rtx
24277 rs6000_frame_related (rtx insn, rtx reg, HOST_WIDE_INT val,
24278 rtx reg2, rtx rreg)
24279 {
24280 rtx real, temp;
24281
24282 if (REGNO (reg) == STACK_POINTER_REGNUM && reg2 == NULL_RTX)
24283 {
24284 /* No need for any replacement. Just set RTX_FRAME_RELATED_P. */
24285 int i;
24286
24287 gcc_checking_assert (val == 0);
24288 real = PATTERN (insn);
24289 if (GET_CODE (real) == PARALLEL)
24290 for (i = 0; i < XVECLEN (real, 0); i++)
24291 if (GET_CODE (XVECEXP (real, 0, i)) == SET)
24292 {
24293 rtx set = XVECEXP (real, 0, i);
24294
24295 RTX_FRAME_RELATED_P (set) = 1;
24296 }
24297 RTX_FRAME_RELATED_P (insn) = 1;
24298 return insn;
24299 }
24300
24301 /* copy_rtx will not make unique copies of registers, so we need to
24302 ensure we don't have unwanted sharing here. */
24303 if (reg == reg2)
24304 reg = gen_raw_REG (GET_MODE (reg), REGNO (reg));
24305
24306 if (reg == rreg)
24307 reg = gen_raw_REG (GET_MODE (reg), REGNO (reg));
24308
24309 real = copy_rtx (PATTERN (insn));
24310
24311 if (reg2 != NULL_RTX)
24312 real = replace_rtx (real, reg2, rreg);
24313
24314 if (REGNO (reg) == STACK_POINTER_REGNUM)
24315 gcc_checking_assert (val == 0);
24316 else
24317 real = replace_rtx (real, reg,
24318 gen_rtx_PLUS (Pmode, gen_rtx_REG (Pmode,
24319 STACK_POINTER_REGNUM),
24320 GEN_INT (val)));
24321
24322 /* We expect that 'real' is either a SET or a PARALLEL containing
24323 SETs (and possibly other stuff). In a PARALLEL, all the SETs
24324 are important so they all have to be marked RTX_FRAME_RELATED_P. */
24325
24326 if (GET_CODE (real) == SET)
24327 {
24328 rtx set = real;
24329
24330 temp = simplify_rtx (SET_SRC (set));
24331 if (temp)
24332 SET_SRC (set) = temp;
24333 temp = simplify_rtx (SET_DEST (set));
24334 if (temp)
24335 SET_DEST (set) = temp;
24336 if (GET_CODE (SET_DEST (set)) == MEM)
24337 {
24338 temp = simplify_rtx (XEXP (SET_DEST (set), 0));
24339 if (temp)
24340 XEXP (SET_DEST (set), 0) = temp;
24341 }
24342 }
24343 else
24344 {
24345 int i;
24346
24347 gcc_assert (GET_CODE (real) == PARALLEL);
24348 for (i = 0; i < XVECLEN (real, 0); i++)
24349 if (GET_CODE (XVECEXP (real, 0, i)) == SET)
24350 {
24351 rtx set = XVECEXP (real, 0, i);
24352
24353 temp = simplify_rtx (SET_SRC (set));
24354 if (temp)
24355 SET_SRC (set) = temp;
24356 temp = simplify_rtx (SET_DEST (set));
24357 if (temp)
24358 SET_DEST (set) = temp;
24359 if (GET_CODE (SET_DEST (set)) == MEM)
24360 {
24361 temp = simplify_rtx (XEXP (SET_DEST (set), 0));
24362 if (temp)
24363 XEXP (SET_DEST (set), 0) = temp;
24364 }
24365 RTX_FRAME_RELATED_P (set) = 1;
24366 }
24367 }
24368
24369 RTX_FRAME_RELATED_P (insn) = 1;
24370 add_reg_note (insn, REG_FRAME_RELATED_EXPR, real);
24371
24372 return insn;
24373 }
24374
24375 /* Returns an insn that has a vrsave set operation with the
24376 appropriate CLOBBERs. */
24377
24378 static rtx
24379 generate_set_vrsave (rtx reg, rs6000_stack_t *info, int epiloguep)
24380 {
24381 int nclobs, i;
24382 rtx insn, clobs[TOTAL_ALTIVEC_REGS + 1];
24383 rtx vrsave = gen_rtx_REG (SImode, VRSAVE_REGNO);
24384
24385 clobs[0]
24386 = gen_rtx_SET (vrsave,
24387 gen_rtx_UNSPEC_VOLATILE (SImode,
24388 gen_rtvec (2, reg, vrsave),
24389 UNSPECV_SET_VRSAVE));
24390
24391 nclobs = 1;
24392
24393 /* We need to clobber the registers in the mask so the scheduler
24394 does not move sets to VRSAVE before sets of AltiVec registers.
24395
24396 However, if the function receives nonlocal gotos, reload will set
24397 all call saved registers live. We will end up with:
24398
24399 (set (reg 999) (mem))
24400 (parallel [ (set (reg vrsave) (unspec blah))
24401 (clobber (reg 999))])
24402
24403 The clobber will cause the store into reg 999 to be dead, and
24404 flow will attempt to delete an epilogue insn. In this case, we
24405 need an unspec use/set of the register. */
24406
24407 for (i = FIRST_ALTIVEC_REGNO; i <= LAST_ALTIVEC_REGNO; ++i)
24408 if (info->vrsave_mask & ALTIVEC_REG_BIT (i))
24409 {
24410 if (!epiloguep || call_used_regs [i])
24411 clobs[nclobs++] = gen_rtx_CLOBBER (VOIDmode,
24412 gen_rtx_REG (V4SImode, i));
24413 else
24414 {
24415 rtx reg = gen_rtx_REG (V4SImode, i);
24416
24417 clobs[nclobs++]
24418 = gen_rtx_SET (reg,
24419 gen_rtx_UNSPEC (V4SImode,
24420 gen_rtvec (1, reg), 27));
24421 }
24422 }
24423
24424 insn = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (nclobs));
24425
24426 for (i = 0; i < nclobs; ++i)
24427 XVECEXP (insn, 0, i) = clobs[i];
24428
24429 return insn;
24430 }
24431
24432 static rtx
24433 gen_frame_set (rtx reg, rtx frame_reg, int offset, bool store)
24434 {
24435 rtx addr, mem;
24436
24437 addr = gen_rtx_PLUS (Pmode, frame_reg, GEN_INT (offset));
24438 mem = gen_frame_mem (GET_MODE (reg), addr);
24439 return gen_rtx_SET (store ? mem : reg, store ? reg : mem);
24440 }
24441
24442 static rtx
24443 gen_frame_load (rtx reg, rtx frame_reg, int offset)
24444 {
24445 return gen_frame_set (reg, frame_reg, offset, false);
24446 }
24447
24448 static rtx
24449 gen_frame_store (rtx reg, rtx frame_reg, int offset)
24450 {
24451 return gen_frame_set (reg, frame_reg, offset, true);
24452 }
24453
24454 /* Save a register into the frame, and emit RTX_FRAME_RELATED_P notes.
24455 Save REGNO into [FRAME_REG + OFFSET] in mode MODE. */
24456
24457 static rtx
24458 emit_frame_save (rtx frame_reg, machine_mode mode,
24459 unsigned int regno, int offset, HOST_WIDE_INT frame_reg_to_sp)
24460 {
24461 rtx reg, insn;
24462
24463 /* Some cases that need register indexed addressing. */
24464 gcc_checking_assert (!((TARGET_ALTIVEC_ABI && ALTIVEC_VECTOR_MODE (mode))
24465 || (TARGET_VSX && ALTIVEC_OR_VSX_VECTOR_MODE (mode))
24466 || (TARGET_E500_DOUBLE && mode == DFmode)
24467 || (TARGET_SPE_ABI
24468 && SPE_VECTOR_MODE (mode)
24469 && !SPE_CONST_OFFSET_OK (offset))));
24470
24471 reg = gen_rtx_REG (mode, regno);
24472 insn = emit_insn (gen_frame_store (reg, frame_reg, offset));
24473 return rs6000_frame_related (insn, frame_reg, frame_reg_to_sp,
24474 NULL_RTX, NULL_RTX);
24475 }
24476
24477 /* Emit an offset memory reference suitable for a frame store, while
24478 converting to a valid addressing mode. */
24479
24480 static rtx
24481 gen_frame_mem_offset (machine_mode mode, rtx reg, int offset)
24482 {
24483 rtx int_rtx, offset_rtx;
24484
24485 int_rtx = GEN_INT (offset);
24486
24487 if ((TARGET_SPE_ABI && SPE_VECTOR_MODE (mode) && !SPE_CONST_OFFSET_OK (offset))
24488 || (TARGET_E500_DOUBLE && mode == DFmode))
24489 {
24490 offset_rtx = gen_rtx_REG (Pmode, FIXED_SCRATCH);
24491 emit_move_insn (offset_rtx, int_rtx);
24492 }
24493 else
24494 offset_rtx = int_rtx;
24495
24496 return gen_frame_mem (mode, gen_rtx_PLUS (Pmode, reg, offset_rtx));
24497 }
24498
24499 #ifndef TARGET_FIX_AND_CONTINUE
24500 #define TARGET_FIX_AND_CONTINUE 0
24501 #endif
24502
24503 /* It's really GPR 13 or 14, FPR 14 and VR 20. We need the smallest. */
24504 #define FIRST_SAVRES_REGISTER FIRST_SAVED_GP_REGNO
24505 #define LAST_SAVRES_REGISTER 31
24506 #define N_SAVRES_REGISTERS (LAST_SAVRES_REGISTER - FIRST_SAVRES_REGISTER + 1)
24507
24508 enum {
24509 SAVRES_LR = 0x1,
24510 SAVRES_SAVE = 0x2,
24511 SAVRES_REG = 0x0c,
24512 SAVRES_GPR = 0,
24513 SAVRES_FPR = 4,
24514 SAVRES_VR = 8
24515 };
24516
24517 static GTY(()) rtx savres_routine_syms[N_SAVRES_REGISTERS][12];
24518
24519 /* Temporary holding space for an out-of-line register save/restore
24520 routine name. */
24521 static char savres_routine_name[30];
24522
24523 /* Return the name for an out-of-line register save/restore routine.
24524 We are saving/restoring GPRs if GPR is true. */
24525
24526 static char *
24527 rs6000_savres_routine_name (rs6000_stack_t *info, int regno, int sel)
24528 {
24529 const char *prefix = "";
24530 const char *suffix = "";
24531
24532 /* Different targets are supposed to define
24533 {SAVE,RESTORE}_FP_{PREFIX,SUFFIX} with the idea that the needed
24534 routine name could be defined with:
24535
24536 sprintf (name, "%s%d%s", SAVE_FP_PREFIX, regno, SAVE_FP_SUFFIX)
24537
24538 This is a nice idea in practice, but in reality, things are
24539 complicated in several ways:
24540
24541 - ELF targets have save/restore routines for GPRs.
24542
24543 - SPE targets use different prefixes for 32/64-bit registers, and
24544 neither of them fit neatly in the FOO_{PREFIX,SUFFIX} regimen.
24545
24546 - PPC64 ELF targets have routines for save/restore of GPRs that
24547 differ in what they do with the link register, so having a set
24548 prefix doesn't work. (We only use one of the save routines at
24549 the moment, though.)
24550
24551 - PPC32 elf targets have "exit" versions of the restore routines
24552 that restore the link register and can save some extra space.
24553 These require an extra suffix. (There are also "tail" versions
24554 of the restore routines and "GOT" versions of the save routines,
24555 but we don't generate those at present. Same problems apply,
24556 though.)
24557
24558 We deal with all this by synthesizing our own prefix/suffix and
24559 using that for the simple sprintf call shown above. */
24560 if (TARGET_SPE)
24561 {
24562 /* No floating point saves on the SPE. */
24563 gcc_assert ((sel & SAVRES_REG) == SAVRES_GPR);
24564
24565 if ((sel & SAVRES_SAVE))
24566 prefix = info->spe_64bit_regs_used ? "_save64gpr_" : "_save32gpr_";
24567 else
24568 prefix = info->spe_64bit_regs_used ? "_rest64gpr_" : "_rest32gpr_";
24569
24570 if ((sel & SAVRES_LR))
24571 suffix = "_x";
24572 }
24573 else if (DEFAULT_ABI == ABI_V4)
24574 {
24575 if (TARGET_64BIT)
24576 goto aix_names;
24577
24578 if ((sel & SAVRES_REG) == SAVRES_GPR)
24579 prefix = (sel & SAVRES_SAVE) ? "_savegpr_" : "_restgpr_";
24580 else if ((sel & SAVRES_REG) == SAVRES_FPR)
24581 prefix = (sel & SAVRES_SAVE) ? "_savefpr_" : "_restfpr_";
24582 else if ((sel & SAVRES_REG) == SAVRES_VR)
24583 prefix = (sel & SAVRES_SAVE) ? "_savevr_" : "_restvr_";
24584 else
24585 abort ();
24586
24587 if ((sel & SAVRES_LR))
24588 suffix = "_x";
24589 }
24590 else if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
24591 {
24592 #if !defined (POWERPC_LINUX) && !defined (POWERPC_FREEBSD)
24593 /* No out-of-line save/restore routines for GPRs on AIX. */
24594 gcc_assert (!TARGET_AIX || (sel & SAVRES_REG) != SAVRES_GPR);
24595 #endif
24596
24597 aix_names:
24598 if ((sel & SAVRES_REG) == SAVRES_GPR)
24599 prefix = ((sel & SAVRES_SAVE)
24600 ? ((sel & SAVRES_LR) ? "_savegpr0_" : "_savegpr1_")
24601 : ((sel & SAVRES_LR) ? "_restgpr0_" : "_restgpr1_"));
24602 else if ((sel & SAVRES_REG) == SAVRES_FPR)
24603 {
24604 #if defined (POWERPC_LINUX) || defined (POWERPC_FREEBSD)
24605 if ((sel & SAVRES_LR))
24606 prefix = ((sel & SAVRES_SAVE) ? "_savefpr_" : "_restfpr_");
24607 else
24608 #endif
24609 {
24610 prefix = (sel & SAVRES_SAVE) ? SAVE_FP_PREFIX : RESTORE_FP_PREFIX;
24611 suffix = (sel & SAVRES_SAVE) ? SAVE_FP_SUFFIX : RESTORE_FP_SUFFIX;
24612 }
24613 }
24614 else if ((sel & SAVRES_REG) == SAVRES_VR)
24615 prefix = (sel & SAVRES_SAVE) ? "_savevr_" : "_restvr_";
24616 else
24617 abort ();
24618 }
24619
24620 if (DEFAULT_ABI == ABI_DARWIN)
24621 {
24622 /* The Darwin approach is (slightly) different, in order to be
24623 compatible with code generated by the system toolchain. There is a
24624 single symbol for the start of save sequence, and the code here
24625 embeds an offset into that code on the basis of the first register
24626 to be saved. */
24627 prefix = (sel & SAVRES_SAVE) ? "save" : "rest" ;
24628 if ((sel & SAVRES_REG) == SAVRES_GPR)
24629 sprintf (savres_routine_name, "*%sGPR%s%s%.0d ; %s r%d-r31", prefix,
24630 ((sel & SAVRES_LR) ? "x" : ""), (regno == 13 ? "" : "+"),
24631 (regno - 13) * 4, prefix, regno);
24632 else if ((sel & SAVRES_REG) == SAVRES_FPR)
24633 sprintf (savres_routine_name, "*%sFP%s%.0d ; %s f%d-f31", prefix,
24634 (regno == 14 ? "" : "+"), (regno - 14) * 4, prefix, regno);
24635 else if ((sel & SAVRES_REG) == SAVRES_VR)
24636 sprintf (savres_routine_name, "*%sVEC%s%.0d ; %s v%d-v31", prefix,
24637 (regno == 20 ? "" : "+"), (regno - 20) * 8, prefix, regno);
24638 else
24639 abort ();
24640 }
24641 else
24642 sprintf (savres_routine_name, "%s%d%s", prefix, regno, suffix);
24643
24644 return savres_routine_name;
24645 }
24646
24647 /* Return an RTL SYMBOL_REF for an out-of-line register save/restore routine.
24648 We are saving/restoring GPRs if GPR is true. */
24649
24650 static rtx
24651 rs6000_savres_routine_sym (rs6000_stack_t *info, int sel)
24652 {
24653 int regno = ((sel & SAVRES_REG) == SAVRES_GPR
24654 ? info->first_gp_reg_save
24655 : (sel & SAVRES_REG) == SAVRES_FPR
24656 ? info->first_fp_reg_save - 32
24657 : (sel & SAVRES_REG) == SAVRES_VR
24658 ? info->first_altivec_reg_save - FIRST_ALTIVEC_REGNO
24659 : -1);
24660 rtx sym;
24661 int select = sel;
24662
24663 /* On the SPE, we never have any FPRs, but we do have 32/64-bit
24664 versions of the gpr routines. */
24665 if (TARGET_SPE_ABI && (sel & SAVRES_REG) == SAVRES_GPR
24666 && info->spe_64bit_regs_used)
24667 select ^= SAVRES_FPR ^ SAVRES_GPR;
24668
24669 /* Don't generate bogus routine names. */
24670 gcc_assert (FIRST_SAVRES_REGISTER <= regno
24671 && regno <= LAST_SAVRES_REGISTER
24672 && select >= 0 && select <= 12);
24673
24674 sym = savres_routine_syms[regno-FIRST_SAVRES_REGISTER][select];
24675
24676 if (sym == NULL)
24677 {
24678 char *name;
24679
24680 name = rs6000_savres_routine_name (info, regno, sel);
24681
24682 sym = savres_routine_syms[regno-FIRST_SAVRES_REGISTER][select]
24683 = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
24684 SYMBOL_REF_FLAGS (sym) |= SYMBOL_FLAG_FUNCTION;
24685 }
24686
24687 return sym;
24688 }
24689
24690 /* Emit a sequence of insns, including a stack tie if needed, for
24691 resetting the stack pointer. If UPDT_REGNO is not 1, then don't
24692 reset the stack pointer, but move the base of the frame into
24693 reg UPDT_REGNO for use by out-of-line register restore routines. */
24694
24695 static rtx
24696 rs6000_emit_stack_reset (rs6000_stack_t *info,
24697 rtx frame_reg_rtx, HOST_WIDE_INT frame_off,
24698 unsigned updt_regno)
24699 {
24700 rtx updt_reg_rtx;
24701
24702 /* This blockage is needed so that sched doesn't decide to move
24703 the sp change before the register restores. */
24704 if (DEFAULT_ABI == ABI_V4
24705 || (TARGET_SPE_ABI
24706 && info->spe_64bit_regs_used != 0
24707 && info->first_gp_reg_save != 32))
24708 rs6000_emit_stack_tie (frame_reg_rtx, frame_pointer_needed);
24709
24710 /* If we are restoring registers out-of-line, we will be using the
24711 "exit" variants of the restore routines, which will reset the
24712 stack for us. But we do need to point updt_reg into the
24713 right place for those routines. */
24714 updt_reg_rtx = gen_rtx_REG (Pmode, updt_regno);
24715
24716 if (frame_off != 0)
24717 return emit_insn (gen_add3_insn (updt_reg_rtx,
24718 frame_reg_rtx, GEN_INT (frame_off)));
24719 else if (REGNO (frame_reg_rtx) != updt_regno)
24720 return emit_move_insn (updt_reg_rtx, frame_reg_rtx);
24721
24722 return NULL_RTX;
24723 }
24724
24725 /* Return the register number used as a pointer by out-of-line
24726 save/restore functions. */
24727
24728 static inline unsigned
24729 ptr_regno_for_savres (int sel)
24730 {
24731 if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
24732 return (sel & SAVRES_REG) == SAVRES_FPR || (sel & SAVRES_LR) ? 1 : 12;
24733 return DEFAULT_ABI == ABI_DARWIN && (sel & SAVRES_REG) == SAVRES_FPR ? 1 : 11;
24734 }
24735
24736 /* Construct a parallel rtx describing the effect of a call to an
24737 out-of-line register save/restore routine, and emit the insn
24738 or jump_insn as appropriate. */
24739
24740 static rtx
24741 rs6000_emit_savres_rtx (rs6000_stack_t *info,
24742 rtx frame_reg_rtx, int save_area_offset, int lr_offset,
24743 machine_mode reg_mode, int sel)
24744 {
24745 int i;
24746 int offset, start_reg, end_reg, n_regs, use_reg;
24747 int reg_size = GET_MODE_SIZE (reg_mode);
24748 rtx sym;
24749 rtvec p;
24750 rtx par, insn;
24751
24752 offset = 0;
24753 start_reg = ((sel & SAVRES_REG) == SAVRES_GPR
24754 ? info->first_gp_reg_save
24755 : (sel & SAVRES_REG) == SAVRES_FPR
24756 ? info->first_fp_reg_save
24757 : (sel & SAVRES_REG) == SAVRES_VR
24758 ? info->first_altivec_reg_save
24759 : -1);
24760 end_reg = ((sel & SAVRES_REG) == SAVRES_GPR
24761 ? 32
24762 : (sel & SAVRES_REG) == SAVRES_FPR
24763 ? 64
24764 : (sel & SAVRES_REG) == SAVRES_VR
24765 ? LAST_ALTIVEC_REGNO + 1
24766 : -1);
24767 n_regs = end_reg - start_reg;
24768 p = rtvec_alloc (3 + ((sel & SAVRES_LR) ? 1 : 0)
24769 + ((sel & SAVRES_REG) == SAVRES_VR ? 1 : 0)
24770 + n_regs);
24771
24772 if (!(sel & SAVRES_SAVE) && (sel & SAVRES_LR))
24773 RTVEC_ELT (p, offset++) = ret_rtx;
24774
24775 RTVEC_ELT (p, offset++)
24776 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (Pmode, LR_REGNO));
24777
24778 sym = rs6000_savres_routine_sym (info, sel);
24779 RTVEC_ELT (p, offset++) = gen_rtx_USE (VOIDmode, sym);
24780
24781 use_reg = ptr_regno_for_savres (sel);
24782 if ((sel & SAVRES_REG) == SAVRES_VR)
24783 {
24784 /* Vector regs are saved/restored using [reg+reg] addressing. */
24785 RTVEC_ELT (p, offset++)
24786 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (Pmode, use_reg));
24787 RTVEC_ELT (p, offset++)
24788 = gen_rtx_USE (VOIDmode, gen_rtx_REG (Pmode, 0));
24789 }
24790 else
24791 RTVEC_ELT (p, offset++)
24792 = gen_rtx_USE (VOIDmode, gen_rtx_REG (Pmode, use_reg));
24793
24794 for (i = 0; i < end_reg - start_reg; i++)
24795 RTVEC_ELT (p, i + offset)
24796 = gen_frame_set (gen_rtx_REG (reg_mode, start_reg + i),
24797 frame_reg_rtx, save_area_offset + reg_size * i,
24798 (sel & SAVRES_SAVE) != 0);
24799
24800 if ((sel & SAVRES_SAVE) && (sel & SAVRES_LR))
24801 RTVEC_ELT (p, i + offset)
24802 = gen_frame_store (gen_rtx_REG (Pmode, 0), frame_reg_rtx, lr_offset);
24803
24804 par = gen_rtx_PARALLEL (VOIDmode, p);
24805
24806 if (!(sel & SAVRES_SAVE) && (sel & SAVRES_LR))
24807 {
24808 insn = emit_jump_insn (par);
24809 JUMP_LABEL (insn) = ret_rtx;
24810 }
24811 else
24812 insn = emit_insn (par);
24813 return insn;
24814 }
24815
24816 /* Emit code to store CR fields that need to be saved into REG. */
24817
24818 static void
24819 rs6000_emit_move_from_cr (rtx reg)
24820 {
24821 /* Only the ELFv2 ABI allows storing only selected fields. */
24822 if (DEFAULT_ABI == ABI_ELFv2 && TARGET_MFCRF)
24823 {
24824 int i, cr_reg[8], count = 0;
24825
24826 /* Collect CR fields that must be saved. */
24827 for (i = 0; i < 8; i++)
24828 if (save_reg_p (CR0_REGNO + i))
24829 cr_reg[count++] = i;
24830
24831 /* If it's just a single one, use mfcrf. */
24832 if (count == 1)
24833 {
24834 rtvec p = rtvec_alloc (1);
24835 rtvec r = rtvec_alloc (2);
24836 RTVEC_ELT (r, 0) = gen_rtx_REG (CCmode, CR0_REGNO + cr_reg[0]);
24837 RTVEC_ELT (r, 1) = GEN_INT (1 << (7 - cr_reg[0]));
24838 RTVEC_ELT (p, 0)
24839 = gen_rtx_SET (reg,
24840 gen_rtx_UNSPEC (SImode, r, UNSPEC_MOVESI_FROM_CR));
24841
24842 emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
24843 return;
24844 }
24845
24846 /* ??? It might be better to handle count == 2 / 3 cases here
24847 as well, using logical operations to combine the values. */
24848 }
24849
24850 emit_insn (gen_movesi_from_cr (reg));
24851 }
24852
24853 /* Return whether the split-stack arg pointer (r12) is used. */
24854
24855 static bool
24856 split_stack_arg_pointer_used_p (void)
24857 {
24858 /* If the pseudo holding the arg pointer is no longer a pseudo,
24859 then the arg pointer is used. */
24860 if (cfun->machine->split_stack_arg_pointer != NULL_RTX
24861 && (!REG_P (cfun->machine->split_stack_arg_pointer)
24862 || (REGNO (cfun->machine->split_stack_arg_pointer)
24863 < FIRST_PSEUDO_REGISTER)))
24864 return true;
24865
24866 /* Unfortunately we also need to do some code scanning, since
24867 r12 may have been substituted for the pseudo. */
24868 rtx_insn *insn;
24869 basic_block bb = ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb;
24870 FOR_BB_INSNS (bb, insn)
24871 if (NONDEBUG_INSN_P (insn))
24872 {
24873 /* A call destroys r12. */
24874 if (CALL_P (insn))
24875 return false;
24876
24877 df_ref use;
24878 FOR_EACH_INSN_USE (use, insn)
24879 {
24880 rtx x = DF_REF_REG (use);
24881 if (REG_P (x) && REGNO (x) == 12)
24882 return true;
24883 }
24884 df_ref def;
24885 FOR_EACH_INSN_DEF (def, insn)
24886 {
24887 rtx x = DF_REF_REG (def);
24888 if (REG_P (x) && REGNO (x) == 12)
24889 return false;
24890 }
24891 }
24892 return bitmap_bit_p (DF_LR_OUT (bb), 12);
24893 }
24894
24895 /* Return whether we need to emit an ELFv2 global entry point prologue. */
24896
24897 static bool
24898 rs6000_global_entry_point_needed_p (void)
24899 {
24900 /* Only needed for the ELFv2 ABI. */
24901 if (DEFAULT_ABI != ABI_ELFv2)
24902 return false;
24903
24904 /* With -msingle-pic-base, we assume the whole program shares the same
24905 TOC, so no global entry point prologues are needed anywhere. */
24906 if (TARGET_SINGLE_PIC_BASE)
24907 return false;
24908
24909 /* Ensure we have a global entry point for thunks. ??? We could
24910 avoid that if the target routine doesn't need a global entry point,
24911 but we do not know whether this is the case at this point. */
24912 if (cfun->is_thunk)
24913 return true;
24914
24915 /* For regular functions, rs6000_emit_prologue sets this flag if the
24916 routine ever uses the TOC pointer. */
24917 return cfun->machine->r2_setup_needed;
24918 }
24919
24920 /* Emit function prologue as insns. */
24921
24922 void
24923 rs6000_emit_prologue (void)
24924 {
24925 rs6000_stack_t *info = rs6000_stack_info ();
24926 machine_mode reg_mode = Pmode;
24927 int reg_size = TARGET_32BIT ? 4 : 8;
24928 rtx sp_reg_rtx = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
24929 rtx frame_reg_rtx = sp_reg_rtx;
24930 unsigned int cr_save_regno;
24931 rtx cr_save_rtx = NULL_RTX;
24932 rtx insn;
24933 int strategy;
24934 int using_static_chain_p = (cfun->static_chain_decl != NULL_TREE
24935 && df_regs_ever_live_p (STATIC_CHAIN_REGNUM)
24936 && call_used_regs[STATIC_CHAIN_REGNUM]);
24937 int using_split_stack = (flag_split_stack
24938 && (lookup_attribute ("no_split_stack",
24939 DECL_ATTRIBUTES (cfun->decl))
24940 == NULL));
24941
24942 /* Offset to top of frame for frame_reg and sp respectively. */
24943 HOST_WIDE_INT frame_off = 0;
24944 HOST_WIDE_INT sp_off = 0;
24945 /* sp_adjust is the stack adjusting instruction, tracked so that the
24946 insn setting up the split-stack arg pointer can be emitted just
24947 prior to it, when r12 is not used here for other purposes. */
24948 rtx_insn *sp_adjust = 0;
24949
24950 #if CHECKING_P
24951 /* Track and check usage of r0, r11, r12. */
24952 int reg_inuse = using_static_chain_p ? 1 << 11 : 0;
24953 #define START_USE(R) do \
24954 { \
24955 gcc_assert ((reg_inuse & (1 << (R))) == 0); \
24956 reg_inuse |= 1 << (R); \
24957 } while (0)
24958 #define END_USE(R) do \
24959 { \
24960 gcc_assert ((reg_inuse & (1 << (R))) != 0); \
24961 reg_inuse &= ~(1 << (R)); \
24962 } while (0)
24963 #define NOT_INUSE(R) do \
24964 { \
24965 gcc_assert ((reg_inuse & (1 << (R))) == 0); \
24966 } while (0)
24967 #else
24968 #define START_USE(R) do {} while (0)
24969 #define END_USE(R) do {} while (0)
24970 #define NOT_INUSE(R) do {} while (0)
24971 #endif
24972
24973 if (DEFAULT_ABI == ABI_ELFv2
24974 && !TARGET_SINGLE_PIC_BASE)
24975 {
24976 cfun->machine->r2_setup_needed = df_regs_ever_live_p (TOC_REGNUM);
24977
24978 /* With -mminimal-toc we may generate an extra use of r2 below. */
24979 if (TARGET_TOC && TARGET_MINIMAL_TOC && get_pool_size () != 0)
24980 cfun->machine->r2_setup_needed = true;
24981 }
24982
24983
24984 if (flag_stack_usage_info)
24985 current_function_static_stack_size = info->total_size;
24986
24987 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
24988 {
24989 HOST_WIDE_INT size = info->total_size;
24990
24991 if (crtl->is_leaf && !cfun->calls_alloca)
24992 {
24993 if (size > PROBE_INTERVAL && size > STACK_CHECK_PROTECT)
24994 rs6000_emit_probe_stack_range (STACK_CHECK_PROTECT,
24995 size - STACK_CHECK_PROTECT);
24996 }
24997 else if (size > 0)
24998 rs6000_emit_probe_stack_range (STACK_CHECK_PROTECT, size);
24999 }
25000
25001 if (TARGET_FIX_AND_CONTINUE)
25002 {
25003 /* gdb on darwin arranges to forward a function from the old
25004 address by modifying the first 5 instructions of the function
25005 to branch to the overriding function. This is necessary to
25006 permit function pointers that point to the old function to
25007 actually forward to the new function. */
25008 emit_insn (gen_nop ());
25009 emit_insn (gen_nop ());
25010 emit_insn (gen_nop ());
25011 emit_insn (gen_nop ());
25012 emit_insn (gen_nop ());
25013 }
25014
25015 if (TARGET_SPE_ABI && info->spe_64bit_regs_used != 0)
25016 {
25017 reg_mode = V2SImode;
25018 reg_size = 8;
25019 }
25020
25021 /* Handle world saves specially here. */
25022 if (WORLD_SAVE_P (info))
25023 {
25024 int i, j, sz;
25025 rtx treg;
25026 rtvec p;
25027 rtx reg0;
25028
25029 /* save_world expects lr in r0. */
25030 reg0 = gen_rtx_REG (Pmode, 0);
25031 if (info->lr_save_p)
25032 {
25033 insn = emit_move_insn (reg0,
25034 gen_rtx_REG (Pmode, LR_REGNO));
25035 RTX_FRAME_RELATED_P (insn) = 1;
25036 }
25037
25038 /* The SAVE_WORLD and RESTORE_WORLD routines make a number of
25039 assumptions about the offsets of various bits of the stack
25040 frame. */
25041 gcc_assert (info->gp_save_offset == -220
25042 && info->fp_save_offset == -144
25043 && info->lr_save_offset == 8
25044 && info->cr_save_offset == 4
25045 && info->push_p
25046 && info->lr_save_p
25047 && (!crtl->calls_eh_return
25048 || info->ehrd_offset == -432)
25049 && info->vrsave_save_offset == -224
25050 && info->altivec_save_offset == -416);
25051
25052 treg = gen_rtx_REG (SImode, 11);
25053 emit_move_insn (treg, GEN_INT (-info->total_size));
25054
25055 /* SAVE_WORLD takes the caller's LR in R0 and the frame size
25056 in R11. It also clobbers R12, so beware! */
25057
25058 /* Preserve CR2 for save_world prologues */
25059 sz = 5;
25060 sz += 32 - info->first_gp_reg_save;
25061 sz += 64 - info->first_fp_reg_save;
25062 sz += LAST_ALTIVEC_REGNO - info->first_altivec_reg_save + 1;
25063 p = rtvec_alloc (sz);
25064 j = 0;
25065 RTVEC_ELT (p, j++) = gen_rtx_CLOBBER (VOIDmode,
25066 gen_rtx_REG (SImode,
25067 LR_REGNO));
25068 RTVEC_ELT (p, j++) = gen_rtx_USE (VOIDmode,
25069 gen_rtx_SYMBOL_REF (Pmode,
25070 "*save_world"));
25071 /* We do floats first so that the instruction pattern matches
25072 properly. */
25073 for (i = 0; i < 64 - info->first_fp_reg_save; i++)
25074 RTVEC_ELT (p, j++)
25075 = gen_frame_store (gen_rtx_REG (TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT
25076 ? DFmode : SFmode,
25077 info->first_fp_reg_save + i),
25078 frame_reg_rtx,
25079 info->fp_save_offset + frame_off + 8 * i);
25080 for (i = 0; info->first_altivec_reg_save + i <= LAST_ALTIVEC_REGNO; i++)
25081 RTVEC_ELT (p, j++)
25082 = gen_frame_store (gen_rtx_REG (V4SImode,
25083 info->first_altivec_reg_save + i),
25084 frame_reg_rtx,
25085 info->altivec_save_offset + frame_off + 16 * i);
25086 for (i = 0; i < 32 - info->first_gp_reg_save; i++)
25087 RTVEC_ELT (p, j++)
25088 = gen_frame_store (gen_rtx_REG (reg_mode, info->first_gp_reg_save + i),
25089 frame_reg_rtx,
25090 info->gp_save_offset + frame_off + reg_size * i);
25091
25092 /* CR register traditionally saved as CR2. */
25093 RTVEC_ELT (p, j++)
25094 = gen_frame_store (gen_rtx_REG (SImode, CR2_REGNO),
25095 frame_reg_rtx, info->cr_save_offset + frame_off);
25096 /* Explain about use of R0. */
25097 if (info->lr_save_p)
25098 RTVEC_ELT (p, j++)
25099 = gen_frame_store (reg0,
25100 frame_reg_rtx, info->lr_save_offset + frame_off);
25101 /* Explain what happens to the stack pointer. */
25102 {
25103 rtx newval = gen_rtx_PLUS (Pmode, sp_reg_rtx, treg);
25104 RTVEC_ELT (p, j++) = gen_rtx_SET (sp_reg_rtx, newval);
25105 }
25106
25107 insn = emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
25108 rs6000_frame_related (insn, frame_reg_rtx, sp_off - frame_off,
25109 treg, GEN_INT (-info->total_size));
25110 sp_off = frame_off = info->total_size;
25111 }
25112
25113 strategy = info->savres_strategy;
25114
25115 /* For V.4, update stack before we do any saving and set back pointer. */
25116 if (! WORLD_SAVE_P (info)
25117 && info->push_p
25118 && (DEFAULT_ABI == ABI_V4
25119 || crtl->calls_eh_return))
25120 {
25121 bool need_r11 = (TARGET_SPE
25122 ? (!(strategy & SAVE_INLINE_GPRS)
25123 && info->spe_64bit_regs_used == 0)
25124 : (!(strategy & SAVE_INLINE_FPRS)
25125 || !(strategy & SAVE_INLINE_GPRS)
25126 || !(strategy & SAVE_INLINE_VRS)));
25127 int ptr_regno = -1;
25128 rtx ptr_reg = NULL_RTX;
25129 int ptr_off = 0;
25130
25131 if (info->total_size < 32767)
25132 frame_off = info->total_size;
25133 else if (need_r11)
25134 ptr_regno = 11;
25135 else if (info->cr_save_p
25136 || info->lr_save_p
25137 || info->first_fp_reg_save < 64
25138 || info->first_gp_reg_save < 32
25139 || info->altivec_size != 0
25140 || info->vrsave_size != 0
25141 || crtl->calls_eh_return)
25142 ptr_regno = 12;
25143 else
25144 {
25145 /* The prologue won't be saving any regs so there is no need
25146 to set up a frame register to access any frame save area.
25147 We also won't be using frame_off anywhere below, but set
25148 the correct value anyway to protect against future
25149 changes to this function. */
25150 frame_off = info->total_size;
25151 }
25152 if (ptr_regno != -1)
25153 {
25154 /* Set up the frame offset to that needed by the first
25155 out-of-line save function. */
25156 START_USE (ptr_regno);
25157 ptr_reg = gen_rtx_REG (Pmode, ptr_regno);
25158 frame_reg_rtx = ptr_reg;
25159 if (!(strategy & SAVE_INLINE_FPRS) && info->fp_size != 0)
25160 gcc_checking_assert (info->fp_save_offset + info->fp_size == 0);
25161 else if (!(strategy & SAVE_INLINE_GPRS) && info->first_gp_reg_save < 32)
25162 ptr_off = info->gp_save_offset + info->gp_size;
25163 else if (!(strategy & SAVE_INLINE_VRS) && info->altivec_size != 0)
25164 ptr_off = info->altivec_save_offset + info->altivec_size;
25165 frame_off = -ptr_off;
25166 }
25167 sp_adjust = rs6000_emit_allocate_stack (info->total_size,
25168 ptr_reg, ptr_off);
25169 if (REGNO (frame_reg_rtx) == 12)
25170 sp_adjust = 0;
25171 sp_off = info->total_size;
25172 if (frame_reg_rtx != sp_reg_rtx)
25173 rs6000_emit_stack_tie (frame_reg_rtx, false);
25174 }
25175
25176 /* If we use the link register, get it into r0. */
25177 if (!WORLD_SAVE_P (info) && info->lr_save_p)
25178 {
25179 rtx addr, reg, mem;
25180
25181 reg = gen_rtx_REG (Pmode, 0);
25182 START_USE (0);
25183 insn = emit_move_insn (reg, gen_rtx_REG (Pmode, LR_REGNO));
25184 RTX_FRAME_RELATED_P (insn) = 1;
25185
25186 if (!(strategy & (SAVE_NOINLINE_GPRS_SAVES_LR
25187 | SAVE_NOINLINE_FPRS_SAVES_LR)))
25188 {
25189 addr = gen_rtx_PLUS (Pmode, frame_reg_rtx,
25190 GEN_INT (info->lr_save_offset + frame_off));
25191 mem = gen_rtx_MEM (Pmode, addr);
25192 /* This should not be of rs6000_sr_alias_set, because of
25193 __builtin_return_address. */
25194
25195 insn = emit_move_insn (mem, reg);
25196 rs6000_frame_related (insn, frame_reg_rtx, sp_off - frame_off,
25197 NULL_RTX, NULL_RTX);
25198 END_USE (0);
25199 }
25200 }
25201
25202 /* If we need to save CR, put it into r12 or r11. Choose r12 except when
25203 r12 will be needed by out-of-line gpr restore. */
25204 cr_save_regno = ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
25205 && !(strategy & (SAVE_INLINE_GPRS
25206 | SAVE_NOINLINE_GPRS_SAVES_LR))
25207 ? 11 : 12);
25208 if (!WORLD_SAVE_P (info)
25209 && info->cr_save_p
25210 && REGNO (frame_reg_rtx) != cr_save_regno
25211 && !(using_static_chain_p && cr_save_regno == 11)
25212 && !(using_split_stack && cr_save_regno == 12 && sp_adjust))
25213 {
25214 cr_save_rtx = gen_rtx_REG (SImode, cr_save_regno);
25215 START_USE (cr_save_regno);
25216 rs6000_emit_move_from_cr (cr_save_rtx);
25217 }
25218
25219 /* Do any required saving of fpr's. If only one or two to save, do
25220 it ourselves. Otherwise, call function. */
25221 if (!WORLD_SAVE_P (info) && (strategy & SAVE_INLINE_FPRS))
25222 {
25223 int i;
25224 for (i = 0; i < 64 - info->first_fp_reg_save; i++)
25225 if (save_reg_p (info->first_fp_reg_save + i))
25226 emit_frame_save (frame_reg_rtx,
25227 (TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT
25228 ? DFmode : SFmode),
25229 info->first_fp_reg_save + i,
25230 info->fp_save_offset + frame_off + 8 * i,
25231 sp_off - frame_off);
25232 }
25233 else if (!WORLD_SAVE_P (info) && info->first_fp_reg_save != 64)
25234 {
25235 bool lr = (strategy & SAVE_NOINLINE_FPRS_SAVES_LR) != 0;
25236 int sel = SAVRES_SAVE | SAVRES_FPR | (lr ? SAVRES_LR : 0);
25237 unsigned ptr_regno = ptr_regno_for_savres (sel);
25238 rtx ptr_reg = frame_reg_rtx;
25239
25240 if (REGNO (frame_reg_rtx) == ptr_regno)
25241 gcc_checking_assert (frame_off == 0);
25242 else
25243 {
25244 ptr_reg = gen_rtx_REG (Pmode, ptr_regno);
25245 NOT_INUSE (ptr_regno);
25246 emit_insn (gen_add3_insn (ptr_reg,
25247 frame_reg_rtx, GEN_INT (frame_off)));
25248 }
25249 insn = rs6000_emit_savres_rtx (info, ptr_reg,
25250 info->fp_save_offset,
25251 info->lr_save_offset,
25252 DFmode, sel);
25253 rs6000_frame_related (insn, ptr_reg, sp_off,
25254 NULL_RTX, NULL_RTX);
25255 if (lr)
25256 END_USE (0);
25257 }
25258
25259 /* Save GPRs. This is done as a PARALLEL if we are using
25260 the store-multiple instructions. */
25261 if (!WORLD_SAVE_P (info)
25262 && TARGET_SPE_ABI
25263 && info->spe_64bit_regs_used != 0
25264 && info->first_gp_reg_save != 32)
25265 {
25266 int i;
25267 rtx spe_save_area_ptr;
25268 HOST_WIDE_INT save_off;
25269 int ool_adjust = 0;
25270
25271 /* Determine whether we can address all of the registers that need
25272 to be saved with an offset from frame_reg_rtx that fits in
25273 the small const field for SPE memory instructions. */
25274 int spe_regs_addressable
25275 = (SPE_CONST_OFFSET_OK (info->spe_gp_save_offset + frame_off
25276 + reg_size * (32 - info->first_gp_reg_save - 1))
25277 && (strategy & SAVE_INLINE_GPRS));
25278
25279 if (spe_regs_addressable)
25280 {
25281 spe_save_area_ptr = frame_reg_rtx;
25282 save_off = frame_off;
25283 }
25284 else
25285 {
25286 /* Make r11 point to the start of the SPE save area. We need
25287 to be careful here if r11 is holding the static chain. If
25288 it is, then temporarily save it in r0. */
25289 HOST_WIDE_INT offset;
25290
25291 if (!(strategy & SAVE_INLINE_GPRS))
25292 ool_adjust = 8 * (info->first_gp_reg_save - FIRST_SAVED_GP_REGNO);
25293 offset = info->spe_gp_save_offset + frame_off - ool_adjust;
25294 spe_save_area_ptr = gen_rtx_REG (Pmode, 11);
25295 save_off = frame_off - offset;
25296
25297 if (using_static_chain_p)
25298 {
25299 rtx r0 = gen_rtx_REG (Pmode, 0);
25300
25301 START_USE (0);
25302 gcc_assert (info->first_gp_reg_save > 11);
25303
25304 emit_move_insn (r0, spe_save_area_ptr);
25305 }
25306 else if (REGNO (frame_reg_rtx) != 11)
25307 START_USE (11);
25308
25309 emit_insn (gen_addsi3 (spe_save_area_ptr,
25310 frame_reg_rtx, GEN_INT (offset)));
25311 if (!using_static_chain_p && REGNO (frame_reg_rtx) == 11)
25312 frame_off = -info->spe_gp_save_offset + ool_adjust;
25313 }
25314
25315 if ((strategy & SAVE_INLINE_GPRS))
25316 {
25317 for (i = 0; i < 32 - info->first_gp_reg_save; i++)
25318 if (rs6000_reg_live_or_pic_offset_p (info->first_gp_reg_save + i))
25319 emit_frame_save (spe_save_area_ptr, reg_mode,
25320 info->first_gp_reg_save + i,
25321 (info->spe_gp_save_offset + save_off
25322 + reg_size * i),
25323 sp_off - save_off);
25324 }
25325 else
25326 {
25327 insn = rs6000_emit_savres_rtx (info, spe_save_area_ptr,
25328 info->spe_gp_save_offset + save_off,
25329 0, reg_mode,
25330 SAVRES_SAVE | SAVRES_GPR);
25331
25332 rs6000_frame_related (insn, spe_save_area_ptr, sp_off - save_off,
25333 NULL_RTX, NULL_RTX);
25334 }
25335
25336 /* Move the static chain pointer back. */
25337 if (!spe_regs_addressable)
25338 {
25339 if (using_static_chain_p)
25340 {
25341 emit_move_insn (spe_save_area_ptr, gen_rtx_REG (Pmode, 0));
25342 END_USE (0);
25343 }
25344 else if (REGNO (frame_reg_rtx) != 11)
25345 END_USE (11);
25346 }
25347 }
25348 else if (!WORLD_SAVE_P (info) && !(strategy & SAVE_INLINE_GPRS))
25349 {
25350 bool lr = (strategy & SAVE_NOINLINE_GPRS_SAVES_LR) != 0;
25351 int sel = SAVRES_SAVE | SAVRES_GPR | (lr ? SAVRES_LR : 0);
25352 unsigned ptr_regno = ptr_regno_for_savres (sel);
25353 rtx ptr_reg = frame_reg_rtx;
25354 bool ptr_set_up = REGNO (ptr_reg) == ptr_regno;
25355 int end_save = info->gp_save_offset + info->gp_size;
25356 int ptr_off;
25357
25358 if (ptr_regno == 12)
25359 sp_adjust = 0;
25360 if (!ptr_set_up)
25361 ptr_reg = gen_rtx_REG (Pmode, ptr_regno);
25362
25363 /* Need to adjust r11 (r12) if we saved any FPRs. */
25364 if (end_save + frame_off != 0)
25365 {
25366 rtx offset = GEN_INT (end_save + frame_off);
25367
25368 if (ptr_set_up)
25369 frame_off = -end_save;
25370 else
25371 NOT_INUSE (ptr_regno);
25372 emit_insn (gen_add3_insn (ptr_reg, frame_reg_rtx, offset));
25373 }
25374 else if (!ptr_set_up)
25375 {
25376 NOT_INUSE (ptr_regno);
25377 emit_move_insn (ptr_reg, frame_reg_rtx);
25378 }
25379 ptr_off = -end_save;
25380 insn = rs6000_emit_savres_rtx (info, ptr_reg,
25381 info->gp_save_offset + ptr_off,
25382 info->lr_save_offset + ptr_off,
25383 reg_mode, sel);
25384 rs6000_frame_related (insn, ptr_reg, sp_off - ptr_off,
25385 NULL_RTX, NULL_RTX);
25386 if (lr)
25387 END_USE (0);
25388 }
25389 else if (!WORLD_SAVE_P (info) && (strategy & SAVRES_MULTIPLE))
25390 {
25391 rtvec p;
25392 int i;
25393 p = rtvec_alloc (32 - info->first_gp_reg_save);
25394 for (i = 0; i < 32 - info->first_gp_reg_save; i++)
25395 RTVEC_ELT (p, i)
25396 = gen_frame_store (gen_rtx_REG (reg_mode, info->first_gp_reg_save + i),
25397 frame_reg_rtx,
25398 info->gp_save_offset + frame_off + reg_size * i);
25399 insn = emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
25400 rs6000_frame_related (insn, frame_reg_rtx, sp_off - frame_off,
25401 NULL_RTX, NULL_RTX);
25402 }
25403 else if (!WORLD_SAVE_P (info))
25404 {
25405 int i;
25406 for (i = 0; i < 32 - info->first_gp_reg_save; i++)
25407 if (rs6000_reg_live_or_pic_offset_p (info->first_gp_reg_save + i))
25408 emit_frame_save (frame_reg_rtx, reg_mode,
25409 info->first_gp_reg_save + i,
25410 info->gp_save_offset + frame_off + reg_size * i,
25411 sp_off - frame_off);
25412 }
25413
25414 if (crtl->calls_eh_return)
25415 {
25416 unsigned int i;
25417 rtvec p;
25418
25419 for (i = 0; ; ++i)
25420 {
25421 unsigned int regno = EH_RETURN_DATA_REGNO (i);
25422 if (regno == INVALID_REGNUM)
25423 break;
25424 }
25425
25426 p = rtvec_alloc (i);
25427
25428 for (i = 0; ; ++i)
25429 {
25430 unsigned int regno = EH_RETURN_DATA_REGNO (i);
25431 if (regno == INVALID_REGNUM)
25432 break;
25433
25434 insn
25435 = gen_frame_store (gen_rtx_REG (reg_mode, regno),
25436 sp_reg_rtx,
25437 info->ehrd_offset + sp_off + reg_size * (int) i);
25438 RTVEC_ELT (p, i) = insn;
25439 RTX_FRAME_RELATED_P (insn) = 1;
25440 }
25441
25442 insn = emit_insn (gen_blockage ());
25443 RTX_FRAME_RELATED_P (insn) = 1;
25444 add_reg_note (insn, REG_FRAME_RELATED_EXPR, gen_rtx_PARALLEL (VOIDmode, p));
25445 }
25446
25447 /* In AIX ABI we need to make sure r2 is really saved. */
25448 if (TARGET_AIX && crtl->calls_eh_return)
25449 {
25450 rtx tmp_reg, tmp_reg_si, hi, lo, compare_result, toc_save_done, jump;
25451 rtx save_insn, join_insn, note;
25452 long toc_restore_insn;
25453
25454 tmp_reg = gen_rtx_REG (Pmode, 11);
25455 tmp_reg_si = gen_rtx_REG (SImode, 11);
25456 if (using_static_chain_p)
25457 {
25458 START_USE (0);
25459 emit_move_insn (gen_rtx_REG (Pmode, 0), tmp_reg);
25460 }
25461 else
25462 START_USE (11);
25463 emit_move_insn (tmp_reg, gen_rtx_REG (Pmode, LR_REGNO));
25464 /* Peek at instruction to which this function returns. If it's
25465 restoring r2, then we know we've already saved r2. We can't
25466 unconditionally save r2 because the value we have will already
25467 be updated if we arrived at this function via a plt call or
25468 toc adjusting stub. */
25469 emit_move_insn (tmp_reg_si, gen_rtx_MEM (SImode, tmp_reg));
25470 toc_restore_insn = ((TARGET_32BIT ? 0x80410000 : 0xE8410000)
25471 + RS6000_TOC_SAVE_SLOT);
25472 hi = gen_int_mode (toc_restore_insn & ~0xffff, SImode);
25473 emit_insn (gen_xorsi3 (tmp_reg_si, tmp_reg_si, hi));
25474 compare_result = gen_rtx_REG (CCUNSmode, CR0_REGNO);
25475 validate_condition_mode (EQ, CCUNSmode);
25476 lo = gen_int_mode (toc_restore_insn & 0xffff, SImode);
25477 emit_insn (gen_rtx_SET (compare_result,
25478 gen_rtx_COMPARE (CCUNSmode, tmp_reg_si, lo)));
25479 toc_save_done = gen_label_rtx ();
25480 jump = gen_rtx_IF_THEN_ELSE (VOIDmode,
25481 gen_rtx_EQ (VOIDmode, compare_result,
25482 const0_rtx),
25483 gen_rtx_LABEL_REF (VOIDmode, toc_save_done),
25484 pc_rtx);
25485 jump = emit_jump_insn (gen_rtx_SET (pc_rtx, jump));
25486 JUMP_LABEL (jump) = toc_save_done;
25487 LABEL_NUSES (toc_save_done) += 1;
25488
25489 save_insn = emit_frame_save (frame_reg_rtx, reg_mode,
25490 TOC_REGNUM, frame_off + RS6000_TOC_SAVE_SLOT,
25491 sp_off - frame_off);
25492
25493 emit_label (toc_save_done);
25494
25495 /* ??? If we leave SAVE_INSN as marked as saving R2, then we'll
25496 have a CFG that has different saves along different paths.
25497 Move the note to a dummy blockage insn, which describes that
25498 R2 is unconditionally saved after the label. */
25499 /* ??? An alternate representation might be a special insn pattern
25500 containing both the branch and the store. That might let the
25501 code that minimizes the number of DW_CFA_advance opcodes better
25502 freedom in placing the annotations. */
25503 note = find_reg_note (save_insn, REG_FRAME_RELATED_EXPR, NULL);
25504 if (note)
25505 remove_note (save_insn, note);
25506 else
25507 note = alloc_reg_note (REG_FRAME_RELATED_EXPR,
25508 copy_rtx (PATTERN (save_insn)), NULL_RTX);
25509 RTX_FRAME_RELATED_P (save_insn) = 0;
25510
25511 join_insn = emit_insn (gen_blockage ());
25512 REG_NOTES (join_insn) = note;
25513 RTX_FRAME_RELATED_P (join_insn) = 1;
25514
25515 if (using_static_chain_p)
25516 {
25517 emit_move_insn (tmp_reg, gen_rtx_REG (Pmode, 0));
25518 END_USE (0);
25519 }
25520 else
25521 END_USE (11);
25522 }
25523
25524 /* Save CR if we use any that must be preserved. */
25525 if (!WORLD_SAVE_P (info) && info->cr_save_p)
25526 {
25527 rtx addr = gen_rtx_PLUS (Pmode, frame_reg_rtx,
25528 GEN_INT (info->cr_save_offset + frame_off));
25529 rtx mem = gen_frame_mem (SImode, addr);
25530
25531 /* If we didn't copy cr before, do so now using r0. */
25532 if (cr_save_rtx == NULL_RTX)
25533 {
25534 START_USE (0);
25535 cr_save_rtx = gen_rtx_REG (SImode, 0);
25536 rs6000_emit_move_from_cr (cr_save_rtx);
25537 }
25538
25539 /* Saving CR requires a two-instruction sequence: one instruction
25540 to move the CR to a general-purpose register, and a second
25541 instruction that stores the GPR to memory.
25542
25543 We do not emit any DWARF CFI records for the first of these,
25544 because we cannot properly represent the fact that CR is saved in
25545 a register. One reason is that we cannot express that multiple
25546 CR fields are saved; another reason is that on 64-bit, the size
25547 of the CR register in DWARF (4 bytes) differs from the size of
25548 a general-purpose register.
25549
25550 This means if any intervening instruction were to clobber one of
25551 the call-saved CR fields, we'd have incorrect CFI. To prevent
25552 this from happening, we mark the store to memory as a use of
25553 those CR fields, which prevents any such instruction from being
25554 scheduled in between the two instructions. */
25555 rtx crsave_v[9];
25556 int n_crsave = 0;
25557 int i;
25558
25559 crsave_v[n_crsave++] = gen_rtx_SET (mem, cr_save_rtx);
25560 for (i = 0; i < 8; i++)
25561 if (save_reg_p (CR0_REGNO + i))
25562 crsave_v[n_crsave++]
25563 = gen_rtx_USE (VOIDmode, gen_rtx_REG (CCmode, CR0_REGNO + i));
25564
25565 insn = emit_insn (gen_rtx_PARALLEL (VOIDmode,
25566 gen_rtvec_v (n_crsave, crsave_v)));
25567 END_USE (REGNO (cr_save_rtx));
25568
25569 /* Now, there's no way that dwarf2out_frame_debug_expr is going to
25570 understand '(unspec:SI [(reg:CC 68) ...] UNSPEC_MOVESI_FROM_CR)',
25571 so we need to construct a frame expression manually. */
25572 RTX_FRAME_RELATED_P (insn) = 1;
25573
25574 /* Update address to be stack-pointer relative, like
25575 rs6000_frame_related would do. */
25576 addr = gen_rtx_PLUS (Pmode, gen_rtx_REG (Pmode, STACK_POINTER_REGNUM),
25577 GEN_INT (info->cr_save_offset + sp_off));
25578 mem = gen_frame_mem (SImode, addr);
25579
25580 if (DEFAULT_ABI == ABI_ELFv2)
25581 {
25582 /* In the ELFv2 ABI we generate separate CFI records for each
25583 CR field that was actually saved. They all point to the
25584 same 32-bit stack slot. */
25585 rtx crframe[8];
25586 int n_crframe = 0;
25587
25588 for (i = 0; i < 8; i++)
25589 if (save_reg_p (CR0_REGNO + i))
25590 {
25591 crframe[n_crframe]
25592 = gen_rtx_SET (mem, gen_rtx_REG (SImode, CR0_REGNO + i));
25593
25594 RTX_FRAME_RELATED_P (crframe[n_crframe]) = 1;
25595 n_crframe++;
25596 }
25597
25598 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
25599 gen_rtx_PARALLEL (VOIDmode,
25600 gen_rtvec_v (n_crframe, crframe)));
25601 }
25602 else
25603 {
25604 /* In other ABIs, by convention, we use a single CR regnum to
25605 represent the fact that all call-saved CR fields are saved.
25606 We use CR2_REGNO to be compatible with gcc-2.95 on Linux. */
25607 rtx set = gen_rtx_SET (mem, gen_rtx_REG (SImode, CR2_REGNO));
25608 add_reg_note (insn, REG_FRAME_RELATED_EXPR, set);
25609 }
25610 }
25611
25612 /* In the ELFv2 ABI we need to save all call-saved CR fields into
25613 *separate* slots if the routine calls __builtin_eh_return, so
25614 that they can be independently restored by the unwinder. */
25615 if (DEFAULT_ABI == ABI_ELFv2 && crtl->calls_eh_return)
25616 {
25617 int i, cr_off = info->ehcr_offset;
25618 rtx crsave;
25619
25620 /* ??? We might get better performance by using multiple mfocrf
25621 instructions. */
25622 crsave = gen_rtx_REG (SImode, 0);
25623 emit_insn (gen_movesi_from_cr (crsave));
25624
25625 for (i = 0; i < 8; i++)
25626 if (!call_used_regs[CR0_REGNO + i])
25627 {
25628 rtvec p = rtvec_alloc (2);
25629 RTVEC_ELT (p, 0)
25630 = gen_frame_store (crsave, frame_reg_rtx, cr_off + frame_off);
25631 RTVEC_ELT (p, 1)
25632 = gen_rtx_USE (VOIDmode, gen_rtx_REG (CCmode, CR0_REGNO + i));
25633
25634 insn = emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
25635
25636 RTX_FRAME_RELATED_P (insn) = 1;
25637 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
25638 gen_frame_store (gen_rtx_REG (SImode, CR0_REGNO + i),
25639 sp_reg_rtx, cr_off + sp_off));
25640
25641 cr_off += reg_size;
25642 }
25643 }
25644
25645 /* Update stack and set back pointer unless this is V.4,
25646 for which it was done previously. */
25647 if (!WORLD_SAVE_P (info) && info->push_p
25648 && !(DEFAULT_ABI == ABI_V4 || crtl->calls_eh_return))
25649 {
25650 rtx ptr_reg = NULL;
25651 int ptr_off = 0;
25652
25653 /* If saving altivec regs we need to be able to address all save
25654 locations using a 16-bit offset. */
25655 if ((strategy & SAVE_INLINE_VRS) == 0
25656 || (info->altivec_size != 0
25657 && (info->altivec_save_offset + info->altivec_size - 16
25658 + info->total_size - frame_off) > 32767)
25659 || (info->vrsave_size != 0
25660 && (info->vrsave_save_offset
25661 + info->total_size - frame_off) > 32767))
25662 {
25663 int sel = SAVRES_SAVE | SAVRES_VR;
25664 unsigned ptr_regno = ptr_regno_for_savres (sel);
25665
25666 if (using_static_chain_p
25667 && ptr_regno == STATIC_CHAIN_REGNUM)
25668 ptr_regno = 12;
25669 if (REGNO (frame_reg_rtx) != ptr_regno)
25670 START_USE (ptr_regno);
25671 ptr_reg = gen_rtx_REG (Pmode, ptr_regno);
25672 frame_reg_rtx = ptr_reg;
25673 ptr_off = info->altivec_save_offset + info->altivec_size;
25674 frame_off = -ptr_off;
25675 }
25676 else if (REGNO (frame_reg_rtx) == 1)
25677 frame_off = info->total_size;
25678 sp_adjust = rs6000_emit_allocate_stack (info->total_size,
25679 ptr_reg, ptr_off);
25680 if (REGNO (frame_reg_rtx) == 12)
25681 sp_adjust = 0;
25682 sp_off = info->total_size;
25683 if (frame_reg_rtx != sp_reg_rtx)
25684 rs6000_emit_stack_tie (frame_reg_rtx, false);
25685 }
25686
25687 /* Set frame pointer, if needed. */
25688 if (frame_pointer_needed)
25689 {
25690 insn = emit_move_insn (gen_rtx_REG (Pmode, HARD_FRAME_POINTER_REGNUM),
25691 sp_reg_rtx);
25692 RTX_FRAME_RELATED_P (insn) = 1;
25693 }
25694
25695 /* Save AltiVec registers if needed. Save here because the red zone does
25696 not always include AltiVec registers. */
25697 if (!WORLD_SAVE_P (info)
25698 && info->altivec_size != 0 && (strategy & SAVE_INLINE_VRS) == 0)
25699 {
25700 int end_save = info->altivec_save_offset + info->altivec_size;
25701 int ptr_off;
25702 /* Oddly, the vector save/restore functions point r0 at the end
25703 of the save area, then use r11 or r12 to load offsets for
25704 [reg+reg] addressing. */
25705 rtx ptr_reg = gen_rtx_REG (Pmode, 0);
25706 int scratch_regno = ptr_regno_for_savres (SAVRES_SAVE | SAVRES_VR);
25707 rtx scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
25708
25709 gcc_checking_assert (scratch_regno == 11 || scratch_regno == 12);
25710 NOT_INUSE (0);
25711 if (scratch_regno == 12)
25712 sp_adjust = 0;
25713 if (end_save + frame_off != 0)
25714 {
25715 rtx offset = GEN_INT (end_save + frame_off);
25716
25717 emit_insn (gen_add3_insn (ptr_reg, frame_reg_rtx, offset));
25718 }
25719 else
25720 emit_move_insn (ptr_reg, frame_reg_rtx);
25721
25722 ptr_off = -end_save;
25723 insn = rs6000_emit_savres_rtx (info, scratch_reg,
25724 info->altivec_save_offset + ptr_off,
25725 0, V4SImode, SAVRES_SAVE | SAVRES_VR);
25726 rs6000_frame_related (insn, scratch_reg, sp_off - ptr_off,
25727 NULL_RTX, NULL_RTX);
25728 if (REGNO (frame_reg_rtx) == REGNO (scratch_reg))
25729 {
25730 /* The oddity mentioned above clobbered our frame reg. */
25731 emit_move_insn (frame_reg_rtx, ptr_reg);
25732 frame_off = ptr_off;
25733 }
25734 }
25735 else if (!WORLD_SAVE_P (info)
25736 && info->altivec_size != 0)
25737 {
25738 int i;
25739
25740 for (i = info->first_altivec_reg_save; i <= LAST_ALTIVEC_REGNO; ++i)
25741 if (info->vrsave_mask & ALTIVEC_REG_BIT (i))
25742 {
25743 rtx areg, savereg, mem;
25744 int offset;
25745
25746 offset = (info->altivec_save_offset + frame_off
25747 + 16 * (i - info->first_altivec_reg_save));
25748
25749 savereg = gen_rtx_REG (V4SImode, i);
25750
25751 NOT_INUSE (0);
25752 areg = gen_rtx_REG (Pmode, 0);
25753 emit_move_insn (areg, GEN_INT (offset));
25754
25755 /* AltiVec addressing mode is [reg+reg]. */
25756 mem = gen_frame_mem (V4SImode,
25757 gen_rtx_PLUS (Pmode, frame_reg_rtx, areg));
25758
25759 /* Rather than emitting a generic move, force use of the stvx
25760 instruction, which we always want. In particular we don't
25761 want xxpermdi/stxvd2x for little endian. */
25762 insn = emit_insn (gen_altivec_stvx_v4si_internal (mem, savereg));
25763
25764 rs6000_frame_related (insn, frame_reg_rtx, sp_off - frame_off,
25765 areg, GEN_INT (offset));
25766 }
25767 }
25768
25769 /* VRSAVE is a bit vector representing which AltiVec registers
25770 are used. The OS uses this to determine which vector
25771 registers to save on a context switch. We need to save
25772 VRSAVE on the stack frame, add whatever AltiVec registers we
25773 used in this function, and do the corresponding magic in the
25774 epilogue. */
25775
25776 if (!WORLD_SAVE_P (info)
25777 && info->vrsave_size != 0)
25778 {
25779 rtx reg, vrsave;
25780 int offset;
25781 int save_regno;
25782
25783 /* Get VRSAVE onto a GPR. Note that ABI_V4 and ABI_DARWIN might
25784 be using r12 as frame_reg_rtx and r11 as the static chain
25785 pointer for nested functions. */
25786 save_regno = 12;
25787 if ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
25788 && !using_static_chain_p)
25789 save_regno = 11;
25790 else if (using_split_stack || REGNO (frame_reg_rtx) == 12)
25791 {
25792 save_regno = 11;
25793 if (using_static_chain_p)
25794 save_regno = 0;
25795 }
25796
25797 NOT_INUSE (save_regno);
25798 reg = gen_rtx_REG (SImode, save_regno);
25799 vrsave = gen_rtx_REG (SImode, VRSAVE_REGNO);
25800 if (TARGET_MACHO)
25801 emit_insn (gen_get_vrsave_internal (reg));
25802 else
25803 emit_insn (gen_rtx_SET (reg, vrsave));
25804
25805 /* Save VRSAVE. */
25806 offset = info->vrsave_save_offset + frame_off;
25807 insn = emit_insn (gen_frame_store (reg, frame_reg_rtx, offset));
25808
25809 /* Include the registers in the mask. */
25810 emit_insn (gen_iorsi3 (reg, reg, GEN_INT ((int) info->vrsave_mask)));
25811
25812 insn = emit_insn (generate_set_vrsave (reg, info, 0));
25813 }
25814
25815 /* If we are using RS6000_PIC_OFFSET_TABLE_REGNUM, we need to set it up. */
25816 if (!TARGET_SINGLE_PIC_BASE
25817 && ((TARGET_TOC && TARGET_MINIMAL_TOC && get_pool_size () != 0)
25818 || (DEFAULT_ABI == ABI_V4
25819 && (flag_pic == 1 || (flag_pic && TARGET_SECURE_PLT))
25820 && df_regs_ever_live_p (RS6000_PIC_OFFSET_TABLE_REGNUM))))
25821 {
25822 /* If emit_load_toc_table will use the link register, we need to save
25823 it. We use R12 for this purpose because emit_load_toc_table
25824 can use register 0. This allows us to use a plain 'blr' to return
25825 from the procedure more often. */
25826 int save_LR_around_toc_setup = (TARGET_ELF
25827 && DEFAULT_ABI == ABI_V4
25828 && flag_pic
25829 && ! info->lr_save_p
25830 && EDGE_COUNT (EXIT_BLOCK_PTR_FOR_FN (cfun)->preds) > 0);
25831 if (save_LR_around_toc_setup)
25832 {
25833 rtx lr = gen_rtx_REG (Pmode, LR_REGNO);
25834 rtx tmp = gen_rtx_REG (Pmode, 12);
25835
25836 sp_adjust = 0;
25837 insn = emit_move_insn (tmp, lr);
25838 RTX_FRAME_RELATED_P (insn) = 1;
25839
25840 rs6000_emit_load_toc_table (TRUE);
25841
25842 insn = emit_move_insn (lr, tmp);
25843 add_reg_note (insn, REG_CFA_RESTORE, lr);
25844 RTX_FRAME_RELATED_P (insn) = 1;
25845 }
25846 else
25847 rs6000_emit_load_toc_table (TRUE);
25848 }
25849
25850 #if TARGET_MACHO
25851 if (!TARGET_SINGLE_PIC_BASE
25852 && DEFAULT_ABI == ABI_DARWIN
25853 && flag_pic && crtl->uses_pic_offset_table)
25854 {
25855 rtx lr = gen_rtx_REG (Pmode, LR_REGNO);
25856 rtx src = gen_rtx_SYMBOL_REF (Pmode, MACHOPIC_FUNCTION_BASE_NAME);
25857
25858 /* Save and restore LR locally around this call (in R0). */
25859 if (!info->lr_save_p)
25860 emit_move_insn (gen_rtx_REG (Pmode, 0), lr);
25861
25862 emit_insn (gen_load_macho_picbase (src));
25863
25864 emit_move_insn (gen_rtx_REG (Pmode,
25865 RS6000_PIC_OFFSET_TABLE_REGNUM),
25866 lr);
25867
25868 if (!info->lr_save_p)
25869 emit_move_insn (lr, gen_rtx_REG (Pmode, 0));
25870 }
25871 #endif
25872
25873 /* If we need to, save the TOC register after doing the stack setup.
25874 Do not emit eh frame info for this save. The unwinder wants info,
25875 conceptually attached to instructions in this function, about
25876 register values in the caller of this function. This R2 may have
25877 already been changed from the value in the caller.
25878 We don't attempt to write accurate DWARF EH frame info for R2
25879 because code emitted by gcc for a (non-pointer) function call
25880 doesn't save and restore R2. Instead, R2 is managed out-of-line
25881 by a linker generated plt call stub when the function resides in
25882 a shared library. This behaviour is costly to describe in DWARF,
25883 both in terms of the size of DWARF info and the time taken in the
25884 unwinder to interpret it. R2 changes, apart from the
25885 calls_eh_return case earlier in this function, are handled by
25886 linux-unwind.h frob_update_context. */
25887 if (rs6000_save_toc_in_prologue_p ())
25888 {
25889 rtx reg = gen_rtx_REG (reg_mode, TOC_REGNUM);
25890 emit_insn (gen_frame_store (reg, sp_reg_rtx, RS6000_TOC_SAVE_SLOT));
25891 }
25892
25893 if (using_split_stack && split_stack_arg_pointer_used_p ())
25894 {
25895 /* Set up the arg pointer (r12) for -fsplit-stack code. If
25896 __morestack was called, it left the arg pointer to the old
25897 stack in r29. Otherwise, the arg pointer is the top of the
25898 current frame. */
25899 cfun->machine->split_stack_argp_used = true;
25900 if (sp_adjust)
25901 {
25902 rtx r12 = gen_rtx_REG (Pmode, 12);
25903 rtx set_r12 = gen_rtx_SET (r12, sp_reg_rtx);
25904 emit_insn_before (set_r12, sp_adjust);
25905 }
25906 else if (frame_off != 0 || REGNO (frame_reg_rtx) != 12)
25907 {
25908 rtx r12 = gen_rtx_REG (Pmode, 12);
25909 if (frame_off == 0)
25910 emit_move_insn (r12, frame_reg_rtx);
25911 else
25912 emit_insn (gen_add3_insn (r12, frame_reg_rtx, GEN_INT (frame_off)));
25913 }
25914 if (info->push_p)
25915 {
25916 rtx r12 = gen_rtx_REG (Pmode, 12);
25917 rtx r29 = gen_rtx_REG (Pmode, 29);
25918 rtx cr7 = gen_rtx_REG (CCUNSmode, CR7_REGNO);
25919 rtx not_more = gen_label_rtx ();
25920 rtx jump;
25921
25922 jump = gen_rtx_IF_THEN_ELSE (VOIDmode,
25923 gen_rtx_GEU (VOIDmode, cr7, const0_rtx),
25924 gen_rtx_LABEL_REF (VOIDmode, not_more),
25925 pc_rtx);
25926 jump = emit_jump_insn (gen_rtx_SET (pc_rtx, jump));
25927 JUMP_LABEL (jump) = not_more;
25928 LABEL_NUSES (not_more) += 1;
25929 emit_move_insn (r12, r29);
25930 emit_label (not_more);
25931 }
25932 }
25933 }
25934
25935 /* Output .extern statements for the save/restore routines we use. */
25936
25937 static void
25938 rs6000_output_savres_externs (FILE *file)
25939 {
25940 rs6000_stack_t *info = rs6000_stack_info ();
25941
25942 if (TARGET_DEBUG_STACK)
25943 debug_stack_info (info);
25944
25945 /* Write .extern for any function we will call to save and restore
25946 fp values. */
25947 if (info->first_fp_reg_save < 64
25948 && !TARGET_MACHO
25949 && !TARGET_ELF)
25950 {
25951 char *name;
25952 int regno = info->first_fp_reg_save - 32;
25953
25954 if ((info->savres_strategy & SAVE_INLINE_FPRS) == 0)
25955 {
25956 bool lr = (info->savres_strategy & SAVE_NOINLINE_FPRS_SAVES_LR) != 0;
25957 int sel = SAVRES_SAVE | SAVRES_FPR | (lr ? SAVRES_LR : 0);
25958 name = rs6000_savres_routine_name (info, regno, sel);
25959 fprintf (file, "\t.extern %s\n", name);
25960 }
25961 if ((info->savres_strategy & REST_INLINE_FPRS) == 0)
25962 {
25963 bool lr = (info->savres_strategy
25964 & REST_NOINLINE_FPRS_DOESNT_RESTORE_LR) == 0;
25965 int sel = SAVRES_FPR | (lr ? SAVRES_LR : 0);
25966 name = rs6000_savres_routine_name (info, regno, sel);
25967 fprintf (file, "\t.extern %s\n", name);
25968 }
25969 }
25970 }
25971
25972 /* Write function prologue. */
25973
25974 static void
25975 rs6000_output_function_prologue (FILE *file,
25976 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
25977 {
25978 if (!cfun->is_thunk)
25979 rs6000_output_savres_externs (file);
25980
25981 /* ELFv2 ABI r2 setup code and local entry point. This must follow
25982 immediately after the global entry point label. */
25983 if (rs6000_global_entry_point_needed_p ())
25984 {
25985 const char *name = XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0);
25986
25987 (*targetm.asm_out.internal_label) (file, "LCF", rs6000_pic_labelno);
25988
25989 if (TARGET_CMODEL != CMODEL_LARGE)
25990 {
25991 /* In the small and medium code models, we assume the TOC is less
25992 2 GB away from the text section, so it can be computed via the
25993 following two-instruction sequence. */
25994 char buf[256];
25995
25996 ASM_GENERATE_INTERNAL_LABEL (buf, "LCF", rs6000_pic_labelno);
25997 fprintf (file, "0:\taddis 2,12,.TOC.-");
25998 assemble_name (file, buf);
25999 fprintf (file, "@ha\n");
26000 fprintf (file, "\taddi 2,2,.TOC.-");
26001 assemble_name (file, buf);
26002 fprintf (file, "@l\n");
26003 }
26004 else
26005 {
26006 /* In the large code model, we allow arbitrary offsets between the
26007 TOC and the text section, so we have to load the offset from
26008 memory. The data field is emitted directly before the global
26009 entry point in rs6000_elf_declare_function_name. */
26010 char buf[256];
26011
26012 #ifdef HAVE_AS_ENTRY_MARKERS
26013 /* If supported by the linker, emit a marker relocation. If the
26014 total code size of the final executable or shared library
26015 happens to fit into 2 GB after all, the linker will replace
26016 this code sequence with the sequence for the small or medium
26017 code model. */
26018 fprintf (file, "\t.reloc .,R_PPC64_ENTRY\n");
26019 #endif
26020 fprintf (file, "\tld 2,");
26021 ASM_GENERATE_INTERNAL_LABEL (buf, "LCL", rs6000_pic_labelno);
26022 assemble_name (file, buf);
26023 fprintf (file, "-");
26024 ASM_GENERATE_INTERNAL_LABEL (buf, "LCF", rs6000_pic_labelno);
26025 assemble_name (file, buf);
26026 fprintf (file, "(12)\n");
26027 fprintf (file, "\tadd 2,2,12\n");
26028 }
26029
26030 fputs ("\t.localentry\t", file);
26031 assemble_name (file, name);
26032 fputs (",.-", file);
26033 assemble_name (file, name);
26034 fputs ("\n", file);
26035 }
26036
26037 /* Output -mprofile-kernel code. This needs to be done here instead of
26038 in output_function_profile since it must go after the ELFv2 ABI
26039 local entry point. */
26040 if (TARGET_PROFILE_KERNEL && crtl->profile)
26041 {
26042 gcc_assert (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2);
26043 gcc_assert (!TARGET_32BIT);
26044
26045 asm_fprintf (file, "\tmflr %s\n", reg_names[0]);
26046
26047 /* In the ELFv2 ABI we have no compiler stack word. It must be
26048 the resposibility of _mcount to preserve the static chain
26049 register if required. */
26050 if (DEFAULT_ABI != ABI_ELFv2
26051 && cfun->static_chain_decl != NULL)
26052 {
26053 asm_fprintf (file, "\tstd %s,24(%s)\n",
26054 reg_names[STATIC_CHAIN_REGNUM], reg_names[1]);
26055 fprintf (file, "\tbl %s\n", RS6000_MCOUNT);
26056 asm_fprintf (file, "\tld %s,24(%s)\n",
26057 reg_names[STATIC_CHAIN_REGNUM], reg_names[1]);
26058 }
26059 else
26060 fprintf (file, "\tbl %s\n", RS6000_MCOUNT);
26061 }
26062
26063 rs6000_pic_labelno++;
26064 }
26065
26066 /* Non-zero if vmx regs are restored before the frame pop, zero if
26067 we restore after the pop when possible. */
26068 #define ALWAYS_RESTORE_ALTIVEC_BEFORE_POP 0
26069
26070 /* Restoring cr is a two step process: loading a reg from the frame
26071 save, then moving the reg to cr. For ABI_V4 we must let the
26072 unwinder know that the stack location is no longer valid at or
26073 before the stack deallocation, but we can't emit a cfa_restore for
26074 cr at the stack deallocation like we do for other registers.
26075 The trouble is that it is possible for the move to cr to be
26076 scheduled after the stack deallocation. So say exactly where cr
26077 is located on each of the two insns. */
26078
26079 static rtx
26080 load_cr_save (int regno, rtx frame_reg_rtx, int offset, bool exit_func)
26081 {
26082 rtx mem = gen_frame_mem_offset (SImode, frame_reg_rtx, offset);
26083 rtx reg = gen_rtx_REG (SImode, regno);
26084 rtx_insn *insn = emit_move_insn (reg, mem);
26085
26086 if (!exit_func && DEFAULT_ABI == ABI_V4)
26087 {
26088 rtx cr = gen_rtx_REG (SImode, CR2_REGNO);
26089 rtx set = gen_rtx_SET (reg, cr);
26090
26091 add_reg_note (insn, REG_CFA_REGISTER, set);
26092 RTX_FRAME_RELATED_P (insn) = 1;
26093 }
26094 return reg;
26095 }
26096
26097 /* Reload CR from REG. */
26098
26099 static void
26100 restore_saved_cr (rtx reg, int using_mfcr_multiple, bool exit_func)
26101 {
26102 int count = 0;
26103 int i;
26104
26105 if (using_mfcr_multiple)
26106 {
26107 for (i = 0; i < 8; i++)
26108 if (save_reg_p (CR0_REGNO + i))
26109 count++;
26110 gcc_assert (count);
26111 }
26112
26113 if (using_mfcr_multiple && count > 1)
26114 {
26115 rtx_insn *insn;
26116 rtvec p;
26117 int ndx;
26118
26119 p = rtvec_alloc (count);
26120
26121 ndx = 0;
26122 for (i = 0; i < 8; i++)
26123 if (save_reg_p (CR0_REGNO + i))
26124 {
26125 rtvec r = rtvec_alloc (2);
26126 RTVEC_ELT (r, 0) = reg;
26127 RTVEC_ELT (r, 1) = GEN_INT (1 << (7-i));
26128 RTVEC_ELT (p, ndx) =
26129 gen_rtx_SET (gen_rtx_REG (CCmode, CR0_REGNO + i),
26130 gen_rtx_UNSPEC (CCmode, r, UNSPEC_MOVESI_TO_CR));
26131 ndx++;
26132 }
26133 insn = emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
26134 gcc_assert (ndx == count);
26135
26136 /* For the ELFv2 ABI we generate a CFA_RESTORE for each
26137 CR field separately. */
26138 if (!exit_func && DEFAULT_ABI == ABI_ELFv2 && flag_shrink_wrap)
26139 {
26140 for (i = 0; i < 8; i++)
26141 if (save_reg_p (CR0_REGNO + i))
26142 add_reg_note (insn, REG_CFA_RESTORE,
26143 gen_rtx_REG (SImode, CR0_REGNO + i));
26144
26145 RTX_FRAME_RELATED_P (insn) = 1;
26146 }
26147 }
26148 else
26149 for (i = 0; i < 8; i++)
26150 if (save_reg_p (CR0_REGNO + i))
26151 {
26152 rtx insn = emit_insn (gen_movsi_to_cr_one
26153 (gen_rtx_REG (CCmode, CR0_REGNO + i), reg));
26154
26155 /* For the ELFv2 ABI we generate a CFA_RESTORE for each
26156 CR field separately, attached to the insn that in fact
26157 restores this particular CR field. */
26158 if (!exit_func && DEFAULT_ABI == ABI_ELFv2 && flag_shrink_wrap)
26159 {
26160 add_reg_note (insn, REG_CFA_RESTORE,
26161 gen_rtx_REG (SImode, CR0_REGNO + i));
26162
26163 RTX_FRAME_RELATED_P (insn) = 1;
26164 }
26165 }
26166
26167 /* For other ABIs, we just generate a single CFA_RESTORE for CR2. */
26168 if (!exit_func && DEFAULT_ABI != ABI_ELFv2
26169 && (DEFAULT_ABI == ABI_V4 || flag_shrink_wrap))
26170 {
26171 rtx_insn *insn = get_last_insn ();
26172 rtx cr = gen_rtx_REG (SImode, CR2_REGNO);
26173
26174 add_reg_note (insn, REG_CFA_RESTORE, cr);
26175 RTX_FRAME_RELATED_P (insn) = 1;
26176 }
26177 }
26178
26179 /* Like cr, the move to lr instruction can be scheduled after the
26180 stack deallocation, but unlike cr, its stack frame save is still
26181 valid. So we only need to emit the cfa_restore on the correct
26182 instruction. */
26183
26184 static void
26185 load_lr_save (int regno, rtx frame_reg_rtx, int offset)
26186 {
26187 rtx mem = gen_frame_mem_offset (Pmode, frame_reg_rtx, offset);
26188 rtx reg = gen_rtx_REG (Pmode, regno);
26189
26190 emit_move_insn (reg, mem);
26191 }
26192
26193 static void
26194 restore_saved_lr (int regno, bool exit_func)
26195 {
26196 rtx reg = gen_rtx_REG (Pmode, regno);
26197 rtx lr = gen_rtx_REG (Pmode, LR_REGNO);
26198 rtx_insn *insn = emit_move_insn (lr, reg);
26199
26200 if (!exit_func && flag_shrink_wrap)
26201 {
26202 add_reg_note (insn, REG_CFA_RESTORE, lr);
26203 RTX_FRAME_RELATED_P (insn) = 1;
26204 }
26205 }
26206
26207 static rtx
26208 add_crlr_cfa_restore (const rs6000_stack_t *info, rtx cfa_restores)
26209 {
26210 if (DEFAULT_ABI == ABI_ELFv2)
26211 {
26212 int i;
26213 for (i = 0; i < 8; i++)
26214 if (save_reg_p (CR0_REGNO + i))
26215 {
26216 rtx cr = gen_rtx_REG (SImode, CR0_REGNO + i);
26217 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, cr,
26218 cfa_restores);
26219 }
26220 }
26221 else if (info->cr_save_p)
26222 cfa_restores = alloc_reg_note (REG_CFA_RESTORE,
26223 gen_rtx_REG (SImode, CR2_REGNO),
26224 cfa_restores);
26225
26226 if (info->lr_save_p)
26227 cfa_restores = alloc_reg_note (REG_CFA_RESTORE,
26228 gen_rtx_REG (Pmode, LR_REGNO),
26229 cfa_restores);
26230 return cfa_restores;
26231 }
26232
26233 /* Return true if OFFSET from stack pointer can be clobbered by signals.
26234 V.4 doesn't have any stack cushion, AIX ABIs have 220 or 288 bytes
26235 below stack pointer not cloberred by signals. */
26236
26237 static inline bool
26238 offset_below_red_zone_p (HOST_WIDE_INT offset)
26239 {
26240 return offset < (DEFAULT_ABI == ABI_V4
26241 ? 0
26242 : TARGET_32BIT ? -220 : -288);
26243 }
26244
26245 /* Append CFA_RESTORES to any existing REG_NOTES on the last insn. */
26246
26247 static void
26248 emit_cfa_restores (rtx cfa_restores)
26249 {
26250 rtx_insn *insn = get_last_insn ();
26251 rtx *loc = &REG_NOTES (insn);
26252
26253 while (*loc)
26254 loc = &XEXP (*loc, 1);
26255 *loc = cfa_restores;
26256 RTX_FRAME_RELATED_P (insn) = 1;
26257 }
26258
26259 /* Emit function epilogue as insns. */
26260
26261 void
26262 rs6000_emit_epilogue (int sibcall)
26263 {
26264 rs6000_stack_t *info;
26265 int restoring_GPRs_inline;
26266 int restoring_FPRs_inline;
26267 int using_load_multiple;
26268 int using_mtcr_multiple;
26269 int use_backchain_to_restore_sp;
26270 int restore_lr;
26271 int strategy;
26272 HOST_WIDE_INT frame_off = 0;
26273 rtx sp_reg_rtx = gen_rtx_REG (Pmode, 1);
26274 rtx frame_reg_rtx = sp_reg_rtx;
26275 rtx cfa_restores = NULL_RTX;
26276 rtx insn;
26277 rtx cr_save_reg = NULL_RTX;
26278 machine_mode reg_mode = Pmode;
26279 int reg_size = TARGET_32BIT ? 4 : 8;
26280 int i;
26281 bool exit_func;
26282 unsigned ptr_regno;
26283
26284 info = rs6000_stack_info ();
26285
26286 if (TARGET_SPE_ABI && info->spe_64bit_regs_used != 0)
26287 {
26288 reg_mode = V2SImode;
26289 reg_size = 8;
26290 }
26291
26292 strategy = info->savres_strategy;
26293 using_load_multiple = strategy & SAVRES_MULTIPLE;
26294 restoring_FPRs_inline = sibcall || (strategy & REST_INLINE_FPRS);
26295 restoring_GPRs_inline = sibcall || (strategy & REST_INLINE_GPRS);
26296 using_mtcr_multiple = (rs6000_cpu == PROCESSOR_PPC601
26297 || rs6000_cpu == PROCESSOR_PPC603
26298 || rs6000_cpu == PROCESSOR_PPC750
26299 || optimize_size);
26300 /* Restore via the backchain when we have a large frame, since this
26301 is more efficient than an addis, addi pair. The second condition
26302 here will not trigger at the moment; We don't actually need a
26303 frame pointer for alloca, but the generic parts of the compiler
26304 give us one anyway. */
26305 use_backchain_to_restore_sp = (info->total_size + (info->lr_save_p
26306 ? info->lr_save_offset
26307 : 0) > 32767
26308 || (cfun->calls_alloca
26309 && !frame_pointer_needed));
26310 restore_lr = (info->lr_save_p
26311 && (restoring_FPRs_inline
26312 || (strategy & REST_NOINLINE_FPRS_DOESNT_RESTORE_LR))
26313 && (restoring_GPRs_inline
26314 || info->first_fp_reg_save < 64));
26315
26316 if (WORLD_SAVE_P (info))
26317 {
26318 int i, j;
26319 char rname[30];
26320 const char *alloc_rname;
26321 rtvec p;
26322
26323 /* eh_rest_world_r10 will return to the location saved in the LR
26324 stack slot (which is not likely to be our caller.)
26325 Input: R10 -- stack adjustment. Clobbers R0, R11, R12, R7, R8.
26326 rest_world is similar, except any R10 parameter is ignored.
26327 The exception-handling stuff that was here in 2.95 is no
26328 longer necessary. */
26329
26330 p = rtvec_alloc (9
26331 + 1
26332 + 32 - info->first_gp_reg_save
26333 + LAST_ALTIVEC_REGNO + 1 - info->first_altivec_reg_save
26334 + 63 + 1 - info->first_fp_reg_save);
26335
26336 strcpy (rname, ((crtl->calls_eh_return) ?
26337 "*eh_rest_world_r10" : "*rest_world"));
26338 alloc_rname = ggc_strdup (rname);
26339
26340 j = 0;
26341 RTVEC_ELT (p, j++) = ret_rtx;
26342 RTVEC_ELT (p, j++) = gen_rtx_USE (VOIDmode,
26343 gen_rtx_REG (Pmode,
26344 LR_REGNO));
26345 RTVEC_ELT (p, j++)
26346 = gen_rtx_USE (VOIDmode, gen_rtx_SYMBOL_REF (Pmode, alloc_rname));
26347 /* The instruction pattern requires a clobber here;
26348 it is shared with the restVEC helper. */
26349 RTVEC_ELT (p, j++)
26350 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (Pmode, 11));
26351
26352 {
26353 /* CR register traditionally saved as CR2. */
26354 rtx reg = gen_rtx_REG (SImode, CR2_REGNO);
26355 RTVEC_ELT (p, j++)
26356 = gen_frame_load (reg, frame_reg_rtx, info->cr_save_offset);
26357 if (flag_shrink_wrap)
26358 {
26359 cfa_restores = alloc_reg_note (REG_CFA_RESTORE,
26360 gen_rtx_REG (Pmode, LR_REGNO),
26361 cfa_restores);
26362 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
26363 }
26364 }
26365
26366 for (i = 0; i < 32 - info->first_gp_reg_save; i++)
26367 {
26368 rtx reg = gen_rtx_REG (reg_mode, info->first_gp_reg_save + i);
26369 RTVEC_ELT (p, j++)
26370 = gen_frame_load (reg,
26371 frame_reg_rtx, info->gp_save_offset + reg_size * i);
26372 if (flag_shrink_wrap)
26373 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
26374 }
26375 for (i = 0; info->first_altivec_reg_save + i <= LAST_ALTIVEC_REGNO; i++)
26376 {
26377 rtx reg = gen_rtx_REG (V4SImode, info->first_altivec_reg_save + i);
26378 RTVEC_ELT (p, j++)
26379 = gen_frame_load (reg,
26380 frame_reg_rtx, info->altivec_save_offset + 16 * i);
26381 if (flag_shrink_wrap)
26382 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
26383 }
26384 for (i = 0; info->first_fp_reg_save + i <= 63; i++)
26385 {
26386 rtx reg = gen_rtx_REG ((TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT
26387 ? DFmode : SFmode),
26388 info->first_fp_reg_save + i);
26389 RTVEC_ELT (p, j++)
26390 = gen_frame_load (reg, frame_reg_rtx, info->fp_save_offset + 8 * i);
26391 if (flag_shrink_wrap)
26392 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
26393 }
26394 RTVEC_ELT (p, j++)
26395 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (Pmode, 0));
26396 RTVEC_ELT (p, j++)
26397 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, 12));
26398 RTVEC_ELT (p, j++)
26399 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, 7));
26400 RTVEC_ELT (p, j++)
26401 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, 8));
26402 RTVEC_ELT (p, j++)
26403 = gen_rtx_USE (VOIDmode, gen_rtx_REG (SImode, 10));
26404 insn = emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, p));
26405
26406 if (flag_shrink_wrap)
26407 {
26408 REG_NOTES (insn) = cfa_restores;
26409 add_reg_note (insn, REG_CFA_DEF_CFA, sp_reg_rtx);
26410 RTX_FRAME_RELATED_P (insn) = 1;
26411 }
26412 return;
26413 }
26414
26415 /* frame_reg_rtx + frame_off points to the top of this stack frame. */
26416 if (info->push_p)
26417 frame_off = info->total_size;
26418
26419 /* Restore AltiVec registers if we must do so before adjusting the
26420 stack. */
26421 if (info->altivec_size != 0
26422 && (ALWAYS_RESTORE_ALTIVEC_BEFORE_POP
26423 || (DEFAULT_ABI != ABI_V4
26424 && offset_below_red_zone_p (info->altivec_save_offset))))
26425 {
26426 int i;
26427 int scratch_regno = ptr_regno_for_savres (SAVRES_VR);
26428
26429 gcc_checking_assert (scratch_regno == 11 || scratch_regno == 12);
26430 if (use_backchain_to_restore_sp)
26431 {
26432 int frame_regno = 11;
26433
26434 if ((strategy & REST_INLINE_VRS) == 0)
26435 {
26436 /* Of r11 and r12, select the one not clobbered by an
26437 out-of-line restore function for the frame register. */
26438 frame_regno = 11 + 12 - scratch_regno;
26439 }
26440 frame_reg_rtx = gen_rtx_REG (Pmode, frame_regno);
26441 emit_move_insn (frame_reg_rtx,
26442 gen_rtx_MEM (Pmode, sp_reg_rtx));
26443 frame_off = 0;
26444 }
26445 else if (frame_pointer_needed)
26446 frame_reg_rtx = hard_frame_pointer_rtx;
26447
26448 if ((strategy & REST_INLINE_VRS) == 0)
26449 {
26450 int end_save = info->altivec_save_offset + info->altivec_size;
26451 int ptr_off;
26452 rtx ptr_reg = gen_rtx_REG (Pmode, 0);
26453 rtx scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
26454
26455 if (end_save + frame_off != 0)
26456 {
26457 rtx offset = GEN_INT (end_save + frame_off);
26458
26459 emit_insn (gen_add3_insn (ptr_reg, frame_reg_rtx, offset));
26460 }
26461 else
26462 emit_move_insn (ptr_reg, frame_reg_rtx);
26463
26464 ptr_off = -end_save;
26465 insn = rs6000_emit_savres_rtx (info, scratch_reg,
26466 info->altivec_save_offset + ptr_off,
26467 0, V4SImode, SAVRES_VR);
26468 }
26469 else
26470 {
26471 for (i = info->first_altivec_reg_save; i <= LAST_ALTIVEC_REGNO; ++i)
26472 if (info->vrsave_mask & ALTIVEC_REG_BIT (i))
26473 {
26474 rtx addr, areg, mem, reg;
26475
26476 areg = gen_rtx_REG (Pmode, 0);
26477 emit_move_insn
26478 (areg, GEN_INT (info->altivec_save_offset
26479 + frame_off
26480 + 16 * (i - info->first_altivec_reg_save)));
26481
26482 /* AltiVec addressing mode is [reg+reg]. */
26483 addr = gen_rtx_PLUS (Pmode, frame_reg_rtx, areg);
26484 mem = gen_frame_mem (V4SImode, addr);
26485
26486 reg = gen_rtx_REG (V4SImode, i);
26487 /* Rather than emitting a generic move, force use of the
26488 lvx instruction, which we always want. In particular
26489 we don't want lxvd2x/xxpermdi for little endian. */
26490 (void) emit_insn (gen_altivec_lvx_v4si_internal (reg, mem));
26491 }
26492 }
26493
26494 for (i = info->first_altivec_reg_save; i <= LAST_ALTIVEC_REGNO; ++i)
26495 if (((strategy & REST_INLINE_VRS) == 0
26496 || (info->vrsave_mask & ALTIVEC_REG_BIT (i)) != 0)
26497 && (flag_shrink_wrap
26498 || (offset_below_red_zone_p
26499 (info->altivec_save_offset
26500 + 16 * (i - info->first_altivec_reg_save)))))
26501 {
26502 rtx reg = gen_rtx_REG (V4SImode, i);
26503 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
26504 }
26505 }
26506
26507 /* Restore VRSAVE if we must do so before adjusting the stack. */
26508 if (info->vrsave_size != 0
26509 && (ALWAYS_RESTORE_ALTIVEC_BEFORE_POP
26510 || (DEFAULT_ABI != ABI_V4
26511 && offset_below_red_zone_p (info->vrsave_save_offset))))
26512 {
26513 rtx reg;
26514
26515 if (frame_reg_rtx == sp_reg_rtx)
26516 {
26517 if (use_backchain_to_restore_sp)
26518 {
26519 frame_reg_rtx = gen_rtx_REG (Pmode, 11);
26520 emit_move_insn (frame_reg_rtx,
26521 gen_rtx_MEM (Pmode, sp_reg_rtx));
26522 frame_off = 0;
26523 }
26524 else if (frame_pointer_needed)
26525 frame_reg_rtx = hard_frame_pointer_rtx;
26526 }
26527
26528 reg = gen_rtx_REG (SImode, 12);
26529 emit_insn (gen_frame_load (reg, frame_reg_rtx,
26530 info->vrsave_save_offset + frame_off));
26531
26532 emit_insn (generate_set_vrsave (reg, info, 1));
26533 }
26534
26535 insn = NULL_RTX;
26536 /* If we have a large stack frame, restore the old stack pointer
26537 using the backchain. */
26538 if (use_backchain_to_restore_sp)
26539 {
26540 if (frame_reg_rtx == sp_reg_rtx)
26541 {
26542 /* Under V.4, don't reset the stack pointer until after we're done
26543 loading the saved registers. */
26544 if (DEFAULT_ABI == ABI_V4)
26545 frame_reg_rtx = gen_rtx_REG (Pmode, 11);
26546
26547 insn = emit_move_insn (frame_reg_rtx,
26548 gen_rtx_MEM (Pmode, sp_reg_rtx));
26549 frame_off = 0;
26550 }
26551 else if (ALWAYS_RESTORE_ALTIVEC_BEFORE_POP
26552 && DEFAULT_ABI == ABI_V4)
26553 /* frame_reg_rtx has been set up by the altivec restore. */
26554 ;
26555 else
26556 {
26557 insn = emit_move_insn (sp_reg_rtx, frame_reg_rtx);
26558 frame_reg_rtx = sp_reg_rtx;
26559 }
26560 }
26561 /* If we have a frame pointer, we can restore the old stack pointer
26562 from it. */
26563 else if (frame_pointer_needed)
26564 {
26565 frame_reg_rtx = sp_reg_rtx;
26566 if (DEFAULT_ABI == ABI_V4)
26567 frame_reg_rtx = gen_rtx_REG (Pmode, 11);
26568 /* Prevent reordering memory accesses against stack pointer restore. */
26569 else if (cfun->calls_alloca
26570 || offset_below_red_zone_p (-info->total_size))
26571 rs6000_emit_stack_tie (frame_reg_rtx, true);
26572
26573 insn = emit_insn (gen_add3_insn (frame_reg_rtx, hard_frame_pointer_rtx,
26574 GEN_INT (info->total_size)));
26575 frame_off = 0;
26576 }
26577 else if (info->push_p
26578 && DEFAULT_ABI != ABI_V4
26579 && !crtl->calls_eh_return)
26580 {
26581 /* Prevent reordering memory accesses against stack pointer restore. */
26582 if (cfun->calls_alloca
26583 || offset_below_red_zone_p (-info->total_size))
26584 rs6000_emit_stack_tie (frame_reg_rtx, false);
26585 insn = emit_insn (gen_add3_insn (sp_reg_rtx, sp_reg_rtx,
26586 GEN_INT (info->total_size)));
26587 frame_off = 0;
26588 }
26589 if (insn && frame_reg_rtx == sp_reg_rtx)
26590 {
26591 if (cfa_restores)
26592 {
26593 REG_NOTES (insn) = cfa_restores;
26594 cfa_restores = NULL_RTX;
26595 }
26596 add_reg_note (insn, REG_CFA_DEF_CFA, sp_reg_rtx);
26597 RTX_FRAME_RELATED_P (insn) = 1;
26598 }
26599
26600 /* Restore AltiVec registers if we have not done so already. */
26601 if (!ALWAYS_RESTORE_ALTIVEC_BEFORE_POP
26602 && info->altivec_size != 0
26603 && (DEFAULT_ABI == ABI_V4
26604 || !offset_below_red_zone_p (info->altivec_save_offset)))
26605 {
26606 int i;
26607
26608 if ((strategy & REST_INLINE_VRS) == 0)
26609 {
26610 int end_save = info->altivec_save_offset + info->altivec_size;
26611 int ptr_off;
26612 rtx ptr_reg = gen_rtx_REG (Pmode, 0);
26613 int scratch_regno = ptr_regno_for_savres (SAVRES_VR);
26614 rtx scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
26615
26616 if (end_save + frame_off != 0)
26617 {
26618 rtx offset = GEN_INT (end_save + frame_off);
26619
26620 emit_insn (gen_add3_insn (ptr_reg, frame_reg_rtx, offset));
26621 }
26622 else
26623 emit_move_insn (ptr_reg, frame_reg_rtx);
26624
26625 ptr_off = -end_save;
26626 insn = rs6000_emit_savres_rtx (info, scratch_reg,
26627 info->altivec_save_offset + ptr_off,
26628 0, V4SImode, SAVRES_VR);
26629 if (REGNO (frame_reg_rtx) == REGNO (scratch_reg))
26630 {
26631 /* Frame reg was clobbered by out-of-line save. Restore it
26632 from ptr_reg, and if we are calling out-of-line gpr or
26633 fpr restore set up the correct pointer and offset. */
26634 unsigned newptr_regno = 1;
26635 if (!restoring_GPRs_inline)
26636 {
26637 bool lr = info->gp_save_offset + info->gp_size == 0;
26638 int sel = SAVRES_GPR | (lr ? SAVRES_LR : 0);
26639 newptr_regno = ptr_regno_for_savres (sel);
26640 end_save = info->gp_save_offset + info->gp_size;
26641 }
26642 else if (!restoring_FPRs_inline)
26643 {
26644 bool lr = !(strategy & REST_NOINLINE_FPRS_DOESNT_RESTORE_LR);
26645 int sel = SAVRES_FPR | (lr ? SAVRES_LR : 0);
26646 newptr_regno = ptr_regno_for_savres (sel);
26647 end_save = info->fp_save_offset + info->fp_size;
26648 }
26649
26650 if (newptr_regno != 1 && REGNO (frame_reg_rtx) != newptr_regno)
26651 frame_reg_rtx = gen_rtx_REG (Pmode, newptr_regno);
26652
26653 if (end_save + ptr_off != 0)
26654 {
26655 rtx offset = GEN_INT (end_save + ptr_off);
26656
26657 frame_off = -end_save;
26658 if (TARGET_32BIT)
26659 emit_insn (gen_addsi3_carry (frame_reg_rtx,
26660 ptr_reg, offset));
26661 else
26662 emit_insn (gen_adddi3_carry (frame_reg_rtx,
26663 ptr_reg, offset));
26664 }
26665 else
26666 {
26667 frame_off = ptr_off;
26668 emit_move_insn (frame_reg_rtx, ptr_reg);
26669 }
26670 }
26671 }
26672 else
26673 {
26674 for (i = info->first_altivec_reg_save; i <= LAST_ALTIVEC_REGNO; ++i)
26675 if (info->vrsave_mask & ALTIVEC_REG_BIT (i))
26676 {
26677 rtx addr, areg, mem, reg;
26678
26679 areg = gen_rtx_REG (Pmode, 0);
26680 emit_move_insn
26681 (areg, GEN_INT (info->altivec_save_offset
26682 + frame_off
26683 + 16 * (i - info->first_altivec_reg_save)));
26684
26685 /* AltiVec addressing mode is [reg+reg]. */
26686 addr = gen_rtx_PLUS (Pmode, frame_reg_rtx, areg);
26687 mem = gen_frame_mem (V4SImode, addr);
26688
26689 reg = gen_rtx_REG (V4SImode, i);
26690 /* Rather than emitting a generic move, force use of the
26691 lvx instruction, which we always want. In particular
26692 we don't want lxvd2x/xxpermdi for little endian. */
26693 (void) emit_insn (gen_altivec_lvx_v4si_internal (reg, mem));
26694 }
26695 }
26696
26697 for (i = info->first_altivec_reg_save; i <= LAST_ALTIVEC_REGNO; ++i)
26698 if (((strategy & REST_INLINE_VRS) == 0
26699 || (info->vrsave_mask & ALTIVEC_REG_BIT (i)) != 0)
26700 && (DEFAULT_ABI == ABI_V4 || flag_shrink_wrap))
26701 {
26702 rtx reg = gen_rtx_REG (V4SImode, i);
26703 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
26704 }
26705 }
26706
26707 /* Restore VRSAVE if we have not done so already. */
26708 if (!ALWAYS_RESTORE_ALTIVEC_BEFORE_POP
26709 && info->vrsave_size != 0
26710 && (DEFAULT_ABI == ABI_V4
26711 || !offset_below_red_zone_p (info->vrsave_save_offset)))
26712 {
26713 rtx reg;
26714
26715 reg = gen_rtx_REG (SImode, 12);
26716 emit_insn (gen_frame_load (reg, frame_reg_rtx,
26717 info->vrsave_save_offset + frame_off));
26718
26719 emit_insn (generate_set_vrsave (reg, info, 1));
26720 }
26721
26722 /* If we exit by an out-of-line restore function on ABI_V4 then that
26723 function will deallocate the stack, so we don't need to worry
26724 about the unwinder restoring cr from an invalid stack frame
26725 location. */
26726 exit_func = (!restoring_FPRs_inline
26727 || (!restoring_GPRs_inline
26728 && info->first_fp_reg_save == 64));
26729
26730 /* In the ELFv2 ABI we need to restore all call-saved CR fields from
26731 *separate* slots if the routine calls __builtin_eh_return, so
26732 that they can be independently restored by the unwinder. */
26733 if (DEFAULT_ABI == ABI_ELFv2 && crtl->calls_eh_return)
26734 {
26735 int i, cr_off = info->ehcr_offset;
26736
26737 for (i = 0; i < 8; i++)
26738 if (!call_used_regs[CR0_REGNO + i])
26739 {
26740 rtx reg = gen_rtx_REG (SImode, 0);
26741 emit_insn (gen_frame_load (reg, frame_reg_rtx,
26742 cr_off + frame_off));
26743
26744 insn = emit_insn (gen_movsi_to_cr_one
26745 (gen_rtx_REG (CCmode, CR0_REGNO + i), reg));
26746
26747 if (!exit_func && flag_shrink_wrap)
26748 {
26749 add_reg_note (insn, REG_CFA_RESTORE,
26750 gen_rtx_REG (SImode, CR0_REGNO + i));
26751
26752 RTX_FRAME_RELATED_P (insn) = 1;
26753 }
26754
26755 cr_off += reg_size;
26756 }
26757 }
26758
26759 /* Get the old lr if we saved it. If we are restoring registers
26760 out-of-line, then the out-of-line routines can do this for us. */
26761 if (restore_lr && restoring_GPRs_inline)
26762 load_lr_save (0, frame_reg_rtx, info->lr_save_offset + frame_off);
26763
26764 /* Get the old cr if we saved it. */
26765 if (info->cr_save_p)
26766 {
26767 unsigned cr_save_regno = 12;
26768
26769 if (!restoring_GPRs_inline)
26770 {
26771 /* Ensure we don't use the register used by the out-of-line
26772 gpr register restore below. */
26773 bool lr = info->gp_save_offset + info->gp_size == 0;
26774 int sel = SAVRES_GPR | (lr ? SAVRES_LR : 0);
26775 int gpr_ptr_regno = ptr_regno_for_savres (sel);
26776
26777 if (gpr_ptr_regno == 12)
26778 cr_save_regno = 11;
26779 gcc_checking_assert (REGNO (frame_reg_rtx) != cr_save_regno);
26780 }
26781 else if (REGNO (frame_reg_rtx) == 12)
26782 cr_save_regno = 11;
26783
26784 cr_save_reg = load_cr_save (cr_save_regno, frame_reg_rtx,
26785 info->cr_save_offset + frame_off,
26786 exit_func);
26787 }
26788
26789 /* Set LR here to try to overlap restores below. */
26790 if (restore_lr && restoring_GPRs_inline)
26791 restore_saved_lr (0, exit_func);
26792
26793 /* Load exception handler data registers, if needed. */
26794 if (crtl->calls_eh_return)
26795 {
26796 unsigned int i, regno;
26797
26798 if (TARGET_AIX)
26799 {
26800 rtx reg = gen_rtx_REG (reg_mode, 2);
26801 emit_insn (gen_frame_load (reg, frame_reg_rtx,
26802 frame_off + RS6000_TOC_SAVE_SLOT));
26803 }
26804
26805 for (i = 0; ; ++i)
26806 {
26807 rtx mem;
26808
26809 regno = EH_RETURN_DATA_REGNO (i);
26810 if (regno == INVALID_REGNUM)
26811 break;
26812
26813 /* Note: possible use of r0 here to address SPE regs. */
26814 mem = gen_frame_mem_offset (reg_mode, frame_reg_rtx,
26815 info->ehrd_offset + frame_off
26816 + reg_size * (int) i);
26817
26818 emit_move_insn (gen_rtx_REG (reg_mode, regno), mem);
26819 }
26820 }
26821
26822 /* Restore GPRs. This is done as a PARALLEL if we are using
26823 the load-multiple instructions. */
26824 if (TARGET_SPE_ABI
26825 && info->spe_64bit_regs_used
26826 && info->first_gp_reg_save != 32)
26827 {
26828 /* Determine whether we can address all of the registers that need
26829 to be saved with an offset from frame_reg_rtx that fits in
26830 the small const field for SPE memory instructions. */
26831 int spe_regs_addressable
26832 = (SPE_CONST_OFFSET_OK (info->spe_gp_save_offset + frame_off
26833 + reg_size * (32 - info->first_gp_reg_save - 1))
26834 && restoring_GPRs_inline);
26835
26836 if (!spe_regs_addressable)
26837 {
26838 int ool_adjust = 0;
26839 rtx old_frame_reg_rtx = frame_reg_rtx;
26840 /* Make r11 point to the start of the SPE save area. We worried about
26841 not clobbering it when we were saving registers in the prologue.
26842 There's no need to worry here because the static chain is passed
26843 anew to every function. */
26844
26845 if (!restoring_GPRs_inline)
26846 ool_adjust = 8 * (info->first_gp_reg_save - FIRST_SAVED_GP_REGNO);
26847 frame_reg_rtx = gen_rtx_REG (Pmode, 11);
26848 emit_insn (gen_addsi3 (frame_reg_rtx, old_frame_reg_rtx,
26849 GEN_INT (info->spe_gp_save_offset
26850 + frame_off
26851 - ool_adjust)));
26852 /* Keep the invariant that frame_reg_rtx + frame_off points
26853 at the top of the stack frame. */
26854 frame_off = -info->spe_gp_save_offset + ool_adjust;
26855 }
26856
26857 if (restoring_GPRs_inline)
26858 {
26859 HOST_WIDE_INT spe_offset = info->spe_gp_save_offset + frame_off;
26860
26861 for (i = 0; i < 32 - info->first_gp_reg_save; i++)
26862 if (rs6000_reg_live_or_pic_offset_p (info->first_gp_reg_save + i))
26863 {
26864 rtx offset, addr, mem, reg;
26865
26866 /* We're doing all this to ensure that the immediate offset
26867 fits into the immediate field of 'evldd'. */
26868 gcc_assert (SPE_CONST_OFFSET_OK (spe_offset + reg_size * i));
26869
26870 offset = GEN_INT (spe_offset + reg_size * i);
26871 addr = gen_rtx_PLUS (Pmode, frame_reg_rtx, offset);
26872 mem = gen_rtx_MEM (V2SImode, addr);
26873 reg = gen_rtx_REG (reg_mode, info->first_gp_reg_save + i);
26874
26875 emit_move_insn (reg, mem);
26876 }
26877 }
26878 else
26879 rs6000_emit_savres_rtx (info, frame_reg_rtx,
26880 info->spe_gp_save_offset + frame_off,
26881 info->lr_save_offset + frame_off,
26882 reg_mode,
26883 SAVRES_GPR | SAVRES_LR);
26884 }
26885 else if (!restoring_GPRs_inline)
26886 {
26887 /* We are jumping to an out-of-line function. */
26888 rtx ptr_reg;
26889 int end_save = info->gp_save_offset + info->gp_size;
26890 bool can_use_exit = end_save == 0;
26891 int sel = SAVRES_GPR | (can_use_exit ? SAVRES_LR : 0);
26892 int ptr_off;
26893
26894 /* Emit stack reset code if we need it. */
26895 ptr_regno = ptr_regno_for_savres (sel);
26896 ptr_reg = gen_rtx_REG (Pmode, ptr_regno);
26897 if (can_use_exit)
26898 rs6000_emit_stack_reset (info, frame_reg_rtx, frame_off, ptr_regno);
26899 else if (end_save + frame_off != 0)
26900 emit_insn (gen_add3_insn (ptr_reg, frame_reg_rtx,
26901 GEN_INT (end_save + frame_off)));
26902 else if (REGNO (frame_reg_rtx) != ptr_regno)
26903 emit_move_insn (ptr_reg, frame_reg_rtx);
26904 if (REGNO (frame_reg_rtx) == ptr_regno)
26905 frame_off = -end_save;
26906
26907 if (can_use_exit && info->cr_save_p)
26908 restore_saved_cr (cr_save_reg, using_mtcr_multiple, true);
26909
26910 ptr_off = -end_save;
26911 rs6000_emit_savres_rtx (info, ptr_reg,
26912 info->gp_save_offset + ptr_off,
26913 info->lr_save_offset + ptr_off,
26914 reg_mode, sel);
26915 }
26916 else if (using_load_multiple)
26917 {
26918 rtvec p;
26919 p = rtvec_alloc (32 - info->first_gp_reg_save);
26920 for (i = 0; i < 32 - info->first_gp_reg_save; i++)
26921 RTVEC_ELT (p, i)
26922 = gen_frame_load (gen_rtx_REG (reg_mode, info->first_gp_reg_save + i),
26923 frame_reg_rtx,
26924 info->gp_save_offset + frame_off + reg_size * i);
26925 emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
26926 }
26927 else
26928 {
26929 for (i = 0; i < 32 - info->first_gp_reg_save; i++)
26930 if (rs6000_reg_live_or_pic_offset_p (info->first_gp_reg_save + i))
26931 emit_insn (gen_frame_load
26932 (gen_rtx_REG (reg_mode, info->first_gp_reg_save + i),
26933 frame_reg_rtx,
26934 info->gp_save_offset + frame_off + reg_size * i));
26935 }
26936
26937 if (DEFAULT_ABI == ABI_V4 || flag_shrink_wrap)
26938 {
26939 /* If the frame pointer was used then we can't delay emitting
26940 a REG_CFA_DEF_CFA note. This must happen on the insn that
26941 restores the frame pointer, r31. We may have already emitted
26942 a REG_CFA_DEF_CFA note, but that's OK; A duplicate is
26943 discarded by dwarf2cfi.c/dwarf2out.c, and in any case would
26944 be harmless if emitted. */
26945 if (frame_pointer_needed)
26946 {
26947 insn = get_last_insn ();
26948 add_reg_note (insn, REG_CFA_DEF_CFA,
26949 plus_constant (Pmode, frame_reg_rtx, frame_off));
26950 RTX_FRAME_RELATED_P (insn) = 1;
26951 }
26952
26953 /* Set up cfa_restores. We always need these when
26954 shrink-wrapping. If not shrink-wrapping then we only need
26955 the cfa_restore when the stack location is no longer valid.
26956 The cfa_restores must be emitted on or before the insn that
26957 invalidates the stack, and of course must not be emitted
26958 before the insn that actually does the restore. The latter
26959 is why it is a bad idea to emit the cfa_restores as a group
26960 on the last instruction here that actually does a restore:
26961 That insn may be reordered with respect to others doing
26962 restores. */
26963 if (flag_shrink_wrap
26964 && !restoring_GPRs_inline
26965 && info->first_fp_reg_save == 64)
26966 cfa_restores = add_crlr_cfa_restore (info, cfa_restores);
26967
26968 for (i = info->first_gp_reg_save; i < 32; i++)
26969 if (!restoring_GPRs_inline
26970 || using_load_multiple
26971 || rs6000_reg_live_or_pic_offset_p (i))
26972 {
26973 rtx reg = gen_rtx_REG (reg_mode, i);
26974
26975 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
26976 }
26977 }
26978
26979 if (!restoring_GPRs_inline
26980 && info->first_fp_reg_save == 64)
26981 {
26982 /* We are jumping to an out-of-line function. */
26983 if (cfa_restores)
26984 emit_cfa_restores (cfa_restores);
26985 return;
26986 }
26987
26988 if (restore_lr && !restoring_GPRs_inline)
26989 {
26990 load_lr_save (0, frame_reg_rtx, info->lr_save_offset + frame_off);
26991 restore_saved_lr (0, exit_func);
26992 }
26993
26994 /* Restore fpr's if we need to do it without calling a function. */
26995 if (restoring_FPRs_inline)
26996 for (i = 0; i < 64 - info->first_fp_reg_save; i++)
26997 if (save_reg_p (info->first_fp_reg_save + i))
26998 {
26999 rtx reg = gen_rtx_REG ((TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT
27000 ? DFmode : SFmode),
27001 info->first_fp_reg_save + i);
27002 emit_insn (gen_frame_load (reg, frame_reg_rtx,
27003 info->fp_save_offset + frame_off + 8 * i));
27004 if (DEFAULT_ABI == ABI_V4 || flag_shrink_wrap)
27005 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
27006 }
27007
27008 /* If we saved cr, restore it here. Just those that were used. */
27009 if (info->cr_save_p)
27010 restore_saved_cr (cr_save_reg, using_mtcr_multiple, exit_func);
27011
27012 /* If this is V.4, unwind the stack pointer after all of the loads
27013 have been done, or set up r11 if we are restoring fp out of line. */
27014 ptr_regno = 1;
27015 if (!restoring_FPRs_inline)
27016 {
27017 bool lr = (strategy & REST_NOINLINE_FPRS_DOESNT_RESTORE_LR) == 0;
27018 int sel = SAVRES_FPR | (lr ? SAVRES_LR : 0);
27019 ptr_regno = ptr_regno_for_savres (sel);
27020 }
27021
27022 insn = rs6000_emit_stack_reset (info, frame_reg_rtx, frame_off, ptr_regno);
27023 if (REGNO (frame_reg_rtx) == ptr_regno)
27024 frame_off = 0;
27025
27026 if (insn && restoring_FPRs_inline)
27027 {
27028 if (cfa_restores)
27029 {
27030 REG_NOTES (insn) = cfa_restores;
27031 cfa_restores = NULL_RTX;
27032 }
27033 add_reg_note (insn, REG_CFA_DEF_CFA, sp_reg_rtx);
27034 RTX_FRAME_RELATED_P (insn) = 1;
27035 }
27036
27037 if (crtl->calls_eh_return)
27038 {
27039 rtx sa = EH_RETURN_STACKADJ_RTX;
27040 emit_insn (gen_add3_insn (sp_reg_rtx, sp_reg_rtx, sa));
27041 }
27042
27043 if (!sibcall)
27044 {
27045 rtvec p;
27046 bool lr = (strategy & REST_NOINLINE_FPRS_DOESNT_RESTORE_LR) == 0;
27047 if (! restoring_FPRs_inline)
27048 {
27049 p = rtvec_alloc (4 + 64 - info->first_fp_reg_save);
27050 RTVEC_ELT (p, 0) = ret_rtx;
27051 }
27052 else
27053 {
27054 if (cfa_restores)
27055 {
27056 /* We can't hang the cfa_restores off a simple return,
27057 since the shrink-wrap code sometimes uses an existing
27058 return. This means there might be a path from
27059 pre-prologue code to this return, and dwarf2cfi code
27060 wants the eh_frame unwinder state to be the same on
27061 all paths to any point. So we need to emit the
27062 cfa_restores before the return. For -m64 we really
27063 don't need epilogue cfa_restores at all, except for
27064 this irritating dwarf2cfi with shrink-wrap
27065 requirement; The stack red-zone means eh_frame info
27066 from the prologue telling the unwinder to restore
27067 from the stack is perfectly good right to the end of
27068 the function. */
27069 emit_insn (gen_blockage ());
27070 emit_cfa_restores (cfa_restores);
27071 cfa_restores = NULL_RTX;
27072 }
27073 p = rtvec_alloc (2);
27074 RTVEC_ELT (p, 0) = simple_return_rtx;
27075 }
27076
27077 RTVEC_ELT (p, 1) = ((restoring_FPRs_inline || !lr)
27078 ? gen_rtx_USE (VOIDmode,
27079 gen_rtx_REG (Pmode, LR_REGNO))
27080 : gen_rtx_CLOBBER (VOIDmode,
27081 gen_rtx_REG (Pmode, LR_REGNO)));
27082
27083 /* If we have to restore more than two FP registers, branch to the
27084 restore function. It will return to our caller. */
27085 if (! restoring_FPRs_inline)
27086 {
27087 int i;
27088 int reg;
27089 rtx sym;
27090
27091 if (flag_shrink_wrap)
27092 cfa_restores = add_crlr_cfa_restore (info, cfa_restores);
27093
27094 sym = rs6000_savres_routine_sym (info,
27095 SAVRES_FPR | (lr ? SAVRES_LR : 0));
27096 RTVEC_ELT (p, 2) = gen_rtx_USE (VOIDmode, sym);
27097 reg = (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)? 1 : 11;
27098 RTVEC_ELT (p, 3) = gen_rtx_USE (VOIDmode, gen_rtx_REG (Pmode, reg));
27099
27100 for (i = 0; i < 64 - info->first_fp_reg_save; i++)
27101 {
27102 rtx reg = gen_rtx_REG (DFmode, info->first_fp_reg_save + i);
27103
27104 RTVEC_ELT (p, i + 4)
27105 = gen_frame_load (reg, sp_reg_rtx, info->fp_save_offset + 8 * i);
27106 if (flag_shrink_wrap)
27107 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg,
27108 cfa_restores);
27109 }
27110 }
27111
27112 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, p));
27113 }
27114
27115 if (cfa_restores)
27116 {
27117 if (sibcall)
27118 /* Ensure the cfa_restores are hung off an insn that won't
27119 be reordered above other restores. */
27120 emit_insn (gen_blockage ());
27121
27122 emit_cfa_restores (cfa_restores);
27123 }
27124 }
27125
27126 /* Write function epilogue. */
27127
27128 static void
27129 rs6000_output_function_epilogue (FILE *file,
27130 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
27131 {
27132 #if TARGET_MACHO
27133 macho_branch_islands ();
27134 /* Mach-O doesn't support labels at the end of objects, so if
27135 it looks like we might want one, insert a NOP. */
27136 {
27137 rtx_insn *insn = get_last_insn ();
27138 rtx_insn *deleted_debug_label = NULL;
27139 while (insn
27140 && NOTE_P (insn)
27141 && NOTE_KIND (insn) != NOTE_INSN_DELETED_LABEL)
27142 {
27143 /* Don't insert a nop for NOTE_INSN_DELETED_DEBUG_LABEL
27144 notes only, instead set their CODE_LABEL_NUMBER to -1,
27145 otherwise there would be code generation differences
27146 in between -g and -g0. */
27147 if (NOTE_P (insn) && NOTE_KIND (insn) == NOTE_INSN_DELETED_DEBUG_LABEL)
27148 deleted_debug_label = insn;
27149 insn = PREV_INSN (insn);
27150 }
27151 if (insn
27152 && (LABEL_P (insn)
27153 || (NOTE_P (insn)
27154 && NOTE_KIND (insn) == NOTE_INSN_DELETED_LABEL)))
27155 fputs ("\tnop\n", file);
27156 else if (deleted_debug_label)
27157 for (insn = deleted_debug_label; insn; insn = NEXT_INSN (insn))
27158 if (NOTE_KIND (insn) == NOTE_INSN_DELETED_DEBUG_LABEL)
27159 CODE_LABEL_NUMBER (insn) = -1;
27160 }
27161 #endif
27162
27163 /* Output a traceback table here. See /usr/include/sys/debug.h for info
27164 on its format.
27165
27166 We don't output a traceback table if -finhibit-size-directive was
27167 used. The documentation for -finhibit-size-directive reads
27168 ``don't output a @code{.size} assembler directive, or anything
27169 else that would cause trouble if the function is split in the
27170 middle, and the two halves are placed at locations far apart in
27171 memory.'' The traceback table has this property, since it
27172 includes the offset from the start of the function to the
27173 traceback table itself.
27174
27175 System V.4 Powerpc's (and the embedded ABI derived from it) use a
27176 different traceback table. */
27177 if ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
27178 && ! flag_inhibit_size_directive
27179 && rs6000_traceback != traceback_none && !cfun->is_thunk)
27180 {
27181 const char *fname = NULL;
27182 const char *language_string = lang_hooks.name;
27183 int fixed_parms = 0, float_parms = 0, parm_info = 0;
27184 int i;
27185 int optional_tbtab;
27186 rs6000_stack_t *info = rs6000_stack_info ();
27187
27188 if (rs6000_traceback == traceback_full)
27189 optional_tbtab = 1;
27190 else if (rs6000_traceback == traceback_part)
27191 optional_tbtab = 0;
27192 else
27193 optional_tbtab = !optimize_size && !TARGET_ELF;
27194
27195 if (optional_tbtab)
27196 {
27197 fname = XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0);
27198 while (*fname == '.') /* V.4 encodes . in the name */
27199 fname++;
27200
27201 /* Need label immediately before tbtab, so we can compute
27202 its offset from the function start. */
27203 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LT");
27204 ASM_OUTPUT_LABEL (file, fname);
27205 }
27206
27207 /* The .tbtab pseudo-op can only be used for the first eight
27208 expressions, since it can't handle the possibly variable
27209 length fields that follow. However, if you omit the optional
27210 fields, the assembler outputs zeros for all optional fields
27211 anyways, giving each variable length field is minimum length
27212 (as defined in sys/debug.h). Thus we can not use the .tbtab
27213 pseudo-op at all. */
27214
27215 /* An all-zero word flags the start of the tbtab, for debuggers
27216 that have to find it by searching forward from the entry
27217 point or from the current pc. */
27218 fputs ("\t.long 0\n", file);
27219
27220 /* Tbtab format type. Use format type 0. */
27221 fputs ("\t.byte 0,", file);
27222
27223 /* Language type. Unfortunately, there does not seem to be any
27224 official way to discover the language being compiled, so we
27225 use language_string.
27226 C is 0. Fortran is 1. Pascal is 2. Ada is 3. C++ is 9.
27227 Java is 13. Objective-C is 14. Objective-C++ isn't assigned
27228 a number, so for now use 9. LTO, Go and JIT aren't assigned numbers
27229 either, so for now use 0. */
27230 if (lang_GNU_C ()
27231 || ! strcmp (language_string, "GNU GIMPLE")
27232 || ! strcmp (language_string, "GNU Go")
27233 || ! strcmp (language_string, "libgccjit"))
27234 i = 0;
27235 else if (! strcmp (language_string, "GNU F77")
27236 || lang_GNU_Fortran ())
27237 i = 1;
27238 else if (! strcmp (language_string, "GNU Pascal"))
27239 i = 2;
27240 else if (! strcmp (language_string, "GNU Ada"))
27241 i = 3;
27242 else if (lang_GNU_CXX ()
27243 || ! strcmp (language_string, "GNU Objective-C++"))
27244 i = 9;
27245 else if (! strcmp (language_string, "GNU Java"))
27246 i = 13;
27247 else if (! strcmp (language_string, "GNU Objective-C"))
27248 i = 14;
27249 else
27250 gcc_unreachable ();
27251 fprintf (file, "%d,", i);
27252
27253 /* 8 single bit fields: global linkage (not set for C extern linkage,
27254 apparently a PL/I convention?), out-of-line epilogue/prologue, offset
27255 from start of procedure stored in tbtab, internal function, function
27256 has controlled storage, function has no toc, function uses fp,
27257 function logs/aborts fp operations. */
27258 /* Assume that fp operations are used if any fp reg must be saved. */
27259 fprintf (file, "%d,",
27260 (optional_tbtab << 5) | ((info->first_fp_reg_save != 64) << 1));
27261
27262 /* 6 bitfields: function is interrupt handler, name present in
27263 proc table, function calls alloca, on condition directives
27264 (controls stack walks, 3 bits), saves condition reg, saves
27265 link reg. */
27266 /* The `function calls alloca' bit seems to be set whenever reg 31 is
27267 set up as a frame pointer, even when there is no alloca call. */
27268 fprintf (file, "%d,",
27269 ((optional_tbtab << 6)
27270 | ((optional_tbtab & frame_pointer_needed) << 5)
27271 | (info->cr_save_p << 1)
27272 | (info->lr_save_p)));
27273
27274 /* 3 bitfields: saves backchain, fixup code, number of fpr saved
27275 (6 bits). */
27276 fprintf (file, "%d,",
27277 (info->push_p << 7) | (64 - info->first_fp_reg_save));
27278
27279 /* 2 bitfields: spare bits (2 bits), number of gpr saved (6 bits). */
27280 fprintf (file, "%d,", (32 - first_reg_to_save ()));
27281
27282 if (optional_tbtab)
27283 {
27284 /* Compute the parameter info from the function decl argument
27285 list. */
27286 tree decl;
27287 int next_parm_info_bit = 31;
27288
27289 for (decl = DECL_ARGUMENTS (current_function_decl);
27290 decl; decl = DECL_CHAIN (decl))
27291 {
27292 rtx parameter = DECL_INCOMING_RTL (decl);
27293 machine_mode mode = GET_MODE (parameter);
27294
27295 if (GET_CODE (parameter) == REG)
27296 {
27297 if (SCALAR_FLOAT_MODE_P (mode))
27298 {
27299 int bits;
27300
27301 float_parms++;
27302
27303 switch (mode)
27304 {
27305 case SFmode:
27306 case SDmode:
27307 bits = 0x2;
27308 break;
27309
27310 case DFmode:
27311 case DDmode:
27312 case TFmode:
27313 case TDmode:
27314 case IFmode:
27315 case KFmode:
27316 bits = 0x3;
27317 break;
27318
27319 default:
27320 gcc_unreachable ();
27321 }
27322
27323 /* If only one bit will fit, don't or in this entry. */
27324 if (next_parm_info_bit > 0)
27325 parm_info |= (bits << (next_parm_info_bit - 1));
27326 next_parm_info_bit -= 2;
27327 }
27328 else
27329 {
27330 fixed_parms += ((GET_MODE_SIZE (mode)
27331 + (UNITS_PER_WORD - 1))
27332 / UNITS_PER_WORD);
27333 next_parm_info_bit -= 1;
27334 }
27335 }
27336 }
27337 }
27338
27339 /* Number of fixed point parameters. */
27340 /* This is actually the number of words of fixed point parameters; thus
27341 an 8 byte struct counts as 2; and thus the maximum value is 8. */
27342 fprintf (file, "%d,", fixed_parms);
27343
27344 /* 2 bitfields: number of floating point parameters (7 bits), parameters
27345 all on stack. */
27346 /* This is actually the number of fp registers that hold parameters;
27347 and thus the maximum value is 13. */
27348 /* Set parameters on stack bit if parameters are not in their original
27349 registers, regardless of whether they are on the stack? Xlc
27350 seems to set the bit when not optimizing. */
27351 fprintf (file, "%d\n", ((float_parms << 1) | (! optimize)));
27352
27353 if (! optional_tbtab)
27354 return;
27355
27356 /* Optional fields follow. Some are variable length. */
27357
27358 /* Parameter types, left adjusted bit fields: 0 fixed, 10 single float,
27359 11 double float. */
27360 /* There is an entry for each parameter in a register, in the order that
27361 they occur in the parameter list. Any intervening arguments on the
27362 stack are ignored. If the list overflows a long (max possible length
27363 34 bits) then completely leave off all elements that don't fit. */
27364 /* Only emit this long if there was at least one parameter. */
27365 if (fixed_parms || float_parms)
27366 fprintf (file, "\t.long %d\n", parm_info);
27367
27368 /* Offset from start of code to tb table. */
27369 fputs ("\t.long ", file);
27370 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LT");
27371 RS6000_OUTPUT_BASENAME (file, fname);
27372 putc ('-', file);
27373 rs6000_output_function_entry (file, fname);
27374 putc ('\n', file);
27375
27376 /* Interrupt handler mask. */
27377 /* Omit this long, since we never set the interrupt handler bit
27378 above. */
27379
27380 /* Number of CTL (controlled storage) anchors. */
27381 /* Omit this long, since the has_ctl bit is never set above. */
27382
27383 /* Displacement into stack of each CTL anchor. */
27384 /* Omit this list of longs, because there are no CTL anchors. */
27385
27386 /* Length of function name. */
27387 if (*fname == '*')
27388 ++fname;
27389 fprintf (file, "\t.short %d\n", (int) strlen (fname));
27390
27391 /* Function name. */
27392 assemble_string (fname, strlen (fname));
27393
27394 /* Register for alloca automatic storage; this is always reg 31.
27395 Only emit this if the alloca bit was set above. */
27396 if (frame_pointer_needed)
27397 fputs ("\t.byte 31\n", file);
27398
27399 fputs ("\t.align 2\n", file);
27400 }
27401 }
27402
27403 /* -fsplit-stack support. */
27404
27405 /* A SYMBOL_REF for __morestack. */
27406 static GTY(()) rtx morestack_ref;
27407
27408 static rtx
27409 gen_add3_const (rtx rt, rtx ra, long c)
27410 {
27411 if (TARGET_64BIT)
27412 return gen_adddi3 (rt, ra, GEN_INT (c));
27413 else
27414 return gen_addsi3 (rt, ra, GEN_INT (c));
27415 }
27416
27417 /* Emit -fsplit-stack prologue, which goes before the regular function
27418 prologue (at local entry point in the case of ELFv2). */
27419
27420 void
27421 rs6000_expand_split_stack_prologue (void)
27422 {
27423 rs6000_stack_t *info = rs6000_stack_info ();
27424 unsigned HOST_WIDE_INT allocate;
27425 long alloc_hi, alloc_lo;
27426 rtx r0, r1, r12, lr, ok_label, compare, jump, call_fusage;
27427 rtx_insn *insn;
27428
27429 gcc_assert (flag_split_stack && reload_completed);
27430
27431 if (!info->push_p)
27432 return;
27433
27434 if (global_regs[29])
27435 {
27436 error ("-fsplit-stack uses register r29");
27437 inform (DECL_SOURCE_LOCATION (global_regs_decl[29]),
27438 "conflicts with %qD", global_regs_decl[29]);
27439 }
27440
27441 allocate = info->total_size;
27442 if (allocate > (unsigned HOST_WIDE_INT) 1 << 31)
27443 {
27444 sorry ("Stack frame larger than 2G is not supported for -fsplit-stack");
27445 return;
27446 }
27447 if (morestack_ref == NULL_RTX)
27448 {
27449 morestack_ref = gen_rtx_SYMBOL_REF (Pmode, "__morestack");
27450 SYMBOL_REF_FLAGS (morestack_ref) |= (SYMBOL_FLAG_LOCAL
27451 | SYMBOL_FLAG_FUNCTION);
27452 }
27453
27454 r0 = gen_rtx_REG (Pmode, 0);
27455 r1 = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
27456 r12 = gen_rtx_REG (Pmode, 12);
27457 emit_insn (gen_load_split_stack_limit (r0));
27458 /* Always emit two insns here to calculate the requested stack,
27459 so that the linker can edit them when adjusting size for calling
27460 non-split-stack code. */
27461 alloc_hi = (-allocate + 0x8000) & ~0xffffL;
27462 alloc_lo = -allocate - alloc_hi;
27463 if (alloc_hi != 0)
27464 {
27465 emit_insn (gen_add3_const (r12, r1, alloc_hi));
27466 if (alloc_lo != 0)
27467 emit_insn (gen_add3_const (r12, r12, alloc_lo));
27468 else
27469 emit_insn (gen_nop ());
27470 }
27471 else
27472 {
27473 emit_insn (gen_add3_const (r12, r1, alloc_lo));
27474 emit_insn (gen_nop ());
27475 }
27476
27477 compare = gen_rtx_REG (CCUNSmode, CR7_REGNO);
27478 emit_insn (gen_rtx_SET (compare, gen_rtx_COMPARE (CCUNSmode, r12, r0)));
27479 ok_label = gen_label_rtx ();
27480 jump = gen_rtx_IF_THEN_ELSE (VOIDmode,
27481 gen_rtx_GEU (VOIDmode, compare, const0_rtx),
27482 gen_rtx_LABEL_REF (VOIDmode, ok_label),
27483 pc_rtx);
27484 jump = emit_jump_insn (gen_rtx_SET (pc_rtx, jump));
27485 JUMP_LABEL (jump) = ok_label;
27486 /* Mark the jump as very likely to be taken. */
27487 add_int_reg_note (jump, REG_BR_PROB,
27488 REG_BR_PROB_BASE - REG_BR_PROB_BASE / 100);
27489
27490 lr = gen_rtx_REG (Pmode, LR_REGNO);
27491 insn = emit_move_insn (r0, lr);
27492 RTX_FRAME_RELATED_P (insn) = 1;
27493 insn = emit_insn (gen_frame_store (r0, r1, info->lr_save_offset));
27494 RTX_FRAME_RELATED_P (insn) = 1;
27495
27496 insn = emit_call_insn (gen_call (gen_rtx_MEM (SImode, morestack_ref),
27497 const0_rtx, const0_rtx));
27498 call_fusage = NULL_RTX;
27499 use_reg (&call_fusage, r12);
27500 add_function_usage_to (insn, call_fusage);
27501 emit_insn (gen_frame_load (r0, r1, info->lr_save_offset));
27502 insn = emit_move_insn (lr, r0);
27503 add_reg_note (insn, REG_CFA_RESTORE, lr);
27504 RTX_FRAME_RELATED_P (insn) = 1;
27505 emit_insn (gen_split_stack_return ());
27506
27507 emit_label (ok_label);
27508 LABEL_NUSES (ok_label) = 1;
27509 }
27510
27511 /* Return the internal arg pointer used for function incoming
27512 arguments. When -fsplit-stack, the arg pointer is r12 so we need
27513 to copy it to a pseudo in order for it to be preserved over calls
27514 and suchlike. We'd really like to use a pseudo here for the
27515 internal arg pointer but data-flow analysis is not prepared to
27516 accept pseudos as live at the beginning of a function. */
27517
27518 static rtx
27519 rs6000_internal_arg_pointer (void)
27520 {
27521 if (flag_split_stack
27522 && (lookup_attribute ("no_split_stack", DECL_ATTRIBUTES (cfun->decl))
27523 == NULL))
27524
27525 {
27526 if (cfun->machine->split_stack_arg_pointer == NULL_RTX)
27527 {
27528 rtx pat;
27529
27530 cfun->machine->split_stack_arg_pointer = gen_reg_rtx (Pmode);
27531 REG_POINTER (cfun->machine->split_stack_arg_pointer) = 1;
27532
27533 /* Put the pseudo initialization right after the note at the
27534 beginning of the function. */
27535 pat = gen_rtx_SET (cfun->machine->split_stack_arg_pointer,
27536 gen_rtx_REG (Pmode, 12));
27537 push_topmost_sequence ();
27538 emit_insn_after (pat, get_insns ());
27539 pop_topmost_sequence ();
27540 }
27541 return plus_constant (Pmode, cfun->machine->split_stack_arg_pointer,
27542 FIRST_PARM_OFFSET (current_function_decl));
27543 }
27544 return virtual_incoming_args_rtx;
27545 }
27546
27547 /* We may have to tell the dataflow pass that the split stack prologue
27548 is initializing a register. */
27549
27550 static void
27551 rs6000_live_on_entry (bitmap regs)
27552 {
27553 if (flag_split_stack)
27554 bitmap_set_bit (regs, 12);
27555 }
27556
27557 /* Emit -fsplit-stack dynamic stack allocation space check. */
27558
27559 void
27560 rs6000_split_stack_space_check (rtx size, rtx label)
27561 {
27562 rtx sp = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
27563 rtx limit = gen_reg_rtx (Pmode);
27564 rtx requested = gen_reg_rtx (Pmode);
27565 rtx cmp = gen_reg_rtx (CCUNSmode);
27566 rtx jump;
27567
27568 emit_insn (gen_load_split_stack_limit (limit));
27569 if (CONST_INT_P (size))
27570 emit_insn (gen_add3_insn (requested, sp, GEN_INT (-INTVAL (size))));
27571 else
27572 {
27573 size = force_reg (Pmode, size);
27574 emit_move_insn (requested, gen_rtx_MINUS (Pmode, sp, size));
27575 }
27576 emit_insn (gen_rtx_SET (cmp, gen_rtx_COMPARE (CCUNSmode, requested, limit)));
27577 jump = gen_rtx_IF_THEN_ELSE (VOIDmode,
27578 gen_rtx_GEU (VOIDmode, cmp, const0_rtx),
27579 gen_rtx_LABEL_REF (VOIDmode, label),
27580 pc_rtx);
27581 jump = emit_jump_insn (gen_rtx_SET (pc_rtx, jump));
27582 JUMP_LABEL (jump) = label;
27583 }
27584 \f
27585 /* A C compound statement that outputs the assembler code for a thunk
27586 function, used to implement C++ virtual function calls with
27587 multiple inheritance. The thunk acts as a wrapper around a virtual
27588 function, adjusting the implicit object parameter before handing
27589 control off to the real function.
27590
27591 First, emit code to add the integer DELTA to the location that
27592 contains the incoming first argument. Assume that this argument
27593 contains a pointer, and is the one used to pass the `this' pointer
27594 in C++. This is the incoming argument *before* the function
27595 prologue, e.g. `%o0' on a sparc. The addition must preserve the
27596 values of all other incoming arguments.
27597
27598 After the addition, emit code to jump to FUNCTION, which is a
27599 `FUNCTION_DECL'. This is a direct pure jump, not a call, and does
27600 not touch the return address. Hence returning from FUNCTION will
27601 return to whoever called the current `thunk'.
27602
27603 The effect must be as if FUNCTION had been called directly with the
27604 adjusted first argument. This macro is responsible for emitting
27605 all of the code for a thunk function; output_function_prologue()
27606 and output_function_epilogue() are not invoked.
27607
27608 The THUNK_FNDECL is redundant. (DELTA and FUNCTION have already
27609 been extracted from it.) It might possibly be useful on some
27610 targets, but probably not.
27611
27612 If you do not define this macro, the target-independent code in the
27613 C++ frontend will generate a less efficient heavyweight thunk that
27614 calls FUNCTION instead of jumping to it. The generic approach does
27615 not support varargs. */
27616
27617 static void
27618 rs6000_output_mi_thunk (FILE *file, tree thunk_fndecl ATTRIBUTE_UNUSED,
27619 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
27620 tree function)
27621 {
27622 rtx this_rtx, funexp;
27623 rtx_insn *insn;
27624
27625 reload_completed = 1;
27626 epilogue_completed = 1;
27627
27628 /* Mark the end of the (empty) prologue. */
27629 emit_note (NOTE_INSN_PROLOGUE_END);
27630
27631 /* Find the "this" pointer. If the function returns a structure,
27632 the structure return pointer is in r3. */
27633 if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
27634 this_rtx = gen_rtx_REG (Pmode, 4);
27635 else
27636 this_rtx = gen_rtx_REG (Pmode, 3);
27637
27638 /* Apply the constant offset, if required. */
27639 if (delta)
27640 emit_insn (gen_add3_insn (this_rtx, this_rtx, GEN_INT (delta)));
27641
27642 /* Apply the offset from the vtable, if required. */
27643 if (vcall_offset)
27644 {
27645 rtx vcall_offset_rtx = GEN_INT (vcall_offset);
27646 rtx tmp = gen_rtx_REG (Pmode, 12);
27647
27648 emit_move_insn (tmp, gen_rtx_MEM (Pmode, this_rtx));
27649 if (((unsigned HOST_WIDE_INT) vcall_offset) + 0x8000 >= 0x10000)
27650 {
27651 emit_insn (gen_add3_insn (tmp, tmp, vcall_offset_rtx));
27652 emit_move_insn (tmp, gen_rtx_MEM (Pmode, tmp));
27653 }
27654 else
27655 {
27656 rtx loc = gen_rtx_PLUS (Pmode, tmp, vcall_offset_rtx);
27657
27658 emit_move_insn (tmp, gen_rtx_MEM (Pmode, loc));
27659 }
27660 emit_insn (gen_add3_insn (this_rtx, this_rtx, tmp));
27661 }
27662
27663 /* Generate a tail call to the target function. */
27664 if (!TREE_USED (function))
27665 {
27666 assemble_external (function);
27667 TREE_USED (function) = 1;
27668 }
27669 funexp = XEXP (DECL_RTL (function), 0);
27670 funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
27671
27672 #if TARGET_MACHO
27673 if (MACHOPIC_INDIRECT)
27674 funexp = machopic_indirect_call_target (funexp);
27675 #endif
27676
27677 /* gen_sibcall expects reload to convert scratch pseudo to LR so we must
27678 generate sibcall RTL explicitly. */
27679 insn = emit_call_insn (
27680 gen_rtx_PARALLEL (VOIDmode,
27681 gen_rtvec (4,
27682 gen_rtx_CALL (VOIDmode,
27683 funexp, const0_rtx),
27684 gen_rtx_USE (VOIDmode, const0_rtx),
27685 gen_rtx_USE (VOIDmode,
27686 gen_rtx_REG (SImode,
27687 LR_REGNO)),
27688 simple_return_rtx)));
27689 SIBLING_CALL_P (insn) = 1;
27690 emit_barrier ();
27691
27692 /* Run just enough of rest_of_compilation to get the insns emitted.
27693 There's not really enough bulk here to make other passes such as
27694 instruction scheduling worth while. Note that use_thunk calls
27695 assemble_start_function and assemble_end_function. */
27696 insn = get_insns ();
27697 shorten_branches (insn);
27698 final_start_function (insn, file, 1);
27699 final (insn, file, 1);
27700 final_end_function ();
27701
27702 reload_completed = 0;
27703 epilogue_completed = 0;
27704 }
27705 \f
27706 /* A quick summary of the various types of 'constant-pool tables'
27707 under PowerPC:
27708
27709 Target Flags Name One table per
27710 AIX (none) AIX TOC object file
27711 AIX -mfull-toc AIX TOC object file
27712 AIX -mminimal-toc AIX minimal TOC translation unit
27713 SVR4/EABI (none) SVR4 SDATA object file
27714 SVR4/EABI -fpic SVR4 pic object file
27715 SVR4/EABI -fPIC SVR4 PIC translation unit
27716 SVR4/EABI -mrelocatable EABI TOC function
27717 SVR4/EABI -maix AIX TOC object file
27718 SVR4/EABI -maix -mminimal-toc
27719 AIX minimal TOC translation unit
27720
27721 Name Reg. Set by entries contains:
27722 made by addrs? fp? sum?
27723
27724 AIX TOC 2 crt0 as Y option option
27725 AIX minimal TOC 30 prolog gcc Y Y option
27726 SVR4 SDATA 13 crt0 gcc N Y N
27727 SVR4 pic 30 prolog ld Y not yet N
27728 SVR4 PIC 30 prolog gcc Y option option
27729 EABI TOC 30 prolog gcc Y option option
27730
27731 */
27732
27733 /* Hash functions for the hash table. */
27734
27735 static unsigned
27736 rs6000_hash_constant (rtx k)
27737 {
27738 enum rtx_code code = GET_CODE (k);
27739 machine_mode mode = GET_MODE (k);
27740 unsigned result = (code << 3) ^ mode;
27741 const char *format;
27742 int flen, fidx;
27743
27744 format = GET_RTX_FORMAT (code);
27745 flen = strlen (format);
27746 fidx = 0;
27747
27748 switch (code)
27749 {
27750 case LABEL_REF:
27751 return result * 1231 + (unsigned) INSN_UID (XEXP (k, 0));
27752
27753 case CONST_WIDE_INT:
27754 {
27755 int i;
27756 flen = CONST_WIDE_INT_NUNITS (k);
27757 for (i = 0; i < flen; i++)
27758 result = result * 613 + CONST_WIDE_INT_ELT (k, i);
27759 return result;
27760 }
27761
27762 case CONST_DOUBLE:
27763 if (mode != VOIDmode)
27764 return real_hash (CONST_DOUBLE_REAL_VALUE (k)) * result;
27765 flen = 2;
27766 break;
27767
27768 case CODE_LABEL:
27769 fidx = 3;
27770 break;
27771
27772 default:
27773 break;
27774 }
27775
27776 for (; fidx < flen; fidx++)
27777 switch (format[fidx])
27778 {
27779 case 's':
27780 {
27781 unsigned i, len;
27782 const char *str = XSTR (k, fidx);
27783 len = strlen (str);
27784 result = result * 613 + len;
27785 for (i = 0; i < len; i++)
27786 result = result * 613 + (unsigned) str[i];
27787 break;
27788 }
27789 case 'u':
27790 case 'e':
27791 result = result * 1231 + rs6000_hash_constant (XEXP (k, fidx));
27792 break;
27793 case 'i':
27794 case 'n':
27795 result = result * 613 + (unsigned) XINT (k, fidx);
27796 break;
27797 case 'w':
27798 if (sizeof (unsigned) >= sizeof (HOST_WIDE_INT))
27799 result = result * 613 + (unsigned) XWINT (k, fidx);
27800 else
27801 {
27802 size_t i;
27803 for (i = 0; i < sizeof (HOST_WIDE_INT) / sizeof (unsigned); i++)
27804 result = result * 613 + (unsigned) (XWINT (k, fidx)
27805 >> CHAR_BIT * i);
27806 }
27807 break;
27808 case '0':
27809 break;
27810 default:
27811 gcc_unreachable ();
27812 }
27813
27814 return result;
27815 }
27816
27817 hashval_t
27818 toc_hasher::hash (toc_hash_struct *thc)
27819 {
27820 return rs6000_hash_constant (thc->key) ^ thc->key_mode;
27821 }
27822
27823 /* Compare H1 and H2 for equivalence. */
27824
27825 bool
27826 toc_hasher::equal (toc_hash_struct *h1, toc_hash_struct *h2)
27827 {
27828 rtx r1 = h1->key;
27829 rtx r2 = h2->key;
27830
27831 if (h1->key_mode != h2->key_mode)
27832 return 0;
27833
27834 return rtx_equal_p (r1, r2);
27835 }
27836
27837 /* These are the names given by the C++ front-end to vtables, and
27838 vtable-like objects. Ideally, this logic should not be here;
27839 instead, there should be some programmatic way of inquiring as
27840 to whether or not an object is a vtable. */
27841
27842 #define VTABLE_NAME_P(NAME) \
27843 (strncmp ("_vt.", name, strlen ("_vt.")) == 0 \
27844 || strncmp ("_ZTV", name, strlen ("_ZTV")) == 0 \
27845 || strncmp ("_ZTT", name, strlen ("_ZTT")) == 0 \
27846 || strncmp ("_ZTI", name, strlen ("_ZTI")) == 0 \
27847 || strncmp ("_ZTC", name, strlen ("_ZTC")) == 0)
27848
27849 #ifdef NO_DOLLAR_IN_LABEL
27850 /* Return a GGC-allocated character string translating dollar signs in
27851 input NAME to underscores. Used by XCOFF ASM_OUTPUT_LABELREF. */
27852
27853 const char *
27854 rs6000_xcoff_strip_dollar (const char *name)
27855 {
27856 char *strip, *p;
27857 const char *q;
27858 size_t len;
27859
27860 q = (const char *) strchr (name, '$');
27861
27862 if (q == 0 || q == name)
27863 return name;
27864
27865 len = strlen (name);
27866 strip = XALLOCAVEC (char, len + 1);
27867 strcpy (strip, name);
27868 p = strip + (q - name);
27869 while (p)
27870 {
27871 *p = '_';
27872 p = strchr (p + 1, '$');
27873 }
27874
27875 return ggc_alloc_string (strip, len);
27876 }
27877 #endif
27878
27879 void
27880 rs6000_output_symbol_ref (FILE *file, rtx x)
27881 {
27882 /* Currently C++ toc references to vtables can be emitted before it
27883 is decided whether the vtable is public or private. If this is
27884 the case, then the linker will eventually complain that there is
27885 a reference to an unknown section. Thus, for vtables only,
27886 we emit the TOC reference to reference the symbol and not the
27887 section. */
27888 const char *name = XSTR (x, 0);
27889
27890 tree decl = SYMBOL_REF_DECL (x);
27891 if (decl /* sync condition with assemble_external () */
27892 && DECL_P (decl) && DECL_EXTERNAL (decl) && TREE_PUBLIC (decl)
27893 && (TREE_CODE (decl) == VAR_DECL
27894 || TREE_CODE (decl) == FUNCTION_DECL)
27895 && name[strlen (name) - 1] != ']')
27896 {
27897 name = concat (name,
27898 (TREE_CODE (decl) == FUNCTION_DECL
27899 ? "[DS]" : "[UA]"),
27900 NULL);
27901 XSTR (x, 0) = name;
27902 }
27903
27904 if (VTABLE_NAME_P (name))
27905 {
27906 RS6000_OUTPUT_BASENAME (file, name);
27907 }
27908 else
27909 assemble_name (file, name);
27910 }
27911
27912 /* Output a TOC entry. We derive the entry name from what is being
27913 written. */
27914
27915 void
27916 output_toc (FILE *file, rtx x, int labelno, machine_mode mode)
27917 {
27918 char buf[256];
27919 const char *name = buf;
27920 rtx base = x;
27921 HOST_WIDE_INT offset = 0;
27922
27923 gcc_assert (!TARGET_NO_TOC);
27924
27925 /* When the linker won't eliminate them, don't output duplicate
27926 TOC entries (this happens on AIX if there is any kind of TOC,
27927 and on SVR4 under -fPIC or -mrelocatable). Don't do this for
27928 CODE_LABELs. */
27929 if (TARGET_TOC && GET_CODE (x) != LABEL_REF)
27930 {
27931 struct toc_hash_struct *h;
27932
27933 /* Create toc_hash_table. This can't be done at TARGET_OPTION_OVERRIDE
27934 time because GGC is not initialized at that point. */
27935 if (toc_hash_table == NULL)
27936 toc_hash_table = hash_table<toc_hasher>::create_ggc (1021);
27937
27938 h = ggc_alloc<toc_hash_struct> ();
27939 h->key = x;
27940 h->key_mode = mode;
27941 h->labelno = labelno;
27942
27943 toc_hash_struct **found = toc_hash_table->find_slot (h, INSERT);
27944 if (*found == NULL)
27945 *found = h;
27946 else /* This is indeed a duplicate.
27947 Set this label equal to that label. */
27948 {
27949 fputs ("\t.set ", file);
27950 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LC");
27951 fprintf (file, "%d,", labelno);
27952 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LC");
27953 fprintf (file, "%d\n", ((*found)->labelno));
27954
27955 #ifdef HAVE_AS_TLS
27956 if (TARGET_XCOFF && GET_CODE (x) == SYMBOL_REF
27957 && (SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_GLOBAL_DYNAMIC
27958 || SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC))
27959 {
27960 fputs ("\t.set ", file);
27961 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LCM");
27962 fprintf (file, "%d,", labelno);
27963 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LCM");
27964 fprintf (file, "%d\n", ((*found)->labelno));
27965 }
27966 #endif
27967 return;
27968 }
27969 }
27970
27971 /* If we're going to put a double constant in the TOC, make sure it's
27972 aligned properly when strict alignment is on. */
27973 if ((CONST_DOUBLE_P (x) || CONST_WIDE_INT_P (x))
27974 && STRICT_ALIGNMENT
27975 && GET_MODE_BITSIZE (mode) >= 64
27976 && ! (TARGET_NO_FP_IN_TOC && ! TARGET_MINIMAL_TOC)) {
27977 ASM_OUTPUT_ALIGN (file, 3);
27978 }
27979
27980 (*targetm.asm_out.internal_label) (file, "LC", labelno);
27981
27982 /* Handle FP constants specially. Note that if we have a minimal
27983 TOC, things we put here aren't actually in the TOC, so we can allow
27984 FP constants. */
27985 if (GET_CODE (x) == CONST_DOUBLE &&
27986 (GET_MODE (x) == TFmode || GET_MODE (x) == TDmode
27987 || GET_MODE (x) == IFmode || GET_MODE (x) == KFmode))
27988 {
27989 long k[4];
27990
27991 if (DECIMAL_FLOAT_MODE_P (GET_MODE (x)))
27992 REAL_VALUE_TO_TARGET_DECIMAL128 (*CONST_DOUBLE_REAL_VALUE (x), k);
27993 else
27994 REAL_VALUE_TO_TARGET_LONG_DOUBLE (*CONST_DOUBLE_REAL_VALUE (x), k);
27995
27996 if (TARGET_64BIT)
27997 {
27998 if (TARGET_ELF || TARGET_MINIMAL_TOC)
27999 fputs (DOUBLE_INT_ASM_OP, file);
28000 else
28001 fprintf (file, "\t.tc FT_%lx_%lx_%lx_%lx[TC],",
28002 k[0] & 0xffffffff, k[1] & 0xffffffff,
28003 k[2] & 0xffffffff, k[3] & 0xffffffff);
28004 fprintf (file, "0x%lx%08lx,0x%lx%08lx\n",
28005 k[WORDS_BIG_ENDIAN ? 0 : 1] & 0xffffffff,
28006 k[WORDS_BIG_ENDIAN ? 1 : 0] & 0xffffffff,
28007 k[WORDS_BIG_ENDIAN ? 2 : 3] & 0xffffffff,
28008 k[WORDS_BIG_ENDIAN ? 3 : 2] & 0xffffffff);
28009 return;
28010 }
28011 else
28012 {
28013 if (TARGET_ELF || TARGET_MINIMAL_TOC)
28014 fputs ("\t.long ", file);
28015 else
28016 fprintf (file, "\t.tc FT_%lx_%lx_%lx_%lx[TC],",
28017 k[0] & 0xffffffff, k[1] & 0xffffffff,
28018 k[2] & 0xffffffff, k[3] & 0xffffffff);
28019 fprintf (file, "0x%lx,0x%lx,0x%lx,0x%lx\n",
28020 k[0] & 0xffffffff, k[1] & 0xffffffff,
28021 k[2] & 0xffffffff, k[3] & 0xffffffff);
28022 return;
28023 }
28024 }
28025 else if (GET_CODE (x) == CONST_DOUBLE &&
28026 (GET_MODE (x) == DFmode || GET_MODE (x) == DDmode))
28027 {
28028 long k[2];
28029
28030 if (DECIMAL_FLOAT_MODE_P (GET_MODE (x)))
28031 REAL_VALUE_TO_TARGET_DECIMAL64 (*CONST_DOUBLE_REAL_VALUE (x), k);
28032 else
28033 REAL_VALUE_TO_TARGET_DOUBLE (*CONST_DOUBLE_REAL_VALUE (x), k);
28034
28035 if (TARGET_64BIT)
28036 {
28037 if (TARGET_ELF || TARGET_MINIMAL_TOC)
28038 fputs (DOUBLE_INT_ASM_OP, file);
28039 else
28040 fprintf (file, "\t.tc FD_%lx_%lx[TC],",
28041 k[0] & 0xffffffff, k[1] & 0xffffffff);
28042 fprintf (file, "0x%lx%08lx\n",
28043 k[WORDS_BIG_ENDIAN ? 0 : 1] & 0xffffffff,
28044 k[WORDS_BIG_ENDIAN ? 1 : 0] & 0xffffffff);
28045 return;
28046 }
28047 else
28048 {
28049 if (TARGET_ELF || TARGET_MINIMAL_TOC)
28050 fputs ("\t.long ", file);
28051 else
28052 fprintf (file, "\t.tc FD_%lx_%lx[TC],",
28053 k[0] & 0xffffffff, k[1] & 0xffffffff);
28054 fprintf (file, "0x%lx,0x%lx\n",
28055 k[0] & 0xffffffff, k[1] & 0xffffffff);
28056 return;
28057 }
28058 }
28059 else if (GET_CODE (x) == CONST_DOUBLE &&
28060 (GET_MODE (x) == SFmode || GET_MODE (x) == SDmode))
28061 {
28062 long l;
28063
28064 if (DECIMAL_FLOAT_MODE_P (GET_MODE (x)))
28065 REAL_VALUE_TO_TARGET_DECIMAL32 (*CONST_DOUBLE_REAL_VALUE (x), l);
28066 else
28067 REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (x), l);
28068
28069 if (TARGET_64BIT)
28070 {
28071 if (TARGET_ELF || TARGET_MINIMAL_TOC)
28072 fputs (DOUBLE_INT_ASM_OP, file);
28073 else
28074 fprintf (file, "\t.tc FS_%lx[TC],", l & 0xffffffff);
28075 if (WORDS_BIG_ENDIAN)
28076 fprintf (file, "0x%lx00000000\n", l & 0xffffffff);
28077 else
28078 fprintf (file, "0x%lx\n", l & 0xffffffff);
28079 return;
28080 }
28081 else
28082 {
28083 if (TARGET_ELF || TARGET_MINIMAL_TOC)
28084 fputs ("\t.long ", file);
28085 else
28086 fprintf (file, "\t.tc FS_%lx[TC],", l & 0xffffffff);
28087 fprintf (file, "0x%lx\n", l & 0xffffffff);
28088 return;
28089 }
28090 }
28091 else if (GET_MODE (x) == VOIDmode && GET_CODE (x) == CONST_INT)
28092 {
28093 unsigned HOST_WIDE_INT low;
28094 HOST_WIDE_INT high;
28095
28096 low = INTVAL (x) & 0xffffffff;
28097 high = (HOST_WIDE_INT) INTVAL (x) >> 32;
28098
28099 /* TOC entries are always Pmode-sized, so when big-endian
28100 smaller integer constants in the TOC need to be padded.
28101 (This is still a win over putting the constants in
28102 a separate constant pool, because then we'd have
28103 to have both a TOC entry _and_ the actual constant.)
28104
28105 For a 32-bit target, CONST_INT values are loaded and shifted
28106 entirely within `low' and can be stored in one TOC entry. */
28107
28108 /* It would be easy to make this work, but it doesn't now. */
28109 gcc_assert (!TARGET_64BIT || POINTER_SIZE >= GET_MODE_BITSIZE (mode));
28110
28111 if (WORDS_BIG_ENDIAN && POINTER_SIZE > GET_MODE_BITSIZE (mode))
28112 {
28113 low |= high << 32;
28114 low <<= POINTER_SIZE - GET_MODE_BITSIZE (mode);
28115 high = (HOST_WIDE_INT) low >> 32;
28116 low &= 0xffffffff;
28117 }
28118
28119 if (TARGET_64BIT)
28120 {
28121 if (TARGET_ELF || TARGET_MINIMAL_TOC)
28122 fputs (DOUBLE_INT_ASM_OP, file);
28123 else
28124 fprintf (file, "\t.tc ID_%lx_%lx[TC],",
28125 (long) high & 0xffffffff, (long) low & 0xffffffff);
28126 fprintf (file, "0x%lx%08lx\n",
28127 (long) high & 0xffffffff, (long) low & 0xffffffff);
28128 return;
28129 }
28130 else
28131 {
28132 if (POINTER_SIZE < GET_MODE_BITSIZE (mode))
28133 {
28134 if (TARGET_ELF || TARGET_MINIMAL_TOC)
28135 fputs ("\t.long ", file);
28136 else
28137 fprintf (file, "\t.tc ID_%lx_%lx[TC],",
28138 (long) high & 0xffffffff, (long) low & 0xffffffff);
28139 fprintf (file, "0x%lx,0x%lx\n",
28140 (long) high & 0xffffffff, (long) low & 0xffffffff);
28141 }
28142 else
28143 {
28144 if (TARGET_ELF || TARGET_MINIMAL_TOC)
28145 fputs ("\t.long ", file);
28146 else
28147 fprintf (file, "\t.tc IS_%lx[TC],", (long) low & 0xffffffff);
28148 fprintf (file, "0x%lx\n", (long) low & 0xffffffff);
28149 }
28150 return;
28151 }
28152 }
28153
28154 if (GET_CODE (x) == CONST)
28155 {
28156 gcc_assert (GET_CODE (XEXP (x, 0)) == PLUS
28157 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT);
28158
28159 base = XEXP (XEXP (x, 0), 0);
28160 offset = INTVAL (XEXP (XEXP (x, 0), 1));
28161 }
28162
28163 switch (GET_CODE (base))
28164 {
28165 case SYMBOL_REF:
28166 name = XSTR (base, 0);
28167 break;
28168
28169 case LABEL_REF:
28170 ASM_GENERATE_INTERNAL_LABEL (buf, "L",
28171 CODE_LABEL_NUMBER (XEXP (base, 0)));
28172 break;
28173
28174 case CODE_LABEL:
28175 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (base));
28176 break;
28177
28178 default:
28179 gcc_unreachable ();
28180 }
28181
28182 if (TARGET_ELF || TARGET_MINIMAL_TOC)
28183 fputs (TARGET_32BIT ? "\t.long " : DOUBLE_INT_ASM_OP, file);
28184 else
28185 {
28186 fputs ("\t.tc ", file);
28187 RS6000_OUTPUT_BASENAME (file, name);
28188
28189 if (offset < 0)
28190 fprintf (file, ".N" HOST_WIDE_INT_PRINT_UNSIGNED, - offset);
28191 else if (offset)
28192 fprintf (file, ".P" HOST_WIDE_INT_PRINT_UNSIGNED, offset);
28193
28194 /* Mark large TOC symbols on AIX with [TE] so they are mapped
28195 after other TOC symbols, reducing overflow of small TOC access
28196 to [TC] symbols. */
28197 fputs (TARGET_XCOFF && TARGET_CMODEL != CMODEL_SMALL
28198 ? "[TE]," : "[TC],", file);
28199 }
28200
28201 /* Currently C++ toc references to vtables can be emitted before it
28202 is decided whether the vtable is public or private. If this is
28203 the case, then the linker will eventually complain that there is
28204 a TOC reference to an unknown section. Thus, for vtables only,
28205 we emit the TOC reference to reference the symbol and not the
28206 section. */
28207 if (VTABLE_NAME_P (name))
28208 {
28209 RS6000_OUTPUT_BASENAME (file, name);
28210 if (offset < 0)
28211 fprintf (file, HOST_WIDE_INT_PRINT_DEC, offset);
28212 else if (offset > 0)
28213 fprintf (file, "+" HOST_WIDE_INT_PRINT_DEC, offset);
28214 }
28215 else
28216 output_addr_const (file, x);
28217
28218 #if HAVE_AS_TLS
28219 if (TARGET_XCOFF && GET_CODE (base) == SYMBOL_REF)
28220 {
28221 switch (SYMBOL_REF_TLS_MODEL (base))
28222 {
28223 case 0:
28224 break;
28225 case TLS_MODEL_LOCAL_EXEC:
28226 fputs ("@le", file);
28227 break;
28228 case TLS_MODEL_INITIAL_EXEC:
28229 fputs ("@ie", file);
28230 break;
28231 /* Use global-dynamic for local-dynamic. */
28232 case TLS_MODEL_GLOBAL_DYNAMIC:
28233 case TLS_MODEL_LOCAL_DYNAMIC:
28234 putc ('\n', file);
28235 (*targetm.asm_out.internal_label) (file, "LCM", labelno);
28236 fputs ("\t.tc .", file);
28237 RS6000_OUTPUT_BASENAME (file, name);
28238 fputs ("[TC],", file);
28239 output_addr_const (file, x);
28240 fputs ("@m", file);
28241 break;
28242 default:
28243 gcc_unreachable ();
28244 }
28245 }
28246 #endif
28247
28248 putc ('\n', file);
28249 }
28250 \f
28251 /* Output an assembler pseudo-op to write an ASCII string of N characters
28252 starting at P to FILE.
28253
28254 On the RS/6000, we have to do this using the .byte operation and
28255 write out special characters outside the quoted string.
28256 Also, the assembler is broken; very long strings are truncated,
28257 so we must artificially break them up early. */
28258
28259 void
28260 output_ascii (FILE *file, const char *p, int n)
28261 {
28262 char c;
28263 int i, count_string;
28264 const char *for_string = "\t.byte \"";
28265 const char *for_decimal = "\t.byte ";
28266 const char *to_close = NULL;
28267
28268 count_string = 0;
28269 for (i = 0; i < n; i++)
28270 {
28271 c = *p++;
28272 if (c >= ' ' && c < 0177)
28273 {
28274 if (for_string)
28275 fputs (for_string, file);
28276 putc (c, file);
28277
28278 /* Write two quotes to get one. */
28279 if (c == '"')
28280 {
28281 putc (c, file);
28282 ++count_string;
28283 }
28284
28285 for_string = NULL;
28286 for_decimal = "\"\n\t.byte ";
28287 to_close = "\"\n";
28288 ++count_string;
28289
28290 if (count_string >= 512)
28291 {
28292 fputs (to_close, file);
28293
28294 for_string = "\t.byte \"";
28295 for_decimal = "\t.byte ";
28296 to_close = NULL;
28297 count_string = 0;
28298 }
28299 }
28300 else
28301 {
28302 if (for_decimal)
28303 fputs (for_decimal, file);
28304 fprintf (file, "%d", c);
28305
28306 for_string = "\n\t.byte \"";
28307 for_decimal = ", ";
28308 to_close = "\n";
28309 count_string = 0;
28310 }
28311 }
28312
28313 /* Now close the string if we have written one. Then end the line. */
28314 if (to_close)
28315 fputs (to_close, file);
28316 }
28317 \f
28318 /* Generate a unique section name for FILENAME for a section type
28319 represented by SECTION_DESC. Output goes into BUF.
28320
28321 SECTION_DESC can be any string, as long as it is different for each
28322 possible section type.
28323
28324 We name the section in the same manner as xlc. The name begins with an
28325 underscore followed by the filename (after stripping any leading directory
28326 names) with the last period replaced by the string SECTION_DESC. If
28327 FILENAME does not contain a period, SECTION_DESC is appended to the end of
28328 the name. */
28329
28330 void
28331 rs6000_gen_section_name (char **buf, const char *filename,
28332 const char *section_desc)
28333 {
28334 const char *q, *after_last_slash, *last_period = 0;
28335 char *p;
28336 int len;
28337
28338 after_last_slash = filename;
28339 for (q = filename; *q; q++)
28340 {
28341 if (*q == '/')
28342 after_last_slash = q + 1;
28343 else if (*q == '.')
28344 last_period = q;
28345 }
28346
28347 len = strlen (after_last_slash) + strlen (section_desc) + 2;
28348 *buf = (char *) xmalloc (len);
28349
28350 p = *buf;
28351 *p++ = '_';
28352
28353 for (q = after_last_slash; *q; q++)
28354 {
28355 if (q == last_period)
28356 {
28357 strcpy (p, section_desc);
28358 p += strlen (section_desc);
28359 break;
28360 }
28361
28362 else if (ISALNUM (*q))
28363 *p++ = *q;
28364 }
28365
28366 if (last_period == 0)
28367 strcpy (p, section_desc);
28368 else
28369 *p = '\0';
28370 }
28371 \f
28372 /* Emit profile function. */
28373
28374 void
28375 output_profile_hook (int labelno ATTRIBUTE_UNUSED)
28376 {
28377 /* Non-standard profiling for kernels, which just saves LR then calls
28378 _mcount without worrying about arg saves. The idea is to change
28379 the function prologue as little as possible as it isn't easy to
28380 account for arg save/restore code added just for _mcount. */
28381 if (TARGET_PROFILE_KERNEL)
28382 return;
28383
28384 if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
28385 {
28386 #ifndef NO_PROFILE_COUNTERS
28387 # define NO_PROFILE_COUNTERS 0
28388 #endif
28389 if (NO_PROFILE_COUNTERS)
28390 emit_library_call (init_one_libfunc (RS6000_MCOUNT),
28391 LCT_NORMAL, VOIDmode, 0);
28392 else
28393 {
28394 char buf[30];
28395 const char *label_name;
28396 rtx fun;
28397
28398 ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno);
28399 label_name = ggc_strdup ((*targetm.strip_name_encoding) (buf));
28400 fun = gen_rtx_SYMBOL_REF (Pmode, label_name);
28401
28402 emit_library_call (init_one_libfunc (RS6000_MCOUNT),
28403 LCT_NORMAL, VOIDmode, 1, fun, Pmode);
28404 }
28405 }
28406 else if (DEFAULT_ABI == ABI_DARWIN)
28407 {
28408 const char *mcount_name = RS6000_MCOUNT;
28409 int caller_addr_regno = LR_REGNO;
28410
28411 /* Be conservative and always set this, at least for now. */
28412 crtl->uses_pic_offset_table = 1;
28413
28414 #if TARGET_MACHO
28415 /* For PIC code, set up a stub and collect the caller's address
28416 from r0, which is where the prologue puts it. */
28417 if (MACHOPIC_INDIRECT
28418 && crtl->uses_pic_offset_table)
28419 caller_addr_regno = 0;
28420 #endif
28421 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, mcount_name),
28422 LCT_NORMAL, VOIDmode, 1,
28423 gen_rtx_REG (Pmode, caller_addr_regno), Pmode);
28424 }
28425 }
28426
28427 /* Write function profiler code. */
28428
28429 void
28430 output_function_profiler (FILE *file, int labelno)
28431 {
28432 char buf[100];
28433
28434 switch (DEFAULT_ABI)
28435 {
28436 default:
28437 gcc_unreachable ();
28438
28439 case ABI_V4:
28440 if (!TARGET_32BIT)
28441 {
28442 warning (0, "no profiling of 64-bit code for this ABI");
28443 return;
28444 }
28445 ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno);
28446 fprintf (file, "\tmflr %s\n", reg_names[0]);
28447 if (NO_PROFILE_COUNTERS)
28448 {
28449 asm_fprintf (file, "\tstw %s,4(%s)\n",
28450 reg_names[0], reg_names[1]);
28451 }
28452 else if (TARGET_SECURE_PLT && flag_pic)
28453 {
28454 if (TARGET_LINK_STACK)
28455 {
28456 char name[32];
28457 get_ppc476_thunk_name (name);
28458 asm_fprintf (file, "\tbl %s\n", name);
28459 }
28460 else
28461 asm_fprintf (file, "\tbcl 20,31,1f\n1:\n");
28462 asm_fprintf (file, "\tstw %s,4(%s)\n",
28463 reg_names[0], reg_names[1]);
28464 asm_fprintf (file, "\tmflr %s\n", reg_names[12]);
28465 asm_fprintf (file, "\taddis %s,%s,",
28466 reg_names[12], reg_names[12]);
28467 assemble_name (file, buf);
28468 asm_fprintf (file, "-1b@ha\n\tla %s,", reg_names[0]);
28469 assemble_name (file, buf);
28470 asm_fprintf (file, "-1b@l(%s)\n", reg_names[12]);
28471 }
28472 else if (flag_pic == 1)
28473 {
28474 fputs ("\tbl _GLOBAL_OFFSET_TABLE_@local-4\n", file);
28475 asm_fprintf (file, "\tstw %s,4(%s)\n",
28476 reg_names[0], reg_names[1]);
28477 asm_fprintf (file, "\tmflr %s\n", reg_names[12]);
28478 asm_fprintf (file, "\tlwz %s,", reg_names[0]);
28479 assemble_name (file, buf);
28480 asm_fprintf (file, "@got(%s)\n", reg_names[12]);
28481 }
28482 else if (flag_pic > 1)
28483 {
28484 asm_fprintf (file, "\tstw %s,4(%s)\n",
28485 reg_names[0], reg_names[1]);
28486 /* Now, we need to get the address of the label. */
28487 if (TARGET_LINK_STACK)
28488 {
28489 char name[32];
28490 get_ppc476_thunk_name (name);
28491 asm_fprintf (file, "\tbl %s\n\tb 1f\n\t.long ", name);
28492 assemble_name (file, buf);
28493 fputs ("-.\n1:", file);
28494 asm_fprintf (file, "\tmflr %s\n", reg_names[11]);
28495 asm_fprintf (file, "\taddi %s,%s,4\n",
28496 reg_names[11], reg_names[11]);
28497 }
28498 else
28499 {
28500 fputs ("\tbcl 20,31,1f\n\t.long ", file);
28501 assemble_name (file, buf);
28502 fputs ("-.\n1:", file);
28503 asm_fprintf (file, "\tmflr %s\n", reg_names[11]);
28504 }
28505 asm_fprintf (file, "\tlwz %s,0(%s)\n",
28506 reg_names[0], reg_names[11]);
28507 asm_fprintf (file, "\tadd %s,%s,%s\n",
28508 reg_names[0], reg_names[0], reg_names[11]);
28509 }
28510 else
28511 {
28512 asm_fprintf (file, "\tlis %s,", reg_names[12]);
28513 assemble_name (file, buf);
28514 fputs ("@ha\n", file);
28515 asm_fprintf (file, "\tstw %s,4(%s)\n",
28516 reg_names[0], reg_names[1]);
28517 asm_fprintf (file, "\tla %s,", reg_names[0]);
28518 assemble_name (file, buf);
28519 asm_fprintf (file, "@l(%s)\n", reg_names[12]);
28520 }
28521
28522 /* ABI_V4 saves the static chain reg with ASM_OUTPUT_REG_PUSH. */
28523 fprintf (file, "\tbl %s%s\n",
28524 RS6000_MCOUNT, flag_pic ? "@plt" : "");
28525 break;
28526
28527 case ABI_AIX:
28528 case ABI_ELFv2:
28529 case ABI_DARWIN:
28530 /* Don't do anything, done in output_profile_hook (). */
28531 break;
28532 }
28533 }
28534
28535 \f
28536
28537 /* The following variable value is the last issued insn. */
28538
28539 static rtx last_scheduled_insn;
28540
28541 /* The following variable helps to balance issuing of load and
28542 store instructions */
28543
28544 static int load_store_pendulum;
28545
28546 /* Power4 load update and store update instructions are cracked into a
28547 load or store and an integer insn which are executed in the same cycle.
28548 Branches have their own dispatch slot which does not count against the
28549 GCC issue rate, but it changes the program flow so there are no other
28550 instructions to issue in this cycle. */
28551
28552 static int
28553 rs6000_variable_issue_1 (rtx_insn *insn, int more)
28554 {
28555 last_scheduled_insn = insn;
28556 if (GET_CODE (PATTERN (insn)) == USE
28557 || GET_CODE (PATTERN (insn)) == CLOBBER)
28558 {
28559 cached_can_issue_more = more;
28560 return cached_can_issue_more;
28561 }
28562
28563 if (insn_terminates_group_p (insn, current_group))
28564 {
28565 cached_can_issue_more = 0;
28566 return cached_can_issue_more;
28567 }
28568
28569 /* If no reservation, but reach here */
28570 if (recog_memoized (insn) < 0)
28571 return more;
28572
28573 if (rs6000_sched_groups)
28574 {
28575 if (is_microcoded_insn (insn))
28576 cached_can_issue_more = 0;
28577 else if (is_cracked_insn (insn))
28578 cached_can_issue_more = more > 2 ? more - 2 : 0;
28579 else
28580 cached_can_issue_more = more - 1;
28581
28582 return cached_can_issue_more;
28583 }
28584
28585 if (rs6000_cpu_attr == CPU_CELL && is_nonpipeline_insn (insn))
28586 return 0;
28587
28588 cached_can_issue_more = more - 1;
28589 return cached_can_issue_more;
28590 }
28591
28592 static int
28593 rs6000_variable_issue (FILE *stream, int verbose, rtx_insn *insn, int more)
28594 {
28595 int r = rs6000_variable_issue_1 (insn, more);
28596 if (verbose)
28597 fprintf (stream, "// rs6000_variable_issue (more = %d) = %d\n", more, r);
28598 return r;
28599 }
28600
28601 /* Adjust the cost of a scheduling dependency. Return the new cost of
28602 a dependency LINK or INSN on DEP_INSN. COST is the current cost. */
28603
28604 static int
28605 rs6000_adjust_cost (rtx_insn *insn, rtx link, rtx_insn *dep_insn, int cost)
28606 {
28607 enum attr_type attr_type;
28608
28609 if (recog_memoized (insn) < 0 || recog_memoized (dep_insn) < 0)
28610 return cost;
28611
28612 switch (REG_NOTE_KIND (link))
28613 {
28614 case REG_DEP_TRUE:
28615 {
28616 /* Data dependency; DEP_INSN writes a register that INSN reads
28617 some cycles later. */
28618
28619 /* Separate a load from a narrower, dependent store. */
28620 if (rs6000_sched_groups
28621 && GET_CODE (PATTERN (insn)) == SET
28622 && GET_CODE (PATTERN (dep_insn)) == SET
28623 && GET_CODE (XEXP (PATTERN (insn), 1)) == MEM
28624 && GET_CODE (XEXP (PATTERN (dep_insn), 0)) == MEM
28625 && (GET_MODE_SIZE (GET_MODE (XEXP (PATTERN (insn), 1)))
28626 > GET_MODE_SIZE (GET_MODE (XEXP (PATTERN (dep_insn), 0)))))
28627 return cost + 14;
28628
28629 attr_type = get_attr_type (insn);
28630
28631 switch (attr_type)
28632 {
28633 case TYPE_JMPREG:
28634 /* Tell the first scheduling pass about the latency between
28635 a mtctr and bctr (and mtlr and br/blr). The first
28636 scheduling pass will not know about this latency since
28637 the mtctr instruction, which has the latency associated
28638 to it, will be generated by reload. */
28639 return 4;
28640 case TYPE_BRANCH:
28641 /* Leave some extra cycles between a compare and its
28642 dependent branch, to inhibit expensive mispredicts. */
28643 if ((rs6000_cpu_attr == CPU_PPC603
28644 || rs6000_cpu_attr == CPU_PPC604
28645 || rs6000_cpu_attr == CPU_PPC604E
28646 || rs6000_cpu_attr == CPU_PPC620
28647 || rs6000_cpu_attr == CPU_PPC630
28648 || rs6000_cpu_attr == CPU_PPC750
28649 || rs6000_cpu_attr == CPU_PPC7400
28650 || rs6000_cpu_attr == CPU_PPC7450
28651 || rs6000_cpu_attr == CPU_PPCE5500
28652 || rs6000_cpu_attr == CPU_PPCE6500
28653 || rs6000_cpu_attr == CPU_POWER4
28654 || rs6000_cpu_attr == CPU_POWER5
28655 || rs6000_cpu_attr == CPU_POWER7
28656 || rs6000_cpu_attr == CPU_POWER8
28657 || rs6000_cpu_attr == CPU_POWER9
28658 || rs6000_cpu_attr == CPU_CELL)
28659 && recog_memoized (dep_insn)
28660 && (INSN_CODE (dep_insn) >= 0))
28661
28662 switch (get_attr_type (dep_insn))
28663 {
28664 case TYPE_CMP:
28665 case TYPE_FPCOMPARE:
28666 case TYPE_CR_LOGICAL:
28667 case TYPE_DELAYED_CR:
28668 return cost + 2;
28669 case TYPE_EXTS:
28670 case TYPE_MUL:
28671 if (get_attr_dot (dep_insn) == DOT_YES)
28672 return cost + 2;
28673 else
28674 break;
28675 case TYPE_SHIFT:
28676 if (get_attr_dot (dep_insn) == DOT_YES
28677 && get_attr_var_shift (dep_insn) == VAR_SHIFT_NO)
28678 return cost + 2;
28679 else
28680 break;
28681 default:
28682 break;
28683 }
28684 break;
28685
28686 case TYPE_STORE:
28687 case TYPE_FPSTORE:
28688 if ((rs6000_cpu == PROCESSOR_POWER6)
28689 && recog_memoized (dep_insn)
28690 && (INSN_CODE (dep_insn) >= 0))
28691 {
28692
28693 if (GET_CODE (PATTERN (insn)) != SET)
28694 /* If this happens, we have to extend this to schedule
28695 optimally. Return default for now. */
28696 return cost;
28697
28698 /* Adjust the cost for the case where the value written
28699 by a fixed point operation is used as the address
28700 gen value on a store. */
28701 switch (get_attr_type (dep_insn))
28702 {
28703 case TYPE_LOAD:
28704 case TYPE_CNTLZ:
28705 {
28706 if (! store_data_bypass_p (dep_insn, insn))
28707 return get_attr_sign_extend (dep_insn)
28708 == SIGN_EXTEND_YES ? 6 : 4;
28709 break;
28710 }
28711 case TYPE_SHIFT:
28712 {
28713 if (! store_data_bypass_p (dep_insn, insn))
28714 return get_attr_var_shift (dep_insn) == VAR_SHIFT_YES ?
28715 6 : 3;
28716 break;
28717 }
28718 case TYPE_INTEGER:
28719 case TYPE_ADD:
28720 case TYPE_LOGICAL:
28721 case TYPE_EXTS:
28722 case TYPE_INSERT:
28723 {
28724 if (! store_data_bypass_p (dep_insn, insn))
28725 return 3;
28726 break;
28727 }
28728 case TYPE_STORE:
28729 case TYPE_FPLOAD:
28730 case TYPE_FPSTORE:
28731 {
28732 if (get_attr_update (dep_insn) == UPDATE_YES
28733 && ! store_data_bypass_p (dep_insn, insn))
28734 return 3;
28735 break;
28736 }
28737 case TYPE_MUL:
28738 {
28739 if (! store_data_bypass_p (dep_insn, insn))
28740 return 17;
28741 break;
28742 }
28743 case TYPE_DIV:
28744 {
28745 if (! store_data_bypass_p (dep_insn, insn))
28746 return get_attr_size (dep_insn) == SIZE_32 ? 45 : 57;
28747 break;
28748 }
28749 default:
28750 break;
28751 }
28752 }
28753 break;
28754
28755 case TYPE_LOAD:
28756 if ((rs6000_cpu == PROCESSOR_POWER6)
28757 && recog_memoized (dep_insn)
28758 && (INSN_CODE (dep_insn) >= 0))
28759 {
28760
28761 /* Adjust the cost for the case where the value written
28762 by a fixed point instruction is used within the address
28763 gen portion of a subsequent load(u)(x) */
28764 switch (get_attr_type (dep_insn))
28765 {
28766 case TYPE_LOAD:
28767 case TYPE_CNTLZ:
28768 {
28769 if (set_to_load_agen (dep_insn, insn))
28770 return get_attr_sign_extend (dep_insn)
28771 == SIGN_EXTEND_YES ? 6 : 4;
28772 break;
28773 }
28774 case TYPE_SHIFT:
28775 {
28776 if (set_to_load_agen (dep_insn, insn))
28777 return get_attr_var_shift (dep_insn) == VAR_SHIFT_YES ?
28778 6 : 3;
28779 break;
28780 }
28781 case TYPE_INTEGER:
28782 case TYPE_ADD:
28783 case TYPE_LOGICAL:
28784 case TYPE_EXTS:
28785 case TYPE_INSERT:
28786 {
28787 if (set_to_load_agen (dep_insn, insn))
28788 return 3;
28789 break;
28790 }
28791 case TYPE_STORE:
28792 case TYPE_FPLOAD:
28793 case TYPE_FPSTORE:
28794 {
28795 if (get_attr_update (dep_insn) == UPDATE_YES
28796 && set_to_load_agen (dep_insn, insn))
28797 return 3;
28798 break;
28799 }
28800 case TYPE_MUL:
28801 {
28802 if (set_to_load_agen (dep_insn, insn))
28803 return 17;
28804 break;
28805 }
28806 case TYPE_DIV:
28807 {
28808 if (set_to_load_agen (dep_insn, insn))
28809 return get_attr_size (dep_insn) == SIZE_32 ? 45 : 57;
28810 break;
28811 }
28812 default:
28813 break;
28814 }
28815 }
28816 break;
28817
28818 case TYPE_FPLOAD:
28819 if ((rs6000_cpu == PROCESSOR_POWER6)
28820 && get_attr_update (insn) == UPDATE_NO
28821 && recog_memoized (dep_insn)
28822 && (INSN_CODE (dep_insn) >= 0)
28823 && (get_attr_type (dep_insn) == TYPE_MFFGPR))
28824 return 2;
28825
28826 default:
28827 break;
28828 }
28829
28830 /* Fall out to return default cost. */
28831 }
28832 break;
28833
28834 case REG_DEP_OUTPUT:
28835 /* Output dependency; DEP_INSN writes a register that INSN writes some
28836 cycles later. */
28837 if ((rs6000_cpu == PROCESSOR_POWER6)
28838 && recog_memoized (dep_insn)
28839 && (INSN_CODE (dep_insn) >= 0))
28840 {
28841 attr_type = get_attr_type (insn);
28842
28843 switch (attr_type)
28844 {
28845 case TYPE_FP:
28846 if (get_attr_type (dep_insn) == TYPE_FP)
28847 return 1;
28848 break;
28849 case TYPE_FPLOAD:
28850 if (get_attr_update (insn) == UPDATE_NO
28851 && get_attr_type (dep_insn) == TYPE_MFFGPR)
28852 return 2;
28853 break;
28854 default:
28855 break;
28856 }
28857 }
28858 case REG_DEP_ANTI:
28859 /* Anti dependency; DEP_INSN reads a register that INSN writes some
28860 cycles later. */
28861 return 0;
28862
28863 default:
28864 gcc_unreachable ();
28865 }
28866
28867 return cost;
28868 }
28869
28870 /* Debug version of rs6000_adjust_cost. */
28871
28872 static int
28873 rs6000_debug_adjust_cost (rtx_insn *insn, rtx link, rtx_insn *dep_insn,
28874 int cost)
28875 {
28876 int ret = rs6000_adjust_cost (insn, link, dep_insn, cost);
28877
28878 if (ret != cost)
28879 {
28880 const char *dep;
28881
28882 switch (REG_NOTE_KIND (link))
28883 {
28884 default: dep = "unknown depencency"; break;
28885 case REG_DEP_TRUE: dep = "data dependency"; break;
28886 case REG_DEP_OUTPUT: dep = "output dependency"; break;
28887 case REG_DEP_ANTI: dep = "anti depencency"; break;
28888 }
28889
28890 fprintf (stderr,
28891 "\nrs6000_adjust_cost, final cost = %d, orig cost = %d, "
28892 "%s, insn:\n", ret, cost, dep);
28893
28894 debug_rtx (insn);
28895 }
28896
28897 return ret;
28898 }
28899
28900 /* The function returns a true if INSN is microcoded.
28901 Return false otherwise. */
28902
28903 static bool
28904 is_microcoded_insn (rtx_insn *insn)
28905 {
28906 if (!insn || !NONDEBUG_INSN_P (insn)
28907 || GET_CODE (PATTERN (insn)) == USE
28908 || GET_CODE (PATTERN (insn)) == CLOBBER)
28909 return false;
28910
28911 if (rs6000_cpu_attr == CPU_CELL)
28912 return get_attr_cell_micro (insn) == CELL_MICRO_ALWAYS;
28913
28914 if (rs6000_sched_groups
28915 && (rs6000_cpu == PROCESSOR_POWER4 || rs6000_cpu == PROCESSOR_POWER5))
28916 {
28917 enum attr_type type = get_attr_type (insn);
28918 if ((type == TYPE_LOAD
28919 && get_attr_update (insn) == UPDATE_YES
28920 && get_attr_sign_extend (insn) == SIGN_EXTEND_YES)
28921 || ((type == TYPE_LOAD || type == TYPE_STORE)
28922 && get_attr_update (insn) == UPDATE_YES
28923 && get_attr_indexed (insn) == INDEXED_YES)
28924 || type == TYPE_MFCR)
28925 return true;
28926 }
28927
28928 return false;
28929 }
28930
28931 /* The function returns true if INSN is cracked into 2 instructions
28932 by the processor (and therefore occupies 2 issue slots). */
28933
28934 static bool
28935 is_cracked_insn (rtx_insn *insn)
28936 {
28937 if (!insn || !NONDEBUG_INSN_P (insn)
28938 || GET_CODE (PATTERN (insn)) == USE
28939 || GET_CODE (PATTERN (insn)) == CLOBBER)
28940 return false;
28941
28942 if (rs6000_sched_groups
28943 && (rs6000_cpu == PROCESSOR_POWER4 || rs6000_cpu == PROCESSOR_POWER5))
28944 {
28945 enum attr_type type = get_attr_type (insn);
28946 if ((type == TYPE_LOAD
28947 && get_attr_sign_extend (insn) == SIGN_EXTEND_YES
28948 && get_attr_update (insn) == UPDATE_NO)
28949 || (type == TYPE_LOAD
28950 && get_attr_sign_extend (insn) == SIGN_EXTEND_NO
28951 && get_attr_update (insn) == UPDATE_YES
28952 && get_attr_indexed (insn) == INDEXED_NO)
28953 || (type == TYPE_STORE
28954 && get_attr_update (insn) == UPDATE_YES
28955 && get_attr_indexed (insn) == INDEXED_NO)
28956 || ((type == TYPE_FPLOAD || type == TYPE_FPSTORE)
28957 && get_attr_update (insn) == UPDATE_YES)
28958 || type == TYPE_DELAYED_CR
28959 || (type == TYPE_EXTS
28960 && get_attr_dot (insn) == DOT_YES)
28961 || (type == TYPE_SHIFT
28962 && get_attr_dot (insn) == DOT_YES
28963 && get_attr_var_shift (insn) == VAR_SHIFT_NO)
28964 || (type == TYPE_MUL
28965 && get_attr_dot (insn) == DOT_YES)
28966 || type == TYPE_DIV
28967 || (type == TYPE_INSERT
28968 && get_attr_size (insn) == SIZE_32))
28969 return true;
28970 }
28971
28972 return false;
28973 }
28974
28975 /* The function returns true if INSN can be issued only from
28976 the branch slot. */
28977
28978 static bool
28979 is_branch_slot_insn (rtx_insn *insn)
28980 {
28981 if (!insn || !NONDEBUG_INSN_P (insn)
28982 || GET_CODE (PATTERN (insn)) == USE
28983 || GET_CODE (PATTERN (insn)) == CLOBBER)
28984 return false;
28985
28986 if (rs6000_sched_groups)
28987 {
28988 enum attr_type type = get_attr_type (insn);
28989 if (type == TYPE_BRANCH || type == TYPE_JMPREG)
28990 return true;
28991 return false;
28992 }
28993
28994 return false;
28995 }
28996
28997 /* The function returns true if out_inst sets a value that is
28998 used in the address generation computation of in_insn */
28999 static bool
29000 set_to_load_agen (rtx_insn *out_insn, rtx_insn *in_insn)
29001 {
29002 rtx out_set, in_set;
29003
29004 /* For performance reasons, only handle the simple case where
29005 both loads are a single_set. */
29006 out_set = single_set (out_insn);
29007 if (out_set)
29008 {
29009 in_set = single_set (in_insn);
29010 if (in_set)
29011 return reg_mentioned_p (SET_DEST (out_set), SET_SRC (in_set));
29012 }
29013
29014 return false;
29015 }
29016
29017 /* Try to determine base/offset/size parts of the given MEM.
29018 Return true if successful, false if all the values couldn't
29019 be determined.
29020
29021 This function only looks for REG or REG+CONST address forms.
29022 REG+REG address form will return false. */
29023
29024 static bool
29025 get_memref_parts (rtx mem, rtx *base, HOST_WIDE_INT *offset,
29026 HOST_WIDE_INT *size)
29027 {
29028 rtx addr_rtx;
29029 if MEM_SIZE_KNOWN_P (mem)
29030 *size = MEM_SIZE (mem);
29031 else
29032 return false;
29033
29034 addr_rtx = (XEXP (mem, 0));
29035 if (GET_CODE (addr_rtx) == PRE_MODIFY)
29036 addr_rtx = XEXP (addr_rtx, 1);
29037
29038 *offset = 0;
29039 while (GET_CODE (addr_rtx) == PLUS
29040 && CONST_INT_P (XEXP (addr_rtx, 1)))
29041 {
29042 *offset += INTVAL (XEXP (addr_rtx, 1));
29043 addr_rtx = XEXP (addr_rtx, 0);
29044 }
29045 if (!REG_P (addr_rtx))
29046 return false;
29047
29048 *base = addr_rtx;
29049 return true;
29050 }
29051
29052 /* The function returns true if the target storage location of
29053 mem1 is adjacent to the target storage location of mem2 */
29054 /* Return 1 if memory locations are adjacent. */
29055
29056 static bool
29057 adjacent_mem_locations (rtx mem1, rtx mem2)
29058 {
29059 rtx reg1, reg2;
29060 HOST_WIDE_INT off1, size1, off2, size2;
29061
29062 if (get_memref_parts (mem1, &reg1, &off1, &size1)
29063 && get_memref_parts (mem2, &reg2, &off2, &size2))
29064 return ((REGNO (reg1) == REGNO (reg2))
29065 && ((off1 + size1 == off2)
29066 || (off2 + size2 == off1)));
29067
29068 return false;
29069 }
29070
29071 /* This function returns true if it can be determined that the two MEM
29072 locations overlap by at least 1 byte based on base reg/offset/size. */
29073
29074 static bool
29075 mem_locations_overlap (rtx mem1, rtx mem2)
29076 {
29077 rtx reg1, reg2;
29078 HOST_WIDE_INT off1, size1, off2, size2;
29079
29080 if (get_memref_parts (mem1, &reg1, &off1, &size1)
29081 && get_memref_parts (mem2, &reg2, &off2, &size2))
29082 return ((REGNO (reg1) == REGNO (reg2))
29083 && (((off1 <= off2) && (off1 + size1 > off2))
29084 || ((off2 <= off1) && (off2 + size2 > off1))));
29085
29086 return false;
29087 }
29088
29089 /* A C statement (sans semicolon) to update the integer scheduling
29090 priority INSN_PRIORITY (INSN). Increase the priority to execute the
29091 INSN earlier, reduce the priority to execute INSN later. Do not
29092 define this macro if you do not need to adjust the scheduling
29093 priorities of insns. */
29094
29095 static int
29096 rs6000_adjust_priority (rtx_insn *insn ATTRIBUTE_UNUSED, int priority)
29097 {
29098 rtx load_mem, str_mem;
29099 /* On machines (like the 750) which have asymmetric integer units,
29100 where one integer unit can do multiply and divides and the other
29101 can't, reduce the priority of multiply/divide so it is scheduled
29102 before other integer operations. */
29103
29104 #if 0
29105 if (! INSN_P (insn))
29106 return priority;
29107
29108 if (GET_CODE (PATTERN (insn)) == USE)
29109 return priority;
29110
29111 switch (rs6000_cpu_attr) {
29112 case CPU_PPC750:
29113 switch (get_attr_type (insn))
29114 {
29115 default:
29116 break;
29117
29118 case TYPE_MUL:
29119 case TYPE_DIV:
29120 fprintf (stderr, "priority was %#x (%d) before adjustment\n",
29121 priority, priority);
29122 if (priority >= 0 && priority < 0x01000000)
29123 priority >>= 3;
29124 break;
29125 }
29126 }
29127 #endif
29128
29129 if (insn_must_be_first_in_group (insn)
29130 && reload_completed
29131 && current_sched_info->sched_max_insns_priority
29132 && rs6000_sched_restricted_insns_priority)
29133 {
29134
29135 /* Prioritize insns that can be dispatched only in the first
29136 dispatch slot. */
29137 if (rs6000_sched_restricted_insns_priority == 1)
29138 /* Attach highest priority to insn. This means that in
29139 haifa-sched.c:ready_sort(), dispatch-slot restriction considerations
29140 precede 'priority' (critical path) considerations. */
29141 return current_sched_info->sched_max_insns_priority;
29142 else if (rs6000_sched_restricted_insns_priority == 2)
29143 /* Increase priority of insn by a minimal amount. This means that in
29144 haifa-sched.c:ready_sort(), only 'priority' (critical path)
29145 considerations precede dispatch-slot restriction considerations. */
29146 return (priority + 1);
29147 }
29148
29149 if (rs6000_cpu == PROCESSOR_POWER6
29150 && ((load_store_pendulum == -2 && is_load_insn (insn, &load_mem))
29151 || (load_store_pendulum == 2 && is_store_insn (insn, &str_mem))))
29152 /* Attach highest priority to insn if the scheduler has just issued two
29153 stores and this instruction is a load, or two loads and this instruction
29154 is a store. Power6 wants loads and stores scheduled alternately
29155 when possible */
29156 return current_sched_info->sched_max_insns_priority;
29157
29158 return priority;
29159 }
29160
29161 /* Return true if the instruction is nonpipelined on the Cell. */
29162 static bool
29163 is_nonpipeline_insn (rtx_insn *insn)
29164 {
29165 enum attr_type type;
29166 if (!insn || !NONDEBUG_INSN_P (insn)
29167 || GET_CODE (PATTERN (insn)) == USE
29168 || GET_CODE (PATTERN (insn)) == CLOBBER)
29169 return false;
29170
29171 type = get_attr_type (insn);
29172 if (type == TYPE_MUL
29173 || type == TYPE_DIV
29174 || type == TYPE_SDIV
29175 || type == TYPE_DDIV
29176 || type == TYPE_SSQRT
29177 || type == TYPE_DSQRT
29178 || type == TYPE_MFCR
29179 || type == TYPE_MFCRF
29180 || type == TYPE_MFJMPR)
29181 {
29182 return true;
29183 }
29184 return false;
29185 }
29186
29187
29188 /* Return how many instructions the machine can issue per cycle. */
29189
29190 static int
29191 rs6000_issue_rate (void)
29192 {
29193 /* Unless scheduling for register pressure, use issue rate of 1 for
29194 first scheduling pass to decrease degradation. */
29195 if (!reload_completed && !flag_sched_pressure)
29196 return 1;
29197
29198 switch (rs6000_cpu_attr) {
29199 case CPU_RS64A:
29200 case CPU_PPC601: /* ? */
29201 case CPU_PPC7450:
29202 return 3;
29203 case CPU_PPC440:
29204 case CPU_PPC603:
29205 case CPU_PPC750:
29206 case CPU_PPC7400:
29207 case CPU_PPC8540:
29208 case CPU_PPC8548:
29209 case CPU_CELL:
29210 case CPU_PPCE300C2:
29211 case CPU_PPCE300C3:
29212 case CPU_PPCE500MC:
29213 case CPU_PPCE500MC64:
29214 case CPU_PPCE5500:
29215 case CPU_PPCE6500:
29216 case CPU_TITAN:
29217 return 2;
29218 case CPU_PPC476:
29219 case CPU_PPC604:
29220 case CPU_PPC604E:
29221 case CPU_PPC620:
29222 case CPU_PPC630:
29223 return 4;
29224 case CPU_POWER4:
29225 case CPU_POWER5:
29226 case CPU_POWER6:
29227 case CPU_POWER7:
29228 return 5;
29229 case CPU_POWER8:
29230 case CPU_POWER9:
29231 return 7;
29232 default:
29233 return 1;
29234 }
29235 }
29236
29237 /* Return how many instructions to look ahead for better insn
29238 scheduling. */
29239
29240 static int
29241 rs6000_use_sched_lookahead (void)
29242 {
29243 switch (rs6000_cpu_attr)
29244 {
29245 case CPU_PPC8540:
29246 case CPU_PPC8548:
29247 return 4;
29248
29249 case CPU_CELL:
29250 return (reload_completed ? 8 : 0);
29251
29252 default:
29253 return 0;
29254 }
29255 }
29256
29257 /* We are choosing insn from the ready queue. Return zero if INSN can be
29258 chosen. */
29259 static int
29260 rs6000_use_sched_lookahead_guard (rtx_insn *insn, int ready_index)
29261 {
29262 if (ready_index == 0)
29263 return 0;
29264
29265 if (rs6000_cpu_attr != CPU_CELL)
29266 return 0;
29267
29268 gcc_assert (insn != NULL_RTX && INSN_P (insn));
29269
29270 if (!reload_completed
29271 || is_nonpipeline_insn (insn)
29272 || is_microcoded_insn (insn))
29273 return 1;
29274
29275 return 0;
29276 }
29277
29278 /* Determine if PAT refers to memory. If so, set MEM_REF to the MEM rtx
29279 and return true. */
29280
29281 static bool
29282 find_mem_ref (rtx pat, rtx *mem_ref)
29283 {
29284 const char * fmt;
29285 int i, j;
29286
29287 /* stack_tie does not produce any real memory traffic. */
29288 if (tie_operand (pat, VOIDmode))
29289 return false;
29290
29291 if (GET_CODE (pat) == MEM)
29292 {
29293 *mem_ref = pat;
29294 return true;
29295 }
29296
29297 /* Recursively process the pattern. */
29298 fmt = GET_RTX_FORMAT (GET_CODE (pat));
29299
29300 for (i = GET_RTX_LENGTH (GET_CODE (pat)) - 1; i >= 0; i--)
29301 {
29302 if (fmt[i] == 'e')
29303 {
29304 if (find_mem_ref (XEXP (pat, i), mem_ref))
29305 return true;
29306 }
29307 else if (fmt[i] == 'E')
29308 for (j = XVECLEN (pat, i) - 1; j >= 0; j--)
29309 {
29310 if (find_mem_ref (XVECEXP (pat, i, j), mem_ref))
29311 return true;
29312 }
29313 }
29314
29315 return false;
29316 }
29317
29318 /* Determine if PAT is a PATTERN of a load insn. */
29319
29320 static bool
29321 is_load_insn1 (rtx pat, rtx *load_mem)
29322 {
29323 if (!pat || pat == NULL_RTX)
29324 return false;
29325
29326 if (GET_CODE (pat) == SET)
29327 return find_mem_ref (SET_SRC (pat), load_mem);
29328
29329 if (GET_CODE (pat) == PARALLEL)
29330 {
29331 int i;
29332
29333 for (i = 0; i < XVECLEN (pat, 0); i++)
29334 if (is_load_insn1 (XVECEXP (pat, 0, i), load_mem))
29335 return true;
29336 }
29337
29338 return false;
29339 }
29340
29341 /* Determine if INSN loads from memory. */
29342
29343 static bool
29344 is_load_insn (rtx insn, rtx *load_mem)
29345 {
29346 if (!insn || !INSN_P (insn))
29347 return false;
29348
29349 if (CALL_P (insn))
29350 return false;
29351
29352 return is_load_insn1 (PATTERN (insn), load_mem);
29353 }
29354
29355 /* Determine if PAT is a PATTERN of a store insn. */
29356
29357 static bool
29358 is_store_insn1 (rtx pat, rtx *str_mem)
29359 {
29360 if (!pat || pat == NULL_RTX)
29361 return false;
29362
29363 if (GET_CODE (pat) == SET)
29364 return find_mem_ref (SET_DEST (pat), str_mem);
29365
29366 if (GET_CODE (pat) == PARALLEL)
29367 {
29368 int i;
29369
29370 for (i = 0; i < XVECLEN (pat, 0); i++)
29371 if (is_store_insn1 (XVECEXP (pat, 0, i), str_mem))
29372 return true;
29373 }
29374
29375 return false;
29376 }
29377
29378 /* Determine if INSN stores to memory. */
29379
29380 static bool
29381 is_store_insn (rtx insn, rtx *str_mem)
29382 {
29383 if (!insn || !INSN_P (insn))
29384 return false;
29385
29386 return is_store_insn1 (PATTERN (insn), str_mem);
29387 }
29388
29389 /* Returns whether the dependence between INSN and NEXT is considered
29390 costly by the given target. */
29391
29392 static bool
29393 rs6000_is_costly_dependence (dep_t dep, int cost, int distance)
29394 {
29395 rtx insn;
29396 rtx next;
29397 rtx load_mem, str_mem;
29398
29399 /* If the flag is not enabled - no dependence is considered costly;
29400 allow all dependent insns in the same group.
29401 This is the most aggressive option. */
29402 if (rs6000_sched_costly_dep == no_dep_costly)
29403 return false;
29404
29405 /* If the flag is set to 1 - a dependence is always considered costly;
29406 do not allow dependent instructions in the same group.
29407 This is the most conservative option. */
29408 if (rs6000_sched_costly_dep == all_deps_costly)
29409 return true;
29410
29411 insn = DEP_PRO (dep);
29412 next = DEP_CON (dep);
29413
29414 if (rs6000_sched_costly_dep == store_to_load_dep_costly
29415 && is_load_insn (next, &load_mem)
29416 && is_store_insn (insn, &str_mem))
29417 /* Prevent load after store in the same group. */
29418 return true;
29419
29420 if (rs6000_sched_costly_dep == true_store_to_load_dep_costly
29421 && is_load_insn (next, &load_mem)
29422 && is_store_insn (insn, &str_mem)
29423 && DEP_TYPE (dep) == REG_DEP_TRUE
29424 && mem_locations_overlap(str_mem, load_mem))
29425 /* Prevent load after store in the same group if it is a true
29426 dependence. */
29427 return true;
29428
29429 /* The flag is set to X; dependences with latency >= X are considered costly,
29430 and will not be scheduled in the same group. */
29431 if (rs6000_sched_costly_dep <= max_dep_latency
29432 && ((cost - distance) >= (int)rs6000_sched_costly_dep))
29433 return true;
29434
29435 return false;
29436 }
29437
29438 /* Return the next insn after INSN that is found before TAIL is reached,
29439 skipping any "non-active" insns - insns that will not actually occupy
29440 an issue slot. Return NULL_RTX if such an insn is not found. */
29441
29442 static rtx_insn *
29443 get_next_active_insn (rtx_insn *insn, rtx_insn *tail)
29444 {
29445 if (insn == NULL_RTX || insn == tail)
29446 return NULL;
29447
29448 while (1)
29449 {
29450 insn = NEXT_INSN (insn);
29451 if (insn == NULL_RTX || insn == tail)
29452 return NULL;
29453
29454 if (CALL_P (insn)
29455 || JUMP_P (insn) || JUMP_TABLE_DATA_P (insn)
29456 || (NONJUMP_INSN_P (insn)
29457 && GET_CODE (PATTERN (insn)) != USE
29458 && GET_CODE (PATTERN (insn)) != CLOBBER
29459 && INSN_CODE (insn) != CODE_FOR_stack_tie))
29460 break;
29461 }
29462 return insn;
29463 }
29464
29465 /* We are about to begin issuing insns for this clock cycle. */
29466
29467 static int
29468 rs6000_sched_reorder (FILE *dump ATTRIBUTE_UNUSED, int sched_verbose,
29469 rtx_insn **ready ATTRIBUTE_UNUSED,
29470 int *pn_ready ATTRIBUTE_UNUSED,
29471 int clock_var ATTRIBUTE_UNUSED)
29472 {
29473 int n_ready = *pn_ready;
29474
29475 if (sched_verbose)
29476 fprintf (dump, "// rs6000_sched_reorder :\n");
29477
29478 /* Reorder the ready list, if the second to last ready insn
29479 is a nonepipeline insn. */
29480 if (rs6000_cpu_attr == CPU_CELL && n_ready > 1)
29481 {
29482 if (is_nonpipeline_insn (ready[n_ready - 1])
29483 && (recog_memoized (ready[n_ready - 2]) > 0))
29484 /* Simply swap first two insns. */
29485 std::swap (ready[n_ready - 1], ready[n_ready - 2]);
29486 }
29487
29488 if (rs6000_cpu == PROCESSOR_POWER6)
29489 load_store_pendulum = 0;
29490
29491 return rs6000_issue_rate ();
29492 }
29493
29494 /* Like rs6000_sched_reorder, but called after issuing each insn. */
29495
29496 static int
29497 rs6000_sched_reorder2 (FILE *dump, int sched_verbose, rtx_insn **ready,
29498 int *pn_ready, int clock_var ATTRIBUTE_UNUSED)
29499 {
29500 if (sched_verbose)
29501 fprintf (dump, "// rs6000_sched_reorder2 :\n");
29502
29503 /* For Power6, we need to handle some special cases to try and keep the
29504 store queue from overflowing and triggering expensive flushes.
29505
29506 This code monitors how load and store instructions are being issued
29507 and skews the ready list one way or the other to increase the likelihood
29508 that a desired instruction is issued at the proper time.
29509
29510 A couple of things are done. First, we maintain a "load_store_pendulum"
29511 to track the current state of load/store issue.
29512
29513 - If the pendulum is at zero, then no loads or stores have been
29514 issued in the current cycle so we do nothing.
29515
29516 - If the pendulum is 1, then a single load has been issued in this
29517 cycle and we attempt to locate another load in the ready list to
29518 issue with it.
29519
29520 - If the pendulum is -2, then two stores have already been
29521 issued in this cycle, so we increase the priority of the first load
29522 in the ready list to increase it's likelihood of being chosen first
29523 in the next cycle.
29524
29525 - If the pendulum is -1, then a single store has been issued in this
29526 cycle and we attempt to locate another store in the ready list to
29527 issue with it, preferring a store to an adjacent memory location to
29528 facilitate store pairing in the store queue.
29529
29530 - If the pendulum is 2, then two loads have already been
29531 issued in this cycle, so we increase the priority of the first store
29532 in the ready list to increase it's likelihood of being chosen first
29533 in the next cycle.
29534
29535 - If the pendulum < -2 or > 2, then do nothing.
29536
29537 Note: This code covers the most common scenarios. There exist non
29538 load/store instructions which make use of the LSU and which
29539 would need to be accounted for to strictly model the behavior
29540 of the machine. Those instructions are currently unaccounted
29541 for to help minimize compile time overhead of this code.
29542 */
29543 if (rs6000_cpu == PROCESSOR_POWER6 && last_scheduled_insn)
29544 {
29545 int pos;
29546 int i;
29547 rtx_insn *tmp;
29548 rtx load_mem, str_mem;
29549
29550 if (is_store_insn (last_scheduled_insn, &str_mem))
29551 /* Issuing a store, swing the load_store_pendulum to the left */
29552 load_store_pendulum--;
29553 else if (is_load_insn (last_scheduled_insn, &load_mem))
29554 /* Issuing a load, swing the load_store_pendulum to the right */
29555 load_store_pendulum++;
29556 else
29557 return cached_can_issue_more;
29558
29559 /* If the pendulum is balanced, or there is only one instruction on
29560 the ready list, then all is well, so return. */
29561 if ((load_store_pendulum == 0) || (*pn_ready <= 1))
29562 return cached_can_issue_more;
29563
29564 if (load_store_pendulum == 1)
29565 {
29566 /* A load has been issued in this cycle. Scan the ready list
29567 for another load to issue with it */
29568 pos = *pn_ready-1;
29569
29570 while (pos >= 0)
29571 {
29572 if (is_load_insn (ready[pos], &load_mem))
29573 {
29574 /* Found a load. Move it to the head of the ready list,
29575 and adjust it's priority so that it is more likely to
29576 stay there */
29577 tmp = ready[pos];
29578 for (i=pos; i<*pn_ready-1; i++)
29579 ready[i] = ready[i + 1];
29580 ready[*pn_ready-1] = tmp;
29581
29582 if (!sel_sched_p () && INSN_PRIORITY_KNOWN (tmp))
29583 INSN_PRIORITY (tmp)++;
29584 break;
29585 }
29586 pos--;
29587 }
29588 }
29589 else if (load_store_pendulum == -2)
29590 {
29591 /* Two stores have been issued in this cycle. Increase the
29592 priority of the first load in the ready list to favor it for
29593 issuing in the next cycle. */
29594 pos = *pn_ready-1;
29595
29596 while (pos >= 0)
29597 {
29598 if (is_load_insn (ready[pos], &load_mem)
29599 && !sel_sched_p ()
29600 && INSN_PRIORITY_KNOWN (ready[pos]))
29601 {
29602 INSN_PRIORITY (ready[pos])++;
29603
29604 /* Adjust the pendulum to account for the fact that a load
29605 was found and increased in priority. This is to prevent
29606 increasing the priority of multiple loads */
29607 load_store_pendulum--;
29608
29609 break;
29610 }
29611 pos--;
29612 }
29613 }
29614 else if (load_store_pendulum == -1)
29615 {
29616 /* A store has been issued in this cycle. Scan the ready list for
29617 another store to issue with it, preferring a store to an adjacent
29618 memory location */
29619 int first_store_pos = -1;
29620
29621 pos = *pn_ready-1;
29622
29623 while (pos >= 0)
29624 {
29625 if (is_store_insn (ready[pos], &str_mem))
29626 {
29627 rtx str_mem2;
29628 /* Maintain the index of the first store found on the
29629 list */
29630 if (first_store_pos == -1)
29631 first_store_pos = pos;
29632
29633 if (is_store_insn (last_scheduled_insn, &str_mem2)
29634 && adjacent_mem_locations (str_mem, str_mem2))
29635 {
29636 /* Found an adjacent store. Move it to the head of the
29637 ready list, and adjust it's priority so that it is
29638 more likely to stay there */
29639 tmp = ready[pos];
29640 for (i=pos; i<*pn_ready-1; i++)
29641 ready[i] = ready[i + 1];
29642 ready[*pn_ready-1] = tmp;
29643
29644 if (!sel_sched_p () && INSN_PRIORITY_KNOWN (tmp))
29645 INSN_PRIORITY (tmp)++;
29646
29647 first_store_pos = -1;
29648
29649 break;
29650 };
29651 }
29652 pos--;
29653 }
29654
29655 if (first_store_pos >= 0)
29656 {
29657 /* An adjacent store wasn't found, but a non-adjacent store was,
29658 so move the non-adjacent store to the front of the ready
29659 list, and adjust its priority so that it is more likely to
29660 stay there. */
29661 tmp = ready[first_store_pos];
29662 for (i=first_store_pos; i<*pn_ready-1; i++)
29663 ready[i] = ready[i + 1];
29664 ready[*pn_ready-1] = tmp;
29665 if (!sel_sched_p () && INSN_PRIORITY_KNOWN (tmp))
29666 INSN_PRIORITY (tmp)++;
29667 }
29668 }
29669 else if (load_store_pendulum == 2)
29670 {
29671 /* Two loads have been issued in this cycle. Increase the priority
29672 of the first store in the ready list to favor it for issuing in
29673 the next cycle. */
29674 pos = *pn_ready-1;
29675
29676 while (pos >= 0)
29677 {
29678 if (is_store_insn (ready[pos], &str_mem)
29679 && !sel_sched_p ()
29680 && INSN_PRIORITY_KNOWN (ready[pos]))
29681 {
29682 INSN_PRIORITY (ready[pos])++;
29683
29684 /* Adjust the pendulum to account for the fact that a store
29685 was found and increased in priority. This is to prevent
29686 increasing the priority of multiple stores */
29687 load_store_pendulum++;
29688
29689 break;
29690 }
29691 pos--;
29692 }
29693 }
29694 }
29695
29696 return cached_can_issue_more;
29697 }
29698
29699 /* Return whether the presence of INSN causes a dispatch group termination
29700 of group WHICH_GROUP.
29701
29702 If WHICH_GROUP == current_group, this function will return true if INSN
29703 causes the termination of the current group (i.e, the dispatch group to
29704 which INSN belongs). This means that INSN will be the last insn in the
29705 group it belongs to.
29706
29707 If WHICH_GROUP == previous_group, this function will return true if INSN
29708 causes the termination of the previous group (i.e, the dispatch group that
29709 precedes the group to which INSN belongs). This means that INSN will be
29710 the first insn in the group it belongs to). */
29711
29712 static bool
29713 insn_terminates_group_p (rtx_insn *insn, enum group_termination which_group)
29714 {
29715 bool first, last;
29716
29717 if (! insn)
29718 return false;
29719
29720 first = insn_must_be_first_in_group (insn);
29721 last = insn_must_be_last_in_group (insn);
29722
29723 if (first && last)
29724 return true;
29725
29726 if (which_group == current_group)
29727 return last;
29728 else if (which_group == previous_group)
29729 return first;
29730
29731 return false;
29732 }
29733
29734
29735 static bool
29736 insn_must_be_first_in_group (rtx_insn *insn)
29737 {
29738 enum attr_type type;
29739
29740 if (!insn
29741 || NOTE_P (insn)
29742 || DEBUG_INSN_P (insn)
29743 || GET_CODE (PATTERN (insn)) == USE
29744 || GET_CODE (PATTERN (insn)) == CLOBBER)
29745 return false;
29746
29747 switch (rs6000_cpu)
29748 {
29749 case PROCESSOR_POWER5:
29750 if (is_cracked_insn (insn))
29751 return true;
29752 case PROCESSOR_POWER4:
29753 if (is_microcoded_insn (insn))
29754 return true;
29755
29756 if (!rs6000_sched_groups)
29757 return false;
29758
29759 type = get_attr_type (insn);
29760
29761 switch (type)
29762 {
29763 case TYPE_MFCR:
29764 case TYPE_MFCRF:
29765 case TYPE_MTCR:
29766 case TYPE_DELAYED_CR:
29767 case TYPE_CR_LOGICAL:
29768 case TYPE_MTJMPR:
29769 case TYPE_MFJMPR:
29770 case TYPE_DIV:
29771 case TYPE_LOAD_L:
29772 case TYPE_STORE_C:
29773 case TYPE_ISYNC:
29774 case TYPE_SYNC:
29775 return true;
29776 default:
29777 break;
29778 }
29779 break;
29780 case PROCESSOR_POWER6:
29781 type = get_attr_type (insn);
29782
29783 switch (type)
29784 {
29785 case TYPE_EXTS:
29786 case TYPE_CNTLZ:
29787 case TYPE_TRAP:
29788 case TYPE_MUL:
29789 case TYPE_INSERT:
29790 case TYPE_FPCOMPARE:
29791 case TYPE_MFCR:
29792 case TYPE_MTCR:
29793 case TYPE_MFJMPR:
29794 case TYPE_MTJMPR:
29795 case TYPE_ISYNC:
29796 case TYPE_SYNC:
29797 case TYPE_LOAD_L:
29798 case TYPE_STORE_C:
29799 return true;
29800 case TYPE_SHIFT:
29801 if (get_attr_dot (insn) == DOT_NO
29802 || get_attr_var_shift (insn) == VAR_SHIFT_NO)
29803 return true;
29804 else
29805 break;
29806 case TYPE_DIV:
29807 if (get_attr_size (insn) == SIZE_32)
29808 return true;
29809 else
29810 break;
29811 case TYPE_LOAD:
29812 case TYPE_STORE:
29813 case TYPE_FPLOAD:
29814 case TYPE_FPSTORE:
29815 if (get_attr_update (insn) == UPDATE_YES)
29816 return true;
29817 else
29818 break;
29819 default:
29820 break;
29821 }
29822 break;
29823 case PROCESSOR_POWER7:
29824 type = get_attr_type (insn);
29825
29826 switch (type)
29827 {
29828 case TYPE_CR_LOGICAL:
29829 case TYPE_MFCR:
29830 case TYPE_MFCRF:
29831 case TYPE_MTCR:
29832 case TYPE_DIV:
29833 case TYPE_ISYNC:
29834 case TYPE_LOAD_L:
29835 case TYPE_STORE_C:
29836 case TYPE_MFJMPR:
29837 case TYPE_MTJMPR:
29838 return true;
29839 case TYPE_MUL:
29840 case TYPE_SHIFT:
29841 case TYPE_EXTS:
29842 if (get_attr_dot (insn) == DOT_YES)
29843 return true;
29844 else
29845 break;
29846 case TYPE_LOAD:
29847 if (get_attr_sign_extend (insn) == SIGN_EXTEND_YES
29848 || get_attr_update (insn) == UPDATE_YES)
29849 return true;
29850 else
29851 break;
29852 case TYPE_STORE:
29853 case TYPE_FPLOAD:
29854 case TYPE_FPSTORE:
29855 if (get_attr_update (insn) == UPDATE_YES)
29856 return true;
29857 else
29858 break;
29859 default:
29860 break;
29861 }
29862 break;
29863 case PROCESSOR_POWER8:
29864 case PROCESSOR_POWER9:
29865 type = get_attr_type (insn);
29866
29867 switch (type)
29868 {
29869 case TYPE_CR_LOGICAL:
29870 case TYPE_DELAYED_CR:
29871 case TYPE_MFCR:
29872 case TYPE_MFCRF:
29873 case TYPE_MTCR:
29874 case TYPE_SYNC:
29875 case TYPE_ISYNC:
29876 case TYPE_LOAD_L:
29877 case TYPE_STORE_C:
29878 case TYPE_VECSTORE:
29879 case TYPE_MFJMPR:
29880 case TYPE_MTJMPR:
29881 return true;
29882 case TYPE_SHIFT:
29883 case TYPE_EXTS:
29884 case TYPE_MUL:
29885 if (get_attr_dot (insn) == DOT_YES)
29886 return true;
29887 else
29888 break;
29889 case TYPE_LOAD:
29890 if (get_attr_sign_extend (insn) == SIGN_EXTEND_YES
29891 || get_attr_update (insn) == UPDATE_YES)
29892 return true;
29893 else
29894 break;
29895 case TYPE_STORE:
29896 if (get_attr_update (insn) == UPDATE_YES
29897 && get_attr_indexed (insn) == INDEXED_YES)
29898 return true;
29899 else
29900 break;
29901 default:
29902 break;
29903 }
29904 break;
29905 default:
29906 break;
29907 }
29908
29909 return false;
29910 }
29911
29912 static bool
29913 insn_must_be_last_in_group (rtx_insn *insn)
29914 {
29915 enum attr_type type;
29916
29917 if (!insn
29918 || NOTE_P (insn)
29919 || DEBUG_INSN_P (insn)
29920 || GET_CODE (PATTERN (insn)) == USE
29921 || GET_CODE (PATTERN (insn)) == CLOBBER)
29922 return false;
29923
29924 switch (rs6000_cpu) {
29925 case PROCESSOR_POWER4:
29926 case PROCESSOR_POWER5:
29927 if (is_microcoded_insn (insn))
29928 return true;
29929
29930 if (is_branch_slot_insn (insn))
29931 return true;
29932
29933 break;
29934 case PROCESSOR_POWER6:
29935 type = get_attr_type (insn);
29936
29937 switch (type)
29938 {
29939 case TYPE_EXTS:
29940 case TYPE_CNTLZ:
29941 case TYPE_TRAP:
29942 case TYPE_MUL:
29943 case TYPE_FPCOMPARE:
29944 case TYPE_MFCR:
29945 case TYPE_MTCR:
29946 case TYPE_MFJMPR:
29947 case TYPE_MTJMPR:
29948 case TYPE_ISYNC:
29949 case TYPE_SYNC:
29950 case TYPE_LOAD_L:
29951 case TYPE_STORE_C:
29952 return true;
29953 case TYPE_SHIFT:
29954 if (get_attr_dot (insn) == DOT_NO
29955 || get_attr_var_shift (insn) == VAR_SHIFT_NO)
29956 return true;
29957 else
29958 break;
29959 case TYPE_DIV:
29960 if (get_attr_size (insn) == SIZE_32)
29961 return true;
29962 else
29963 break;
29964 default:
29965 break;
29966 }
29967 break;
29968 case PROCESSOR_POWER7:
29969 type = get_attr_type (insn);
29970
29971 switch (type)
29972 {
29973 case TYPE_ISYNC:
29974 case TYPE_SYNC:
29975 case TYPE_LOAD_L:
29976 case TYPE_STORE_C:
29977 return true;
29978 case TYPE_LOAD:
29979 if (get_attr_sign_extend (insn) == SIGN_EXTEND_YES
29980 && get_attr_update (insn) == UPDATE_YES)
29981 return true;
29982 else
29983 break;
29984 case TYPE_STORE:
29985 if (get_attr_update (insn) == UPDATE_YES
29986 && get_attr_indexed (insn) == INDEXED_YES)
29987 return true;
29988 else
29989 break;
29990 default:
29991 break;
29992 }
29993 break;
29994 case PROCESSOR_POWER8:
29995 case PROCESSOR_POWER9:
29996 type = get_attr_type (insn);
29997
29998 switch (type)
29999 {
30000 case TYPE_MFCR:
30001 case TYPE_MTCR:
30002 case TYPE_ISYNC:
30003 case TYPE_SYNC:
30004 case TYPE_LOAD_L:
30005 case TYPE_STORE_C:
30006 return true;
30007 case TYPE_LOAD:
30008 if (get_attr_sign_extend (insn) == SIGN_EXTEND_YES
30009 && get_attr_update (insn) == UPDATE_YES)
30010 return true;
30011 else
30012 break;
30013 case TYPE_STORE:
30014 if (get_attr_update (insn) == UPDATE_YES
30015 && get_attr_indexed (insn) == INDEXED_YES)
30016 return true;
30017 else
30018 break;
30019 default:
30020 break;
30021 }
30022 break;
30023 default:
30024 break;
30025 }
30026
30027 return false;
30028 }
30029
30030 /* Return true if it is recommended to keep NEXT_INSN "far" (in a separate
30031 dispatch group) from the insns in GROUP_INSNS. Return false otherwise. */
30032
30033 static bool
30034 is_costly_group (rtx *group_insns, rtx next_insn)
30035 {
30036 int i;
30037 int issue_rate = rs6000_issue_rate ();
30038
30039 for (i = 0; i < issue_rate; i++)
30040 {
30041 sd_iterator_def sd_it;
30042 dep_t dep;
30043 rtx insn = group_insns[i];
30044
30045 if (!insn)
30046 continue;
30047
30048 FOR_EACH_DEP (insn, SD_LIST_RES_FORW, sd_it, dep)
30049 {
30050 rtx next = DEP_CON (dep);
30051
30052 if (next == next_insn
30053 && rs6000_is_costly_dependence (dep, dep_cost (dep), 0))
30054 return true;
30055 }
30056 }
30057
30058 return false;
30059 }
30060
30061 /* Utility of the function redefine_groups.
30062 Check if it is too costly to schedule NEXT_INSN together with GROUP_INSNS
30063 in the same dispatch group. If so, insert nops before NEXT_INSN, in order
30064 to keep it "far" (in a separate group) from GROUP_INSNS, following
30065 one of the following schemes, depending on the value of the flag
30066 -minsert_sched_nops = X:
30067 (1) X == sched_finish_regroup_exact: insert exactly as many nops as needed
30068 in order to force NEXT_INSN into a separate group.
30069 (2) X < sched_finish_regroup_exact: insert exactly X nops.
30070 GROUP_END, CAN_ISSUE_MORE and GROUP_COUNT record the state after nop
30071 insertion (has a group just ended, how many vacant issue slots remain in the
30072 last group, and how many dispatch groups were encountered so far). */
30073
30074 static int
30075 force_new_group (int sched_verbose, FILE *dump, rtx *group_insns,
30076 rtx_insn *next_insn, bool *group_end, int can_issue_more,
30077 int *group_count)
30078 {
30079 rtx nop;
30080 bool force;
30081 int issue_rate = rs6000_issue_rate ();
30082 bool end = *group_end;
30083 int i;
30084
30085 if (next_insn == NULL_RTX || DEBUG_INSN_P (next_insn))
30086 return can_issue_more;
30087
30088 if (rs6000_sched_insert_nops > sched_finish_regroup_exact)
30089 return can_issue_more;
30090
30091 force = is_costly_group (group_insns, next_insn);
30092 if (!force)
30093 return can_issue_more;
30094
30095 if (sched_verbose > 6)
30096 fprintf (dump,"force: group count = %d, can_issue_more = %d\n",
30097 *group_count ,can_issue_more);
30098
30099 if (rs6000_sched_insert_nops == sched_finish_regroup_exact)
30100 {
30101 if (*group_end)
30102 can_issue_more = 0;
30103
30104 /* Since only a branch can be issued in the last issue_slot, it is
30105 sufficient to insert 'can_issue_more - 1' nops if next_insn is not
30106 a branch. If next_insn is a branch, we insert 'can_issue_more' nops;
30107 in this case the last nop will start a new group and the branch
30108 will be forced to the new group. */
30109 if (can_issue_more && !is_branch_slot_insn (next_insn))
30110 can_issue_more--;
30111
30112 /* Do we have a special group ending nop? */
30113 if (rs6000_cpu_attr == CPU_POWER6 || rs6000_cpu_attr == CPU_POWER7
30114 || rs6000_cpu_attr == CPU_POWER8 || rs6000_cpu_attr == CPU_POWER9)
30115 {
30116 nop = gen_group_ending_nop ();
30117 emit_insn_before (nop, next_insn);
30118 can_issue_more = 0;
30119 }
30120 else
30121 while (can_issue_more > 0)
30122 {
30123 nop = gen_nop ();
30124 emit_insn_before (nop, next_insn);
30125 can_issue_more--;
30126 }
30127
30128 *group_end = true;
30129 return 0;
30130 }
30131
30132 if (rs6000_sched_insert_nops < sched_finish_regroup_exact)
30133 {
30134 int n_nops = rs6000_sched_insert_nops;
30135
30136 /* Nops can't be issued from the branch slot, so the effective
30137 issue_rate for nops is 'issue_rate - 1'. */
30138 if (can_issue_more == 0)
30139 can_issue_more = issue_rate;
30140 can_issue_more--;
30141 if (can_issue_more == 0)
30142 {
30143 can_issue_more = issue_rate - 1;
30144 (*group_count)++;
30145 end = true;
30146 for (i = 0; i < issue_rate; i++)
30147 {
30148 group_insns[i] = 0;
30149 }
30150 }
30151
30152 while (n_nops > 0)
30153 {
30154 nop = gen_nop ();
30155 emit_insn_before (nop, next_insn);
30156 if (can_issue_more == issue_rate - 1) /* new group begins */
30157 end = false;
30158 can_issue_more--;
30159 if (can_issue_more == 0)
30160 {
30161 can_issue_more = issue_rate - 1;
30162 (*group_count)++;
30163 end = true;
30164 for (i = 0; i < issue_rate; i++)
30165 {
30166 group_insns[i] = 0;
30167 }
30168 }
30169 n_nops--;
30170 }
30171
30172 /* Scale back relative to 'issue_rate' (instead of 'issue_rate - 1'). */
30173 can_issue_more++;
30174
30175 /* Is next_insn going to start a new group? */
30176 *group_end
30177 = (end
30178 || (can_issue_more == 1 && !is_branch_slot_insn (next_insn))
30179 || (can_issue_more <= 2 && is_cracked_insn (next_insn))
30180 || (can_issue_more < issue_rate &&
30181 insn_terminates_group_p (next_insn, previous_group)));
30182 if (*group_end && end)
30183 (*group_count)--;
30184
30185 if (sched_verbose > 6)
30186 fprintf (dump, "done force: group count = %d, can_issue_more = %d\n",
30187 *group_count, can_issue_more);
30188 return can_issue_more;
30189 }
30190
30191 return can_issue_more;
30192 }
30193
30194 /* This function tries to synch the dispatch groups that the compiler "sees"
30195 with the dispatch groups that the processor dispatcher is expected to
30196 form in practice. It tries to achieve this synchronization by forcing the
30197 estimated processor grouping on the compiler (as opposed to the function
30198 'pad_goups' which tries to force the scheduler's grouping on the processor).
30199
30200 The function scans the insn sequence between PREV_HEAD_INSN and TAIL and
30201 examines the (estimated) dispatch groups that will be formed by the processor
30202 dispatcher. It marks these group boundaries to reflect the estimated
30203 processor grouping, overriding the grouping that the scheduler had marked.
30204 Depending on the value of the flag '-minsert-sched-nops' this function can
30205 force certain insns into separate groups or force a certain distance between
30206 them by inserting nops, for example, if there exists a "costly dependence"
30207 between the insns.
30208
30209 The function estimates the group boundaries that the processor will form as
30210 follows: It keeps track of how many vacant issue slots are available after
30211 each insn. A subsequent insn will start a new group if one of the following
30212 4 cases applies:
30213 - no more vacant issue slots remain in the current dispatch group.
30214 - only the last issue slot, which is the branch slot, is vacant, but the next
30215 insn is not a branch.
30216 - only the last 2 or less issue slots, including the branch slot, are vacant,
30217 which means that a cracked insn (which occupies two issue slots) can't be
30218 issued in this group.
30219 - less than 'issue_rate' slots are vacant, and the next insn always needs to
30220 start a new group. */
30221
30222 static int
30223 redefine_groups (FILE *dump, int sched_verbose, rtx_insn *prev_head_insn,
30224 rtx_insn *tail)
30225 {
30226 rtx_insn *insn, *next_insn;
30227 int issue_rate;
30228 int can_issue_more;
30229 int slot, i;
30230 bool group_end;
30231 int group_count = 0;
30232 rtx *group_insns;
30233
30234 /* Initialize. */
30235 issue_rate = rs6000_issue_rate ();
30236 group_insns = XALLOCAVEC (rtx, issue_rate);
30237 for (i = 0; i < issue_rate; i++)
30238 {
30239 group_insns[i] = 0;
30240 }
30241 can_issue_more = issue_rate;
30242 slot = 0;
30243 insn = get_next_active_insn (prev_head_insn, tail);
30244 group_end = false;
30245
30246 while (insn != NULL_RTX)
30247 {
30248 slot = (issue_rate - can_issue_more);
30249 group_insns[slot] = insn;
30250 can_issue_more =
30251 rs6000_variable_issue (dump, sched_verbose, insn, can_issue_more);
30252 if (insn_terminates_group_p (insn, current_group))
30253 can_issue_more = 0;
30254
30255 next_insn = get_next_active_insn (insn, tail);
30256 if (next_insn == NULL_RTX)
30257 return group_count + 1;
30258
30259 /* Is next_insn going to start a new group? */
30260 group_end
30261 = (can_issue_more == 0
30262 || (can_issue_more == 1 && !is_branch_slot_insn (next_insn))
30263 || (can_issue_more <= 2 && is_cracked_insn (next_insn))
30264 || (can_issue_more < issue_rate &&
30265 insn_terminates_group_p (next_insn, previous_group)));
30266
30267 can_issue_more = force_new_group (sched_verbose, dump, group_insns,
30268 next_insn, &group_end, can_issue_more,
30269 &group_count);
30270
30271 if (group_end)
30272 {
30273 group_count++;
30274 can_issue_more = 0;
30275 for (i = 0; i < issue_rate; i++)
30276 {
30277 group_insns[i] = 0;
30278 }
30279 }
30280
30281 if (GET_MODE (next_insn) == TImode && can_issue_more)
30282 PUT_MODE (next_insn, VOIDmode);
30283 else if (!can_issue_more && GET_MODE (next_insn) != TImode)
30284 PUT_MODE (next_insn, TImode);
30285
30286 insn = next_insn;
30287 if (can_issue_more == 0)
30288 can_issue_more = issue_rate;
30289 } /* while */
30290
30291 return group_count;
30292 }
30293
30294 /* Scan the insn sequence between PREV_HEAD_INSN and TAIL and examine the
30295 dispatch group boundaries that the scheduler had marked. Pad with nops
30296 any dispatch groups which have vacant issue slots, in order to force the
30297 scheduler's grouping on the processor dispatcher. The function
30298 returns the number of dispatch groups found. */
30299
30300 static int
30301 pad_groups (FILE *dump, int sched_verbose, rtx_insn *prev_head_insn,
30302 rtx_insn *tail)
30303 {
30304 rtx_insn *insn, *next_insn;
30305 rtx nop;
30306 int issue_rate;
30307 int can_issue_more;
30308 int group_end;
30309 int group_count = 0;
30310
30311 /* Initialize issue_rate. */
30312 issue_rate = rs6000_issue_rate ();
30313 can_issue_more = issue_rate;
30314
30315 insn = get_next_active_insn (prev_head_insn, tail);
30316 next_insn = get_next_active_insn (insn, tail);
30317
30318 while (insn != NULL_RTX)
30319 {
30320 can_issue_more =
30321 rs6000_variable_issue (dump, sched_verbose, insn, can_issue_more);
30322
30323 group_end = (next_insn == NULL_RTX || GET_MODE (next_insn) == TImode);
30324
30325 if (next_insn == NULL_RTX)
30326 break;
30327
30328 if (group_end)
30329 {
30330 /* If the scheduler had marked group termination at this location
30331 (between insn and next_insn), and neither insn nor next_insn will
30332 force group termination, pad the group with nops to force group
30333 termination. */
30334 if (can_issue_more
30335 && (rs6000_sched_insert_nops == sched_finish_pad_groups)
30336 && !insn_terminates_group_p (insn, current_group)
30337 && !insn_terminates_group_p (next_insn, previous_group))
30338 {
30339 if (!is_branch_slot_insn (next_insn))
30340 can_issue_more--;
30341
30342 while (can_issue_more)
30343 {
30344 nop = gen_nop ();
30345 emit_insn_before (nop, next_insn);
30346 can_issue_more--;
30347 }
30348 }
30349
30350 can_issue_more = issue_rate;
30351 group_count++;
30352 }
30353
30354 insn = next_insn;
30355 next_insn = get_next_active_insn (insn, tail);
30356 }
30357
30358 return group_count;
30359 }
30360
30361 /* We're beginning a new block. Initialize data structures as necessary. */
30362
30363 static void
30364 rs6000_sched_init (FILE *dump ATTRIBUTE_UNUSED,
30365 int sched_verbose ATTRIBUTE_UNUSED,
30366 int max_ready ATTRIBUTE_UNUSED)
30367 {
30368 last_scheduled_insn = NULL_RTX;
30369 load_store_pendulum = 0;
30370 }
30371
30372 /* The following function is called at the end of scheduling BB.
30373 After reload, it inserts nops at insn group bundling. */
30374
30375 static void
30376 rs6000_sched_finish (FILE *dump, int sched_verbose)
30377 {
30378 int n_groups;
30379
30380 if (sched_verbose)
30381 fprintf (dump, "=== Finishing schedule.\n");
30382
30383 if (reload_completed && rs6000_sched_groups)
30384 {
30385 /* Do not run sched_finish hook when selective scheduling enabled. */
30386 if (sel_sched_p ())
30387 return;
30388
30389 if (rs6000_sched_insert_nops == sched_finish_none)
30390 return;
30391
30392 if (rs6000_sched_insert_nops == sched_finish_pad_groups)
30393 n_groups = pad_groups (dump, sched_verbose,
30394 current_sched_info->prev_head,
30395 current_sched_info->next_tail);
30396 else
30397 n_groups = redefine_groups (dump, sched_verbose,
30398 current_sched_info->prev_head,
30399 current_sched_info->next_tail);
30400
30401 if (sched_verbose >= 6)
30402 {
30403 fprintf (dump, "ngroups = %d\n", n_groups);
30404 print_rtl (dump, current_sched_info->prev_head);
30405 fprintf (dump, "Done finish_sched\n");
30406 }
30407 }
30408 }
30409
30410 struct _rs6000_sched_context
30411 {
30412 short cached_can_issue_more;
30413 rtx last_scheduled_insn;
30414 int load_store_pendulum;
30415 };
30416
30417 typedef struct _rs6000_sched_context rs6000_sched_context_def;
30418 typedef rs6000_sched_context_def *rs6000_sched_context_t;
30419
30420 /* Allocate store for new scheduling context. */
30421 static void *
30422 rs6000_alloc_sched_context (void)
30423 {
30424 return xmalloc (sizeof (rs6000_sched_context_def));
30425 }
30426
30427 /* If CLEAN_P is true then initializes _SC with clean data,
30428 and from the global context otherwise. */
30429 static void
30430 rs6000_init_sched_context (void *_sc, bool clean_p)
30431 {
30432 rs6000_sched_context_t sc = (rs6000_sched_context_t) _sc;
30433
30434 if (clean_p)
30435 {
30436 sc->cached_can_issue_more = 0;
30437 sc->last_scheduled_insn = NULL_RTX;
30438 sc->load_store_pendulum = 0;
30439 }
30440 else
30441 {
30442 sc->cached_can_issue_more = cached_can_issue_more;
30443 sc->last_scheduled_insn = last_scheduled_insn;
30444 sc->load_store_pendulum = load_store_pendulum;
30445 }
30446 }
30447
30448 /* Sets the global scheduling context to the one pointed to by _SC. */
30449 static void
30450 rs6000_set_sched_context (void *_sc)
30451 {
30452 rs6000_sched_context_t sc = (rs6000_sched_context_t) _sc;
30453
30454 gcc_assert (sc != NULL);
30455
30456 cached_can_issue_more = sc->cached_can_issue_more;
30457 last_scheduled_insn = sc->last_scheduled_insn;
30458 load_store_pendulum = sc->load_store_pendulum;
30459 }
30460
30461 /* Free _SC. */
30462 static void
30463 rs6000_free_sched_context (void *_sc)
30464 {
30465 gcc_assert (_sc != NULL);
30466
30467 free (_sc);
30468 }
30469
30470 \f
30471 /* Length in units of the trampoline for entering a nested function. */
30472
30473 int
30474 rs6000_trampoline_size (void)
30475 {
30476 int ret = 0;
30477
30478 switch (DEFAULT_ABI)
30479 {
30480 default:
30481 gcc_unreachable ();
30482
30483 case ABI_AIX:
30484 ret = (TARGET_32BIT) ? 12 : 24;
30485 break;
30486
30487 case ABI_ELFv2:
30488 gcc_assert (!TARGET_32BIT);
30489 ret = 32;
30490 break;
30491
30492 case ABI_DARWIN:
30493 case ABI_V4:
30494 ret = (TARGET_32BIT) ? 40 : 48;
30495 break;
30496 }
30497
30498 return ret;
30499 }
30500
30501 /* Emit RTL insns to initialize the variable parts of a trampoline.
30502 FNADDR is an RTX for the address of the function's pure code.
30503 CXT is an RTX for the static chain value for the function. */
30504
30505 static void
30506 rs6000_trampoline_init (rtx m_tramp, tree fndecl, rtx cxt)
30507 {
30508 int regsize = (TARGET_32BIT) ? 4 : 8;
30509 rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
30510 rtx ctx_reg = force_reg (Pmode, cxt);
30511 rtx addr = force_reg (Pmode, XEXP (m_tramp, 0));
30512
30513 switch (DEFAULT_ABI)
30514 {
30515 default:
30516 gcc_unreachable ();
30517
30518 /* Under AIX, just build the 3 word function descriptor */
30519 case ABI_AIX:
30520 {
30521 rtx fnmem, fn_reg, toc_reg;
30522
30523 if (!TARGET_POINTERS_TO_NESTED_FUNCTIONS)
30524 error ("You cannot take the address of a nested function if you use "
30525 "the -mno-pointers-to-nested-functions option.");
30526
30527 fnmem = gen_const_mem (Pmode, force_reg (Pmode, fnaddr));
30528 fn_reg = gen_reg_rtx (Pmode);
30529 toc_reg = gen_reg_rtx (Pmode);
30530
30531 /* Macro to shorten the code expansions below. */
30532 # define MEM_PLUS(MEM, OFFSET) adjust_address (MEM, Pmode, OFFSET)
30533
30534 m_tramp = replace_equiv_address (m_tramp, addr);
30535
30536 emit_move_insn (fn_reg, MEM_PLUS (fnmem, 0));
30537 emit_move_insn (toc_reg, MEM_PLUS (fnmem, regsize));
30538 emit_move_insn (MEM_PLUS (m_tramp, 0), fn_reg);
30539 emit_move_insn (MEM_PLUS (m_tramp, regsize), toc_reg);
30540 emit_move_insn (MEM_PLUS (m_tramp, 2*regsize), ctx_reg);
30541
30542 # undef MEM_PLUS
30543 }
30544 break;
30545
30546 /* Under V.4/eabi/darwin, __trampoline_setup does the real work. */
30547 case ABI_ELFv2:
30548 case ABI_DARWIN:
30549 case ABI_V4:
30550 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__trampoline_setup"),
30551 LCT_NORMAL, VOIDmode, 4,
30552 addr, Pmode,
30553 GEN_INT (rs6000_trampoline_size ()), SImode,
30554 fnaddr, Pmode,
30555 ctx_reg, Pmode);
30556 break;
30557 }
30558 }
30559
30560 \f
30561 /* Returns TRUE iff the target attribute indicated by ATTR_ID takes a plain
30562 identifier as an argument, so the front end shouldn't look it up. */
30563
30564 static bool
30565 rs6000_attribute_takes_identifier_p (const_tree attr_id)
30566 {
30567 return is_attribute_p ("altivec", attr_id);
30568 }
30569
30570 /* Handle the "altivec" attribute. The attribute may have
30571 arguments as follows:
30572
30573 __attribute__((altivec(vector__)))
30574 __attribute__((altivec(pixel__))) (always followed by 'unsigned short')
30575 __attribute__((altivec(bool__))) (always followed by 'unsigned')
30576
30577 and may appear more than once (e.g., 'vector bool char') in a
30578 given declaration. */
30579
30580 static tree
30581 rs6000_handle_altivec_attribute (tree *node,
30582 tree name ATTRIBUTE_UNUSED,
30583 tree args,
30584 int flags ATTRIBUTE_UNUSED,
30585 bool *no_add_attrs)
30586 {
30587 tree type = *node, result = NULL_TREE;
30588 machine_mode mode;
30589 int unsigned_p;
30590 char altivec_type
30591 = ((args && TREE_CODE (args) == TREE_LIST && TREE_VALUE (args)
30592 && TREE_CODE (TREE_VALUE (args)) == IDENTIFIER_NODE)
30593 ? *IDENTIFIER_POINTER (TREE_VALUE (args))
30594 : '?');
30595
30596 while (POINTER_TYPE_P (type)
30597 || TREE_CODE (type) == FUNCTION_TYPE
30598 || TREE_CODE (type) == METHOD_TYPE
30599 || TREE_CODE (type) == ARRAY_TYPE)
30600 type = TREE_TYPE (type);
30601
30602 mode = TYPE_MODE (type);
30603
30604 /* Check for invalid AltiVec type qualifiers. */
30605 if (type == long_double_type_node)
30606 error ("use of %<long double%> in AltiVec types is invalid");
30607 else if (type == boolean_type_node)
30608 error ("use of boolean types in AltiVec types is invalid");
30609 else if (TREE_CODE (type) == COMPLEX_TYPE)
30610 error ("use of %<complex%> in AltiVec types is invalid");
30611 else if (DECIMAL_FLOAT_MODE_P (mode))
30612 error ("use of decimal floating point types in AltiVec types is invalid");
30613 else if (!TARGET_VSX)
30614 {
30615 if (type == long_unsigned_type_node || type == long_integer_type_node)
30616 {
30617 if (TARGET_64BIT)
30618 error ("use of %<long%> in AltiVec types is invalid for "
30619 "64-bit code without -mvsx");
30620 else if (rs6000_warn_altivec_long)
30621 warning (0, "use of %<long%> in AltiVec types is deprecated; "
30622 "use %<int%>");
30623 }
30624 else if (type == long_long_unsigned_type_node
30625 || type == long_long_integer_type_node)
30626 error ("use of %<long long%> in AltiVec types is invalid without "
30627 "-mvsx");
30628 else if (type == double_type_node)
30629 error ("use of %<double%> in AltiVec types is invalid without -mvsx");
30630 }
30631
30632 switch (altivec_type)
30633 {
30634 case 'v':
30635 unsigned_p = TYPE_UNSIGNED (type);
30636 switch (mode)
30637 {
30638 case TImode:
30639 result = (unsigned_p ? unsigned_V1TI_type_node : V1TI_type_node);
30640 break;
30641 case DImode:
30642 result = (unsigned_p ? unsigned_V2DI_type_node : V2DI_type_node);
30643 break;
30644 case SImode:
30645 result = (unsigned_p ? unsigned_V4SI_type_node : V4SI_type_node);
30646 break;
30647 case HImode:
30648 result = (unsigned_p ? unsigned_V8HI_type_node : V8HI_type_node);
30649 break;
30650 case QImode:
30651 result = (unsigned_p ? unsigned_V16QI_type_node : V16QI_type_node);
30652 break;
30653 case SFmode: result = V4SF_type_node; break;
30654 case DFmode: result = V2DF_type_node; break;
30655 /* If the user says 'vector int bool', we may be handed the 'bool'
30656 attribute _before_ the 'vector' attribute, and so select the
30657 proper type in the 'b' case below. */
30658 case V4SImode: case V8HImode: case V16QImode: case V4SFmode:
30659 case V2DImode: case V2DFmode:
30660 result = type;
30661 default: break;
30662 }
30663 break;
30664 case 'b':
30665 switch (mode)
30666 {
30667 case DImode: case V2DImode: result = bool_V2DI_type_node; break;
30668 case SImode: case V4SImode: result = bool_V4SI_type_node; break;
30669 case HImode: case V8HImode: result = bool_V8HI_type_node; break;
30670 case QImode: case V16QImode: result = bool_V16QI_type_node;
30671 default: break;
30672 }
30673 break;
30674 case 'p':
30675 switch (mode)
30676 {
30677 case V8HImode: result = pixel_V8HI_type_node;
30678 default: break;
30679 }
30680 default: break;
30681 }
30682
30683 /* Propagate qualifiers attached to the element type
30684 onto the vector type. */
30685 if (result && result != type && TYPE_QUALS (type))
30686 result = build_qualified_type (result, TYPE_QUALS (type));
30687
30688 *no_add_attrs = true; /* No need to hang on to the attribute. */
30689
30690 if (result)
30691 *node = lang_hooks.types.reconstruct_complex_type (*node, result);
30692
30693 return NULL_TREE;
30694 }
30695
30696 /* AltiVec defines four built-in scalar types that serve as vector
30697 elements; we must teach the compiler how to mangle them. */
30698
30699 static const char *
30700 rs6000_mangle_type (const_tree type)
30701 {
30702 type = TYPE_MAIN_VARIANT (type);
30703
30704 if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
30705 && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
30706 return NULL;
30707
30708 if (type == bool_char_type_node) return "U6__boolc";
30709 if (type == bool_short_type_node) return "U6__bools";
30710 if (type == pixel_type_node) return "u7__pixel";
30711 if (type == bool_int_type_node) return "U6__booli";
30712 if (type == bool_long_type_node) return "U6__booll";
30713
30714 /* Use a unique name for __float128 rather than trying to use "e" or "g". Use
30715 "g" for IBM extended double, no matter whether it is long double (using
30716 -mabi=ibmlongdouble) or the distinct __ibm128 type. */
30717 if (TARGET_FLOAT128)
30718 {
30719 if (type == ieee128_float_type_node)
30720 return "U10__float128";
30721
30722 if (type == ibm128_float_type_node)
30723 return "g";
30724
30725 if (type == long_double_type_node && TARGET_LONG_DOUBLE_128)
30726 return (TARGET_IEEEQUAD) ? "U10__float128" : "g";
30727 }
30728
30729 /* Mangle IBM extended float long double as `g' (__float128) on
30730 powerpc*-linux where long-double-64 previously was the default. */
30731 if (TYPE_MAIN_VARIANT (type) == long_double_type_node
30732 && TARGET_ELF
30733 && TARGET_LONG_DOUBLE_128
30734 && !TARGET_IEEEQUAD)
30735 return "g";
30736
30737 /* For all other types, use normal C++ mangling. */
30738 return NULL;
30739 }
30740
30741 /* Handle a "longcall" or "shortcall" attribute; arguments as in
30742 struct attribute_spec.handler. */
30743
30744 static tree
30745 rs6000_handle_longcall_attribute (tree *node, tree name,
30746 tree args ATTRIBUTE_UNUSED,
30747 int flags ATTRIBUTE_UNUSED,
30748 bool *no_add_attrs)
30749 {
30750 if (TREE_CODE (*node) != FUNCTION_TYPE
30751 && TREE_CODE (*node) != FIELD_DECL
30752 && TREE_CODE (*node) != TYPE_DECL)
30753 {
30754 warning (OPT_Wattributes, "%qE attribute only applies to functions",
30755 name);
30756 *no_add_attrs = true;
30757 }
30758
30759 return NULL_TREE;
30760 }
30761
30762 /* Set longcall attributes on all functions declared when
30763 rs6000_default_long_calls is true. */
30764 static void
30765 rs6000_set_default_type_attributes (tree type)
30766 {
30767 if (rs6000_default_long_calls
30768 && (TREE_CODE (type) == FUNCTION_TYPE
30769 || TREE_CODE (type) == METHOD_TYPE))
30770 TYPE_ATTRIBUTES (type) = tree_cons (get_identifier ("longcall"),
30771 NULL_TREE,
30772 TYPE_ATTRIBUTES (type));
30773
30774 #if TARGET_MACHO
30775 darwin_set_default_type_attributes (type);
30776 #endif
30777 }
30778
30779 /* Return a reference suitable for calling a function with the
30780 longcall attribute. */
30781
30782 rtx
30783 rs6000_longcall_ref (rtx call_ref)
30784 {
30785 const char *call_name;
30786 tree node;
30787
30788 if (GET_CODE (call_ref) != SYMBOL_REF)
30789 return call_ref;
30790
30791 /* System V adds '.' to the internal name, so skip them. */
30792 call_name = XSTR (call_ref, 0);
30793 if (*call_name == '.')
30794 {
30795 while (*call_name == '.')
30796 call_name++;
30797
30798 node = get_identifier (call_name);
30799 call_ref = gen_rtx_SYMBOL_REF (VOIDmode, IDENTIFIER_POINTER (node));
30800 }
30801
30802 return force_reg (Pmode, call_ref);
30803 }
30804 \f
30805 #ifndef TARGET_USE_MS_BITFIELD_LAYOUT
30806 #define TARGET_USE_MS_BITFIELD_LAYOUT 0
30807 #endif
30808
30809 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
30810 struct attribute_spec.handler. */
30811 static tree
30812 rs6000_handle_struct_attribute (tree *node, tree name,
30813 tree args ATTRIBUTE_UNUSED,
30814 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
30815 {
30816 tree *type = NULL;
30817 if (DECL_P (*node))
30818 {
30819 if (TREE_CODE (*node) == TYPE_DECL)
30820 type = &TREE_TYPE (*node);
30821 }
30822 else
30823 type = node;
30824
30825 if (!(type && (TREE_CODE (*type) == RECORD_TYPE
30826 || TREE_CODE (*type) == UNION_TYPE)))
30827 {
30828 warning (OPT_Wattributes, "%qE attribute ignored", name);
30829 *no_add_attrs = true;
30830 }
30831
30832 else if ((is_attribute_p ("ms_struct", name)
30833 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
30834 || ((is_attribute_p ("gcc_struct", name)
30835 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
30836 {
30837 warning (OPT_Wattributes, "%qE incompatible attribute ignored",
30838 name);
30839 *no_add_attrs = true;
30840 }
30841
30842 return NULL_TREE;
30843 }
30844
30845 static bool
30846 rs6000_ms_bitfield_layout_p (const_tree record_type)
30847 {
30848 return (TARGET_USE_MS_BITFIELD_LAYOUT &&
30849 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
30850 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type));
30851 }
30852 \f
30853 #ifdef USING_ELFOS_H
30854
30855 /* A get_unnamed_section callback, used for switching to toc_section. */
30856
30857 static void
30858 rs6000_elf_output_toc_section_asm_op (const void *data ATTRIBUTE_UNUSED)
30859 {
30860 if ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
30861 && TARGET_MINIMAL_TOC
30862 && !TARGET_RELOCATABLE)
30863 {
30864 if (!toc_initialized)
30865 {
30866 toc_initialized = 1;
30867 fprintf (asm_out_file, "%s\n", TOC_SECTION_ASM_OP);
30868 (*targetm.asm_out.internal_label) (asm_out_file, "LCTOC", 0);
30869 fprintf (asm_out_file, "\t.tc ");
30870 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file, "LCTOC1[TC],");
30871 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file, "LCTOC1");
30872 fprintf (asm_out_file, "\n");
30873
30874 fprintf (asm_out_file, "%s\n", MINIMAL_TOC_SECTION_ASM_OP);
30875 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file, "LCTOC1");
30876 fprintf (asm_out_file, " = .+32768\n");
30877 }
30878 else
30879 fprintf (asm_out_file, "%s\n", MINIMAL_TOC_SECTION_ASM_OP);
30880 }
30881 else if ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
30882 && !TARGET_RELOCATABLE)
30883 fprintf (asm_out_file, "%s\n", TOC_SECTION_ASM_OP);
30884 else
30885 {
30886 fprintf (asm_out_file, "%s\n", MINIMAL_TOC_SECTION_ASM_OP);
30887 if (!toc_initialized)
30888 {
30889 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file, "LCTOC1");
30890 fprintf (asm_out_file, " = .+32768\n");
30891 toc_initialized = 1;
30892 }
30893 }
30894 }
30895
30896 /* Implement TARGET_ASM_INIT_SECTIONS. */
30897
30898 static void
30899 rs6000_elf_asm_init_sections (void)
30900 {
30901 toc_section
30902 = get_unnamed_section (0, rs6000_elf_output_toc_section_asm_op, NULL);
30903
30904 sdata2_section
30905 = get_unnamed_section (SECTION_WRITE, output_section_asm_op,
30906 SDATA2_SECTION_ASM_OP);
30907 }
30908
30909 /* Implement TARGET_SELECT_RTX_SECTION. */
30910
30911 static section *
30912 rs6000_elf_select_rtx_section (machine_mode mode, rtx x,
30913 unsigned HOST_WIDE_INT align)
30914 {
30915 if (ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (x, mode))
30916 return toc_section;
30917 else
30918 return default_elf_select_rtx_section (mode, x, align);
30919 }
30920 \f
30921 /* For a SYMBOL_REF, set generic flags and then perform some
30922 target-specific processing.
30923
30924 When the AIX ABI is requested on a non-AIX system, replace the
30925 function name with the real name (with a leading .) rather than the
30926 function descriptor name. This saves a lot of overriding code to
30927 read the prefixes. */
30928
30929 static void rs6000_elf_encode_section_info (tree, rtx, int) ATTRIBUTE_UNUSED;
30930 static void
30931 rs6000_elf_encode_section_info (tree decl, rtx rtl, int first)
30932 {
30933 default_encode_section_info (decl, rtl, first);
30934
30935 if (first
30936 && TREE_CODE (decl) == FUNCTION_DECL
30937 && !TARGET_AIX
30938 && DEFAULT_ABI == ABI_AIX)
30939 {
30940 rtx sym_ref = XEXP (rtl, 0);
30941 size_t len = strlen (XSTR (sym_ref, 0));
30942 char *str = XALLOCAVEC (char, len + 2);
30943 str[0] = '.';
30944 memcpy (str + 1, XSTR (sym_ref, 0), len + 1);
30945 XSTR (sym_ref, 0) = ggc_alloc_string (str, len + 1);
30946 }
30947 }
30948
30949 static inline bool
30950 compare_section_name (const char *section, const char *templ)
30951 {
30952 int len;
30953
30954 len = strlen (templ);
30955 return (strncmp (section, templ, len) == 0
30956 && (section[len] == 0 || section[len] == '.'));
30957 }
30958
30959 bool
30960 rs6000_elf_in_small_data_p (const_tree decl)
30961 {
30962 if (rs6000_sdata == SDATA_NONE)
30963 return false;
30964
30965 /* We want to merge strings, so we never consider them small data. */
30966 if (TREE_CODE (decl) == STRING_CST)
30967 return false;
30968
30969 /* Functions are never in the small data area. */
30970 if (TREE_CODE (decl) == FUNCTION_DECL)
30971 return false;
30972
30973 if (TREE_CODE (decl) == VAR_DECL && DECL_SECTION_NAME (decl))
30974 {
30975 const char *section = DECL_SECTION_NAME (decl);
30976 if (compare_section_name (section, ".sdata")
30977 || compare_section_name (section, ".sdata2")
30978 || compare_section_name (section, ".gnu.linkonce.s")
30979 || compare_section_name (section, ".sbss")
30980 || compare_section_name (section, ".sbss2")
30981 || compare_section_name (section, ".gnu.linkonce.sb")
30982 || strcmp (section, ".PPC.EMB.sdata0") == 0
30983 || strcmp (section, ".PPC.EMB.sbss0") == 0)
30984 return true;
30985 }
30986 else
30987 {
30988 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (decl));
30989
30990 if (size > 0
30991 && size <= g_switch_value
30992 /* If it's not public, and we're not going to reference it there,
30993 there's no need to put it in the small data section. */
30994 && (rs6000_sdata != SDATA_DATA || TREE_PUBLIC (decl)))
30995 return true;
30996 }
30997
30998 return false;
30999 }
31000
31001 #endif /* USING_ELFOS_H */
31002 \f
31003 /* Implement TARGET_USE_BLOCKS_FOR_CONSTANT_P. */
31004
31005 static bool
31006 rs6000_use_blocks_for_constant_p (machine_mode mode, const_rtx x)
31007 {
31008 return !ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (x, mode);
31009 }
31010
31011 /* Do not place thread-local symbols refs in the object blocks. */
31012
31013 static bool
31014 rs6000_use_blocks_for_decl_p (const_tree decl)
31015 {
31016 return !DECL_THREAD_LOCAL_P (decl);
31017 }
31018 \f
31019 /* Return a REG that occurs in ADDR with coefficient 1.
31020 ADDR can be effectively incremented by incrementing REG.
31021
31022 r0 is special and we must not select it as an address
31023 register by this routine since our caller will try to
31024 increment the returned register via an "la" instruction. */
31025
31026 rtx
31027 find_addr_reg (rtx addr)
31028 {
31029 while (GET_CODE (addr) == PLUS)
31030 {
31031 if (GET_CODE (XEXP (addr, 0)) == REG
31032 && REGNO (XEXP (addr, 0)) != 0)
31033 addr = XEXP (addr, 0);
31034 else if (GET_CODE (XEXP (addr, 1)) == REG
31035 && REGNO (XEXP (addr, 1)) != 0)
31036 addr = XEXP (addr, 1);
31037 else if (CONSTANT_P (XEXP (addr, 0)))
31038 addr = XEXP (addr, 1);
31039 else if (CONSTANT_P (XEXP (addr, 1)))
31040 addr = XEXP (addr, 0);
31041 else
31042 gcc_unreachable ();
31043 }
31044 gcc_assert (GET_CODE (addr) == REG && REGNO (addr) != 0);
31045 return addr;
31046 }
31047
31048 void
31049 rs6000_fatal_bad_address (rtx op)
31050 {
31051 fatal_insn ("bad address", op);
31052 }
31053
31054 #if TARGET_MACHO
31055
31056 typedef struct branch_island_d {
31057 tree function_name;
31058 tree label_name;
31059 int line_number;
31060 } branch_island;
31061
31062
31063 static vec<branch_island, va_gc> *branch_islands;
31064
31065 /* Remember to generate a branch island for far calls to the given
31066 function. */
31067
31068 static void
31069 add_compiler_branch_island (tree label_name, tree function_name,
31070 int line_number)
31071 {
31072 branch_island bi = {function_name, label_name, line_number};
31073 vec_safe_push (branch_islands, bi);
31074 }
31075
31076 /* Generate far-jump branch islands for everything recorded in
31077 branch_islands. Invoked immediately after the last instruction of
31078 the epilogue has been emitted; the branch islands must be appended
31079 to, and contiguous with, the function body. Mach-O stubs are
31080 generated in machopic_output_stub(). */
31081
31082 static void
31083 macho_branch_islands (void)
31084 {
31085 char tmp_buf[512];
31086
31087 while (!vec_safe_is_empty (branch_islands))
31088 {
31089 branch_island *bi = &branch_islands->last ();
31090 const char *label = IDENTIFIER_POINTER (bi->label_name);
31091 const char *name = IDENTIFIER_POINTER (bi->function_name);
31092 char name_buf[512];
31093 /* Cheap copy of the details from the Darwin ASM_OUTPUT_LABELREF(). */
31094 if (name[0] == '*' || name[0] == '&')
31095 strcpy (name_buf, name+1);
31096 else
31097 {
31098 name_buf[0] = '_';
31099 strcpy (name_buf+1, name);
31100 }
31101 strcpy (tmp_buf, "\n");
31102 strcat (tmp_buf, label);
31103 #if defined (DBX_DEBUGGING_INFO) || defined (XCOFF_DEBUGGING_INFO)
31104 if (write_symbols == DBX_DEBUG || write_symbols == XCOFF_DEBUG)
31105 dbxout_stabd (N_SLINE, bi->line_number);
31106 #endif /* DBX_DEBUGGING_INFO || XCOFF_DEBUGGING_INFO */
31107 if (flag_pic)
31108 {
31109 if (TARGET_LINK_STACK)
31110 {
31111 char name[32];
31112 get_ppc476_thunk_name (name);
31113 strcat (tmp_buf, ":\n\tmflr r0\n\tbl ");
31114 strcat (tmp_buf, name);
31115 strcat (tmp_buf, "\n");
31116 strcat (tmp_buf, label);
31117 strcat (tmp_buf, "_pic:\n\tmflr r11\n");
31118 }
31119 else
31120 {
31121 strcat (tmp_buf, ":\n\tmflr r0\n\tbcl 20,31,");
31122 strcat (tmp_buf, label);
31123 strcat (tmp_buf, "_pic\n");
31124 strcat (tmp_buf, label);
31125 strcat (tmp_buf, "_pic:\n\tmflr r11\n");
31126 }
31127
31128 strcat (tmp_buf, "\taddis r11,r11,ha16(");
31129 strcat (tmp_buf, name_buf);
31130 strcat (tmp_buf, " - ");
31131 strcat (tmp_buf, label);
31132 strcat (tmp_buf, "_pic)\n");
31133
31134 strcat (tmp_buf, "\tmtlr r0\n");
31135
31136 strcat (tmp_buf, "\taddi r12,r11,lo16(");
31137 strcat (tmp_buf, name_buf);
31138 strcat (tmp_buf, " - ");
31139 strcat (tmp_buf, label);
31140 strcat (tmp_buf, "_pic)\n");
31141
31142 strcat (tmp_buf, "\tmtctr r12\n\tbctr\n");
31143 }
31144 else
31145 {
31146 strcat (tmp_buf, ":\nlis r12,hi16(");
31147 strcat (tmp_buf, name_buf);
31148 strcat (tmp_buf, ")\n\tori r12,r12,lo16(");
31149 strcat (tmp_buf, name_buf);
31150 strcat (tmp_buf, ")\n\tmtctr r12\n\tbctr");
31151 }
31152 output_asm_insn (tmp_buf, 0);
31153 #if defined (DBX_DEBUGGING_INFO) || defined (XCOFF_DEBUGGING_INFO)
31154 if (write_symbols == DBX_DEBUG || write_symbols == XCOFF_DEBUG)
31155 dbxout_stabd (N_SLINE, bi->line_number);
31156 #endif /* DBX_DEBUGGING_INFO || XCOFF_DEBUGGING_INFO */
31157 branch_islands->pop ();
31158 }
31159 }
31160
31161 /* NO_PREVIOUS_DEF checks in the link list whether the function name is
31162 already there or not. */
31163
31164 static int
31165 no_previous_def (tree function_name)
31166 {
31167 branch_island *bi;
31168 unsigned ix;
31169
31170 FOR_EACH_VEC_SAFE_ELT (branch_islands, ix, bi)
31171 if (function_name == bi->function_name)
31172 return 0;
31173 return 1;
31174 }
31175
31176 /* GET_PREV_LABEL gets the label name from the previous definition of
31177 the function. */
31178
31179 static tree
31180 get_prev_label (tree function_name)
31181 {
31182 branch_island *bi;
31183 unsigned ix;
31184
31185 FOR_EACH_VEC_SAFE_ELT (branch_islands, ix, bi)
31186 if (function_name == bi->function_name)
31187 return bi->label_name;
31188 return NULL_TREE;
31189 }
31190
31191 /* INSN is either a function call or a millicode call. It may have an
31192 unconditional jump in its delay slot.
31193
31194 CALL_DEST is the routine we are calling. */
31195
31196 char *
31197 output_call (rtx_insn *insn, rtx *operands, int dest_operand_number,
31198 int cookie_operand_number)
31199 {
31200 static char buf[256];
31201 if (darwin_emit_branch_islands
31202 && GET_CODE (operands[dest_operand_number]) == SYMBOL_REF
31203 && (INTVAL (operands[cookie_operand_number]) & CALL_LONG))
31204 {
31205 tree labelname;
31206 tree funname = get_identifier (XSTR (operands[dest_operand_number], 0));
31207
31208 if (no_previous_def (funname))
31209 {
31210 rtx label_rtx = gen_label_rtx ();
31211 char *label_buf, temp_buf[256];
31212 ASM_GENERATE_INTERNAL_LABEL (temp_buf, "L",
31213 CODE_LABEL_NUMBER (label_rtx));
31214 label_buf = temp_buf[0] == '*' ? temp_buf + 1 : temp_buf;
31215 labelname = get_identifier (label_buf);
31216 add_compiler_branch_island (labelname, funname, insn_line (insn));
31217 }
31218 else
31219 labelname = get_prev_label (funname);
31220
31221 /* "jbsr foo, L42" is Mach-O for "Link as 'bl foo' if a 'bl'
31222 instruction will reach 'foo', otherwise link as 'bl L42'".
31223 "L42" should be a 'branch island', that will do a far jump to
31224 'foo'. Branch islands are generated in
31225 macho_branch_islands(). */
31226 sprintf (buf, "jbsr %%z%d,%.246s",
31227 dest_operand_number, IDENTIFIER_POINTER (labelname));
31228 }
31229 else
31230 sprintf (buf, "bl %%z%d", dest_operand_number);
31231 return buf;
31232 }
31233
31234 /* Generate PIC and indirect symbol stubs. */
31235
31236 void
31237 machopic_output_stub (FILE *file, const char *symb, const char *stub)
31238 {
31239 unsigned int length;
31240 char *symbol_name, *lazy_ptr_name;
31241 char *local_label_0;
31242 static int label = 0;
31243
31244 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
31245 symb = (*targetm.strip_name_encoding) (symb);
31246
31247
31248 length = strlen (symb);
31249 symbol_name = XALLOCAVEC (char, length + 32);
31250 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
31251
31252 lazy_ptr_name = XALLOCAVEC (char, length + 32);
31253 GEN_LAZY_PTR_NAME_FOR_SYMBOL (lazy_ptr_name, symb, length);
31254
31255 if (flag_pic == 2)
31256 switch_to_section (darwin_sections[machopic_picsymbol_stub1_section]);
31257 else
31258 switch_to_section (darwin_sections[machopic_symbol_stub1_section]);
31259
31260 if (flag_pic == 2)
31261 {
31262 fprintf (file, "\t.align 5\n");
31263
31264 fprintf (file, "%s:\n", stub);
31265 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
31266
31267 label++;
31268 local_label_0 = XALLOCAVEC (char, sizeof ("\"L00000000000$spb\""));
31269 sprintf (local_label_0, "\"L%011d$spb\"", label);
31270
31271 fprintf (file, "\tmflr r0\n");
31272 if (TARGET_LINK_STACK)
31273 {
31274 char name[32];
31275 get_ppc476_thunk_name (name);
31276 fprintf (file, "\tbl %s\n", name);
31277 fprintf (file, "%s:\n\tmflr r11\n", local_label_0);
31278 }
31279 else
31280 {
31281 fprintf (file, "\tbcl 20,31,%s\n", local_label_0);
31282 fprintf (file, "%s:\n\tmflr r11\n", local_label_0);
31283 }
31284 fprintf (file, "\taddis r11,r11,ha16(%s-%s)\n",
31285 lazy_ptr_name, local_label_0);
31286 fprintf (file, "\tmtlr r0\n");
31287 fprintf (file, "\t%s r12,lo16(%s-%s)(r11)\n",
31288 (TARGET_64BIT ? "ldu" : "lwzu"),
31289 lazy_ptr_name, local_label_0);
31290 fprintf (file, "\tmtctr r12\n");
31291 fprintf (file, "\tbctr\n");
31292 }
31293 else
31294 {
31295 fprintf (file, "\t.align 4\n");
31296
31297 fprintf (file, "%s:\n", stub);
31298 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
31299
31300 fprintf (file, "\tlis r11,ha16(%s)\n", lazy_ptr_name);
31301 fprintf (file, "\t%s r12,lo16(%s)(r11)\n",
31302 (TARGET_64BIT ? "ldu" : "lwzu"),
31303 lazy_ptr_name);
31304 fprintf (file, "\tmtctr r12\n");
31305 fprintf (file, "\tbctr\n");
31306 }
31307
31308 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr_section]);
31309 fprintf (file, "%s:\n", lazy_ptr_name);
31310 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
31311 fprintf (file, "%sdyld_stub_binding_helper\n",
31312 (TARGET_64BIT ? DOUBLE_INT_ASM_OP : "\t.long\t"));
31313 }
31314
31315 /* Legitimize PIC addresses. If the address is already
31316 position-independent, we return ORIG. Newly generated
31317 position-independent addresses go into a reg. This is REG if non
31318 zero, otherwise we allocate register(s) as necessary. */
31319
31320 #define SMALL_INT(X) ((UINTVAL (X) + 0x8000) < 0x10000)
31321
31322 rtx
31323 rs6000_machopic_legitimize_pic_address (rtx orig, machine_mode mode,
31324 rtx reg)
31325 {
31326 rtx base, offset;
31327
31328 if (reg == NULL && ! reload_in_progress && ! reload_completed)
31329 reg = gen_reg_rtx (Pmode);
31330
31331 if (GET_CODE (orig) == CONST)
31332 {
31333 rtx reg_temp;
31334
31335 if (GET_CODE (XEXP (orig, 0)) == PLUS
31336 && XEXP (XEXP (orig, 0), 0) == pic_offset_table_rtx)
31337 return orig;
31338
31339 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
31340
31341 /* Use a different reg for the intermediate value, as
31342 it will be marked UNCHANGING. */
31343 reg_temp = !can_create_pseudo_p () ? reg : gen_reg_rtx (Pmode);
31344 base = rs6000_machopic_legitimize_pic_address (XEXP (XEXP (orig, 0), 0),
31345 Pmode, reg_temp);
31346 offset =
31347 rs6000_machopic_legitimize_pic_address (XEXP (XEXP (orig, 0), 1),
31348 Pmode, reg);
31349
31350 if (GET_CODE (offset) == CONST_INT)
31351 {
31352 if (SMALL_INT (offset))
31353 return plus_constant (Pmode, base, INTVAL (offset));
31354 else if (! reload_in_progress && ! reload_completed)
31355 offset = force_reg (Pmode, offset);
31356 else
31357 {
31358 rtx mem = force_const_mem (Pmode, orig);
31359 return machopic_legitimize_pic_address (mem, Pmode, reg);
31360 }
31361 }
31362 return gen_rtx_PLUS (Pmode, base, offset);
31363 }
31364
31365 /* Fall back on generic machopic code. */
31366 return machopic_legitimize_pic_address (orig, mode, reg);
31367 }
31368
31369 /* Output a .machine directive for the Darwin assembler, and call
31370 the generic start_file routine. */
31371
31372 static void
31373 rs6000_darwin_file_start (void)
31374 {
31375 static const struct
31376 {
31377 const char *arg;
31378 const char *name;
31379 HOST_WIDE_INT if_set;
31380 } mapping[] = {
31381 { "ppc64", "ppc64", MASK_64BIT },
31382 { "970", "ppc970", MASK_PPC_GPOPT | MASK_MFCRF | MASK_POWERPC64 },
31383 { "power4", "ppc970", 0 },
31384 { "G5", "ppc970", 0 },
31385 { "7450", "ppc7450", 0 },
31386 { "7400", "ppc7400", MASK_ALTIVEC },
31387 { "G4", "ppc7400", 0 },
31388 { "750", "ppc750", 0 },
31389 { "740", "ppc750", 0 },
31390 { "G3", "ppc750", 0 },
31391 { "604e", "ppc604e", 0 },
31392 { "604", "ppc604", 0 },
31393 { "603e", "ppc603", 0 },
31394 { "603", "ppc603", 0 },
31395 { "601", "ppc601", 0 },
31396 { NULL, "ppc", 0 } };
31397 const char *cpu_id = "";
31398 size_t i;
31399
31400 rs6000_file_start ();
31401 darwin_file_start ();
31402
31403 /* Determine the argument to -mcpu=. Default to G3 if not specified. */
31404
31405 if (rs6000_default_cpu != 0 && rs6000_default_cpu[0] != '\0')
31406 cpu_id = rs6000_default_cpu;
31407
31408 if (global_options_set.x_rs6000_cpu_index)
31409 cpu_id = processor_target_table[rs6000_cpu_index].name;
31410
31411 /* Look through the mapping array. Pick the first name that either
31412 matches the argument, has a bit set in IF_SET that is also set
31413 in the target flags, or has a NULL name. */
31414
31415 i = 0;
31416 while (mapping[i].arg != NULL
31417 && strcmp (mapping[i].arg, cpu_id) != 0
31418 && (mapping[i].if_set & rs6000_isa_flags) == 0)
31419 i++;
31420
31421 fprintf (asm_out_file, "\t.machine %s\n", mapping[i].name);
31422 }
31423
31424 #endif /* TARGET_MACHO */
31425
31426 #if TARGET_ELF
31427 static int
31428 rs6000_elf_reloc_rw_mask (void)
31429 {
31430 if (flag_pic)
31431 return 3;
31432 else if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
31433 return 2;
31434 else
31435 return 0;
31436 }
31437
31438 /* Record an element in the table of global constructors. SYMBOL is
31439 a SYMBOL_REF of the function to be called; PRIORITY is a number
31440 between 0 and MAX_INIT_PRIORITY.
31441
31442 This differs from default_named_section_asm_out_constructor in
31443 that we have special handling for -mrelocatable. */
31444
31445 static void rs6000_elf_asm_out_constructor (rtx, int) ATTRIBUTE_UNUSED;
31446 static void
31447 rs6000_elf_asm_out_constructor (rtx symbol, int priority)
31448 {
31449 const char *section = ".ctors";
31450 char buf[16];
31451
31452 if (priority != DEFAULT_INIT_PRIORITY)
31453 {
31454 sprintf (buf, ".ctors.%.5u",
31455 /* Invert the numbering so the linker puts us in the proper
31456 order; constructors are run from right to left, and the
31457 linker sorts in increasing order. */
31458 MAX_INIT_PRIORITY - priority);
31459 section = buf;
31460 }
31461
31462 switch_to_section (get_section (section, SECTION_WRITE, NULL));
31463 assemble_align (POINTER_SIZE);
31464
31465 if (TARGET_RELOCATABLE)
31466 {
31467 fputs ("\t.long (", asm_out_file);
31468 output_addr_const (asm_out_file, symbol);
31469 fputs (")@fixup\n", asm_out_file);
31470 }
31471 else
31472 assemble_integer (symbol, POINTER_SIZE / BITS_PER_UNIT, POINTER_SIZE, 1);
31473 }
31474
31475 static void rs6000_elf_asm_out_destructor (rtx, int) ATTRIBUTE_UNUSED;
31476 static void
31477 rs6000_elf_asm_out_destructor (rtx symbol, int priority)
31478 {
31479 const char *section = ".dtors";
31480 char buf[16];
31481
31482 if (priority != DEFAULT_INIT_PRIORITY)
31483 {
31484 sprintf (buf, ".dtors.%.5u",
31485 /* Invert the numbering so the linker puts us in the proper
31486 order; constructors are run from right to left, and the
31487 linker sorts in increasing order. */
31488 MAX_INIT_PRIORITY - priority);
31489 section = buf;
31490 }
31491
31492 switch_to_section (get_section (section, SECTION_WRITE, NULL));
31493 assemble_align (POINTER_SIZE);
31494
31495 if (TARGET_RELOCATABLE)
31496 {
31497 fputs ("\t.long (", asm_out_file);
31498 output_addr_const (asm_out_file, symbol);
31499 fputs (")@fixup\n", asm_out_file);
31500 }
31501 else
31502 assemble_integer (symbol, POINTER_SIZE / BITS_PER_UNIT, POINTER_SIZE, 1);
31503 }
31504
31505 void
31506 rs6000_elf_declare_function_name (FILE *file, const char *name, tree decl)
31507 {
31508 if (TARGET_64BIT && DEFAULT_ABI != ABI_ELFv2)
31509 {
31510 fputs ("\t.section\t\".opd\",\"aw\"\n\t.align 3\n", file);
31511 ASM_OUTPUT_LABEL (file, name);
31512 fputs (DOUBLE_INT_ASM_OP, file);
31513 rs6000_output_function_entry (file, name);
31514 fputs (",.TOC.@tocbase,0\n\t.previous\n", file);
31515 if (DOT_SYMBOLS)
31516 {
31517 fputs ("\t.size\t", file);
31518 assemble_name (file, name);
31519 fputs (",24\n\t.type\t.", file);
31520 assemble_name (file, name);
31521 fputs (",@function\n", file);
31522 if (TREE_PUBLIC (decl) && ! DECL_WEAK (decl))
31523 {
31524 fputs ("\t.globl\t.", file);
31525 assemble_name (file, name);
31526 putc ('\n', file);
31527 }
31528 }
31529 else
31530 ASM_OUTPUT_TYPE_DIRECTIVE (file, name, "function");
31531 ASM_DECLARE_RESULT (file, DECL_RESULT (decl));
31532 rs6000_output_function_entry (file, name);
31533 fputs (":\n", file);
31534 return;
31535 }
31536
31537 if (TARGET_RELOCATABLE
31538 && !TARGET_SECURE_PLT
31539 && (get_pool_size () != 0 || crtl->profile)
31540 && uses_TOC ())
31541 {
31542 char buf[256];
31543
31544 (*targetm.asm_out.internal_label) (file, "LCL", rs6000_pic_labelno);
31545
31546 ASM_GENERATE_INTERNAL_LABEL (buf, "LCTOC", 1);
31547 fprintf (file, "\t.long ");
31548 assemble_name (file, buf);
31549 putc ('-', file);
31550 ASM_GENERATE_INTERNAL_LABEL (buf, "LCF", rs6000_pic_labelno);
31551 assemble_name (file, buf);
31552 putc ('\n', file);
31553 }
31554
31555 ASM_OUTPUT_TYPE_DIRECTIVE (file, name, "function");
31556 ASM_DECLARE_RESULT (file, DECL_RESULT (decl));
31557
31558 if (TARGET_CMODEL == CMODEL_LARGE && rs6000_global_entry_point_needed_p ())
31559 {
31560 char buf[256];
31561
31562 (*targetm.asm_out.internal_label) (file, "LCL", rs6000_pic_labelno);
31563
31564 fprintf (file, "\t.quad .TOC.-");
31565 ASM_GENERATE_INTERNAL_LABEL (buf, "LCF", rs6000_pic_labelno);
31566 assemble_name (file, buf);
31567 putc ('\n', file);
31568 }
31569
31570 if (DEFAULT_ABI == ABI_AIX)
31571 {
31572 const char *desc_name, *orig_name;
31573
31574 orig_name = (*targetm.strip_name_encoding) (name);
31575 desc_name = orig_name;
31576 while (*desc_name == '.')
31577 desc_name++;
31578
31579 if (TREE_PUBLIC (decl))
31580 fprintf (file, "\t.globl %s\n", desc_name);
31581
31582 fprintf (file, "%s\n", MINIMAL_TOC_SECTION_ASM_OP);
31583 fprintf (file, "%s:\n", desc_name);
31584 fprintf (file, "\t.long %s\n", orig_name);
31585 fputs ("\t.long _GLOBAL_OFFSET_TABLE_\n", file);
31586 fputs ("\t.long 0\n", file);
31587 fprintf (file, "\t.previous\n");
31588 }
31589 ASM_OUTPUT_LABEL (file, name);
31590 }
31591
31592 static void rs6000_elf_file_end (void) ATTRIBUTE_UNUSED;
31593 static void
31594 rs6000_elf_file_end (void)
31595 {
31596 #ifdef HAVE_AS_GNU_ATTRIBUTE
31597 if (TARGET_32BIT && DEFAULT_ABI == ABI_V4)
31598 {
31599 if (rs6000_passes_float)
31600 fprintf (asm_out_file, "\t.gnu_attribute 4, %d\n",
31601 ((TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT) ? 1
31602 : (TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_SINGLE_FLOAT) ? 3
31603 : 2));
31604 if (rs6000_passes_vector)
31605 fprintf (asm_out_file, "\t.gnu_attribute 8, %d\n",
31606 (TARGET_ALTIVEC_ABI ? 2
31607 : TARGET_SPE_ABI ? 3
31608 : 1));
31609 if (rs6000_returns_struct)
31610 fprintf (asm_out_file, "\t.gnu_attribute 12, %d\n",
31611 aix_struct_return ? 2 : 1);
31612 }
31613 #endif
31614 #if defined (POWERPC_LINUX) || defined (POWERPC_FREEBSD)
31615 if (TARGET_32BIT || DEFAULT_ABI == ABI_ELFv2)
31616 file_end_indicate_exec_stack ();
31617 #endif
31618
31619 if (flag_split_stack)
31620 file_end_indicate_split_stack ();
31621 }
31622 #endif
31623
31624 #if TARGET_XCOFF
31625
31626 #ifndef HAVE_XCOFF_DWARF_EXTRAS
31627 #define HAVE_XCOFF_DWARF_EXTRAS 0
31628 #endif
31629
31630 static enum unwind_info_type
31631 rs6000_xcoff_debug_unwind_info (void)
31632 {
31633 return UI_NONE;
31634 }
31635
31636 static void
31637 rs6000_xcoff_asm_output_anchor (rtx symbol)
31638 {
31639 char buffer[100];
31640
31641 sprintf (buffer, "$ + " HOST_WIDE_INT_PRINT_DEC,
31642 SYMBOL_REF_BLOCK_OFFSET (symbol));
31643 fprintf (asm_out_file, "%s", SET_ASM_OP);
31644 RS6000_OUTPUT_BASENAME (asm_out_file, XSTR (symbol, 0));
31645 fprintf (asm_out_file, ",");
31646 RS6000_OUTPUT_BASENAME (asm_out_file, buffer);
31647 fprintf (asm_out_file, "\n");
31648 }
31649
31650 static void
31651 rs6000_xcoff_asm_globalize_label (FILE *stream, const char *name)
31652 {
31653 fputs (GLOBAL_ASM_OP, stream);
31654 RS6000_OUTPUT_BASENAME (stream, name);
31655 putc ('\n', stream);
31656 }
31657
31658 /* A get_unnamed_decl callback, used for read-only sections. PTR
31659 points to the section string variable. */
31660
31661 static void
31662 rs6000_xcoff_output_readonly_section_asm_op (const void *directive)
31663 {
31664 fprintf (asm_out_file, "\t.csect %s[RO],%s\n",
31665 *(const char *const *) directive,
31666 XCOFF_CSECT_DEFAULT_ALIGNMENT_STR);
31667 }
31668
31669 /* Likewise for read-write sections. */
31670
31671 static void
31672 rs6000_xcoff_output_readwrite_section_asm_op (const void *directive)
31673 {
31674 fprintf (asm_out_file, "\t.csect %s[RW],%s\n",
31675 *(const char *const *) directive,
31676 XCOFF_CSECT_DEFAULT_ALIGNMENT_STR);
31677 }
31678
31679 static void
31680 rs6000_xcoff_output_tls_section_asm_op (const void *directive)
31681 {
31682 fprintf (asm_out_file, "\t.csect %s[TL],%s\n",
31683 *(const char *const *) directive,
31684 XCOFF_CSECT_DEFAULT_ALIGNMENT_STR);
31685 }
31686
31687 /* A get_unnamed_section callback, used for switching to toc_section. */
31688
31689 static void
31690 rs6000_xcoff_output_toc_section_asm_op (const void *data ATTRIBUTE_UNUSED)
31691 {
31692 if (TARGET_MINIMAL_TOC)
31693 {
31694 /* toc_section is always selected at least once from
31695 rs6000_xcoff_file_start, so this is guaranteed to
31696 always be defined once and only once in each file. */
31697 if (!toc_initialized)
31698 {
31699 fputs ("\t.toc\nLCTOC..1:\n", asm_out_file);
31700 fputs ("\t.tc toc_table[TC],toc_table[RW]\n", asm_out_file);
31701 toc_initialized = 1;
31702 }
31703 fprintf (asm_out_file, "\t.csect toc_table[RW]%s\n",
31704 (TARGET_32BIT ? "" : ",3"));
31705 }
31706 else
31707 fputs ("\t.toc\n", asm_out_file);
31708 }
31709
31710 /* Implement TARGET_ASM_INIT_SECTIONS. */
31711
31712 static void
31713 rs6000_xcoff_asm_init_sections (void)
31714 {
31715 read_only_data_section
31716 = get_unnamed_section (0, rs6000_xcoff_output_readonly_section_asm_op,
31717 &xcoff_read_only_section_name);
31718
31719 private_data_section
31720 = get_unnamed_section (SECTION_WRITE,
31721 rs6000_xcoff_output_readwrite_section_asm_op,
31722 &xcoff_private_data_section_name);
31723
31724 tls_data_section
31725 = get_unnamed_section (SECTION_TLS,
31726 rs6000_xcoff_output_tls_section_asm_op,
31727 &xcoff_tls_data_section_name);
31728
31729 tls_private_data_section
31730 = get_unnamed_section (SECTION_TLS,
31731 rs6000_xcoff_output_tls_section_asm_op,
31732 &xcoff_private_data_section_name);
31733
31734 read_only_private_data_section
31735 = get_unnamed_section (0, rs6000_xcoff_output_readonly_section_asm_op,
31736 &xcoff_private_data_section_name);
31737
31738 toc_section
31739 = get_unnamed_section (0, rs6000_xcoff_output_toc_section_asm_op, NULL);
31740
31741 readonly_data_section = read_only_data_section;
31742 }
31743
31744 static int
31745 rs6000_xcoff_reloc_rw_mask (void)
31746 {
31747 return 3;
31748 }
31749
31750 static void
31751 rs6000_xcoff_asm_named_section (const char *name, unsigned int flags,
31752 tree decl ATTRIBUTE_UNUSED)
31753 {
31754 int smclass;
31755 static const char * const suffix[5] = { "PR", "RO", "RW", "TL", "XO" };
31756
31757 if (flags & SECTION_EXCLUDE)
31758 smclass = 4;
31759 else if (flags & SECTION_DEBUG)
31760 {
31761 fprintf (asm_out_file, "\t.dwsect %s\n", name);
31762 return;
31763 }
31764 else if (flags & SECTION_CODE)
31765 smclass = 0;
31766 else if (flags & SECTION_TLS)
31767 smclass = 3;
31768 else if (flags & SECTION_WRITE)
31769 smclass = 2;
31770 else
31771 smclass = 1;
31772
31773 fprintf (asm_out_file, "\t.csect %s%s[%s],%u\n",
31774 (flags & SECTION_CODE) ? "." : "",
31775 name, suffix[smclass], flags & SECTION_ENTSIZE);
31776 }
31777
31778 #define IN_NAMED_SECTION(DECL) \
31779 ((TREE_CODE (DECL) == FUNCTION_DECL || TREE_CODE (DECL) == VAR_DECL) \
31780 && DECL_SECTION_NAME (DECL) != NULL)
31781
31782 static section *
31783 rs6000_xcoff_select_section (tree decl, int reloc,
31784 unsigned HOST_WIDE_INT align)
31785 {
31786 /* Place variables with alignment stricter than BIGGEST_ALIGNMENT into
31787 named section. */
31788 if (align > BIGGEST_ALIGNMENT)
31789 {
31790 resolve_unique_section (decl, reloc, true);
31791 if (IN_NAMED_SECTION (decl))
31792 return get_named_section (decl, NULL, reloc);
31793 }
31794
31795 if (decl_readonly_section (decl, reloc))
31796 {
31797 if (TREE_PUBLIC (decl))
31798 return read_only_data_section;
31799 else
31800 return read_only_private_data_section;
31801 }
31802 else
31803 {
31804 #if HAVE_AS_TLS
31805 if (TREE_CODE (decl) == VAR_DECL && DECL_THREAD_LOCAL_P (decl))
31806 {
31807 if (TREE_PUBLIC (decl))
31808 return tls_data_section;
31809 else if (bss_initializer_p (decl))
31810 {
31811 /* Convert to COMMON to emit in BSS. */
31812 DECL_COMMON (decl) = 1;
31813 return tls_comm_section;
31814 }
31815 else
31816 return tls_private_data_section;
31817 }
31818 else
31819 #endif
31820 if (TREE_PUBLIC (decl))
31821 return data_section;
31822 else
31823 return private_data_section;
31824 }
31825 }
31826
31827 static void
31828 rs6000_xcoff_unique_section (tree decl, int reloc ATTRIBUTE_UNUSED)
31829 {
31830 const char *name;
31831
31832 /* Use select_section for private data and uninitialized data with
31833 alignment <= BIGGEST_ALIGNMENT. */
31834 if (!TREE_PUBLIC (decl)
31835 || DECL_COMMON (decl)
31836 || (DECL_INITIAL (decl) == NULL_TREE
31837 && DECL_ALIGN (decl) <= BIGGEST_ALIGNMENT)
31838 || DECL_INITIAL (decl) == error_mark_node
31839 || (flag_zero_initialized_in_bss
31840 && initializer_zerop (DECL_INITIAL (decl))))
31841 return;
31842
31843 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
31844 name = (*targetm.strip_name_encoding) (name);
31845 set_decl_section_name (decl, name);
31846 }
31847
31848 /* Select section for constant in constant pool.
31849
31850 On RS/6000, all constants are in the private read-only data area.
31851 However, if this is being placed in the TOC it must be output as a
31852 toc entry. */
31853
31854 static section *
31855 rs6000_xcoff_select_rtx_section (machine_mode mode, rtx x,
31856 unsigned HOST_WIDE_INT align ATTRIBUTE_UNUSED)
31857 {
31858 if (ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (x, mode))
31859 return toc_section;
31860 else
31861 return read_only_private_data_section;
31862 }
31863
31864 /* Remove any trailing [DS] or the like from the symbol name. */
31865
31866 static const char *
31867 rs6000_xcoff_strip_name_encoding (const char *name)
31868 {
31869 size_t len;
31870 if (*name == '*')
31871 name++;
31872 len = strlen (name);
31873 if (name[len - 1] == ']')
31874 return ggc_alloc_string (name, len - 4);
31875 else
31876 return name;
31877 }
31878
31879 /* Section attributes. AIX is always PIC. */
31880
31881 static unsigned int
31882 rs6000_xcoff_section_type_flags (tree decl, const char *name, int reloc)
31883 {
31884 unsigned int align;
31885 unsigned int flags = default_section_type_flags (decl, name, reloc);
31886
31887 /* Align to at least UNIT size. */
31888 if ((flags & SECTION_CODE) != 0 || !decl || !DECL_P (decl))
31889 align = MIN_UNITS_PER_WORD;
31890 else
31891 /* Increase alignment of large objects if not already stricter. */
31892 align = MAX ((DECL_ALIGN (decl) / BITS_PER_UNIT),
31893 int_size_in_bytes (TREE_TYPE (decl)) > MIN_UNITS_PER_WORD
31894 ? UNITS_PER_FP_WORD : MIN_UNITS_PER_WORD);
31895
31896 return flags | (exact_log2 (align) & SECTION_ENTSIZE);
31897 }
31898
31899 /* Output at beginning of assembler file.
31900
31901 Initialize the section names for the RS/6000 at this point.
31902
31903 Specify filename, including full path, to assembler.
31904
31905 We want to go into the TOC section so at least one .toc will be emitted.
31906 Also, in order to output proper .bs/.es pairs, we need at least one static
31907 [RW] section emitted.
31908
31909 Finally, declare mcount when profiling to make the assembler happy. */
31910
31911 static void
31912 rs6000_xcoff_file_start (void)
31913 {
31914 rs6000_gen_section_name (&xcoff_bss_section_name,
31915 main_input_filename, ".bss_");
31916 rs6000_gen_section_name (&xcoff_private_data_section_name,
31917 main_input_filename, ".rw_");
31918 rs6000_gen_section_name (&xcoff_read_only_section_name,
31919 main_input_filename, ".ro_");
31920 rs6000_gen_section_name (&xcoff_tls_data_section_name,
31921 main_input_filename, ".tls_");
31922 rs6000_gen_section_name (&xcoff_tbss_section_name,
31923 main_input_filename, ".tbss_[UL]");
31924
31925 fputs ("\t.file\t", asm_out_file);
31926 output_quoted_string (asm_out_file, main_input_filename);
31927 fputc ('\n', asm_out_file);
31928 if (write_symbols != NO_DEBUG)
31929 switch_to_section (private_data_section);
31930 switch_to_section (text_section);
31931 if (profile_flag)
31932 fprintf (asm_out_file, "\t.extern %s\n", RS6000_MCOUNT);
31933 rs6000_file_start ();
31934 }
31935
31936 /* Output at end of assembler file.
31937 On the RS/6000, referencing data should automatically pull in text. */
31938
31939 static void
31940 rs6000_xcoff_file_end (void)
31941 {
31942 switch_to_section (text_section);
31943 fputs ("_section_.text:\n", asm_out_file);
31944 switch_to_section (data_section);
31945 fputs (TARGET_32BIT
31946 ? "\t.long _section_.text\n" : "\t.llong _section_.text\n",
31947 asm_out_file);
31948 }
31949
31950 struct declare_alias_data
31951 {
31952 FILE *file;
31953 bool function_descriptor;
31954 };
31955
31956 /* Declare alias N. A helper function for for_node_and_aliases. */
31957
31958 static bool
31959 rs6000_declare_alias (struct symtab_node *n, void *d)
31960 {
31961 struct declare_alias_data *data = (struct declare_alias_data *)d;
31962 /* Main symbol is output specially, because varasm machinery does part of
31963 the job for us - we do not need to declare .globl/lglobs and such. */
31964 if (!n->alias || n->weakref)
31965 return false;
31966
31967 if (lookup_attribute ("ifunc", DECL_ATTRIBUTES (n->decl)))
31968 return false;
31969
31970 /* Prevent assemble_alias from trying to use .set pseudo operation
31971 that does not behave as expected by the middle-end. */
31972 TREE_ASM_WRITTEN (n->decl) = true;
31973
31974 const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (n->decl));
31975 char *buffer = (char *) alloca (strlen (name) + 2);
31976 char *p;
31977 int dollar_inside = 0;
31978
31979 strcpy (buffer, name);
31980 p = strchr (buffer, '$');
31981 while (p) {
31982 *p = '_';
31983 dollar_inside++;
31984 p = strchr (p + 1, '$');
31985 }
31986 if (TREE_PUBLIC (n->decl))
31987 {
31988 if (!RS6000_WEAK || !DECL_WEAK (n->decl))
31989 {
31990 if (dollar_inside) {
31991 if (data->function_descriptor)
31992 fprintf(data->file, "\t.rename .%s,\".%s\"\n", buffer, name);
31993 fprintf(data->file, "\t.rename %s,\"%s\"\n", buffer, name);
31994 }
31995 if (data->function_descriptor)
31996 {
31997 fputs ("\t.globl .", data->file);
31998 RS6000_OUTPUT_BASENAME (data->file, buffer);
31999 putc ('\n', data->file);
32000 }
32001 fputs ("\t.globl ", data->file);
32002 RS6000_OUTPUT_BASENAME (data->file, buffer);
32003 putc ('\n', data->file);
32004 }
32005 #ifdef ASM_WEAKEN_DECL
32006 else if (DECL_WEAK (n->decl) && !data->function_descriptor)
32007 ASM_WEAKEN_DECL (data->file, n->decl, name, NULL);
32008 #endif
32009 }
32010 else
32011 {
32012 if (dollar_inside)
32013 {
32014 if (data->function_descriptor)
32015 fprintf(data->file, "\t.rename .%s,\".%s\"\n", buffer, name);
32016 fprintf(data->file, "\t.rename %s,\"%s\"\n", buffer, name);
32017 }
32018 if (data->function_descriptor)
32019 {
32020 fputs ("\t.lglobl .", data->file);
32021 RS6000_OUTPUT_BASENAME (data->file, buffer);
32022 putc ('\n', data->file);
32023 }
32024 fputs ("\t.lglobl ", data->file);
32025 RS6000_OUTPUT_BASENAME (data->file, buffer);
32026 putc ('\n', data->file);
32027 }
32028 if (data->function_descriptor)
32029 fputs (".", data->file);
32030 RS6000_OUTPUT_BASENAME (data->file, buffer);
32031 fputs (":\n", data->file);
32032 return false;
32033 }
32034
32035 /* This macro produces the initial definition of a function name.
32036 On the RS/6000, we need to place an extra '.' in the function name and
32037 output the function descriptor.
32038 Dollar signs are converted to underscores.
32039
32040 The csect for the function will have already been created when
32041 text_section was selected. We do have to go back to that csect, however.
32042
32043 The third and fourth parameters to the .function pseudo-op (16 and 044)
32044 are placeholders which no longer have any use.
32045
32046 Because AIX assembler's .set command has unexpected semantics, we output
32047 all aliases as alternative labels in front of the definition. */
32048
32049 void
32050 rs6000_xcoff_declare_function_name (FILE *file, const char *name, tree decl)
32051 {
32052 char *buffer = (char *) alloca (strlen (name) + 1);
32053 char *p;
32054 int dollar_inside = 0;
32055 struct declare_alias_data data = {file, false};
32056
32057 strcpy (buffer, name);
32058 p = strchr (buffer, '$');
32059 while (p) {
32060 *p = '_';
32061 dollar_inside++;
32062 p = strchr (p + 1, '$');
32063 }
32064 if (TREE_PUBLIC (decl))
32065 {
32066 if (!RS6000_WEAK || !DECL_WEAK (decl))
32067 {
32068 if (dollar_inside) {
32069 fprintf(file, "\t.rename .%s,\".%s\"\n", buffer, name);
32070 fprintf(file, "\t.rename %s,\"%s\"\n", buffer, name);
32071 }
32072 fputs ("\t.globl .", file);
32073 RS6000_OUTPUT_BASENAME (file, buffer);
32074 putc ('\n', file);
32075 }
32076 }
32077 else
32078 {
32079 if (dollar_inside) {
32080 fprintf(file, "\t.rename .%s,\".%s\"\n", buffer, name);
32081 fprintf(file, "\t.rename %s,\"%s\"\n", buffer, name);
32082 }
32083 fputs ("\t.lglobl .", file);
32084 RS6000_OUTPUT_BASENAME (file, buffer);
32085 putc ('\n', file);
32086 }
32087 fputs ("\t.csect ", file);
32088 RS6000_OUTPUT_BASENAME (file, buffer);
32089 fputs (TARGET_32BIT ? "[DS]\n" : "[DS],3\n", file);
32090 RS6000_OUTPUT_BASENAME (file, buffer);
32091 fputs (":\n", file);
32092 symtab_node::get (decl)->call_for_symbol_and_aliases (rs6000_declare_alias, &data, true);
32093 fputs (TARGET_32BIT ? "\t.long ." : "\t.llong .", file);
32094 RS6000_OUTPUT_BASENAME (file, buffer);
32095 fputs (", TOC[tc0], 0\n", file);
32096 in_section = NULL;
32097 switch_to_section (function_section (decl));
32098 putc ('.', file);
32099 RS6000_OUTPUT_BASENAME (file, buffer);
32100 fputs (":\n", file);
32101 data.function_descriptor = true;
32102 symtab_node::get (decl)->call_for_symbol_and_aliases (rs6000_declare_alias, &data, true);
32103 if (!DECL_IGNORED_P (decl))
32104 {
32105 if (write_symbols == DBX_DEBUG || write_symbols == XCOFF_DEBUG)
32106 xcoffout_declare_function (file, decl, buffer);
32107 else if (write_symbols == DWARF2_DEBUG)
32108 {
32109 name = (*targetm.strip_name_encoding) (name);
32110 fprintf (file, "\t.function .%s,.%s,2,0\n", name, name);
32111 }
32112 }
32113 return;
32114 }
32115
32116 /* This macro produces the initial definition of a object (variable) name.
32117 Because AIX assembler's .set command has unexpected semantics, we output
32118 all aliases as alternative labels in front of the definition. */
32119
32120 void
32121 rs6000_xcoff_declare_object_name (FILE *file, const char *name, tree decl)
32122 {
32123 struct declare_alias_data data = {file, false};
32124 RS6000_OUTPUT_BASENAME (file, name);
32125 fputs (":\n", file);
32126 symtab_node::get (decl)->call_for_symbol_and_aliases (rs6000_declare_alias, &data, true);
32127 }
32128
32129 /* Overide the default 'SYMBOL-.' syntax with AIX compatible 'SYMBOL-$'. */
32130
32131 void
32132 rs6000_asm_output_dwarf_pcrel (FILE *file, int size, const char *label)
32133 {
32134 fputs (integer_asm_op (size, FALSE), file);
32135 assemble_name (file, label);
32136 fputs ("-$", file);
32137 }
32138
32139 /* Output a symbol offset relative to the dbase for the current object.
32140 We use __gcc_unwind_dbase as an arbitrary base for dbase and assume
32141 signed offsets.
32142
32143 __gcc_unwind_dbase is embedded in all executables/libraries through
32144 libgcc/config/rs6000/crtdbase.S. */
32145
32146 void
32147 rs6000_asm_output_dwarf_datarel (FILE *file, int size, const char *label)
32148 {
32149 fputs (integer_asm_op (size, FALSE), file);
32150 assemble_name (file, label);
32151 fputs("-__gcc_unwind_dbase", file);
32152 }
32153
32154 #ifdef HAVE_AS_TLS
32155 static void
32156 rs6000_xcoff_encode_section_info (tree decl, rtx rtl, int first)
32157 {
32158 rtx symbol;
32159 int flags;
32160
32161 default_encode_section_info (decl, rtl, first);
32162
32163 /* Careful not to prod global register variables. */
32164 if (!MEM_P (rtl))
32165 return;
32166 symbol = XEXP (rtl, 0);
32167 if (GET_CODE (symbol) != SYMBOL_REF)
32168 return;
32169
32170 flags = SYMBOL_REF_FLAGS (symbol);
32171
32172 if (TREE_CODE (decl) == VAR_DECL && DECL_THREAD_LOCAL_P (decl))
32173 flags &= ~SYMBOL_FLAG_HAS_BLOCK_INFO;
32174
32175 SYMBOL_REF_FLAGS (symbol) = flags;
32176 }
32177 #endif /* HAVE_AS_TLS */
32178 #endif /* TARGET_XCOFF */
32179
32180 /* Return true if INSN should not be copied. */
32181
32182 static bool
32183 rs6000_cannot_copy_insn_p (rtx_insn *insn)
32184 {
32185 return recog_memoized (insn) >= 0
32186 && get_attr_cannot_copy (insn);
32187 }
32188
32189 /* Compute a (partial) cost for rtx X. Return true if the complete
32190 cost has been computed, and false if subexpressions should be
32191 scanned. In either case, *TOTAL contains the cost result. */
32192
32193 static bool
32194 rs6000_rtx_costs (rtx x, machine_mode mode, int outer_code,
32195 int opno ATTRIBUTE_UNUSED, int *total, bool speed)
32196 {
32197 int code = GET_CODE (x);
32198
32199 switch (code)
32200 {
32201 /* On the RS/6000, if it is valid in the insn, it is free. */
32202 case CONST_INT:
32203 if (((outer_code == SET
32204 || outer_code == PLUS
32205 || outer_code == MINUS)
32206 && (satisfies_constraint_I (x)
32207 || satisfies_constraint_L (x)))
32208 || (outer_code == AND
32209 && (satisfies_constraint_K (x)
32210 || (mode == SImode
32211 ? satisfies_constraint_L (x)
32212 : satisfies_constraint_J (x))))
32213 || ((outer_code == IOR || outer_code == XOR)
32214 && (satisfies_constraint_K (x)
32215 || (mode == SImode
32216 ? satisfies_constraint_L (x)
32217 : satisfies_constraint_J (x))))
32218 || outer_code == ASHIFT
32219 || outer_code == ASHIFTRT
32220 || outer_code == LSHIFTRT
32221 || outer_code == ROTATE
32222 || outer_code == ROTATERT
32223 || outer_code == ZERO_EXTRACT
32224 || (outer_code == MULT
32225 && satisfies_constraint_I (x))
32226 || ((outer_code == DIV || outer_code == UDIV
32227 || outer_code == MOD || outer_code == UMOD)
32228 && exact_log2 (INTVAL (x)) >= 0)
32229 || (outer_code == COMPARE
32230 && (satisfies_constraint_I (x)
32231 || satisfies_constraint_K (x)))
32232 || ((outer_code == EQ || outer_code == NE)
32233 && (satisfies_constraint_I (x)
32234 || satisfies_constraint_K (x)
32235 || (mode == SImode
32236 ? satisfies_constraint_L (x)
32237 : satisfies_constraint_J (x))))
32238 || (outer_code == GTU
32239 && satisfies_constraint_I (x))
32240 || (outer_code == LTU
32241 && satisfies_constraint_P (x)))
32242 {
32243 *total = 0;
32244 return true;
32245 }
32246 else if ((outer_code == PLUS
32247 && reg_or_add_cint_operand (x, VOIDmode))
32248 || (outer_code == MINUS
32249 && reg_or_sub_cint_operand (x, VOIDmode))
32250 || ((outer_code == SET
32251 || outer_code == IOR
32252 || outer_code == XOR)
32253 && (INTVAL (x)
32254 & ~ (unsigned HOST_WIDE_INT) 0xffffffff) == 0))
32255 {
32256 *total = COSTS_N_INSNS (1);
32257 return true;
32258 }
32259 /* FALLTHRU */
32260
32261 case CONST_DOUBLE:
32262 case CONST_WIDE_INT:
32263 case CONST:
32264 case HIGH:
32265 case SYMBOL_REF:
32266 case MEM:
32267 /* When optimizing for size, MEM should be slightly more expensive
32268 than generating address, e.g., (plus (reg) (const)).
32269 L1 cache latency is about two instructions. */
32270 *total = !speed ? COSTS_N_INSNS (1) + 1 : COSTS_N_INSNS (2);
32271 return true;
32272
32273 case LABEL_REF:
32274 *total = 0;
32275 return true;
32276
32277 case PLUS:
32278 case MINUS:
32279 if (FLOAT_MODE_P (mode))
32280 *total = rs6000_cost->fp;
32281 else
32282 *total = COSTS_N_INSNS (1);
32283 return false;
32284
32285 case MULT:
32286 if (GET_CODE (XEXP (x, 1)) == CONST_INT
32287 && satisfies_constraint_I (XEXP (x, 1)))
32288 {
32289 if (INTVAL (XEXP (x, 1)) >= -256
32290 && INTVAL (XEXP (x, 1)) <= 255)
32291 *total = rs6000_cost->mulsi_const9;
32292 else
32293 *total = rs6000_cost->mulsi_const;
32294 }
32295 else if (mode == SFmode)
32296 *total = rs6000_cost->fp;
32297 else if (FLOAT_MODE_P (mode))
32298 *total = rs6000_cost->dmul;
32299 else if (mode == DImode)
32300 *total = rs6000_cost->muldi;
32301 else
32302 *total = rs6000_cost->mulsi;
32303 return false;
32304
32305 case FMA:
32306 if (mode == SFmode)
32307 *total = rs6000_cost->fp;
32308 else
32309 *total = rs6000_cost->dmul;
32310 break;
32311
32312 case DIV:
32313 case MOD:
32314 if (FLOAT_MODE_P (mode))
32315 {
32316 *total = mode == DFmode ? rs6000_cost->ddiv
32317 : rs6000_cost->sdiv;
32318 return false;
32319 }
32320 /* FALLTHRU */
32321
32322 case UDIV:
32323 case UMOD:
32324 if (GET_CODE (XEXP (x, 1)) == CONST_INT
32325 && exact_log2 (INTVAL (XEXP (x, 1))) >= 0)
32326 {
32327 if (code == DIV || code == MOD)
32328 /* Shift, addze */
32329 *total = COSTS_N_INSNS (2);
32330 else
32331 /* Shift */
32332 *total = COSTS_N_INSNS (1);
32333 }
32334 else
32335 {
32336 if (GET_MODE (XEXP (x, 1)) == DImode)
32337 *total = rs6000_cost->divdi;
32338 else
32339 *total = rs6000_cost->divsi;
32340 }
32341 /* Add in shift and subtract for MOD unless we have a mod instruction. */
32342 if (!TARGET_MODULO && (code == MOD || code == UMOD))
32343 *total += COSTS_N_INSNS (2);
32344 return false;
32345
32346 case CTZ:
32347 *total = COSTS_N_INSNS (TARGET_CTZ ? 1 : 4);
32348 return false;
32349
32350 case FFS:
32351 *total = COSTS_N_INSNS (4);
32352 return false;
32353
32354 case POPCOUNT:
32355 *total = COSTS_N_INSNS (TARGET_POPCNTD ? 1 : 6);
32356 return false;
32357
32358 case PARITY:
32359 *total = COSTS_N_INSNS (TARGET_CMPB ? 2 : 6);
32360 return false;
32361
32362 case NOT:
32363 if (outer_code == AND || outer_code == IOR || outer_code == XOR)
32364 *total = 0;
32365 else
32366 *total = COSTS_N_INSNS (1);
32367 return false;
32368
32369 case AND:
32370 if (CONST_INT_P (XEXP (x, 1)))
32371 {
32372 rtx left = XEXP (x, 0);
32373 rtx_code left_code = GET_CODE (left);
32374
32375 /* rotate-and-mask: 1 insn. */
32376 if ((left_code == ROTATE
32377 || left_code == ASHIFT
32378 || left_code == LSHIFTRT)
32379 && rs6000_is_valid_shift_mask (XEXP (x, 1), left, mode))
32380 {
32381 *total = rtx_cost (XEXP (left, 0), mode, left_code, 0, speed);
32382 if (!CONST_INT_P (XEXP (left, 1)))
32383 *total += rtx_cost (XEXP (left, 1), SImode, left_code, 1, speed);
32384 *total += COSTS_N_INSNS (1);
32385 return true;
32386 }
32387
32388 /* rotate-and-mask (no rotate), andi., andis.: 1 insn. */
32389 HOST_WIDE_INT val = INTVAL (XEXP (x, 1));
32390 if (rs6000_is_valid_and_mask (XEXP (x, 1), mode)
32391 || (val & 0xffff) == val
32392 || (val & 0xffff0000) == val
32393 || ((val & 0xffff) == 0 && mode == SImode))
32394 {
32395 *total = rtx_cost (left, mode, AND, 0, speed);
32396 *total += COSTS_N_INSNS (1);
32397 return true;
32398 }
32399
32400 /* 2 insns. */
32401 if (rs6000_is_valid_2insn_and (XEXP (x, 1), mode))
32402 {
32403 *total = rtx_cost (left, mode, AND, 0, speed);
32404 *total += COSTS_N_INSNS (2);
32405 return true;
32406 }
32407 }
32408
32409 *total = COSTS_N_INSNS (1);
32410 return false;
32411
32412 case IOR:
32413 /* FIXME */
32414 *total = COSTS_N_INSNS (1);
32415 return true;
32416
32417 case CLZ:
32418 case XOR:
32419 case ZERO_EXTRACT:
32420 *total = COSTS_N_INSNS (1);
32421 return false;
32422
32423 case ASHIFT:
32424 /* The EXTSWSLI instruction is a combined instruction. Don't count both
32425 the sign extend and shift separately within the insn. */
32426 if (TARGET_EXTSWSLI && mode == DImode
32427 && GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
32428 && GET_MODE (XEXP (XEXP (x, 0), 0)) == SImode)
32429 {
32430 *total = 0;
32431 return false;
32432 }
32433 /* fall through */
32434
32435 case ASHIFTRT:
32436 case LSHIFTRT:
32437 case ROTATE:
32438 case ROTATERT:
32439 /* Handle mul_highpart. */
32440 if (outer_code == TRUNCATE
32441 && GET_CODE (XEXP (x, 0)) == MULT)
32442 {
32443 if (mode == DImode)
32444 *total = rs6000_cost->muldi;
32445 else
32446 *total = rs6000_cost->mulsi;
32447 return true;
32448 }
32449 else if (outer_code == AND)
32450 *total = 0;
32451 else
32452 *total = COSTS_N_INSNS (1);
32453 return false;
32454
32455 case SIGN_EXTEND:
32456 case ZERO_EXTEND:
32457 if (GET_CODE (XEXP (x, 0)) == MEM)
32458 *total = 0;
32459 else
32460 *total = COSTS_N_INSNS (1);
32461 return false;
32462
32463 case COMPARE:
32464 case NEG:
32465 case ABS:
32466 if (!FLOAT_MODE_P (mode))
32467 {
32468 *total = COSTS_N_INSNS (1);
32469 return false;
32470 }
32471 /* FALLTHRU */
32472
32473 case FLOAT:
32474 case UNSIGNED_FLOAT:
32475 case FIX:
32476 case UNSIGNED_FIX:
32477 case FLOAT_TRUNCATE:
32478 *total = rs6000_cost->fp;
32479 return false;
32480
32481 case FLOAT_EXTEND:
32482 if (mode == DFmode)
32483 *total = rs6000_cost->sfdf_convert;
32484 else
32485 *total = rs6000_cost->fp;
32486 return false;
32487
32488 case UNSPEC:
32489 switch (XINT (x, 1))
32490 {
32491 case UNSPEC_FRSP:
32492 *total = rs6000_cost->fp;
32493 return true;
32494
32495 default:
32496 break;
32497 }
32498 break;
32499
32500 case CALL:
32501 case IF_THEN_ELSE:
32502 if (!speed)
32503 {
32504 *total = COSTS_N_INSNS (1);
32505 return true;
32506 }
32507 else if (FLOAT_MODE_P (mode)
32508 && TARGET_PPC_GFXOPT && TARGET_HARD_FLOAT && TARGET_FPRS)
32509 {
32510 *total = rs6000_cost->fp;
32511 return false;
32512 }
32513 break;
32514
32515 case NE:
32516 case EQ:
32517 case GTU:
32518 case LTU:
32519 /* Carry bit requires mode == Pmode.
32520 NEG or PLUS already counted so only add one. */
32521 if (mode == Pmode
32522 && (outer_code == NEG || outer_code == PLUS))
32523 {
32524 *total = COSTS_N_INSNS (1);
32525 return true;
32526 }
32527 if (outer_code == SET)
32528 {
32529 if (XEXP (x, 1) == const0_rtx)
32530 {
32531 if (TARGET_ISEL && !TARGET_MFCRF)
32532 *total = COSTS_N_INSNS (8);
32533 else
32534 *total = COSTS_N_INSNS (2);
32535 return true;
32536 }
32537 else
32538 {
32539 *total = COSTS_N_INSNS (3);
32540 return false;
32541 }
32542 }
32543 /* FALLTHRU */
32544
32545 case GT:
32546 case LT:
32547 case UNORDERED:
32548 if (outer_code == SET && (XEXP (x, 1) == const0_rtx))
32549 {
32550 if (TARGET_ISEL && !TARGET_MFCRF)
32551 *total = COSTS_N_INSNS (8);
32552 else
32553 *total = COSTS_N_INSNS (2);
32554 return true;
32555 }
32556 /* CC COMPARE. */
32557 if (outer_code == COMPARE)
32558 {
32559 *total = 0;
32560 return true;
32561 }
32562 break;
32563
32564 default:
32565 break;
32566 }
32567
32568 return false;
32569 }
32570
32571 /* Debug form of r6000_rtx_costs that is selected if -mdebug=cost. */
32572
32573 static bool
32574 rs6000_debug_rtx_costs (rtx x, machine_mode mode, int outer_code,
32575 int opno, int *total, bool speed)
32576 {
32577 bool ret = rs6000_rtx_costs (x, mode, outer_code, opno, total, speed);
32578
32579 fprintf (stderr,
32580 "\nrs6000_rtx_costs, return = %s, mode = %s, outer_code = %s, "
32581 "opno = %d, total = %d, speed = %s, x:\n",
32582 ret ? "complete" : "scan inner",
32583 GET_MODE_NAME (mode),
32584 GET_RTX_NAME (outer_code),
32585 opno,
32586 *total,
32587 speed ? "true" : "false");
32588
32589 debug_rtx (x);
32590
32591 return ret;
32592 }
32593
32594 /* Debug form of ADDRESS_COST that is selected if -mdebug=cost. */
32595
32596 static int
32597 rs6000_debug_address_cost (rtx x, machine_mode mode,
32598 addr_space_t as, bool speed)
32599 {
32600 int ret = TARGET_ADDRESS_COST (x, mode, as, speed);
32601
32602 fprintf (stderr, "\nrs6000_address_cost, return = %d, speed = %s, x:\n",
32603 ret, speed ? "true" : "false");
32604 debug_rtx (x);
32605
32606 return ret;
32607 }
32608
32609
32610 /* A C expression returning the cost of moving data from a register of class
32611 CLASS1 to one of CLASS2. */
32612
32613 static int
32614 rs6000_register_move_cost (machine_mode mode,
32615 reg_class_t from, reg_class_t to)
32616 {
32617 int ret;
32618
32619 if (TARGET_DEBUG_COST)
32620 dbg_cost_ctrl++;
32621
32622 /* Moves from/to GENERAL_REGS. */
32623 if (reg_classes_intersect_p (to, GENERAL_REGS)
32624 || reg_classes_intersect_p (from, GENERAL_REGS))
32625 {
32626 reg_class_t rclass = from;
32627
32628 if (! reg_classes_intersect_p (to, GENERAL_REGS))
32629 rclass = to;
32630
32631 if (rclass == FLOAT_REGS || rclass == ALTIVEC_REGS || rclass == VSX_REGS)
32632 ret = (rs6000_memory_move_cost (mode, rclass, false)
32633 + rs6000_memory_move_cost (mode, GENERAL_REGS, false));
32634
32635 /* It's more expensive to move CR_REGS than CR0_REGS because of the
32636 shift. */
32637 else if (rclass == CR_REGS)
32638 ret = 4;
32639
32640 /* For those processors that have slow LR/CTR moves, make them more
32641 expensive than memory in order to bias spills to memory .*/
32642 else if ((rs6000_cpu == PROCESSOR_POWER6
32643 || rs6000_cpu == PROCESSOR_POWER7
32644 || rs6000_cpu == PROCESSOR_POWER8
32645 || rs6000_cpu == PROCESSOR_POWER9)
32646 && reg_classes_intersect_p (rclass, LINK_OR_CTR_REGS))
32647 ret = 6 * hard_regno_nregs[0][mode];
32648
32649 else
32650 /* A move will cost one instruction per GPR moved. */
32651 ret = 2 * hard_regno_nregs[0][mode];
32652 }
32653
32654 /* If we have VSX, we can easily move between FPR or Altivec registers. */
32655 else if (VECTOR_MEM_VSX_P (mode)
32656 && reg_classes_intersect_p (to, VSX_REGS)
32657 && reg_classes_intersect_p (from, VSX_REGS))
32658 ret = 2 * hard_regno_nregs[32][mode];
32659
32660 /* Moving between two similar registers is just one instruction. */
32661 else if (reg_classes_intersect_p (to, from))
32662 ret = (FLOAT128_2REG_P (mode)) ? 4 : 2;
32663
32664 /* Everything else has to go through GENERAL_REGS. */
32665 else
32666 ret = (rs6000_register_move_cost (mode, GENERAL_REGS, to)
32667 + rs6000_register_move_cost (mode, from, GENERAL_REGS));
32668
32669 if (TARGET_DEBUG_COST)
32670 {
32671 if (dbg_cost_ctrl == 1)
32672 fprintf (stderr,
32673 "rs6000_register_move_cost:, ret=%d, mode=%s, from=%s, to=%s\n",
32674 ret, GET_MODE_NAME (mode), reg_class_names[from],
32675 reg_class_names[to]);
32676 dbg_cost_ctrl--;
32677 }
32678
32679 return ret;
32680 }
32681
32682 /* A C expressions returning the cost of moving data of MODE from a register to
32683 or from memory. */
32684
32685 static int
32686 rs6000_memory_move_cost (machine_mode mode, reg_class_t rclass,
32687 bool in ATTRIBUTE_UNUSED)
32688 {
32689 int ret;
32690
32691 if (TARGET_DEBUG_COST)
32692 dbg_cost_ctrl++;
32693
32694 if (reg_classes_intersect_p (rclass, GENERAL_REGS))
32695 ret = 4 * hard_regno_nregs[0][mode];
32696 else if ((reg_classes_intersect_p (rclass, FLOAT_REGS)
32697 || reg_classes_intersect_p (rclass, VSX_REGS)))
32698 ret = 4 * hard_regno_nregs[32][mode];
32699 else if (reg_classes_intersect_p (rclass, ALTIVEC_REGS))
32700 ret = 4 * hard_regno_nregs[FIRST_ALTIVEC_REGNO][mode];
32701 else
32702 ret = 4 + rs6000_register_move_cost (mode, rclass, GENERAL_REGS);
32703
32704 if (TARGET_DEBUG_COST)
32705 {
32706 if (dbg_cost_ctrl == 1)
32707 fprintf (stderr,
32708 "rs6000_memory_move_cost: ret=%d, mode=%s, rclass=%s, in=%d\n",
32709 ret, GET_MODE_NAME (mode), reg_class_names[rclass], in);
32710 dbg_cost_ctrl--;
32711 }
32712
32713 return ret;
32714 }
32715
32716 /* Returns a code for a target-specific builtin that implements
32717 reciprocal of the function, or NULL_TREE if not available. */
32718
32719 static tree
32720 rs6000_builtin_reciprocal (tree fndecl)
32721 {
32722 switch (DECL_FUNCTION_CODE (fndecl))
32723 {
32724 case VSX_BUILTIN_XVSQRTDP:
32725 if (!RS6000_RECIP_AUTO_RSQRTE_P (V2DFmode))
32726 return NULL_TREE;
32727
32728 return rs6000_builtin_decls[VSX_BUILTIN_RSQRT_2DF];
32729
32730 case VSX_BUILTIN_XVSQRTSP:
32731 if (!RS6000_RECIP_AUTO_RSQRTE_P (V4SFmode))
32732 return NULL_TREE;
32733
32734 return rs6000_builtin_decls[VSX_BUILTIN_RSQRT_4SF];
32735
32736 default:
32737 return NULL_TREE;
32738 }
32739 }
32740
32741 /* Load up a constant. If the mode is a vector mode, splat the value across
32742 all of the vector elements. */
32743
32744 static rtx
32745 rs6000_load_constant_and_splat (machine_mode mode, REAL_VALUE_TYPE dconst)
32746 {
32747 rtx reg;
32748
32749 if (mode == SFmode || mode == DFmode)
32750 {
32751 rtx d = const_double_from_real_value (dconst, mode);
32752 reg = force_reg (mode, d);
32753 }
32754 else if (mode == V4SFmode)
32755 {
32756 rtx d = const_double_from_real_value (dconst, SFmode);
32757 rtvec v = gen_rtvec (4, d, d, d, d);
32758 reg = gen_reg_rtx (mode);
32759 rs6000_expand_vector_init (reg, gen_rtx_PARALLEL (mode, v));
32760 }
32761 else if (mode == V2DFmode)
32762 {
32763 rtx d = const_double_from_real_value (dconst, DFmode);
32764 rtvec v = gen_rtvec (2, d, d);
32765 reg = gen_reg_rtx (mode);
32766 rs6000_expand_vector_init (reg, gen_rtx_PARALLEL (mode, v));
32767 }
32768 else
32769 gcc_unreachable ();
32770
32771 return reg;
32772 }
32773
32774 /* Generate an FMA instruction. */
32775
32776 static void
32777 rs6000_emit_madd (rtx target, rtx m1, rtx m2, rtx a)
32778 {
32779 machine_mode mode = GET_MODE (target);
32780 rtx dst;
32781
32782 dst = expand_ternary_op (mode, fma_optab, m1, m2, a, target, 0);
32783 gcc_assert (dst != NULL);
32784
32785 if (dst != target)
32786 emit_move_insn (target, dst);
32787 }
32788
32789 /* Generate a FMSUB instruction: dst = fma(m1, m2, -a). */
32790
32791 static void
32792 rs6000_emit_msub (rtx target, rtx m1, rtx m2, rtx a)
32793 {
32794 machine_mode mode = GET_MODE (target);
32795 rtx dst;
32796
32797 /* Altivec does not support fms directly;
32798 generate in terms of fma in that case. */
32799 if (optab_handler (fms_optab, mode) != CODE_FOR_nothing)
32800 dst = expand_ternary_op (mode, fms_optab, m1, m2, a, target, 0);
32801 else
32802 {
32803 a = expand_unop (mode, neg_optab, a, NULL_RTX, 0);
32804 dst = expand_ternary_op (mode, fma_optab, m1, m2, a, target, 0);
32805 }
32806 gcc_assert (dst != NULL);
32807
32808 if (dst != target)
32809 emit_move_insn (target, dst);
32810 }
32811
32812 /* Generate a FNMSUB instruction: dst = -fma(m1, m2, -a). */
32813
32814 static void
32815 rs6000_emit_nmsub (rtx dst, rtx m1, rtx m2, rtx a)
32816 {
32817 machine_mode mode = GET_MODE (dst);
32818 rtx r;
32819
32820 /* This is a tad more complicated, since the fnma_optab is for
32821 a different expression: fma(-m1, m2, a), which is the same
32822 thing except in the case of signed zeros.
32823
32824 Fortunately we know that if FMA is supported that FNMSUB is
32825 also supported in the ISA. Just expand it directly. */
32826
32827 gcc_assert (optab_handler (fma_optab, mode) != CODE_FOR_nothing);
32828
32829 r = gen_rtx_NEG (mode, a);
32830 r = gen_rtx_FMA (mode, m1, m2, r);
32831 r = gen_rtx_NEG (mode, r);
32832 emit_insn (gen_rtx_SET (dst, r));
32833 }
32834
32835 /* Newton-Raphson approximation of floating point divide DST = N/D. If NOTE_P,
32836 add a reg_note saying that this was a division. Support both scalar and
32837 vector divide. Assumes no trapping math and finite arguments. */
32838
32839 void
32840 rs6000_emit_swdiv (rtx dst, rtx n, rtx d, bool note_p)
32841 {
32842 machine_mode mode = GET_MODE (dst);
32843 rtx one, x0, e0, x1, xprev, eprev, xnext, enext, u, v;
32844 int i;
32845
32846 /* Low precision estimates guarantee 5 bits of accuracy. High
32847 precision estimates guarantee 14 bits of accuracy. SFmode
32848 requires 23 bits of accuracy. DFmode requires 52 bits of
32849 accuracy. Each pass at least doubles the accuracy, leading
32850 to the following. */
32851 int passes = (TARGET_RECIP_PRECISION) ? 1 : 3;
32852 if (mode == DFmode || mode == V2DFmode)
32853 passes++;
32854
32855 enum insn_code code = optab_handler (smul_optab, mode);
32856 insn_gen_fn gen_mul = GEN_FCN (code);
32857
32858 gcc_assert (code != CODE_FOR_nothing);
32859
32860 one = rs6000_load_constant_and_splat (mode, dconst1);
32861
32862 /* x0 = 1./d estimate */
32863 x0 = gen_reg_rtx (mode);
32864 emit_insn (gen_rtx_SET (x0, gen_rtx_UNSPEC (mode, gen_rtvec (1, d),
32865 UNSPEC_FRES)));
32866
32867 /* Each iteration but the last calculates x_(i+1) = x_i * (2 - d * x_i). */
32868 if (passes > 1) {
32869
32870 /* e0 = 1. - d * x0 */
32871 e0 = gen_reg_rtx (mode);
32872 rs6000_emit_nmsub (e0, d, x0, one);
32873
32874 /* x1 = x0 + e0 * x0 */
32875 x1 = gen_reg_rtx (mode);
32876 rs6000_emit_madd (x1, e0, x0, x0);
32877
32878 for (i = 0, xprev = x1, eprev = e0; i < passes - 2;
32879 ++i, xprev = xnext, eprev = enext) {
32880
32881 /* enext = eprev * eprev */
32882 enext = gen_reg_rtx (mode);
32883 emit_insn (gen_mul (enext, eprev, eprev));
32884
32885 /* xnext = xprev + enext * xprev */
32886 xnext = gen_reg_rtx (mode);
32887 rs6000_emit_madd (xnext, enext, xprev, xprev);
32888 }
32889
32890 } else
32891 xprev = x0;
32892
32893 /* The last iteration calculates x_(i+1) = n * x_i * (2 - d * x_i). */
32894
32895 /* u = n * xprev */
32896 u = gen_reg_rtx (mode);
32897 emit_insn (gen_mul (u, n, xprev));
32898
32899 /* v = n - (d * u) */
32900 v = gen_reg_rtx (mode);
32901 rs6000_emit_nmsub (v, d, u, n);
32902
32903 /* dst = (v * xprev) + u */
32904 rs6000_emit_madd (dst, v, xprev, u);
32905
32906 if (note_p)
32907 add_reg_note (get_last_insn (), REG_EQUAL, gen_rtx_DIV (mode, n, d));
32908 }
32909
32910 /* Newton-Raphson approximation of single/double-precision floating point
32911 rsqrt. Assumes no trapping math and finite arguments. */
32912
32913 void
32914 rs6000_emit_swsqrt (rtx dst, rtx src, bool recip)
32915 {
32916 machine_mode mode = GET_MODE (src);
32917 rtx x0 = gen_reg_rtx (mode);
32918 rtx y = gen_reg_rtx (mode);
32919
32920 /* Low precision estimates guarantee 5 bits of accuracy. High
32921 precision estimates guarantee 14 bits of accuracy. SFmode
32922 requires 23 bits of accuracy. DFmode requires 52 bits of
32923 accuracy. Each pass at least doubles the accuracy, leading
32924 to the following. */
32925 int passes = (TARGET_RECIP_PRECISION) ? 1 : 3;
32926 if (mode == DFmode || mode == V2DFmode)
32927 passes++;
32928
32929 REAL_VALUE_TYPE dconst3_2;
32930 int i;
32931 rtx halfthree;
32932 enum insn_code code = optab_handler (smul_optab, mode);
32933 insn_gen_fn gen_mul = GEN_FCN (code);
32934
32935 gcc_assert (code != CODE_FOR_nothing);
32936
32937 /* Load up the constant 1.5 either as a scalar, or as a vector. */
32938 real_from_integer (&dconst3_2, VOIDmode, 3, SIGNED);
32939 SET_REAL_EXP (&dconst3_2, REAL_EXP (&dconst3_2) - 1);
32940
32941 halfthree = rs6000_load_constant_and_splat (mode, dconst3_2);
32942
32943 /* x0 = rsqrt estimate */
32944 emit_insn (gen_rtx_SET (x0, gen_rtx_UNSPEC (mode, gen_rtvec (1, src),
32945 UNSPEC_RSQRT)));
32946
32947 /* If (src == 0.0) filter infinity to prevent NaN for sqrt(0.0). */
32948 if (!recip)
32949 {
32950 rtx zero = force_reg (mode, CONST0_RTX (mode));
32951 rtx target = emit_conditional_move (x0, GT, src, zero, mode,
32952 x0, zero, mode, 0);
32953 if (target != x0)
32954 emit_move_insn (x0, target);
32955 }
32956
32957 /* y = 0.5 * src = 1.5 * src - src -> fewer constants */
32958 rs6000_emit_msub (y, src, halfthree, src);
32959
32960 for (i = 0; i < passes; i++)
32961 {
32962 rtx x1 = gen_reg_rtx (mode);
32963 rtx u = gen_reg_rtx (mode);
32964 rtx v = gen_reg_rtx (mode);
32965
32966 /* x1 = x0 * (1.5 - y * (x0 * x0)) */
32967 emit_insn (gen_mul (u, x0, x0));
32968 rs6000_emit_nmsub (v, y, u, halfthree);
32969 emit_insn (gen_mul (x1, x0, v));
32970 x0 = x1;
32971 }
32972
32973 /* If not reciprocal, multiply by src to produce sqrt. */
32974 if (!recip)
32975 emit_insn (gen_mul (dst, src, x0));
32976 else
32977 emit_move_insn (dst, x0);
32978
32979 return;
32980 }
32981
32982 /* Emit popcount intrinsic on TARGET_POPCNTB (Power5) and TARGET_POPCNTD
32983 (Power7) targets. DST is the target, and SRC is the argument operand. */
32984
32985 void
32986 rs6000_emit_popcount (rtx dst, rtx src)
32987 {
32988 machine_mode mode = GET_MODE (dst);
32989 rtx tmp1, tmp2;
32990
32991 /* Use the PPC ISA 2.06 popcnt{w,d} instruction if we can. */
32992 if (TARGET_POPCNTD)
32993 {
32994 if (mode == SImode)
32995 emit_insn (gen_popcntdsi2 (dst, src));
32996 else
32997 emit_insn (gen_popcntddi2 (dst, src));
32998 return;
32999 }
33000
33001 tmp1 = gen_reg_rtx (mode);
33002
33003 if (mode == SImode)
33004 {
33005 emit_insn (gen_popcntbsi2 (tmp1, src));
33006 tmp2 = expand_mult (SImode, tmp1, GEN_INT (0x01010101),
33007 NULL_RTX, 0);
33008 tmp2 = force_reg (SImode, tmp2);
33009 emit_insn (gen_lshrsi3 (dst, tmp2, GEN_INT (24)));
33010 }
33011 else
33012 {
33013 emit_insn (gen_popcntbdi2 (tmp1, src));
33014 tmp2 = expand_mult (DImode, tmp1,
33015 GEN_INT ((HOST_WIDE_INT)
33016 0x01010101 << 32 | 0x01010101),
33017 NULL_RTX, 0);
33018 tmp2 = force_reg (DImode, tmp2);
33019 emit_insn (gen_lshrdi3 (dst, tmp2, GEN_INT (56)));
33020 }
33021 }
33022
33023
33024 /* Emit parity intrinsic on TARGET_POPCNTB targets. DST is the
33025 target, and SRC is the argument operand. */
33026
33027 void
33028 rs6000_emit_parity (rtx dst, rtx src)
33029 {
33030 machine_mode mode = GET_MODE (dst);
33031 rtx tmp;
33032
33033 tmp = gen_reg_rtx (mode);
33034
33035 /* Use the PPC ISA 2.05 prtyw/prtyd instruction if we can. */
33036 if (TARGET_CMPB)
33037 {
33038 if (mode == SImode)
33039 {
33040 emit_insn (gen_popcntbsi2 (tmp, src));
33041 emit_insn (gen_paritysi2_cmpb (dst, tmp));
33042 }
33043 else
33044 {
33045 emit_insn (gen_popcntbdi2 (tmp, src));
33046 emit_insn (gen_paritydi2_cmpb (dst, tmp));
33047 }
33048 return;
33049 }
33050
33051 if (mode == SImode)
33052 {
33053 /* Is mult+shift >= shift+xor+shift+xor? */
33054 if (rs6000_cost->mulsi_const >= COSTS_N_INSNS (3))
33055 {
33056 rtx tmp1, tmp2, tmp3, tmp4;
33057
33058 tmp1 = gen_reg_rtx (SImode);
33059 emit_insn (gen_popcntbsi2 (tmp1, src));
33060
33061 tmp2 = gen_reg_rtx (SImode);
33062 emit_insn (gen_lshrsi3 (tmp2, tmp1, GEN_INT (16)));
33063 tmp3 = gen_reg_rtx (SImode);
33064 emit_insn (gen_xorsi3 (tmp3, tmp1, tmp2));
33065
33066 tmp4 = gen_reg_rtx (SImode);
33067 emit_insn (gen_lshrsi3 (tmp4, tmp3, GEN_INT (8)));
33068 emit_insn (gen_xorsi3 (tmp, tmp3, tmp4));
33069 }
33070 else
33071 rs6000_emit_popcount (tmp, src);
33072 emit_insn (gen_andsi3 (dst, tmp, const1_rtx));
33073 }
33074 else
33075 {
33076 /* Is mult+shift >= shift+xor+shift+xor+shift+xor? */
33077 if (rs6000_cost->muldi >= COSTS_N_INSNS (5))
33078 {
33079 rtx tmp1, tmp2, tmp3, tmp4, tmp5, tmp6;
33080
33081 tmp1 = gen_reg_rtx (DImode);
33082 emit_insn (gen_popcntbdi2 (tmp1, src));
33083
33084 tmp2 = gen_reg_rtx (DImode);
33085 emit_insn (gen_lshrdi3 (tmp2, tmp1, GEN_INT (32)));
33086 tmp3 = gen_reg_rtx (DImode);
33087 emit_insn (gen_xordi3 (tmp3, tmp1, tmp2));
33088
33089 tmp4 = gen_reg_rtx (DImode);
33090 emit_insn (gen_lshrdi3 (tmp4, tmp3, GEN_INT (16)));
33091 tmp5 = gen_reg_rtx (DImode);
33092 emit_insn (gen_xordi3 (tmp5, tmp3, tmp4));
33093
33094 tmp6 = gen_reg_rtx (DImode);
33095 emit_insn (gen_lshrdi3 (tmp6, tmp5, GEN_INT (8)));
33096 emit_insn (gen_xordi3 (tmp, tmp5, tmp6));
33097 }
33098 else
33099 rs6000_emit_popcount (tmp, src);
33100 emit_insn (gen_anddi3 (dst, tmp, const1_rtx));
33101 }
33102 }
33103
33104 /* Expand an Altivec constant permutation for little endian mode.
33105 There are two issues: First, the two input operands must be
33106 swapped so that together they form a double-wide array in LE
33107 order. Second, the vperm instruction has surprising behavior
33108 in LE mode: it interprets the elements of the source vectors
33109 in BE mode ("left to right") and interprets the elements of
33110 the destination vector in LE mode ("right to left"). To
33111 correct for this, we must subtract each element of the permute
33112 control vector from 31.
33113
33114 For example, suppose we want to concatenate vr10 = {0, 1, 2, 3}
33115 with vr11 = {4, 5, 6, 7} and extract {0, 2, 4, 6} using a vperm.
33116 We place {0,1,2,3,8,9,10,11,16,17,18,19,24,25,26,27} in vr12 to
33117 serve as the permute control vector. Then, in BE mode,
33118
33119 vperm 9,10,11,12
33120
33121 places the desired result in vr9. However, in LE mode the
33122 vector contents will be
33123
33124 vr10 = 00000003 00000002 00000001 00000000
33125 vr11 = 00000007 00000006 00000005 00000004
33126
33127 The result of the vperm using the same permute control vector is
33128
33129 vr9 = 05000000 07000000 01000000 03000000
33130
33131 That is, the leftmost 4 bytes of vr10 are interpreted as the
33132 source for the rightmost 4 bytes of vr9, and so on.
33133
33134 If we change the permute control vector to
33135
33136 vr12 = {31,20,29,28,23,22,21,20,15,14,13,12,7,6,5,4}
33137
33138 and issue
33139
33140 vperm 9,11,10,12
33141
33142 we get the desired
33143
33144 vr9 = 00000006 00000004 00000002 00000000. */
33145
33146 void
33147 altivec_expand_vec_perm_const_le (rtx operands[4])
33148 {
33149 unsigned int i;
33150 rtx perm[16];
33151 rtx constv, unspec;
33152 rtx target = operands[0];
33153 rtx op0 = operands[1];
33154 rtx op1 = operands[2];
33155 rtx sel = operands[3];
33156
33157 /* Unpack and adjust the constant selector. */
33158 for (i = 0; i < 16; ++i)
33159 {
33160 rtx e = XVECEXP (sel, 0, i);
33161 unsigned int elt = 31 - (INTVAL (e) & 31);
33162 perm[i] = GEN_INT (elt);
33163 }
33164
33165 /* Expand to a permute, swapping the inputs and using the
33166 adjusted selector. */
33167 if (!REG_P (op0))
33168 op0 = force_reg (V16QImode, op0);
33169 if (!REG_P (op1))
33170 op1 = force_reg (V16QImode, op1);
33171
33172 constv = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, perm));
33173 constv = force_reg (V16QImode, constv);
33174 unspec = gen_rtx_UNSPEC (V16QImode, gen_rtvec (3, op1, op0, constv),
33175 UNSPEC_VPERM);
33176 if (!REG_P (target))
33177 {
33178 rtx tmp = gen_reg_rtx (V16QImode);
33179 emit_move_insn (tmp, unspec);
33180 unspec = tmp;
33181 }
33182
33183 emit_move_insn (target, unspec);
33184 }
33185
33186 /* Similarly to altivec_expand_vec_perm_const_le, we must adjust the
33187 permute control vector. But here it's not a constant, so we must
33188 generate a vector NAND or NOR to do the adjustment. */
33189
33190 void
33191 altivec_expand_vec_perm_le (rtx operands[4])
33192 {
33193 rtx notx, iorx, unspec;
33194 rtx target = operands[0];
33195 rtx op0 = operands[1];
33196 rtx op1 = operands[2];
33197 rtx sel = operands[3];
33198 rtx tmp = target;
33199 rtx norreg = gen_reg_rtx (V16QImode);
33200 machine_mode mode = GET_MODE (target);
33201
33202 /* Get everything in regs so the pattern matches. */
33203 if (!REG_P (op0))
33204 op0 = force_reg (mode, op0);
33205 if (!REG_P (op1))
33206 op1 = force_reg (mode, op1);
33207 if (!REG_P (sel))
33208 sel = force_reg (V16QImode, sel);
33209 if (!REG_P (target))
33210 tmp = gen_reg_rtx (mode);
33211
33212 /* Invert the selector with a VNAND if available, else a VNOR.
33213 The VNAND is preferred for future fusion opportunities. */
33214 notx = gen_rtx_NOT (V16QImode, sel);
33215 iorx = (TARGET_P8_VECTOR
33216 ? gen_rtx_IOR (V16QImode, notx, notx)
33217 : gen_rtx_AND (V16QImode, notx, notx));
33218 emit_insn (gen_rtx_SET (norreg, iorx));
33219
33220 /* Permute with operands reversed and adjusted selector. */
33221 unspec = gen_rtx_UNSPEC (mode, gen_rtvec (3, op1, op0, norreg),
33222 UNSPEC_VPERM);
33223
33224 /* Copy into target, possibly by way of a register. */
33225 if (!REG_P (target))
33226 {
33227 emit_move_insn (tmp, unspec);
33228 unspec = tmp;
33229 }
33230
33231 emit_move_insn (target, unspec);
33232 }
33233
33234 /* Expand an Altivec constant permutation. Return true if we match
33235 an efficient implementation; false to fall back to VPERM. */
33236
33237 bool
33238 altivec_expand_vec_perm_const (rtx operands[4])
33239 {
33240 struct altivec_perm_insn {
33241 HOST_WIDE_INT mask;
33242 enum insn_code impl;
33243 unsigned char perm[16];
33244 };
33245 static const struct altivec_perm_insn patterns[] = {
33246 { OPTION_MASK_ALTIVEC, CODE_FOR_altivec_vpkuhum_direct,
33247 { 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31 } },
33248 { OPTION_MASK_ALTIVEC, CODE_FOR_altivec_vpkuwum_direct,
33249 { 2, 3, 6, 7, 10, 11, 14, 15, 18, 19, 22, 23, 26, 27, 30, 31 } },
33250 { OPTION_MASK_ALTIVEC,
33251 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrghb_direct
33252 : CODE_FOR_altivec_vmrglb_direct),
33253 { 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23 } },
33254 { OPTION_MASK_ALTIVEC,
33255 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrghh_direct
33256 : CODE_FOR_altivec_vmrglh_direct),
33257 { 0, 1, 16, 17, 2, 3, 18, 19, 4, 5, 20, 21, 6, 7, 22, 23 } },
33258 { OPTION_MASK_ALTIVEC,
33259 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrghw_direct
33260 : CODE_FOR_altivec_vmrglw_direct),
33261 { 0, 1, 2, 3, 16, 17, 18, 19, 4, 5, 6, 7, 20, 21, 22, 23 } },
33262 { OPTION_MASK_ALTIVEC,
33263 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrglb_direct
33264 : CODE_FOR_altivec_vmrghb_direct),
33265 { 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31 } },
33266 { OPTION_MASK_ALTIVEC,
33267 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrglh_direct
33268 : CODE_FOR_altivec_vmrghh_direct),
33269 { 8, 9, 24, 25, 10, 11, 26, 27, 12, 13, 28, 29, 14, 15, 30, 31 } },
33270 { OPTION_MASK_ALTIVEC,
33271 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrglw_direct
33272 : CODE_FOR_altivec_vmrghw_direct),
33273 { 8, 9, 10, 11, 24, 25, 26, 27, 12, 13, 14, 15, 28, 29, 30, 31 } },
33274 { OPTION_MASK_P8_VECTOR, CODE_FOR_p8_vmrgew,
33275 { 0, 1, 2, 3, 16, 17, 18, 19, 8, 9, 10, 11, 24, 25, 26, 27 } },
33276 { OPTION_MASK_P8_VECTOR, CODE_FOR_p8_vmrgow,
33277 { 4, 5, 6, 7, 20, 21, 22, 23, 12, 13, 14, 15, 28, 29, 30, 31 } }
33278 };
33279
33280 unsigned int i, j, elt, which;
33281 unsigned char perm[16];
33282 rtx target, op0, op1, sel, x;
33283 bool one_vec;
33284
33285 target = operands[0];
33286 op0 = operands[1];
33287 op1 = operands[2];
33288 sel = operands[3];
33289
33290 /* Unpack the constant selector. */
33291 for (i = which = 0; i < 16; ++i)
33292 {
33293 rtx e = XVECEXP (sel, 0, i);
33294 elt = INTVAL (e) & 31;
33295 which |= (elt < 16 ? 1 : 2);
33296 perm[i] = elt;
33297 }
33298
33299 /* Simplify the constant selector based on operands. */
33300 switch (which)
33301 {
33302 default:
33303 gcc_unreachable ();
33304
33305 case 3:
33306 one_vec = false;
33307 if (!rtx_equal_p (op0, op1))
33308 break;
33309 /* FALLTHRU */
33310
33311 case 2:
33312 for (i = 0; i < 16; ++i)
33313 perm[i] &= 15;
33314 op0 = op1;
33315 one_vec = true;
33316 break;
33317
33318 case 1:
33319 op1 = op0;
33320 one_vec = true;
33321 break;
33322 }
33323
33324 /* Look for splat patterns. */
33325 if (one_vec)
33326 {
33327 elt = perm[0];
33328
33329 for (i = 0; i < 16; ++i)
33330 if (perm[i] != elt)
33331 break;
33332 if (i == 16)
33333 {
33334 if (!BYTES_BIG_ENDIAN)
33335 elt = 15 - elt;
33336 emit_insn (gen_altivec_vspltb_direct (target, op0, GEN_INT (elt)));
33337 return true;
33338 }
33339
33340 if (elt % 2 == 0)
33341 {
33342 for (i = 0; i < 16; i += 2)
33343 if (perm[i] != elt || perm[i + 1] != elt + 1)
33344 break;
33345 if (i == 16)
33346 {
33347 int field = BYTES_BIG_ENDIAN ? elt / 2 : 7 - elt / 2;
33348 x = gen_reg_rtx (V8HImode);
33349 emit_insn (gen_altivec_vsplth_direct (x, gen_lowpart (V8HImode, op0),
33350 GEN_INT (field)));
33351 emit_move_insn (target, gen_lowpart (V16QImode, x));
33352 return true;
33353 }
33354 }
33355
33356 if (elt % 4 == 0)
33357 {
33358 for (i = 0; i < 16; i += 4)
33359 if (perm[i] != elt
33360 || perm[i + 1] != elt + 1
33361 || perm[i + 2] != elt + 2
33362 || perm[i + 3] != elt + 3)
33363 break;
33364 if (i == 16)
33365 {
33366 int field = BYTES_BIG_ENDIAN ? elt / 4 : 3 - elt / 4;
33367 x = gen_reg_rtx (V4SImode);
33368 emit_insn (gen_altivec_vspltw_direct (x, gen_lowpart (V4SImode, op0),
33369 GEN_INT (field)));
33370 emit_move_insn (target, gen_lowpart (V16QImode, x));
33371 return true;
33372 }
33373 }
33374 }
33375
33376 /* Look for merge and pack patterns. */
33377 for (j = 0; j < ARRAY_SIZE (patterns); ++j)
33378 {
33379 bool swapped;
33380
33381 if ((patterns[j].mask & rs6000_isa_flags) == 0)
33382 continue;
33383
33384 elt = patterns[j].perm[0];
33385 if (perm[0] == elt)
33386 swapped = false;
33387 else if (perm[0] == elt + 16)
33388 swapped = true;
33389 else
33390 continue;
33391 for (i = 1; i < 16; ++i)
33392 {
33393 elt = patterns[j].perm[i];
33394 if (swapped)
33395 elt = (elt >= 16 ? elt - 16 : elt + 16);
33396 else if (one_vec && elt >= 16)
33397 elt -= 16;
33398 if (perm[i] != elt)
33399 break;
33400 }
33401 if (i == 16)
33402 {
33403 enum insn_code icode = patterns[j].impl;
33404 machine_mode omode = insn_data[icode].operand[0].mode;
33405 machine_mode imode = insn_data[icode].operand[1].mode;
33406
33407 /* For little-endian, don't use vpkuwum and vpkuhum if the
33408 underlying vector type is not V4SI and V8HI, respectively.
33409 For example, using vpkuwum with a V8HI picks up the even
33410 halfwords (BE numbering) when the even halfwords (LE
33411 numbering) are what we need. */
33412 if (!BYTES_BIG_ENDIAN
33413 && icode == CODE_FOR_altivec_vpkuwum_direct
33414 && ((GET_CODE (op0) == REG
33415 && GET_MODE (op0) != V4SImode)
33416 || (GET_CODE (op0) == SUBREG
33417 && GET_MODE (XEXP (op0, 0)) != V4SImode)))
33418 continue;
33419 if (!BYTES_BIG_ENDIAN
33420 && icode == CODE_FOR_altivec_vpkuhum_direct
33421 && ((GET_CODE (op0) == REG
33422 && GET_MODE (op0) != V8HImode)
33423 || (GET_CODE (op0) == SUBREG
33424 && GET_MODE (XEXP (op0, 0)) != V8HImode)))
33425 continue;
33426
33427 /* For little-endian, the two input operands must be swapped
33428 (or swapped back) to ensure proper right-to-left numbering
33429 from 0 to 2N-1. */
33430 if (swapped ^ !BYTES_BIG_ENDIAN)
33431 std::swap (op0, op1);
33432 if (imode != V16QImode)
33433 {
33434 op0 = gen_lowpart (imode, op0);
33435 op1 = gen_lowpart (imode, op1);
33436 }
33437 if (omode == V16QImode)
33438 x = target;
33439 else
33440 x = gen_reg_rtx (omode);
33441 emit_insn (GEN_FCN (icode) (x, op0, op1));
33442 if (omode != V16QImode)
33443 emit_move_insn (target, gen_lowpart (V16QImode, x));
33444 return true;
33445 }
33446 }
33447
33448 if (!BYTES_BIG_ENDIAN)
33449 {
33450 altivec_expand_vec_perm_const_le (operands);
33451 return true;
33452 }
33453
33454 return false;
33455 }
33456
33457 /* Expand a Paired Single, VSX Permute Doubleword, or SPE constant permutation.
33458 Return true if we match an efficient implementation. */
33459
33460 static bool
33461 rs6000_expand_vec_perm_const_1 (rtx target, rtx op0, rtx op1,
33462 unsigned char perm0, unsigned char perm1)
33463 {
33464 rtx x;
33465
33466 /* If both selectors come from the same operand, fold to single op. */
33467 if ((perm0 & 2) == (perm1 & 2))
33468 {
33469 if (perm0 & 2)
33470 op0 = op1;
33471 else
33472 op1 = op0;
33473 }
33474 /* If both operands are equal, fold to simpler permutation. */
33475 if (rtx_equal_p (op0, op1))
33476 {
33477 perm0 = perm0 & 1;
33478 perm1 = (perm1 & 1) + 2;
33479 }
33480 /* If the first selector comes from the second operand, swap. */
33481 else if (perm0 & 2)
33482 {
33483 if (perm1 & 2)
33484 return false;
33485 perm0 -= 2;
33486 perm1 += 2;
33487 std::swap (op0, op1);
33488 }
33489 /* If the second selector does not come from the second operand, fail. */
33490 else if ((perm1 & 2) == 0)
33491 return false;
33492
33493 /* Success! */
33494 if (target != NULL)
33495 {
33496 machine_mode vmode, dmode;
33497 rtvec v;
33498
33499 vmode = GET_MODE (target);
33500 gcc_assert (GET_MODE_NUNITS (vmode) == 2);
33501 dmode = mode_for_vector (GET_MODE_INNER (vmode), 4);
33502 x = gen_rtx_VEC_CONCAT (dmode, op0, op1);
33503 v = gen_rtvec (2, GEN_INT (perm0), GEN_INT (perm1));
33504 x = gen_rtx_VEC_SELECT (vmode, x, gen_rtx_PARALLEL (VOIDmode, v));
33505 emit_insn (gen_rtx_SET (target, x));
33506 }
33507 return true;
33508 }
33509
33510 bool
33511 rs6000_expand_vec_perm_const (rtx operands[4])
33512 {
33513 rtx target, op0, op1, sel;
33514 unsigned char perm0, perm1;
33515
33516 target = operands[0];
33517 op0 = operands[1];
33518 op1 = operands[2];
33519 sel = operands[3];
33520
33521 /* Unpack the constant selector. */
33522 perm0 = INTVAL (XVECEXP (sel, 0, 0)) & 3;
33523 perm1 = INTVAL (XVECEXP (sel, 0, 1)) & 3;
33524
33525 return rs6000_expand_vec_perm_const_1 (target, op0, op1, perm0, perm1);
33526 }
33527
33528 /* Test whether a constant permutation is supported. */
33529
33530 static bool
33531 rs6000_vectorize_vec_perm_const_ok (machine_mode vmode,
33532 const unsigned char *sel)
33533 {
33534 /* AltiVec (and thus VSX) can handle arbitrary permutations. */
33535 if (TARGET_ALTIVEC)
33536 return true;
33537
33538 /* Check for ps_merge* or evmerge* insns. */
33539 if ((TARGET_PAIRED_FLOAT && vmode == V2SFmode)
33540 || (TARGET_SPE && vmode == V2SImode))
33541 {
33542 rtx op0 = gen_raw_REG (vmode, LAST_VIRTUAL_REGISTER + 1);
33543 rtx op1 = gen_raw_REG (vmode, LAST_VIRTUAL_REGISTER + 2);
33544 return rs6000_expand_vec_perm_const_1 (NULL, op0, op1, sel[0], sel[1]);
33545 }
33546
33547 return false;
33548 }
33549
33550 /* A subroutine for rs6000_expand_extract_even & rs6000_expand_interleave. */
33551
33552 static void
33553 rs6000_do_expand_vec_perm (rtx target, rtx op0, rtx op1,
33554 machine_mode vmode, unsigned nelt, rtx perm[])
33555 {
33556 machine_mode imode;
33557 rtx x;
33558
33559 imode = vmode;
33560 if (GET_MODE_CLASS (vmode) != MODE_VECTOR_INT)
33561 {
33562 imode = mode_for_size (GET_MODE_UNIT_BITSIZE (vmode), MODE_INT, 0);
33563 imode = mode_for_vector (imode, nelt);
33564 }
33565
33566 x = gen_rtx_CONST_VECTOR (imode, gen_rtvec_v (nelt, perm));
33567 x = expand_vec_perm (vmode, op0, op1, x, target);
33568 if (x != target)
33569 emit_move_insn (target, x);
33570 }
33571
33572 /* Expand an extract even operation. */
33573
33574 void
33575 rs6000_expand_extract_even (rtx target, rtx op0, rtx op1)
33576 {
33577 machine_mode vmode = GET_MODE (target);
33578 unsigned i, nelt = GET_MODE_NUNITS (vmode);
33579 rtx perm[16];
33580
33581 for (i = 0; i < nelt; i++)
33582 perm[i] = GEN_INT (i * 2);
33583
33584 rs6000_do_expand_vec_perm (target, op0, op1, vmode, nelt, perm);
33585 }
33586
33587 /* Expand a vector interleave operation. */
33588
33589 void
33590 rs6000_expand_interleave (rtx target, rtx op0, rtx op1, bool highp)
33591 {
33592 machine_mode vmode = GET_MODE (target);
33593 unsigned i, high, nelt = GET_MODE_NUNITS (vmode);
33594 rtx perm[16];
33595
33596 high = (highp ? 0 : nelt / 2);
33597 for (i = 0; i < nelt / 2; i++)
33598 {
33599 perm[i * 2] = GEN_INT (i + high);
33600 perm[i * 2 + 1] = GEN_INT (i + nelt + high);
33601 }
33602
33603 rs6000_do_expand_vec_perm (target, op0, op1, vmode, nelt, perm);
33604 }
33605
33606 /* Scale a V2DF vector SRC by two to the SCALE and place in TGT. */
33607 void
33608 rs6000_scale_v2df (rtx tgt, rtx src, int scale)
33609 {
33610 HOST_WIDE_INT hwi_scale (scale);
33611 REAL_VALUE_TYPE r_pow;
33612 rtvec v = rtvec_alloc (2);
33613 rtx elt;
33614 rtx scale_vec = gen_reg_rtx (V2DFmode);
33615 (void)real_powi (&r_pow, DFmode, &dconst2, hwi_scale);
33616 elt = const_double_from_real_value (r_pow, DFmode);
33617 RTVEC_ELT (v, 0) = elt;
33618 RTVEC_ELT (v, 1) = elt;
33619 rs6000_expand_vector_init (scale_vec, gen_rtx_PARALLEL (V2DFmode, v));
33620 emit_insn (gen_mulv2df3 (tgt, src, scale_vec));
33621 }
33622
33623 /* Return an RTX representing where to find the function value of a
33624 function returning MODE. */
33625 static rtx
33626 rs6000_complex_function_value (machine_mode mode)
33627 {
33628 unsigned int regno;
33629 rtx r1, r2;
33630 machine_mode inner = GET_MODE_INNER (mode);
33631 unsigned int inner_bytes = GET_MODE_UNIT_SIZE (mode);
33632
33633 if (FLOAT_MODE_P (mode) && TARGET_HARD_FLOAT && TARGET_FPRS)
33634 regno = FP_ARG_RETURN;
33635 else
33636 {
33637 regno = GP_ARG_RETURN;
33638
33639 /* 32-bit is OK since it'll go in r3/r4. */
33640 if (TARGET_32BIT && inner_bytes >= 4)
33641 return gen_rtx_REG (mode, regno);
33642 }
33643
33644 if (inner_bytes >= 8)
33645 return gen_rtx_REG (mode, regno);
33646
33647 r1 = gen_rtx_EXPR_LIST (inner, gen_rtx_REG (inner, regno),
33648 const0_rtx);
33649 r2 = gen_rtx_EXPR_LIST (inner, gen_rtx_REG (inner, regno + 1),
33650 GEN_INT (inner_bytes));
33651 return gen_rtx_PARALLEL (mode, gen_rtvec (2, r1, r2));
33652 }
33653
33654 /* Return an rtx describing a return value of MODE as a PARALLEL
33655 in N_ELTS registers, each of mode ELT_MODE, starting at REGNO,
33656 stride REG_STRIDE. */
33657
33658 static rtx
33659 rs6000_parallel_return (machine_mode mode,
33660 int n_elts, machine_mode elt_mode,
33661 unsigned int regno, unsigned int reg_stride)
33662 {
33663 rtx par = gen_rtx_PARALLEL (mode, rtvec_alloc (n_elts));
33664
33665 int i;
33666 for (i = 0; i < n_elts; i++)
33667 {
33668 rtx r = gen_rtx_REG (elt_mode, regno);
33669 rtx off = GEN_INT (i * GET_MODE_SIZE (elt_mode));
33670 XVECEXP (par, 0, i) = gen_rtx_EXPR_LIST (VOIDmode, r, off);
33671 regno += reg_stride;
33672 }
33673
33674 return par;
33675 }
33676
33677 /* Target hook for TARGET_FUNCTION_VALUE.
33678
33679 On the SPE, both FPs and vectors are returned in r3.
33680
33681 On RS/6000 an integer value is in r3 and a floating-point value is in
33682 fp1, unless -msoft-float. */
33683
33684 static rtx
33685 rs6000_function_value (const_tree valtype,
33686 const_tree fn_decl_or_type ATTRIBUTE_UNUSED,
33687 bool outgoing ATTRIBUTE_UNUSED)
33688 {
33689 machine_mode mode;
33690 unsigned int regno;
33691 machine_mode elt_mode;
33692 int n_elts;
33693
33694 /* Special handling for structs in darwin64. */
33695 if (TARGET_MACHO
33696 && rs6000_darwin64_struct_check_p (TYPE_MODE (valtype), valtype))
33697 {
33698 CUMULATIVE_ARGS valcum;
33699 rtx valret;
33700
33701 valcum.words = 0;
33702 valcum.fregno = FP_ARG_MIN_REG;
33703 valcum.vregno = ALTIVEC_ARG_MIN_REG;
33704 /* Do a trial code generation as if this were going to be passed as
33705 an argument; if any part goes in memory, we return NULL. */
33706 valret = rs6000_darwin64_record_arg (&valcum, valtype, true, /* retval= */ true);
33707 if (valret)
33708 return valret;
33709 /* Otherwise fall through to standard ABI rules. */
33710 }
33711
33712 mode = TYPE_MODE (valtype);
33713
33714 /* The ELFv2 ABI returns homogeneous VFP aggregates in registers. */
33715 if (rs6000_discover_homogeneous_aggregate (mode, valtype, &elt_mode, &n_elts))
33716 {
33717 int first_reg, n_regs;
33718
33719 if (SCALAR_FLOAT_MODE_NOT_VECTOR_P (elt_mode))
33720 {
33721 /* _Decimal128 must use even/odd register pairs. */
33722 first_reg = (elt_mode == TDmode) ? FP_ARG_RETURN + 1 : FP_ARG_RETURN;
33723 n_regs = (GET_MODE_SIZE (elt_mode) + 7) >> 3;
33724 }
33725 else
33726 {
33727 first_reg = ALTIVEC_ARG_RETURN;
33728 n_regs = 1;
33729 }
33730
33731 return rs6000_parallel_return (mode, n_elts, elt_mode, first_reg, n_regs);
33732 }
33733
33734 /* Some return value types need be split in -mpowerpc64, 32bit ABI. */
33735 if (TARGET_32BIT && TARGET_POWERPC64)
33736 switch (mode)
33737 {
33738 default:
33739 break;
33740 case DImode:
33741 case SCmode:
33742 case DCmode:
33743 case TCmode:
33744 int count = GET_MODE_SIZE (mode) / 4;
33745 return rs6000_parallel_return (mode, count, SImode, GP_ARG_RETURN, 1);
33746 }
33747
33748 if ((INTEGRAL_TYPE_P (valtype)
33749 && GET_MODE_BITSIZE (mode) < (TARGET_32BIT ? 32 : 64))
33750 || POINTER_TYPE_P (valtype))
33751 mode = TARGET_32BIT ? SImode : DImode;
33752
33753 if (DECIMAL_FLOAT_MODE_P (mode) && TARGET_HARD_FLOAT && TARGET_FPRS)
33754 /* _Decimal128 must use an even/odd register pair. */
33755 regno = (mode == TDmode) ? FP_ARG_RETURN + 1 : FP_ARG_RETURN;
33756 else if (SCALAR_FLOAT_MODE_NOT_VECTOR_P (mode) && TARGET_HARD_FLOAT && TARGET_FPRS
33757 && ((TARGET_SINGLE_FLOAT && (mode == SFmode)) || TARGET_DOUBLE_FLOAT))
33758 regno = FP_ARG_RETURN;
33759 else if (TREE_CODE (valtype) == COMPLEX_TYPE
33760 && targetm.calls.split_complex_arg)
33761 return rs6000_complex_function_value (mode);
33762 /* VSX is a superset of Altivec and adds V2DImode/V2DFmode. Since the same
33763 return register is used in both cases, and we won't see V2DImode/V2DFmode
33764 for pure altivec, combine the two cases. */
33765 else if ((TREE_CODE (valtype) == VECTOR_TYPE || FLOAT128_VECTOR_P (mode))
33766 && TARGET_ALTIVEC && TARGET_ALTIVEC_ABI
33767 && ALTIVEC_OR_VSX_VECTOR_MODE (mode))
33768 regno = ALTIVEC_ARG_RETURN;
33769 else if (TARGET_E500_DOUBLE && TARGET_HARD_FLOAT
33770 && (mode == DFmode || mode == DCmode
33771 || FLOAT128_IBM_P (mode) || mode == TCmode))
33772 return spe_build_register_parallel (mode, GP_ARG_RETURN);
33773 else
33774 regno = GP_ARG_RETURN;
33775
33776 return gen_rtx_REG (mode, regno);
33777 }
33778
33779 /* Define how to find the value returned by a library function
33780 assuming the value has mode MODE. */
33781 rtx
33782 rs6000_libcall_value (machine_mode mode)
33783 {
33784 unsigned int regno;
33785
33786 /* Long long return value need be split in -mpowerpc64, 32bit ABI. */
33787 if (TARGET_32BIT && TARGET_POWERPC64 && mode == DImode)
33788 return rs6000_parallel_return (mode, 2, SImode, GP_ARG_RETURN, 1);
33789
33790 if (DECIMAL_FLOAT_MODE_P (mode) && TARGET_HARD_FLOAT && TARGET_FPRS)
33791 /* _Decimal128 must use an even/odd register pair. */
33792 regno = (mode == TDmode) ? FP_ARG_RETURN + 1 : FP_ARG_RETURN;
33793 else if (SCALAR_FLOAT_MODE_NOT_VECTOR_P (mode)
33794 && TARGET_HARD_FLOAT && TARGET_FPRS
33795 && ((TARGET_SINGLE_FLOAT && mode == SFmode) || TARGET_DOUBLE_FLOAT))
33796 regno = FP_ARG_RETURN;
33797 /* VSX is a superset of Altivec and adds V2DImode/V2DFmode. Since the same
33798 return register is used in both cases, and we won't see V2DImode/V2DFmode
33799 for pure altivec, combine the two cases. */
33800 else if (ALTIVEC_OR_VSX_VECTOR_MODE (mode)
33801 && TARGET_ALTIVEC && TARGET_ALTIVEC_ABI)
33802 regno = ALTIVEC_ARG_RETURN;
33803 else if (COMPLEX_MODE_P (mode) && targetm.calls.split_complex_arg)
33804 return rs6000_complex_function_value (mode);
33805 else if (TARGET_E500_DOUBLE && TARGET_HARD_FLOAT
33806 && (mode == DFmode || mode == DCmode
33807 || FLOAT128_IBM_P (mode) || mode == TCmode))
33808 return spe_build_register_parallel (mode, GP_ARG_RETURN);
33809 else
33810 regno = GP_ARG_RETURN;
33811
33812 return gen_rtx_REG (mode, regno);
33813 }
33814
33815
33816 /* Return true if we use LRA instead of reload pass. */
33817 static bool
33818 rs6000_lra_p (void)
33819 {
33820 return rs6000_lra_flag;
33821 }
33822
33823 /* Given FROM and TO register numbers, say whether this elimination is allowed.
33824 Frame pointer elimination is automatically handled.
33825
33826 For the RS/6000, if frame pointer elimination is being done, we would like
33827 to convert ap into fp, not sp.
33828
33829 We need r30 if -mminimal-toc was specified, and there are constant pool
33830 references. */
33831
33832 static bool
33833 rs6000_can_eliminate (const int from, const int to)
33834 {
33835 return (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM
33836 ? ! frame_pointer_needed
33837 : from == RS6000_PIC_OFFSET_TABLE_REGNUM
33838 ? ! TARGET_MINIMAL_TOC || TARGET_NO_TOC || get_pool_size () == 0
33839 : true);
33840 }
33841
33842 /* Define the offset between two registers, FROM to be eliminated and its
33843 replacement TO, at the start of a routine. */
33844 HOST_WIDE_INT
33845 rs6000_initial_elimination_offset (int from, int to)
33846 {
33847 rs6000_stack_t *info = rs6000_stack_info ();
33848 HOST_WIDE_INT offset;
33849
33850 if (from == HARD_FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
33851 offset = info->push_p ? 0 : -info->total_size;
33852 else if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
33853 {
33854 offset = info->push_p ? 0 : -info->total_size;
33855 if (FRAME_GROWS_DOWNWARD)
33856 offset += info->fixed_size + info->vars_size + info->parm_size;
33857 }
33858 else if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
33859 offset = FRAME_GROWS_DOWNWARD
33860 ? info->fixed_size + info->vars_size + info->parm_size
33861 : 0;
33862 else if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
33863 offset = info->total_size;
33864 else if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
33865 offset = info->push_p ? info->total_size : 0;
33866 else if (from == RS6000_PIC_OFFSET_TABLE_REGNUM)
33867 offset = 0;
33868 else
33869 gcc_unreachable ();
33870
33871 return offset;
33872 }
33873
33874 static rtx
33875 rs6000_dwarf_register_span (rtx reg)
33876 {
33877 rtx parts[8];
33878 int i, words;
33879 unsigned regno = REGNO (reg);
33880 machine_mode mode = GET_MODE (reg);
33881
33882 if (TARGET_SPE
33883 && regno < 32
33884 && (SPE_VECTOR_MODE (GET_MODE (reg))
33885 || (TARGET_E500_DOUBLE && FLOAT_MODE_P (mode)
33886 && mode != SFmode && mode != SDmode && mode != SCmode)))
33887 ;
33888 else
33889 return NULL_RTX;
33890
33891 regno = REGNO (reg);
33892
33893 /* The duality of the SPE register size wreaks all kinds of havoc.
33894 This is a way of distinguishing r0 in 32-bits from r0 in
33895 64-bits. */
33896 words = (GET_MODE_SIZE (mode) + UNITS_PER_FP_WORD - 1) / UNITS_PER_FP_WORD;
33897 gcc_assert (words <= 4);
33898 for (i = 0; i < words; i++, regno++)
33899 {
33900 if (BYTES_BIG_ENDIAN)
33901 {
33902 parts[2 * i] = gen_rtx_REG (SImode, regno + FIRST_SPE_HIGH_REGNO);
33903 parts[2 * i + 1] = gen_rtx_REG (SImode, regno);
33904 }
33905 else
33906 {
33907 parts[2 * i] = gen_rtx_REG (SImode, regno);
33908 parts[2 * i + 1] = gen_rtx_REG (SImode, regno + FIRST_SPE_HIGH_REGNO);
33909 }
33910 }
33911
33912 return gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (words * 2, parts));
33913 }
33914
33915 /* Fill in sizes for SPE register high parts in table used by unwinder. */
33916
33917 static void
33918 rs6000_init_dwarf_reg_sizes_extra (tree address)
33919 {
33920 if (TARGET_SPE)
33921 {
33922 int i;
33923 machine_mode mode = TYPE_MODE (char_type_node);
33924 rtx addr = expand_expr (address, NULL_RTX, VOIDmode, EXPAND_NORMAL);
33925 rtx mem = gen_rtx_MEM (BLKmode, addr);
33926 rtx value = gen_int_mode (4, mode);
33927
33928 for (i = FIRST_SPE_HIGH_REGNO; i < LAST_SPE_HIGH_REGNO+1; i++)
33929 {
33930 int column = DWARF_REG_TO_UNWIND_COLUMN
33931 (DWARF2_FRAME_REG_OUT (DWARF_FRAME_REGNUM (i), true));
33932 HOST_WIDE_INT offset = column * GET_MODE_SIZE (mode);
33933
33934 emit_move_insn (adjust_address (mem, mode, offset), value);
33935 }
33936 }
33937
33938 if (TARGET_MACHO && ! TARGET_ALTIVEC)
33939 {
33940 int i;
33941 machine_mode mode = TYPE_MODE (char_type_node);
33942 rtx addr = expand_expr (address, NULL_RTX, VOIDmode, EXPAND_NORMAL);
33943 rtx mem = gen_rtx_MEM (BLKmode, addr);
33944 rtx value = gen_int_mode (16, mode);
33945
33946 /* On Darwin, libgcc may be built to run on both G3 and G4/5.
33947 The unwinder still needs to know the size of Altivec registers. */
33948
33949 for (i = FIRST_ALTIVEC_REGNO; i < LAST_ALTIVEC_REGNO+1; i++)
33950 {
33951 int column = DWARF_REG_TO_UNWIND_COLUMN
33952 (DWARF2_FRAME_REG_OUT (DWARF_FRAME_REGNUM (i), true));
33953 HOST_WIDE_INT offset = column * GET_MODE_SIZE (mode);
33954
33955 emit_move_insn (adjust_address (mem, mode, offset), value);
33956 }
33957 }
33958 }
33959
33960 /* Map internal gcc register numbers to debug format register numbers.
33961 FORMAT specifies the type of debug register number to use:
33962 0 -- debug information, except for frame-related sections
33963 1 -- DWARF .debug_frame section
33964 2 -- DWARF .eh_frame section */
33965
33966 unsigned int
33967 rs6000_dbx_register_number (unsigned int regno, unsigned int format)
33968 {
33969 /* We never use the GCC internal number for SPE high registers.
33970 Those are mapped to the 1200..1231 range for all debug formats. */
33971 if (SPE_HIGH_REGNO_P (regno))
33972 return regno - FIRST_SPE_HIGH_REGNO + 1200;
33973
33974 /* Except for the above, we use the internal number for non-DWARF
33975 debug information, and also for .eh_frame. */
33976 if ((format == 0 && write_symbols != DWARF2_DEBUG) || format == 2)
33977 return regno;
33978
33979 /* On some platforms, we use the standard DWARF register
33980 numbering for .debug_info and .debug_frame. */
33981 #ifdef RS6000_USE_DWARF_NUMBERING
33982 if (regno <= 63)
33983 return regno;
33984 if (regno == LR_REGNO)
33985 return 108;
33986 if (regno == CTR_REGNO)
33987 return 109;
33988 /* Special handling for CR for .debug_frame: rs6000_emit_prologue has
33989 translated any combination of CR2, CR3, CR4 saves to a save of CR2.
33990 The actual code emitted saves the whole of CR, so we map CR2_REGNO
33991 to the DWARF reg for CR. */
33992 if (format == 1 && regno == CR2_REGNO)
33993 return 64;
33994 if (CR_REGNO_P (regno))
33995 return regno - CR0_REGNO + 86;
33996 if (regno == CA_REGNO)
33997 return 101; /* XER */
33998 if (ALTIVEC_REGNO_P (regno))
33999 return regno - FIRST_ALTIVEC_REGNO + 1124;
34000 if (regno == VRSAVE_REGNO)
34001 return 356;
34002 if (regno == VSCR_REGNO)
34003 return 67;
34004 if (regno == SPE_ACC_REGNO)
34005 return 99;
34006 if (regno == SPEFSCR_REGNO)
34007 return 612;
34008 #endif
34009 return regno;
34010 }
34011
34012 /* target hook eh_return_filter_mode */
34013 static machine_mode
34014 rs6000_eh_return_filter_mode (void)
34015 {
34016 return TARGET_32BIT ? SImode : word_mode;
34017 }
34018
34019 /* Target hook for scalar_mode_supported_p. */
34020 static bool
34021 rs6000_scalar_mode_supported_p (machine_mode mode)
34022 {
34023 /* -m32 does not support TImode. This is the default, from
34024 default_scalar_mode_supported_p. For -m32 -mpowerpc64 we want the
34025 same ABI as for -m32. But default_scalar_mode_supported_p allows
34026 integer modes of precision 2 * BITS_PER_WORD, which matches TImode
34027 for -mpowerpc64. */
34028 if (TARGET_32BIT && mode == TImode)
34029 return false;
34030
34031 if (DECIMAL_FLOAT_MODE_P (mode))
34032 return default_decimal_float_supported_p ();
34033 else if (TARGET_FLOAT128 && (mode == KFmode || mode == IFmode))
34034 return true;
34035 else
34036 return default_scalar_mode_supported_p (mode);
34037 }
34038
34039 /* Target hook for vector_mode_supported_p. */
34040 static bool
34041 rs6000_vector_mode_supported_p (machine_mode mode)
34042 {
34043
34044 if (TARGET_PAIRED_FLOAT && PAIRED_VECTOR_MODE (mode))
34045 return true;
34046
34047 if (TARGET_SPE && SPE_VECTOR_MODE (mode))
34048 return true;
34049
34050 /* There is no vector form for IEEE 128-bit. If we return true for IEEE
34051 128-bit, the compiler might try to widen IEEE 128-bit to IBM
34052 double-double. */
34053 else if (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode) && !FLOAT128_IEEE_P (mode))
34054 return true;
34055
34056 else
34057 return false;
34058 }
34059
34060 /* Target hook for c_mode_for_suffix. */
34061 static machine_mode
34062 rs6000_c_mode_for_suffix (char suffix)
34063 {
34064 if (TARGET_FLOAT128)
34065 {
34066 if (suffix == 'q' || suffix == 'Q')
34067 return (FLOAT128_IEEE_P (TFmode)) ? TFmode : KFmode;
34068
34069 /* At the moment, we are not defining a suffix for IBM extended double.
34070 If/when the default for -mabi=ieeelongdouble is changed, and we want
34071 to support __ibm128 constants in legacy library code, we may need to
34072 re-evalaute this decision. Currently, c-lex.c only supports 'w' and
34073 'q' as machine dependent suffixes. The x86_64 port uses 'w' for
34074 __float80 constants. */
34075 }
34076
34077 return VOIDmode;
34078 }
34079
34080 /* Target hook for invalid_arg_for_unprototyped_fn. */
34081 static const char *
34082 invalid_arg_for_unprototyped_fn (const_tree typelist, const_tree funcdecl, const_tree val)
34083 {
34084 return (!rs6000_darwin64_abi
34085 && typelist == 0
34086 && TREE_CODE (TREE_TYPE (val)) == VECTOR_TYPE
34087 && (funcdecl == NULL_TREE
34088 || (TREE_CODE (funcdecl) == FUNCTION_DECL
34089 && DECL_BUILT_IN_CLASS (funcdecl) != BUILT_IN_MD)))
34090 ? N_("AltiVec argument passed to unprototyped function")
34091 : NULL;
34092 }
34093
34094 /* For TARGET_SECURE_PLT 32-bit PIC code we can save PIC register
34095 setup by using __stack_chk_fail_local hidden function instead of
34096 calling __stack_chk_fail directly. Otherwise it is better to call
34097 __stack_chk_fail directly. */
34098
34099 static tree ATTRIBUTE_UNUSED
34100 rs6000_stack_protect_fail (void)
34101 {
34102 return (DEFAULT_ABI == ABI_V4 && TARGET_SECURE_PLT && flag_pic)
34103 ? default_hidden_stack_protect_fail ()
34104 : default_external_stack_protect_fail ();
34105 }
34106
34107 void
34108 rs6000_final_prescan_insn (rtx_insn *insn, rtx *operand ATTRIBUTE_UNUSED,
34109 int num_operands ATTRIBUTE_UNUSED)
34110 {
34111 if (rs6000_warn_cell_microcode)
34112 {
34113 const char *temp;
34114 int insn_code_number = recog_memoized (insn);
34115 location_t location = INSN_LOCATION (insn);
34116
34117 /* Punt on insns we cannot recognize. */
34118 if (insn_code_number < 0)
34119 return;
34120
34121 temp = get_insn_template (insn_code_number, insn);
34122
34123 if (get_attr_cell_micro (insn) == CELL_MICRO_ALWAYS)
34124 warning_at (location, OPT_mwarn_cell_microcode,
34125 "emitting microcode insn %s\t[%s] #%d",
34126 temp, insn_data[INSN_CODE (insn)].name, INSN_UID (insn));
34127 else if (get_attr_cell_micro (insn) == CELL_MICRO_CONDITIONAL)
34128 warning_at (location, OPT_mwarn_cell_microcode,
34129 "emitting conditional microcode insn %s\t[%s] #%d",
34130 temp, insn_data[INSN_CODE (insn)].name, INSN_UID (insn));
34131 }
34132 }
34133
34134 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
34135
34136 #if TARGET_ELF
34137 static unsigned HOST_WIDE_INT
34138 rs6000_asan_shadow_offset (void)
34139 {
34140 return (unsigned HOST_WIDE_INT) 1 << (TARGET_64BIT ? 41 : 29);
34141 }
34142 #endif
34143 \f
34144 /* Mask options that we want to support inside of attribute((target)) and
34145 #pragma GCC target operations. Note, we do not include things like
34146 64/32-bit, endianess, hard/soft floating point, etc. that would have
34147 different calling sequences. */
34148
34149 struct rs6000_opt_mask {
34150 const char *name; /* option name */
34151 HOST_WIDE_INT mask; /* mask to set */
34152 bool invert; /* invert sense of mask */
34153 bool valid_target; /* option is a target option */
34154 };
34155
34156 static struct rs6000_opt_mask const rs6000_opt_masks[] =
34157 {
34158 { "altivec", OPTION_MASK_ALTIVEC, false, true },
34159 { "cmpb", OPTION_MASK_CMPB, false, true },
34160 { "crypto", OPTION_MASK_CRYPTO, false, true },
34161 { "direct-move", OPTION_MASK_DIRECT_MOVE, false, true },
34162 { "dlmzb", OPTION_MASK_DLMZB, false, true },
34163 { "efficient-unaligned-vsx", OPTION_MASK_EFFICIENT_UNALIGNED_VSX,
34164 false, true },
34165 { "float128", OPTION_MASK_FLOAT128, false, true },
34166 { "float128-hardware", OPTION_MASK_FLOAT128_HW, false, true },
34167 { "fprnd", OPTION_MASK_FPRND, false, true },
34168 { "hard-dfp", OPTION_MASK_DFP, false, true },
34169 { "htm", OPTION_MASK_HTM, false, true },
34170 { "isel", OPTION_MASK_ISEL, false, true },
34171 { "mfcrf", OPTION_MASK_MFCRF, false, true },
34172 { "mfpgpr", OPTION_MASK_MFPGPR, false, true },
34173 { "modulo", OPTION_MASK_MODULO, false, true },
34174 { "mulhw", OPTION_MASK_MULHW, false, true },
34175 { "multiple", OPTION_MASK_MULTIPLE, false, true },
34176 { "popcntb", OPTION_MASK_POPCNTB, false, true },
34177 { "popcntd", OPTION_MASK_POPCNTD, false, true },
34178 { "power8-fusion", OPTION_MASK_P8_FUSION, false, true },
34179 { "power8-fusion-sign", OPTION_MASK_P8_FUSION_SIGN, false, true },
34180 { "power8-vector", OPTION_MASK_P8_VECTOR, false, true },
34181 { "power9-dform", OPTION_MASK_P9_DFORM, false, true },
34182 { "power9-fusion", OPTION_MASK_P9_FUSION, false, true },
34183 { "power9-minmax", OPTION_MASK_P9_MINMAX, false, true },
34184 { "power9-vector", OPTION_MASK_P9_VECTOR, false, true },
34185 { "powerpc-gfxopt", OPTION_MASK_PPC_GFXOPT, false, true },
34186 { "powerpc-gpopt", OPTION_MASK_PPC_GPOPT, false, true },
34187 { "quad-memory", OPTION_MASK_QUAD_MEMORY, false, true },
34188 { "quad-memory-atomic", OPTION_MASK_QUAD_MEMORY_ATOMIC, false, true },
34189 { "recip-precision", OPTION_MASK_RECIP_PRECISION, false, true },
34190 { "save-toc-indirect", OPTION_MASK_SAVE_TOC_INDIRECT, false, true },
34191 { "string", OPTION_MASK_STRING, false, true },
34192 { "toc-fusion", OPTION_MASK_TOC_FUSION, false, true },
34193 { "update", OPTION_MASK_NO_UPDATE, true , true },
34194 { "upper-regs-df", OPTION_MASK_UPPER_REGS_DF, false, true },
34195 { "upper-regs-sf", OPTION_MASK_UPPER_REGS_SF, false, true },
34196 { "vsx", OPTION_MASK_VSX, false, true },
34197 { "vsx-timode", OPTION_MASK_VSX_TIMODE, false, true },
34198 #ifdef OPTION_MASK_64BIT
34199 #if TARGET_AIX_OS
34200 { "aix64", OPTION_MASK_64BIT, false, false },
34201 { "aix32", OPTION_MASK_64BIT, true, false },
34202 #else
34203 { "64", OPTION_MASK_64BIT, false, false },
34204 { "32", OPTION_MASK_64BIT, true, false },
34205 #endif
34206 #endif
34207 #ifdef OPTION_MASK_EABI
34208 { "eabi", OPTION_MASK_EABI, false, false },
34209 #endif
34210 #ifdef OPTION_MASK_LITTLE_ENDIAN
34211 { "little", OPTION_MASK_LITTLE_ENDIAN, false, false },
34212 { "big", OPTION_MASK_LITTLE_ENDIAN, true, false },
34213 #endif
34214 #ifdef OPTION_MASK_RELOCATABLE
34215 { "relocatable", OPTION_MASK_RELOCATABLE, false, false },
34216 #endif
34217 #ifdef OPTION_MASK_STRICT_ALIGN
34218 { "strict-align", OPTION_MASK_STRICT_ALIGN, false, false },
34219 #endif
34220 { "soft-float", OPTION_MASK_SOFT_FLOAT, false, false },
34221 { "string", OPTION_MASK_STRING, false, false },
34222 };
34223
34224 /* Builtin mask mapping for printing the flags. */
34225 static struct rs6000_opt_mask const rs6000_builtin_mask_names[] =
34226 {
34227 { "altivec", RS6000_BTM_ALTIVEC, false, false },
34228 { "vsx", RS6000_BTM_VSX, false, false },
34229 { "spe", RS6000_BTM_SPE, false, false },
34230 { "paired", RS6000_BTM_PAIRED, false, false },
34231 { "fre", RS6000_BTM_FRE, false, false },
34232 { "fres", RS6000_BTM_FRES, false, false },
34233 { "frsqrte", RS6000_BTM_FRSQRTE, false, false },
34234 { "frsqrtes", RS6000_BTM_FRSQRTES, false, false },
34235 { "popcntd", RS6000_BTM_POPCNTD, false, false },
34236 { "cell", RS6000_BTM_CELL, false, false },
34237 { "power8-vector", RS6000_BTM_P8_VECTOR, false, false },
34238 { "crypto", RS6000_BTM_CRYPTO, false, false },
34239 { "htm", RS6000_BTM_HTM, false, false },
34240 { "hard-dfp", RS6000_BTM_DFP, false, false },
34241 { "hard-float", RS6000_BTM_HARD_FLOAT, false, false },
34242 { "long-double-128", RS6000_BTM_LDBL128, false, false },
34243 };
34244
34245 /* Option variables that we want to support inside attribute((target)) and
34246 #pragma GCC target operations. */
34247
34248 struct rs6000_opt_var {
34249 const char *name; /* option name */
34250 size_t global_offset; /* offset of the option in global_options. */
34251 size_t target_offset; /* offset of the option in target options. */
34252 };
34253
34254 static struct rs6000_opt_var const rs6000_opt_vars[] =
34255 {
34256 { "friz",
34257 offsetof (struct gcc_options, x_TARGET_FRIZ),
34258 offsetof (struct cl_target_option, x_TARGET_FRIZ), },
34259 { "avoid-indexed-addresses",
34260 offsetof (struct gcc_options, x_TARGET_AVOID_XFORM),
34261 offsetof (struct cl_target_option, x_TARGET_AVOID_XFORM) },
34262 { "paired",
34263 offsetof (struct gcc_options, x_rs6000_paired_float),
34264 offsetof (struct cl_target_option, x_rs6000_paired_float), },
34265 { "longcall",
34266 offsetof (struct gcc_options, x_rs6000_default_long_calls),
34267 offsetof (struct cl_target_option, x_rs6000_default_long_calls), },
34268 { "optimize-swaps",
34269 offsetof (struct gcc_options, x_rs6000_optimize_swaps),
34270 offsetof (struct cl_target_option, x_rs6000_optimize_swaps), },
34271 { "allow-movmisalign",
34272 offsetof (struct gcc_options, x_TARGET_ALLOW_MOVMISALIGN),
34273 offsetof (struct cl_target_option, x_TARGET_ALLOW_MOVMISALIGN), },
34274 { "allow-df-permute",
34275 offsetof (struct gcc_options, x_TARGET_ALLOW_DF_PERMUTE),
34276 offsetof (struct cl_target_option, x_TARGET_ALLOW_DF_PERMUTE), },
34277 { "sched-groups",
34278 offsetof (struct gcc_options, x_TARGET_SCHED_GROUPS),
34279 offsetof (struct cl_target_option, x_TARGET_SCHED_GROUPS), },
34280 { "always-hint",
34281 offsetof (struct gcc_options, x_TARGET_ALWAYS_HINT),
34282 offsetof (struct cl_target_option, x_TARGET_ALWAYS_HINT), },
34283 { "align-branch-targets",
34284 offsetof (struct gcc_options, x_TARGET_ALIGN_BRANCH_TARGETS),
34285 offsetof (struct cl_target_option, x_TARGET_ALIGN_BRANCH_TARGETS), },
34286 { "vectorize-builtins",
34287 offsetof (struct gcc_options, x_TARGET_VECTORIZE_BUILTINS),
34288 offsetof (struct cl_target_option, x_TARGET_VECTORIZE_BUILTINS), },
34289 { "tls-markers",
34290 offsetof (struct gcc_options, x_tls_markers),
34291 offsetof (struct cl_target_option, x_tls_markers), },
34292 { "sched-prolog",
34293 offsetof (struct gcc_options, x_TARGET_SCHED_PROLOG),
34294 offsetof (struct cl_target_option, x_TARGET_SCHED_PROLOG), },
34295 { "sched-epilog",
34296 offsetof (struct gcc_options, x_TARGET_SCHED_PROLOG),
34297 offsetof (struct cl_target_option, x_TARGET_SCHED_PROLOG), },
34298 { "gen-cell-microcode",
34299 offsetof (struct gcc_options, x_rs6000_gen_cell_microcode),
34300 offsetof (struct cl_target_option, x_rs6000_gen_cell_microcode), },
34301 { "warn-cell-microcode",
34302 offsetof (struct gcc_options, x_rs6000_warn_cell_microcode),
34303 offsetof (struct cl_target_option, x_rs6000_warn_cell_microcode), },
34304 };
34305
34306 /* Inner function to handle attribute((target("..."))) and #pragma GCC target
34307 parsing. Return true if there were no errors. */
34308
34309 static bool
34310 rs6000_inner_target_options (tree args, bool attr_p)
34311 {
34312 bool ret = true;
34313
34314 if (args == NULL_TREE)
34315 ;
34316
34317 else if (TREE_CODE (args) == STRING_CST)
34318 {
34319 char *p = ASTRDUP (TREE_STRING_POINTER (args));
34320 char *q;
34321
34322 while ((q = strtok (p, ",")) != NULL)
34323 {
34324 bool error_p = false;
34325 bool not_valid_p = false;
34326 const char *cpu_opt = NULL;
34327
34328 p = NULL;
34329 if (strncmp (q, "cpu=", 4) == 0)
34330 {
34331 int cpu_index = rs6000_cpu_name_lookup (q+4);
34332 if (cpu_index >= 0)
34333 rs6000_cpu_index = cpu_index;
34334 else
34335 {
34336 error_p = true;
34337 cpu_opt = q+4;
34338 }
34339 }
34340 else if (strncmp (q, "tune=", 5) == 0)
34341 {
34342 int tune_index = rs6000_cpu_name_lookup (q+5);
34343 if (tune_index >= 0)
34344 rs6000_tune_index = tune_index;
34345 else
34346 {
34347 error_p = true;
34348 cpu_opt = q+5;
34349 }
34350 }
34351 else
34352 {
34353 size_t i;
34354 bool invert = false;
34355 char *r = q;
34356
34357 error_p = true;
34358 if (strncmp (r, "no-", 3) == 0)
34359 {
34360 invert = true;
34361 r += 3;
34362 }
34363
34364 for (i = 0; i < ARRAY_SIZE (rs6000_opt_masks); i++)
34365 if (strcmp (r, rs6000_opt_masks[i].name) == 0)
34366 {
34367 HOST_WIDE_INT mask = rs6000_opt_masks[i].mask;
34368
34369 if (!rs6000_opt_masks[i].valid_target)
34370 not_valid_p = true;
34371 else
34372 {
34373 error_p = false;
34374 rs6000_isa_flags_explicit |= mask;
34375
34376 /* VSX needs altivec, so -mvsx automagically sets
34377 altivec and disables -mavoid-indexed-addresses. */
34378 if (!invert)
34379 {
34380 if (mask == OPTION_MASK_VSX)
34381 {
34382 mask |= OPTION_MASK_ALTIVEC;
34383 TARGET_AVOID_XFORM = 0;
34384 }
34385 }
34386
34387 if (rs6000_opt_masks[i].invert)
34388 invert = !invert;
34389
34390 if (invert)
34391 rs6000_isa_flags &= ~mask;
34392 else
34393 rs6000_isa_flags |= mask;
34394 }
34395 break;
34396 }
34397
34398 if (error_p && !not_valid_p)
34399 {
34400 for (i = 0; i < ARRAY_SIZE (rs6000_opt_vars); i++)
34401 if (strcmp (r, rs6000_opt_vars[i].name) == 0)
34402 {
34403 size_t j = rs6000_opt_vars[i].global_offset;
34404 *((int *) ((char *)&global_options + j)) = !invert;
34405 error_p = false;
34406 not_valid_p = false;
34407 break;
34408 }
34409 }
34410 }
34411
34412 if (error_p)
34413 {
34414 const char *eprefix, *esuffix;
34415
34416 ret = false;
34417 if (attr_p)
34418 {
34419 eprefix = "__attribute__((__target__(";
34420 esuffix = ")))";
34421 }
34422 else
34423 {
34424 eprefix = "#pragma GCC target ";
34425 esuffix = "";
34426 }
34427
34428 if (cpu_opt)
34429 error ("invalid cpu \"%s\" for %s\"%s\"%s", cpu_opt, eprefix,
34430 q, esuffix);
34431 else if (not_valid_p)
34432 error ("%s\"%s\"%s is not allowed", eprefix, q, esuffix);
34433 else
34434 error ("%s\"%s\"%s is invalid", eprefix, q, esuffix);
34435 }
34436 }
34437 }
34438
34439 else if (TREE_CODE (args) == TREE_LIST)
34440 {
34441 do
34442 {
34443 tree value = TREE_VALUE (args);
34444 if (value)
34445 {
34446 bool ret2 = rs6000_inner_target_options (value, attr_p);
34447 if (!ret2)
34448 ret = false;
34449 }
34450 args = TREE_CHAIN (args);
34451 }
34452 while (args != NULL_TREE);
34453 }
34454
34455 else
34456 gcc_unreachable ();
34457
34458 return ret;
34459 }
34460
34461 /* Print out the target options as a list for -mdebug=target. */
34462
34463 static void
34464 rs6000_debug_target_options (tree args, const char *prefix)
34465 {
34466 if (args == NULL_TREE)
34467 fprintf (stderr, "%s<NULL>", prefix);
34468
34469 else if (TREE_CODE (args) == STRING_CST)
34470 {
34471 char *p = ASTRDUP (TREE_STRING_POINTER (args));
34472 char *q;
34473
34474 while ((q = strtok (p, ",")) != NULL)
34475 {
34476 p = NULL;
34477 fprintf (stderr, "%s\"%s\"", prefix, q);
34478 prefix = ", ";
34479 }
34480 }
34481
34482 else if (TREE_CODE (args) == TREE_LIST)
34483 {
34484 do
34485 {
34486 tree value = TREE_VALUE (args);
34487 if (value)
34488 {
34489 rs6000_debug_target_options (value, prefix);
34490 prefix = ", ";
34491 }
34492 args = TREE_CHAIN (args);
34493 }
34494 while (args != NULL_TREE);
34495 }
34496
34497 else
34498 gcc_unreachable ();
34499
34500 return;
34501 }
34502
34503 \f
34504 /* Hook to validate attribute((target("..."))). */
34505
34506 static bool
34507 rs6000_valid_attribute_p (tree fndecl,
34508 tree ARG_UNUSED (name),
34509 tree args,
34510 int flags)
34511 {
34512 struct cl_target_option cur_target;
34513 bool ret;
34514 tree old_optimize = build_optimization_node (&global_options);
34515 tree new_target, new_optimize;
34516 tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
34517
34518 gcc_assert ((fndecl != NULL_TREE) && (args != NULL_TREE));
34519
34520 if (TARGET_DEBUG_TARGET)
34521 {
34522 tree tname = DECL_NAME (fndecl);
34523 fprintf (stderr, "\n==================== rs6000_valid_attribute_p:\n");
34524 if (tname)
34525 fprintf (stderr, "function: %.*s\n",
34526 (int) IDENTIFIER_LENGTH (tname),
34527 IDENTIFIER_POINTER (tname));
34528 else
34529 fprintf (stderr, "function: unknown\n");
34530
34531 fprintf (stderr, "args:");
34532 rs6000_debug_target_options (args, " ");
34533 fprintf (stderr, "\n");
34534
34535 if (flags)
34536 fprintf (stderr, "flags: 0x%x\n", flags);
34537
34538 fprintf (stderr, "--------------------\n");
34539 }
34540
34541 old_optimize = build_optimization_node (&global_options);
34542 func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
34543
34544 /* If the function changed the optimization levels as well as setting target
34545 options, start with the optimizations specified. */
34546 if (func_optimize && func_optimize != old_optimize)
34547 cl_optimization_restore (&global_options,
34548 TREE_OPTIMIZATION (func_optimize));
34549
34550 /* The target attributes may also change some optimization flags, so update
34551 the optimization options if necessary. */
34552 cl_target_option_save (&cur_target, &global_options);
34553 rs6000_cpu_index = rs6000_tune_index = -1;
34554 ret = rs6000_inner_target_options (args, true);
34555
34556 /* Set up any additional state. */
34557 if (ret)
34558 {
34559 ret = rs6000_option_override_internal (false);
34560 new_target = build_target_option_node (&global_options);
34561 }
34562 else
34563 new_target = NULL;
34564
34565 new_optimize = build_optimization_node (&global_options);
34566
34567 if (!new_target)
34568 ret = false;
34569
34570 else if (fndecl)
34571 {
34572 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = new_target;
34573
34574 if (old_optimize != new_optimize)
34575 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
34576 }
34577
34578 cl_target_option_restore (&global_options, &cur_target);
34579
34580 if (old_optimize != new_optimize)
34581 cl_optimization_restore (&global_options,
34582 TREE_OPTIMIZATION (old_optimize));
34583
34584 return ret;
34585 }
34586
34587 \f
34588 /* Hook to validate the current #pragma GCC target and set the state, and
34589 update the macros based on what was changed. If ARGS is NULL, then
34590 POP_TARGET is used to reset the options. */
34591
34592 bool
34593 rs6000_pragma_target_parse (tree args, tree pop_target)
34594 {
34595 tree prev_tree = build_target_option_node (&global_options);
34596 tree cur_tree;
34597 struct cl_target_option *prev_opt, *cur_opt;
34598 HOST_WIDE_INT prev_flags, cur_flags, diff_flags;
34599 HOST_WIDE_INT prev_bumask, cur_bumask, diff_bumask;
34600
34601 if (TARGET_DEBUG_TARGET)
34602 {
34603 fprintf (stderr, "\n==================== rs6000_pragma_target_parse\n");
34604 fprintf (stderr, "args:");
34605 rs6000_debug_target_options (args, " ");
34606 fprintf (stderr, "\n");
34607
34608 if (pop_target)
34609 {
34610 fprintf (stderr, "pop_target:\n");
34611 debug_tree (pop_target);
34612 }
34613 else
34614 fprintf (stderr, "pop_target: <NULL>\n");
34615
34616 fprintf (stderr, "--------------------\n");
34617 }
34618
34619 if (! args)
34620 {
34621 cur_tree = ((pop_target)
34622 ? pop_target
34623 : target_option_default_node);
34624 cl_target_option_restore (&global_options,
34625 TREE_TARGET_OPTION (cur_tree));
34626 }
34627 else
34628 {
34629 rs6000_cpu_index = rs6000_tune_index = -1;
34630 if (!rs6000_inner_target_options (args, false)
34631 || !rs6000_option_override_internal (false)
34632 || (cur_tree = build_target_option_node (&global_options))
34633 == NULL_TREE)
34634 {
34635 if (TARGET_DEBUG_BUILTIN || TARGET_DEBUG_TARGET)
34636 fprintf (stderr, "invalid pragma\n");
34637
34638 return false;
34639 }
34640 }
34641
34642 target_option_current_node = cur_tree;
34643
34644 /* If we have the preprocessor linked in (i.e. C or C++ languages), possibly
34645 change the macros that are defined. */
34646 if (rs6000_target_modify_macros_ptr)
34647 {
34648 prev_opt = TREE_TARGET_OPTION (prev_tree);
34649 prev_bumask = prev_opt->x_rs6000_builtin_mask;
34650 prev_flags = prev_opt->x_rs6000_isa_flags;
34651
34652 cur_opt = TREE_TARGET_OPTION (cur_tree);
34653 cur_flags = cur_opt->x_rs6000_isa_flags;
34654 cur_bumask = cur_opt->x_rs6000_builtin_mask;
34655
34656 diff_bumask = (prev_bumask ^ cur_bumask);
34657 diff_flags = (prev_flags ^ cur_flags);
34658
34659 if ((diff_flags != 0) || (diff_bumask != 0))
34660 {
34661 /* Delete old macros. */
34662 rs6000_target_modify_macros_ptr (false,
34663 prev_flags & diff_flags,
34664 prev_bumask & diff_bumask);
34665
34666 /* Define new macros. */
34667 rs6000_target_modify_macros_ptr (true,
34668 cur_flags & diff_flags,
34669 cur_bumask & diff_bumask);
34670 }
34671 }
34672
34673 return true;
34674 }
34675
34676 \f
34677 /* Remember the last target of rs6000_set_current_function. */
34678 static GTY(()) tree rs6000_previous_fndecl;
34679
34680 /* Establish appropriate back-end context for processing the function
34681 FNDECL. The argument might be NULL to indicate processing at top
34682 level, outside of any function scope. */
34683 static void
34684 rs6000_set_current_function (tree fndecl)
34685 {
34686 tree old_tree = (rs6000_previous_fndecl
34687 ? DECL_FUNCTION_SPECIFIC_TARGET (rs6000_previous_fndecl)
34688 : NULL_TREE);
34689
34690 tree new_tree = (fndecl
34691 ? DECL_FUNCTION_SPECIFIC_TARGET (fndecl)
34692 : NULL_TREE);
34693
34694 if (TARGET_DEBUG_TARGET)
34695 {
34696 bool print_final = false;
34697 fprintf (stderr, "\n==================== rs6000_set_current_function");
34698
34699 if (fndecl)
34700 fprintf (stderr, ", fndecl %s (%p)",
34701 (DECL_NAME (fndecl)
34702 ? IDENTIFIER_POINTER (DECL_NAME (fndecl))
34703 : "<unknown>"), (void *)fndecl);
34704
34705 if (rs6000_previous_fndecl)
34706 fprintf (stderr, ", prev_fndecl (%p)", (void *)rs6000_previous_fndecl);
34707
34708 fprintf (stderr, "\n");
34709 if (new_tree)
34710 {
34711 fprintf (stderr, "\nnew fndecl target specific options:\n");
34712 debug_tree (new_tree);
34713 print_final = true;
34714 }
34715
34716 if (old_tree)
34717 {
34718 fprintf (stderr, "\nold fndecl target specific options:\n");
34719 debug_tree (old_tree);
34720 print_final = true;
34721 }
34722
34723 if (print_final)
34724 fprintf (stderr, "--------------------\n");
34725 }
34726
34727 /* Only change the context if the function changes. This hook is called
34728 several times in the course of compiling a function, and we don't want to
34729 slow things down too much or call target_reinit when it isn't safe. */
34730 if (fndecl && fndecl != rs6000_previous_fndecl)
34731 {
34732 rs6000_previous_fndecl = fndecl;
34733 if (old_tree == new_tree)
34734 ;
34735
34736 else if (new_tree && new_tree != target_option_default_node)
34737 {
34738 cl_target_option_restore (&global_options,
34739 TREE_TARGET_OPTION (new_tree));
34740 if (TREE_TARGET_GLOBALS (new_tree))
34741 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
34742 else
34743 TREE_TARGET_GLOBALS (new_tree)
34744 = save_target_globals_default_opts ();
34745 }
34746
34747 else if (old_tree && old_tree != target_option_default_node)
34748 {
34749 new_tree = target_option_current_node;
34750 cl_target_option_restore (&global_options,
34751 TREE_TARGET_OPTION (new_tree));
34752 if (TREE_TARGET_GLOBALS (new_tree))
34753 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
34754 else if (new_tree == target_option_default_node)
34755 restore_target_globals (&default_target_globals);
34756 else
34757 TREE_TARGET_GLOBALS (new_tree)
34758 = save_target_globals_default_opts ();
34759 }
34760 }
34761 }
34762
34763 \f
34764 /* Save the current options */
34765
34766 static void
34767 rs6000_function_specific_save (struct cl_target_option *ptr,
34768 struct gcc_options *opts)
34769 {
34770 ptr->x_rs6000_isa_flags = opts->x_rs6000_isa_flags;
34771 ptr->x_rs6000_isa_flags_explicit = opts->x_rs6000_isa_flags_explicit;
34772 }
34773
34774 /* Restore the current options */
34775
34776 static void
34777 rs6000_function_specific_restore (struct gcc_options *opts,
34778 struct cl_target_option *ptr)
34779
34780 {
34781 opts->x_rs6000_isa_flags = ptr->x_rs6000_isa_flags;
34782 opts->x_rs6000_isa_flags_explicit = ptr->x_rs6000_isa_flags_explicit;
34783 (void) rs6000_option_override_internal (false);
34784 }
34785
34786 /* Print the current options */
34787
34788 static void
34789 rs6000_function_specific_print (FILE *file, int indent,
34790 struct cl_target_option *ptr)
34791 {
34792 rs6000_print_isa_options (file, indent, "Isa options set",
34793 ptr->x_rs6000_isa_flags);
34794
34795 rs6000_print_isa_options (file, indent, "Isa options explicit",
34796 ptr->x_rs6000_isa_flags_explicit);
34797 }
34798
34799 /* Helper function to print the current isa or misc options on a line. */
34800
34801 static void
34802 rs6000_print_options_internal (FILE *file,
34803 int indent,
34804 const char *string,
34805 HOST_WIDE_INT flags,
34806 const char *prefix,
34807 const struct rs6000_opt_mask *opts,
34808 size_t num_elements)
34809 {
34810 size_t i;
34811 size_t start_column = 0;
34812 size_t cur_column;
34813 size_t max_column = 76;
34814 const char *comma = "";
34815
34816 if (indent)
34817 start_column += fprintf (file, "%*s", indent, "");
34818
34819 if (!flags)
34820 {
34821 fprintf (stderr, DEBUG_FMT_S, string, "<none>");
34822 return;
34823 }
34824
34825 start_column += fprintf (stderr, DEBUG_FMT_WX, string, flags);
34826
34827 /* Print the various mask options. */
34828 cur_column = start_column;
34829 for (i = 0; i < num_elements; i++)
34830 {
34831 if ((flags & opts[i].mask) != 0)
34832 {
34833 const char *no_str = rs6000_opt_masks[i].invert ? "no-" : "";
34834 size_t len = (strlen (comma)
34835 + strlen (prefix)
34836 + strlen (no_str)
34837 + strlen (rs6000_opt_masks[i].name));
34838
34839 cur_column += len;
34840 if (cur_column > max_column)
34841 {
34842 fprintf (stderr, ", \\\n%*s", (int)start_column, "");
34843 cur_column = start_column + len;
34844 comma = "";
34845 }
34846
34847 fprintf (file, "%s%s%s%s", comma, prefix, no_str,
34848 rs6000_opt_masks[i].name);
34849 flags &= ~ opts[i].mask;
34850 comma = ", ";
34851 }
34852 }
34853
34854 fputs ("\n", file);
34855 }
34856
34857 /* Helper function to print the current isa options on a line. */
34858
34859 static void
34860 rs6000_print_isa_options (FILE *file, int indent, const char *string,
34861 HOST_WIDE_INT flags)
34862 {
34863 rs6000_print_options_internal (file, indent, string, flags, "-m",
34864 &rs6000_opt_masks[0],
34865 ARRAY_SIZE (rs6000_opt_masks));
34866 }
34867
34868 static void
34869 rs6000_print_builtin_options (FILE *file, int indent, const char *string,
34870 HOST_WIDE_INT flags)
34871 {
34872 rs6000_print_options_internal (file, indent, string, flags, "",
34873 &rs6000_builtin_mask_names[0],
34874 ARRAY_SIZE (rs6000_builtin_mask_names));
34875 }
34876
34877 \f
34878 /* Hook to determine if one function can safely inline another. */
34879
34880 static bool
34881 rs6000_can_inline_p (tree caller, tree callee)
34882 {
34883 bool ret = false;
34884 tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
34885 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
34886
34887 /* If callee has no option attributes, then it is ok to inline. */
34888 if (!callee_tree)
34889 ret = true;
34890
34891 /* If caller has no option attributes, but callee does then it is not ok to
34892 inline. */
34893 else if (!caller_tree)
34894 ret = false;
34895
34896 else
34897 {
34898 struct cl_target_option *caller_opts = TREE_TARGET_OPTION (caller_tree);
34899 struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
34900
34901 /* Callee's options should a subset of the caller's, i.e. a vsx function
34902 can inline an altivec function but a non-vsx function can't inline a
34903 vsx function. */
34904 if ((caller_opts->x_rs6000_isa_flags & callee_opts->x_rs6000_isa_flags)
34905 == callee_opts->x_rs6000_isa_flags)
34906 ret = true;
34907 }
34908
34909 if (TARGET_DEBUG_TARGET)
34910 fprintf (stderr, "rs6000_can_inline_p:, caller %s, callee %s, %s inline\n",
34911 (DECL_NAME (caller)
34912 ? IDENTIFIER_POINTER (DECL_NAME (caller))
34913 : "<unknown>"),
34914 (DECL_NAME (callee)
34915 ? IDENTIFIER_POINTER (DECL_NAME (callee))
34916 : "<unknown>"),
34917 (ret ? "can" : "cannot"));
34918
34919 return ret;
34920 }
34921 \f
34922 /* Allocate a stack temp and fixup the address so it meets the particular
34923 memory requirements (either offetable or REG+REG addressing). */
34924
34925 rtx
34926 rs6000_allocate_stack_temp (machine_mode mode,
34927 bool offsettable_p,
34928 bool reg_reg_p)
34929 {
34930 rtx stack = assign_stack_temp (mode, GET_MODE_SIZE (mode));
34931 rtx addr = XEXP (stack, 0);
34932 int strict_p = (reload_in_progress || reload_completed);
34933
34934 if (!legitimate_indirect_address_p (addr, strict_p))
34935 {
34936 if (offsettable_p
34937 && !rs6000_legitimate_offset_address_p (mode, addr, strict_p, true))
34938 stack = replace_equiv_address (stack, copy_addr_to_reg (addr));
34939
34940 else if (reg_reg_p && !legitimate_indexed_address_p (addr, strict_p))
34941 stack = replace_equiv_address (stack, copy_addr_to_reg (addr));
34942 }
34943
34944 return stack;
34945 }
34946
34947 /* Given a memory reference, if it is not a reg or reg+reg addressing, convert
34948 to such a form to deal with memory reference instructions like STFIWX that
34949 only take reg+reg addressing. */
34950
34951 rtx
34952 rs6000_address_for_fpconvert (rtx x)
34953 {
34954 int strict_p = (reload_in_progress || reload_completed);
34955 rtx addr;
34956
34957 gcc_assert (MEM_P (x));
34958 addr = XEXP (x, 0);
34959 if (! legitimate_indirect_address_p (addr, strict_p)
34960 && ! legitimate_indexed_address_p (addr, strict_p))
34961 {
34962 if (GET_CODE (addr) == PRE_INC || GET_CODE (addr) == PRE_DEC)
34963 {
34964 rtx reg = XEXP (addr, 0);
34965 HOST_WIDE_INT size = GET_MODE_SIZE (GET_MODE (x));
34966 rtx size_rtx = GEN_INT ((GET_CODE (addr) == PRE_DEC) ? -size : size);
34967 gcc_assert (REG_P (reg));
34968 emit_insn (gen_add3_insn (reg, reg, size_rtx));
34969 addr = reg;
34970 }
34971 else if (GET_CODE (addr) == PRE_MODIFY)
34972 {
34973 rtx reg = XEXP (addr, 0);
34974 rtx expr = XEXP (addr, 1);
34975 gcc_assert (REG_P (reg));
34976 gcc_assert (GET_CODE (expr) == PLUS);
34977 emit_insn (gen_add3_insn (reg, XEXP (expr, 0), XEXP (expr, 1)));
34978 addr = reg;
34979 }
34980
34981 x = replace_equiv_address (x, copy_addr_to_reg (addr));
34982 }
34983
34984 return x;
34985 }
34986
34987 /* Given a memory reference, if it is not in the form for altivec memory
34988 reference instructions (i.e. reg or reg+reg addressing with AND of -16),
34989 convert to the altivec format. */
34990
34991 rtx
34992 rs6000_address_for_altivec (rtx x)
34993 {
34994 gcc_assert (MEM_P (x));
34995 if (!altivec_indexed_or_indirect_operand (x, GET_MODE (x)))
34996 {
34997 rtx addr = XEXP (x, 0);
34998 int strict_p = (reload_in_progress || reload_completed);
34999
35000 if (!legitimate_indexed_address_p (addr, strict_p)
35001 && !legitimate_indirect_address_p (addr, strict_p))
35002 addr = copy_to_mode_reg (Pmode, addr);
35003
35004 addr = gen_rtx_AND (Pmode, addr, GEN_INT (-16));
35005 x = change_address (x, GET_MODE (x), addr);
35006 }
35007
35008 return x;
35009 }
35010
35011 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
35012
35013 On the RS/6000, all integer constants are acceptable, most won't be valid
35014 for particular insns, though. Only easy FP constants are acceptable. */
35015
35016 static bool
35017 rs6000_legitimate_constant_p (machine_mode mode, rtx x)
35018 {
35019 if (TARGET_ELF && tls_referenced_p (x))
35020 return false;
35021
35022 return ((GET_CODE (x) != CONST_DOUBLE && GET_CODE (x) != CONST_VECTOR)
35023 || GET_MODE (x) == VOIDmode
35024 || (TARGET_POWERPC64 && mode == DImode)
35025 || easy_fp_constant (x, mode)
35026 || easy_vector_constant (x, mode));
35027 }
35028
35029 \f
35030 /* Return TRUE iff the sequence ending in LAST sets the static chain. */
35031
35032 static bool
35033 chain_already_loaded (rtx_insn *last)
35034 {
35035 for (; last != NULL; last = PREV_INSN (last))
35036 {
35037 if (NONJUMP_INSN_P (last))
35038 {
35039 rtx patt = PATTERN (last);
35040
35041 if (GET_CODE (patt) == SET)
35042 {
35043 rtx lhs = XEXP (patt, 0);
35044
35045 if (REG_P (lhs) && REGNO (lhs) == STATIC_CHAIN_REGNUM)
35046 return true;
35047 }
35048 }
35049 }
35050 return false;
35051 }
35052
35053 /* Expand code to perform a call under the AIX or ELFv2 ABI. */
35054
35055 void
35056 rs6000_call_aix (rtx value, rtx func_desc, rtx flag, rtx cookie)
35057 {
35058 const bool direct_call_p
35059 = GET_CODE (func_desc) == SYMBOL_REF && SYMBOL_REF_FUNCTION_P (func_desc);
35060 rtx toc_reg = gen_rtx_REG (Pmode, TOC_REGNUM);
35061 rtx toc_load = NULL_RTX;
35062 rtx toc_restore = NULL_RTX;
35063 rtx func_addr;
35064 rtx abi_reg = NULL_RTX;
35065 rtx call[4];
35066 int n_call;
35067 rtx insn;
35068
35069 /* Handle longcall attributes. */
35070 if (INTVAL (cookie) & CALL_LONG)
35071 func_desc = rs6000_longcall_ref (func_desc);
35072
35073 /* Handle indirect calls. */
35074 if (GET_CODE (func_desc) != SYMBOL_REF
35075 || (DEFAULT_ABI == ABI_AIX && !SYMBOL_REF_FUNCTION_P (func_desc)))
35076 {
35077 /* Save the TOC into its reserved slot before the call,
35078 and prepare to restore it after the call. */
35079 rtx stack_ptr = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
35080 rtx stack_toc_offset = GEN_INT (RS6000_TOC_SAVE_SLOT);
35081 rtx stack_toc_mem = gen_frame_mem (Pmode,
35082 gen_rtx_PLUS (Pmode, stack_ptr,
35083 stack_toc_offset));
35084 rtx stack_toc_unspec = gen_rtx_UNSPEC (Pmode,
35085 gen_rtvec (1, stack_toc_offset),
35086 UNSPEC_TOCSLOT);
35087 toc_restore = gen_rtx_SET (toc_reg, stack_toc_unspec);
35088
35089 /* Can we optimize saving the TOC in the prologue or
35090 do we need to do it at every call? */
35091 if (TARGET_SAVE_TOC_INDIRECT && !cfun->calls_alloca)
35092 cfun->machine->save_toc_in_prologue = true;
35093 else
35094 {
35095 MEM_VOLATILE_P (stack_toc_mem) = 1;
35096 emit_move_insn (stack_toc_mem, toc_reg);
35097 }
35098
35099 if (DEFAULT_ABI == ABI_ELFv2)
35100 {
35101 /* A function pointer in the ELFv2 ABI is just a plain address, but
35102 the ABI requires it to be loaded into r12 before the call. */
35103 func_addr = gen_rtx_REG (Pmode, 12);
35104 emit_move_insn (func_addr, func_desc);
35105 abi_reg = func_addr;
35106 }
35107 else
35108 {
35109 /* A function pointer under AIX is a pointer to a data area whose
35110 first word contains the actual address of the function, whose
35111 second word contains a pointer to its TOC, and whose third word
35112 contains a value to place in the static chain register (r11).
35113 Note that if we load the static chain, our "trampoline" need
35114 not have any executable code. */
35115
35116 /* Load up address of the actual function. */
35117 func_desc = force_reg (Pmode, func_desc);
35118 func_addr = gen_reg_rtx (Pmode);
35119 emit_move_insn (func_addr, gen_rtx_MEM (Pmode, func_desc));
35120
35121 /* Prepare to load the TOC of the called function. Note that the
35122 TOC load must happen immediately before the actual call so
35123 that unwinding the TOC registers works correctly. See the
35124 comment in frob_update_context. */
35125 rtx func_toc_offset = GEN_INT (GET_MODE_SIZE (Pmode));
35126 rtx func_toc_mem = gen_rtx_MEM (Pmode,
35127 gen_rtx_PLUS (Pmode, func_desc,
35128 func_toc_offset));
35129 toc_load = gen_rtx_USE (VOIDmode, func_toc_mem);
35130
35131 /* If we have a static chain, load it up. But, if the call was
35132 originally direct, the 3rd word has not been written since no
35133 trampoline has been built, so we ought not to load it, lest we
35134 override a static chain value. */
35135 if (!direct_call_p
35136 && TARGET_POINTERS_TO_NESTED_FUNCTIONS
35137 && !chain_already_loaded (get_current_sequence ()->next->last))
35138 {
35139 rtx sc_reg = gen_rtx_REG (Pmode, STATIC_CHAIN_REGNUM);
35140 rtx func_sc_offset = GEN_INT (2 * GET_MODE_SIZE (Pmode));
35141 rtx func_sc_mem = gen_rtx_MEM (Pmode,
35142 gen_rtx_PLUS (Pmode, func_desc,
35143 func_sc_offset));
35144 emit_move_insn (sc_reg, func_sc_mem);
35145 abi_reg = sc_reg;
35146 }
35147 }
35148 }
35149 else
35150 {
35151 /* Direct calls use the TOC: for local calls, the callee will
35152 assume the TOC register is set; for non-local calls, the
35153 PLT stub needs the TOC register. */
35154 abi_reg = toc_reg;
35155 func_addr = func_desc;
35156 }
35157
35158 /* Create the call. */
35159 call[0] = gen_rtx_CALL (VOIDmode, gen_rtx_MEM (SImode, func_addr), flag);
35160 if (value != NULL_RTX)
35161 call[0] = gen_rtx_SET (value, call[0]);
35162 n_call = 1;
35163
35164 if (toc_load)
35165 call[n_call++] = toc_load;
35166 if (toc_restore)
35167 call[n_call++] = toc_restore;
35168
35169 call[n_call++] = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (Pmode, LR_REGNO));
35170
35171 insn = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (n_call, call));
35172 insn = emit_call_insn (insn);
35173
35174 /* Mention all registers defined by the ABI to hold information
35175 as uses in CALL_INSN_FUNCTION_USAGE. */
35176 if (abi_reg)
35177 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), abi_reg);
35178 }
35179
35180 /* Expand code to perform a sibling call under the AIX or ELFv2 ABI. */
35181
35182 void
35183 rs6000_sibcall_aix (rtx value, rtx func_desc, rtx flag, rtx cookie)
35184 {
35185 rtx call[2];
35186 rtx insn;
35187
35188 gcc_assert (INTVAL (cookie) == 0);
35189
35190 /* Create the call. */
35191 call[0] = gen_rtx_CALL (VOIDmode, gen_rtx_MEM (SImode, func_desc), flag);
35192 if (value != NULL_RTX)
35193 call[0] = gen_rtx_SET (value, call[0]);
35194
35195 call[1] = simple_return_rtx;
35196
35197 insn = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (2, call));
35198 insn = emit_call_insn (insn);
35199
35200 /* Note use of the TOC register. */
35201 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), gen_rtx_REG (Pmode, TOC_REGNUM));
35202 /* We need to also mark a use of the link register since the function we
35203 sibling-call to will use it to return to our caller. */
35204 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), gen_rtx_REG (Pmode, LR_REGNO));
35205 }
35206
35207 /* Return whether we need to always update the saved TOC pointer when we update
35208 the stack pointer. */
35209
35210 static bool
35211 rs6000_save_toc_in_prologue_p (void)
35212 {
35213 return (cfun && cfun->machine && cfun->machine->save_toc_in_prologue);
35214 }
35215
35216 #ifdef HAVE_GAS_HIDDEN
35217 # define USE_HIDDEN_LINKONCE 1
35218 #else
35219 # define USE_HIDDEN_LINKONCE 0
35220 #endif
35221
35222 /* Fills in the label name that should be used for a 476 link stack thunk. */
35223
35224 void
35225 get_ppc476_thunk_name (char name[32])
35226 {
35227 gcc_assert (TARGET_LINK_STACK);
35228
35229 if (USE_HIDDEN_LINKONCE)
35230 sprintf (name, "__ppc476.get_thunk");
35231 else
35232 ASM_GENERATE_INTERNAL_LABEL (name, "LPPC476_", 0);
35233 }
35234
35235 /* This function emits the simple thunk routine that is used to preserve
35236 the link stack on the 476 cpu. */
35237
35238 static void rs6000_code_end (void) ATTRIBUTE_UNUSED;
35239 static void
35240 rs6000_code_end (void)
35241 {
35242 char name[32];
35243 tree decl;
35244
35245 if (!TARGET_LINK_STACK)
35246 return;
35247
35248 get_ppc476_thunk_name (name);
35249
35250 decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL, get_identifier (name),
35251 build_function_type_list (void_type_node, NULL_TREE));
35252 DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
35253 NULL_TREE, void_type_node);
35254 TREE_PUBLIC (decl) = 1;
35255 TREE_STATIC (decl) = 1;
35256
35257 #if RS6000_WEAK
35258 if (USE_HIDDEN_LINKONCE)
35259 {
35260 cgraph_node::create (decl)->set_comdat_group (DECL_ASSEMBLER_NAME (decl));
35261 targetm.asm_out.unique_section (decl, 0);
35262 switch_to_section (get_named_section (decl, NULL, 0));
35263 DECL_WEAK (decl) = 1;
35264 ASM_WEAKEN_DECL (asm_out_file, decl, name, 0);
35265 targetm.asm_out.globalize_label (asm_out_file, name);
35266 targetm.asm_out.assemble_visibility (decl, VISIBILITY_HIDDEN);
35267 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
35268 }
35269 else
35270 #endif
35271 {
35272 switch_to_section (text_section);
35273 ASM_OUTPUT_LABEL (asm_out_file, name);
35274 }
35275
35276 DECL_INITIAL (decl) = make_node (BLOCK);
35277 current_function_decl = decl;
35278 allocate_struct_function (decl, false);
35279 init_function_start (decl);
35280 first_function_block_is_cold = false;
35281 /* Make sure unwind info is emitted for the thunk if needed. */
35282 final_start_function (emit_barrier (), asm_out_file, 1);
35283
35284 fputs ("\tblr\n", asm_out_file);
35285
35286 final_end_function ();
35287 init_insn_lengths ();
35288 free_after_compilation (cfun);
35289 set_cfun (NULL);
35290 current_function_decl = NULL;
35291 }
35292
35293 /* Add r30 to hard reg set if the prologue sets it up and it is not
35294 pic_offset_table_rtx. */
35295
35296 static void
35297 rs6000_set_up_by_prologue (struct hard_reg_set_container *set)
35298 {
35299 if (!TARGET_SINGLE_PIC_BASE
35300 && TARGET_TOC
35301 && TARGET_MINIMAL_TOC
35302 && get_pool_size () != 0)
35303 add_to_hard_reg_set (&set->set, Pmode, RS6000_PIC_OFFSET_TABLE_REGNUM);
35304 if (cfun->machine->split_stack_argp_used)
35305 add_to_hard_reg_set (&set->set, Pmode, 12);
35306 }
35307
35308 \f
35309 /* Helper function for rs6000_split_logical to emit a logical instruction after
35310 spliting the operation to single GPR registers.
35311
35312 DEST is the destination register.
35313 OP1 and OP2 are the input source registers.
35314 CODE is the base operation (AND, IOR, XOR, NOT).
35315 MODE is the machine mode.
35316 If COMPLEMENT_FINAL_P is true, wrap the whole operation with NOT.
35317 If COMPLEMENT_OP1_P is true, wrap operand1 with NOT.
35318 If COMPLEMENT_OP2_P is true, wrap operand2 with NOT. */
35319
35320 static void
35321 rs6000_split_logical_inner (rtx dest,
35322 rtx op1,
35323 rtx op2,
35324 enum rtx_code code,
35325 machine_mode mode,
35326 bool complement_final_p,
35327 bool complement_op1_p,
35328 bool complement_op2_p)
35329 {
35330 rtx bool_rtx;
35331
35332 /* Optimize AND of 0/0xffffffff and IOR/XOR of 0. */
35333 if (op2 && GET_CODE (op2) == CONST_INT
35334 && (mode == SImode || (mode == DImode && TARGET_POWERPC64))
35335 && !complement_final_p && !complement_op1_p && !complement_op2_p)
35336 {
35337 HOST_WIDE_INT mask = GET_MODE_MASK (mode);
35338 HOST_WIDE_INT value = INTVAL (op2) & mask;
35339
35340 /* Optimize AND of 0 to just set 0. Optimize AND of -1 to be a move. */
35341 if (code == AND)
35342 {
35343 if (value == 0)
35344 {
35345 emit_insn (gen_rtx_SET (dest, const0_rtx));
35346 return;
35347 }
35348
35349 else if (value == mask)
35350 {
35351 if (!rtx_equal_p (dest, op1))
35352 emit_insn (gen_rtx_SET (dest, op1));
35353 return;
35354 }
35355 }
35356
35357 /* Optimize IOR/XOR of 0 to be a simple move. Split large operations
35358 into separate ORI/ORIS or XORI/XORIS instrucitons. */
35359 else if (code == IOR || code == XOR)
35360 {
35361 if (value == 0)
35362 {
35363 if (!rtx_equal_p (dest, op1))
35364 emit_insn (gen_rtx_SET (dest, op1));
35365 return;
35366 }
35367 }
35368 }
35369
35370 if (code == AND && mode == SImode
35371 && !complement_final_p && !complement_op1_p && !complement_op2_p)
35372 {
35373 emit_insn (gen_andsi3 (dest, op1, op2));
35374 return;
35375 }
35376
35377 if (complement_op1_p)
35378 op1 = gen_rtx_NOT (mode, op1);
35379
35380 if (complement_op2_p)
35381 op2 = gen_rtx_NOT (mode, op2);
35382
35383 /* For canonical RTL, if only one arm is inverted it is the first. */
35384 if (!complement_op1_p && complement_op2_p)
35385 std::swap (op1, op2);
35386
35387 bool_rtx = ((code == NOT)
35388 ? gen_rtx_NOT (mode, op1)
35389 : gen_rtx_fmt_ee (code, mode, op1, op2));
35390
35391 if (complement_final_p)
35392 bool_rtx = gen_rtx_NOT (mode, bool_rtx);
35393
35394 emit_insn (gen_rtx_SET (dest, bool_rtx));
35395 }
35396
35397 /* Split a DImode AND/IOR/XOR with a constant on a 32-bit system. These
35398 operations are split immediately during RTL generation to allow for more
35399 optimizations of the AND/IOR/XOR.
35400
35401 OPERANDS is an array containing the destination and two input operands.
35402 CODE is the base operation (AND, IOR, XOR, NOT).
35403 MODE is the machine mode.
35404 If COMPLEMENT_FINAL_P is true, wrap the whole operation with NOT.
35405 If COMPLEMENT_OP1_P is true, wrap operand1 with NOT.
35406 If COMPLEMENT_OP2_P is true, wrap operand2 with NOT.
35407 CLOBBER_REG is either NULL or a scratch register of type CC to allow
35408 formation of the AND instructions. */
35409
35410 static void
35411 rs6000_split_logical_di (rtx operands[3],
35412 enum rtx_code code,
35413 bool complement_final_p,
35414 bool complement_op1_p,
35415 bool complement_op2_p)
35416 {
35417 const HOST_WIDE_INT lower_32bits = HOST_WIDE_INT_C(0xffffffff);
35418 const HOST_WIDE_INT upper_32bits = ~ lower_32bits;
35419 const HOST_WIDE_INT sign_bit = HOST_WIDE_INT_C(0x80000000);
35420 enum hi_lo { hi = 0, lo = 1 };
35421 rtx op0_hi_lo[2], op1_hi_lo[2], op2_hi_lo[2];
35422 size_t i;
35423
35424 op0_hi_lo[hi] = gen_highpart (SImode, operands[0]);
35425 op1_hi_lo[hi] = gen_highpart (SImode, operands[1]);
35426 op0_hi_lo[lo] = gen_lowpart (SImode, operands[0]);
35427 op1_hi_lo[lo] = gen_lowpart (SImode, operands[1]);
35428
35429 if (code == NOT)
35430 op2_hi_lo[hi] = op2_hi_lo[lo] = NULL_RTX;
35431 else
35432 {
35433 if (GET_CODE (operands[2]) != CONST_INT)
35434 {
35435 op2_hi_lo[hi] = gen_highpart_mode (SImode, DImode, operands[2]);
35436 op2_hi_lo[lo] = gen_lowpart (SImode, operands[2]);
35437 }
35438 else
35439 {
35440 HOST_WIDE_INT value = INTVAL (operands[2]);
35441 HOST_WIDE_INT value_hi_lo[2];
35442
35443 gcc_assert (!complement_final_p);
35444 gcc_assert (!complement_op1_p);
35445 gcc_assert (!complement_op2_p);
35446
35447 value_hi_lo[hi] = value >> 32;
35448 value_hi_lo[lo] = value & lower_32bits;
35449
35450 for (i = 0; i < 2; i++)
35451 {
35452 HOST_WIDE_INT sub_value = value_hi_lo[i];
35453
35454 if (sub_value & sign_bit)
35455 sub_value |= upper_32bits;
35456
35457 op2_hi_lo[i] = GEN_INT (sub_value);
35458
35459 /* If this is an AND instruction, check to see if we need to load
35460 the value in a register. */
35461 if (code == AND && sub_value != -1 && sub_value != 0
35462 && !and_operand (op2_hi_lo[i], SImode))
35463 op2_hi_lo[i] = force_reg (SImode, op2_hi_lo[i]);
35464 }
35465 }
35466 }
35467
35468 for (i = 0; i < 2; i++)
35469 {
35470 /* Split large IOR/XOR operations. */
35471 if ((code == IOR || code == XOR)
35472 && GET_CODE (op2_hi_lo[i]) == CONST_INT
35473 && !complement_final_p
35474 && !complement_op1_p
35475 && !complement_op2_p
35476 && !logical_const_operand (op2_hi_lo[i], SImode))
35477 {
35478 HOST_WIDE_INT value = INTVAL (op2_hi_lo[i]);
35479 HOST_WIDE_INT hi_16bits = value & HOST_WIDE_INT_C(0xffff0000);
35480 HOST_WIDE_INT lo_16bits = value & HOST_WIDE_INT_C(0x0000ffff);
35481 rtx tmp = gen_reg_rtx (SImode);
35482
35483 /* Make sure the constant is sign extended. */
35484 if ((hi_16bits & sign_bit) != 0)
35485 hi_16bits |= upper_32bits;
35486
35487 rs6000_split_logical_inner (tmp, op1_hi_lo[i], GEN_INT (hi_16bits),
35488 code, SImode, false, false, false);
35489
35490 rs6000_split_logical_inner (op0_hi_lo[i], tmp, GEN_INT (lo_16bits),
35491 code, SImode, false, false, false);
35492 }
35493 else
35494 rs6000_split_logical_inner (op0_hi_lo[i], op1_hi_lo[i], op2_hi_lo[i],
35495 code, SImode, complement_final_p,
35496 complement_op1_p, complement_op2_p);
35497 }
35498
35499 return;
35500 }
35501
35502 /* Split the insns that make up boolean operations operating on multiple GPR
35503 registers. The boolean MD patterns ensure that the inputs either are
35504 exactly the same as the output registers, or there is no overlap.
35505
35506 OPERANDS is an array containing the destination and two input operands.
35507 CODE is the base operation (AND, IOR, XOR, NOT).
35508 If COMPLEMENT_FINAL_P is true, wrap the whole operation with NOT.
35509 If COMPLEMENT_OP1_P is true, wrap operand1 with NOT.
35510 If COMPLEMENT_OP2_P is true, wrap operand2 with NOT. */
35511
35512 void
35513 rs6000_split_logical (rtx operands[3],
35514 enum rtx_code code,
35515 bool complement_final_p,
35516 bool complement_op1_p,
35517 bool complement_op2_p)
35518 {
35519 machine_mode mode = GET_MODE (operands[0]);
35520 machine_mode sub_mode;
35521 rtx op0, op1, op2;
35522 int sub_size, regno0, regno1, nregs, i;
35523
35524 /* If this is DImode, use the specialized version that can run before
35525 register allocation. */
35526 if (mode == DImode && !TARGET_POWERPC64)
35527 {
35528 rs6000_split_logical_di (operands, code, complement_final_p,
35529 complement_op1_p, complement_op2_p);
35530 return;
35531 }
35532
35533 op0 = operands[0];
35534 op1 = operands[1];
35535 op2 = (code == NOT) ? NULL_RTX : operands[2];
35536 sub_mode = (TARGET_POWERPC64) ? DImode : SImode;
35537 sub_size = GET_MODE_SIZE (sub_mode);
35538 regno0 = REGNO (op0);
35539 regno1 = REGNO (op1);
35540
35541 gcc_assert (reload_completed);
35542 gcc_assert (IN_RANGE (regno0, FIRST_GPR_REGNO, LAST_GPR_REGNO));
35543 gcc_assert (IN_RANGE (regno1, FIRST_GPR_REGNO, LAST_GPR_REGNO));
35544
35545 nregs = rs6000_hard_regno_nregs[(int)mode][regno0];
35546 gcc_assert (nregs > 1);
35547
35548 if (op2 && REG_P (op2))
35549 gcc_assert (IN_RANGE (REGNO (op2), FIRST_GPR_REGNO, LAST_GPR_REGNO));
35550
35551 for (i = 0; i < nregs; i++)
35552 {
35553 int offset = i * sub_size;
35554 rtx sub_op0 = simplify_subreg (sub_mode, op0, mode, offset);
35555 rtx sub_op1 = simplify_subreg (sub_mode, op1, mode, offset);
35556 rtx sub_op2 = ((code == NOT)
35557 ? NULL_RTX
35558 : simplify_subreg (sub_mode, op2, mode, offset));
35559
35560 rs6000_split_logical_inner (sub_op0, sub_op1, sub_op2, code, sub_mode,
35561 complement_final_p, complement_op1_p,
35562 complement_op2_p);
35563 }
35564
35565 return;
35566 }
35567
35568 \f
35569 /* Return true if the peephole2 can combine a load involving a combination of
35570 an addis instruction and a load with an offset that can be fused together on
35571 a power8. */
35572
35573 bool
35574 fusion_gpr_load_p (rtx addis_reg, /* register set via addis. */
35575 rtx addis_value, /* addis value. */
35576 rtx target, /* target register that is loaded. */
35577 rtx mem) /* bottom part of the memory addr. */
35578 {
35579 rtx addr;
35580 rtx base_reg;
35581
35582 /* Validate arguments. */
35583 if (!base_reg_operand (addis_reg, GET_MODE (addis_reg)))
35584 return false;
35585
35586 if (!base_reg_operand (target, GET_MODE (target)))
35587 return false;
35588
35589 if (!fusion_gpr_addis (addis_value, GET_MODE (addis_value)))
35590 return false;
35591
35592 /* Allow sign/zero extension. */
35593 if (GET_CODE (mem) == ZERO_EXTEND
35594 || (GET_CODE (mem) == SIGN_EXTEND && TARGET_P8_FUSION_SIGN))
35595 mem = XEXP (mem, 0);
35596
35597 if (!MEM_P (mem))
35598 return false;
35599
35600 if (!fusion_gpr_mem_load (mem, GET_MODE (mem)))
35601 return false;
35602
35603 addr = XEXP (mem, 0); /* either PLUS or LO_SUM. */
35604 if (GET_CODE (addr) != PLUS && GET_CODE (addr) != LO_SUM)
35605 return false;
35606
35607 /* Validate that the register used to load the high value is either the
35608 register being loaded, or we can safely replace its use.
35609
35610 This function is only called from the peephole2 pass and we assume that
35611 there are 2 instructions in the peephole (addis and load), so we want to
35612 check if the target register was not used in the memory address and the
35613 register to hold the addis result is dead after the peephole. */
35614 if (REGNO (addis_reg) != REGNO (target))
35615 {
35616 if (reg_mentioned_p (target, mem))
35617 return false;
35618
35619 if (!peep2_reg_dead_p (2, addis_reg))
35620 return false;
35621
35622 /* If the target register being loaded is the stack pointer, we must
35623 avoid loading any other value into it, even temporarily. */
35624 if (REG_P (target) && REGNO (target) == STACK_POINTER_REGNUM)
35625 return false;
35626 }
35627
35628 base_reg = XEXP (addr, 0);
35629 return REGNO (addis_reg) == REGNO (base_reg);
35630 }
35631
35632 /* During the peephole2 pass, adjust and expand the insns for a load fusion
35633 sequence. We adjust the addis register to use the target register. If the
35634 load sign extends, we adjust the code to do the zero extending load, and an
35635 explicit sign extension later since the fusion only covers zero extending
35636 loads.
35637
35638 The operands are:
35639 operands[0] register set with addis (to be replaced with target)
35640 operands[1] value set via addis
35641 operands[2] target register being loaded
35642 operands[3] D-form memory reference using operands[0]. */
35643
35644 void
35645 expand_fusion_gpr_load (rtx *operands)
35646 {
35647 rtx addis_value = operands[1];
35648 rtx target = operands[2];
35649 rtx orig_mem = operands[3];
35650 rtx new_addr, new_mem, orig_addr, offset;
35651 enum rtx_code plus_or_lo_sum;
35652 machine_mode target_mode = GET_MODE (target);
35653 machine_mode extend_mode = target_mode;
35654 machine_mode ptr_mode = Pmode;
35655 enum rtx_code extend = UNKNOWN;
35656
35657 if (GET_CODE (orig_mem) == ZERO_EXTEND
35658 || (TARGET_P8_FUSION_SIGN && GET_CODE (orig_mem) == SIGN_EXTEND))
35659 {
35660 extend = GET_CODE (orig_mem);
35661 orig_mem = XEXP (orig_mem, 0);
35662 target_mode = GET_MODE (orig_mem);
35663 }
35664
35665 gcc_assert (MEM_P (orig_mem));
35666
35667 orig_addr = XEXP (orig_mem, 0);
35668 plus_or_lo_sum = GET_CODE (orig_addr);
35669 gcc_assert (plus_or_lo_sum == PLUS || plus_or_lo_sum == LO_SUM);
35670
35671 offset = XEXP (orig_addr, 1);
35672 new_addr = gen_rtx_fmt_ee (plus_or_lo_sum, ptr_mode, addis_value, offset);
35673 new_mem = replace_equiv_address_nv (orig_mem, new_addr, false);
35674
35675 if (extend != UNKNOWN)
35676 new_mem = gen_rtx_fmt_e (ZERO_EXTEND, extend_mode, new_mem);
35677
35678 new_mem = gen_rtx_UNSPEC (extend_mode, gen_rtvec (1, new_mem),
35679 UNSPEC_FUSION_GPR);
35680 emit_insn (gen_rtx_SET (target, new_mem));
35681
35682 if (extend == SIGN_EXTEND)
35683 {
35684 int sub_off = ((BYTES_BIG_ENDIAN)
35685 ? GET_MODE_SIZE (extend_mode) - GET_MODE_SIZE (target_mode)
35686 : 0);
35687 rtx sign_reg
35688 = simplify_subreg (target_mode, target, extend_mode, sub_off);
35689
35690 emit_insn (gen_rtx_SET (target,
35691 gen_rtx_SIGN_EXTEND (extend_mode, sign_reg)));
35692 }
35693
35694 return;
35695 }
35696
35697 /* Emit the addis instruction that will be part of a fused instruction
35698 sequence. */
35699
35700 void
35701 emit_fusion_addis (rtx target, rtx addis_value, const char *comment,
35702 const char *mode_name)
35703 {
35704 rtx fuse_ops[10];
35705 char insn_template[80];
35706 const char *addis_str = NULL;
35707 const char *comment_str = ASM_COMMENT_START;
35708
35709 if (*comment_str == ' ')
35710 comment_str++;
35711
35712 /* Emit the addis instruction. */
35713 fuse_ops[0] = target;
35714 if (satisfies_constraint_L (addis_value))
35715 {
35716 fuse_ops[1] = addis_value;
35717 addis_str = "lis %0,%v1";
35718 }
35719
35720 else if (GET_CODE (addis_value) == PLUS)
35721 {
35722 rtx op0 = XEXP (addis_value, 0);
35723 rtx op1 = XEXP (addis_value, 1);
35724
35725 if (REG_P (op0) && CONST_INT_P (op1)
35726 && satisfies_constraint_L (op1))
35727 {
35728 fuse_ops[1] = op0;
35729 fuse_ops[2] = op1;
35730 addis_str = "addis %0,%1,%v2";
35731 }
35732 }
35733
35734 else if (GET_CODE (addis_value) == HIGH)
35735 {
35736 rtx value = XEXP (addis_value, 0);
35737 if (GET_CODE (value) == UNSPEC && XINT (value, 1) == UNSPEC_TOCREL)
35738 {
35739 fuse_ops[1] = XVECEXP (value, 0, 0); /* symbol ref. */
35740 fuse_ops[2] = XVECEXP (value, 0, 1); /* TOC register. */
35741 if (TARGET_ELF)
35742 addis_str = "addis %0,%2,%1@toc@ha";
35743
35744 else if (TARGET_XCOFF)
35745 addis_str = "addis %0,%1@u(%2)";
35746
35747 else
35748 gcc_unreachable ();
35749 }
35750
35751 else if (GET_CODE (value) == PLUS)
35752 {
35753 rtx op0 = XEXP (value, 0);
35754 rtx op1 = XEXP (value, 1);
35755
35756 if (GET_CODE (op0) == UNSPEC
35757 && XINT (op0, 1) == UNSPEC_TOCREL
35758 && CONST_INT_P (op1))
35759 {
35760 fuse_ops[1] = XVECEXP (op0, 0, 0); /* symbol ref. */
35761 fuse_ops[2] = XVECEXP (op0, 0, 1); /* TOC register. */
35762 fuse_ops[3] = op1;
35763 if (TARGET_ELF)
35764 addis_str = "addis %0,%2,%1+%3@toc@ha";
35765
35766 else if (TARGET_XCOFF)
35767 addis_str = "addis %0,%1+%3@u(%2)";
35768
35769 else
35770 gcc_unreachable ();
35771 }
35772 }
35773
35774 else if (satisfies_constraint_L (value))
35775 {
35776 fuse_ops[1] = value;
35777 addis_str = "lis %0,%v1";
35778 }
35779
35780 else if (TARGET_ELF && !TARGET_POWERPC64 && CONSTANT_P (value))
35781 {
35782 fuse_ops[1] = value;
35783 addis_str = "lis %0,%1@ha";
35784 }
35785 }
35786
35787 if (!addis_str)
35788 fatal_insn ("Could not generate addis value for fusion", addis_value);
35789
35790 sprintf (insn_template, "%s\t\t%s %s, type %s", addis_str, comment_str,
35791 comment, mode_name);
35792 output_asm_insn (insn_template, fuse_ops);
35793 }
35794
35795 /* Emit a D-form load or store instruction that is the second instruction
35796 of a fusion sequence. */
35797
35798 void
35799 emit_fusion_load_store (rtx load_store_reg, rtx addis_reg, rtx offset,
35800 const char *insn_str)
35801 {
35802 rtx fuse_ops[10];
35803 char insn_template[80];
35804
35805 fuse_ops[0] = load_store_reg;
35806 fuse_ops[1] = addis_reg;
35807
35808 if (CONST_INT_P (offset) && satisfies_constraint_I (offset))
35809 {
35810 sprintf (insn_template, "%s %%0,%%2(%%1)", insn_str);
35811 fuse_ops[2] = offset;
35812 output_asm_insn (insn_template, fuse_ops);
35813 }
35814
35815 else if (GET_CODE (offset) == UNSPEC
35816 && XINT (offset, 1) == UNSPEC_TOCREL)
35817 {
35818 if (TARGET_ELF)
35819 sprintf (insn_template, "%s %%0,%%2@toc@l(%%1)", insn_str);
35820
35821 else if (TARGET_XCOFF)
35822 sprintf (insn_template, "%s %%0,%%2@l(%%1)", insn_str);
35823
35824 else
35825 gcc_unreachable ();
35826
35827 fuse_ops[2] = XVECEXP (offset, 0, 0);
35828 output_asm_insn (insn_template, fuse_ops);
35829 }
35830
35831 else if (GET_CODE (offset) == PLUS
35832 && GET_CODE (XEXP (offset, 0)) == UNSPEC
35833 && XINT (XEXP (offset, 0), 1) == UNSPEC_TOCREL
35834 && CONST_INT_P (XEXP (offset, 1)))
35835 {
35836 rtx tocrel_unspec = XEXP (offset, 0);
35837 if (TARGET_ELF)
35838 sprintf (insn_template, "%s %%0,%%2+%%3@toc@l(%%1)", insn_str);
35839
35840 else if (TARGET_XCOFF)
35841 sprintf (insn_template, "%s %%0,%%2+%%3@l(%%1)", insn_str);
35842
35843 else
35844 gcc_unreachable ();
35845
35846 fuse_ops[2] = XVECEXP (tocrel_unspec, 0, 0);
35847 fuse_ops[3] = XEXP (offset, 1);
35848 output_asm_insn (insn_template, fuse_ops);
35849 }
35850
35851 else if (TARGET_ELF && !TARGET_POWERPC64 && CONSTANT_P (offset))
35852 {
35853 sprintf (insn_template, "%s %%0,%%2@l(%%1)", insn_str);
35854
35855 fuse_ops[2] = offset;
35856 output_asm_insn (insn_template, fuse_ops);
35857 }
35858
35859 else
35860 fatal_insn ("Unable to generate load/store offset for fusion", offset);
35861
35862 return;
35863 }
35864
35865 /* Wrap a TOC address that can be fused to indicate that special fusion
35866 processing is needed. */
35867
35868 rtx
35869 fusion_wrap_memory_address (rtx old_mem)
35870 {
35871 rtx old_addr = XEXP (old_mem, 0);
35872 rtvec v = gen_rtvec (1, old_addr);
35873 rtx new_addr = gen_rtx_UNSPEC (Pmode, v, UNSPEC_FUSION_ADDIS);
35874 return replace_equiv_address_nv (old_mem, new_addr, false);
35875 }
35876
35877 /* Given an address, convert it into the addis and load offset parts. Addresses
35878 created during the peephole2 process look like:
35879 (lo_sum (high (unspec [(sym)] UNSPEC_TOCREL))
35880 (unspec [(...)] UNSPEC_TOCREL))
35881
35882 Addresses created via toc fusion look like:
35883 (unspec [(unspec [(...)] UNSPEC_TOCREL)] UNSPEC_FUSION_ADDIS)) */
35884
35885 static void
35886 fusion_split_address (rtx addr, rtx *p_hi, rtx *p_lo)
35887 {
35888 rtx hi, lo;
35889
35890 if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_FUSION_ADDIS)
35891 {
35892 lo = XVECEXP (addr, 0, 0);
35893 hi = gen_rtx_HIGH (Pmode, lo);
35894 }
35895 else if (GET_CODE (addr) == PLUS || GET_CODE (addr) == LO_SUM)
35896 {
35897 hi = XEXP (addr, 0);
35898 lo = XEXP (addr, 1);
35899 }
35900 else
35901 gcc_unreachable ();
35902
35903 *p_hi = hi;
35904 *p_lo = lo;
35905 }
35906
35907 /* Return a string to fuse an addis instruction with a gpr load to the same
35908 register that we loaded up the addis instruction. The address that is used
35909 is the logical address that was formed during peephole2:
35910 (lo_sum (high) (low-part))
35911
35912 Or the address is the TOC address that is wrapped before register allocation:
35913 (unspec [(addr) (toc-reg)] UNSPEC_FUSION_ADDIS)
35914
35915 The code is complicated, so we call output_asm_insn directly, and just
35916 return "". */
35917
35918 const char *
35919 emit_fusion_gpr_load (rtx target, rtx mem)
35920 {
35921 rtx addis_value;
35922 rtx addr;
35923 rtx load_offset;
35924 const char *load_str = NULL;
35925 const char *mode_name = NULL;
35926 machine_mode mode;
35927
35928 if (GET_CODE (mem) == ZERO_EXTEND)
35929 mem = XEXP (mem, 0);
35930
35931 gcc_assert (REG_P (target) && MEM_P (mem));
35932
35933 addr = XEXP (mem, 0);
35934 fusion_split_address (addr, &addis_value, &load_offset);
35935
35936 /* Now emit the load instruction to the same register. */
35937 mode = GET_MODE (mem);
35938 switch (mode)
35939 {
35940 case QImode:
35941 mode_name = "char";
35942 load_str = "lbz";
35943 break;
35944
35945 case HImode:
35946 mode_name = "short";
35947 load_str = "lhz";
35948 break;
35949
35950 case SImode:
35951 case SFmode:
35952 mode_name = (mode == SFmode) ? "float" : "int";
35953 load_str = "lwz";
35954 break;
35955
35956 case DImode:
35957 case DFmode:
35958 gcc_assert (TARGET_POWERPC64);
35959 mode_name = (mode == DFmode) ? "double" : "long";
35960 load_str = "ld";
35961 break;
35962
35963 default:
35964 fatal_insn ("Bad GPR fusion", gen_rtx_SET (target, mem));
35965 }
35966
35967 /* Emit the addis instruction. */
35968 emit_fusion_addis (target, addis_value, "gpr load fusion", mode_name);
35969
35970 /* Emit the D-form load instruction. */
35971 emit_fusion_load_store (target, target, load_offset, load_str);
35972
35973 return "";
35974 }
35975 \f
35976
35977 /* Return true if the peephole2 can combine a load/store involving a
35978 combination of an addis instruction and the memory operation. This was
35979 added to the ISA 3.0 (power9) hardware. */
35980
35981 bool
35982 fusion_p9_p (rtx addis_reg, /* register set via addis. */
35983 rtx addis_value, /* addis value. */
35984 rtx dest, /* destination (memory or register). */
35985 rtx src) /* source (register or memory). */
35986 {
35987 rtx addr, mem, offset;
35988 enum machine_mode mode = GET_MODE (src);
35989
35990 /* Validate arguments. */
35991 if (!base_reg_operand (addis_reg, GET_MODE (addis_reg)))
35992 return false;
35993
35994 if (!fusion_gpr_addis (addis_value, GET_MODE (addis_value)))
35995 return false;
35996
35997 /* Ignore extend operations that are part of the load. */
35998 if (GET_CODE (src) == FLOAT_EXTEND || GET_CODE (src) == ZERO_EXTEND)
35999 src = XEXP (src, 0);
36000
36001 /* Test for memory<-register or register<-memory. */
36002 if (fpr_reg_operand (src, mode) || int_reg_operand (src, mode))
36003 {
36004 if (!MEM_P (dest))
36005 return false;
36006
36007 mem = dest;
36008 }
36009
36010 else if (MEM_P (src))
36011 {
36012 if (!fpr_reg_operand (dest, mode) && !int_reg_operand (dest, mode))
36013 return false;
36014
36015 mem = src;
36016 }
36017
36018 else
36019 return false;
36020
36021 addr = XEXP (mem, 0); /* either PLUS or LO_SUM. */
36022 if (GET_CODE (addr) == PLUS)
36023 {
36024 if (!rtx_equal_p (addis_reg, XEXP (addr, 0)))
36025 return false;
36026
36027 return satisfies_constraint_I (XEXP (addr, 1));
36028 }
36029
36030 else if (GET_CODE (addr) == LO_SUM)
36031 {
36032 if (!rtx_equal_p (addis_reg, XEXP (addr, 0)))
36033 return false;
36034
36035 offset = XEXP (addr, 1);
36036 if (TARGET_XCOFF || (TARGET_ELF && TARGET_POWERPC64))
36037 return small_toc_ref (offset, GET_MODE (offset));
36038
36039 else if (TARGET_ELF && !TARGET_POWERPC64)
36040 return CONSTANT_P (offset);
36041 }
36042
36043 return false;
36044 }
36045
36046 /* During the peephole2 pass, adjust and expand the insns for an extended fusion
36047 load sequence.
36048
36049 The operands are:
36050 operands[0] register set with addis
36051 operands[1] value set via addis
36052 operands[2] target register being loaded
36053 operands[3] D-form memory reference using operands[0].
36054
36055 This is similar to the fusion introduced with power8, except it scales to
36056 both loads/stores and does not require the result register to be the same as
36057 the base register. At the moment, we only do this if register set with addis
36058 is dead. */
36059
36060 void
36061 expand_fusion_p9_load (rtx *operands)
36062 {
36063 rtx tmp_reg = operands[0];
36064 rtx addis_value = operands[1];
36065 rtx target = operands[2];
36066 rtx orig_mem = operands[3];
36067 rtx new_addr, new_mem, orig_addr, offset, set, clobber, insn;
36068 enum rtx_code plus_or_lo_sum;
36069 machine_mode target_mode = GET_MODE (target);
36070 machine_mode extend_mode = target_mode;
36071 machine_mode ptr_mode = Pmode;
36072 enum rtx_code extend = UNKNOWN;
36073
36074 if (GET_CODE (orig_mem) == FLOAT_EXTEND || GET_CODE (orig_mem) == ZERO_EXTEND)
36075 {
36076 extend = GET_CODE (orig_mem);
36077 orig_mem = XEXP (orig_mem, 0);
36078 target_mode = GET_MODE (orig_mem);
36079 }
36080
36081 gcc_assert (MEM_P (orig_mem));
36082
36083 orig_addr = XEXP (orig_mem, 0);
36084 plus_or_lo_sum = GET_CODE (orig_addr);
36085 gcc_assert (plus_or_lo_sum == PLUS || plus_or_lo_sum == LO_SUM);
36086
36087 offset = XEXP (orig_addr, 1);
36088 new_addr = gen_rtx_fmt_ee (plus_or_lo_sum, ptr_mode, addis_value, offset);
36089 new_mem = replace_equiv_address_nv (orig_mem, new_addr, false);
36090
36091 if (extend != UNKNOWN)
36092 new_mem = gen_rtx_fmt_e (extend, extend_mode, new_mem);
36093
36094 new_mem = gen_rtx_UNSPEC (extend_mode, gen_rtvec (1, new_mem),
36095 UNSPEC_FUSION_P9);
36096
36097 set = gen_rtx_SET (target, new_mem);
36098 clobber = gen_rtx_CLOBBER (VOIDmode, tmp_reg);
36099 insn = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, clobber));
36100 emit_insn (insn);
36101
36102 return;
36103 }
36104
36105 /* During the peephole2 pass, adjust and expand the insns for an extended fusion
36106 store sequence.
36107
36108 The operands are:
36109 operands[0] register set with addis
36110 operands[1] value set via addis
36111 operands[2] target D-form memory being stored to
36112 operands[3] register being stored
36113
36114 This is similar to the fusion introduced with power8, except it scales to
36115 both loads/stores and does not require the result register to be the same as
36116 the base register. At the moment, we only do this if register set with addis
36117 is dead. */
36118
36119 void
36120 expand_fusion_p9_store (rtx *operands)
36121 {
36122 rtx tmp_reg = operands[0];
36123 rtx addis_value = operands[1];
36124 rtx orig_mem = operands[2];
36125 rtx src = operands[3];
36126 rtx new_addr, new_mem, orig_addr, offset, set, clobber, insn, new_src;
36127 enum rtx_code plus_or_lo_sum;
36128 machine_mode target_mode = GET_MODE (orig_mem);
36129 machine_mode ptr_mode = Pmode;
36130
36131 gcc_assert (MEM_P (orig_mem));
36132
36133 orig_addr = XEXP (orig_mem, 0);
36134 plus_or_lo_sum = GET_CODE (orig_addr);
36135 gcc_assert (plus_or_lo_sum == PLUS || plus_or_lo_sum == LO_SUM);
36136
36137 offset = XEXP (orig_addr, 1);
36138 new_addr = gen_rtx_fmt_ee (plus_or_lo_sum, ptr_mode, addis_value, offset);
36139 new_mem = replace_equiv_address_nv (orig_mem, new_addr, false);
36140
36141 new_src = gen_rtx_UNSPEC (target_mode, gen_rtvec (1, src),
36142 UNSPEC_FUSION_P9);
36143
36144 set = gen_rtx_SET (new_mem, new_src);
36145 clobber = gen_rtx_CLOBBER (VOIDmode, tmp_reg);
36146 insn = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, clobber));
36147 emit_insn (insn);
36148
36149 return;
36150 }
36151
36152 /* Return a string to fuse an addis instruction with a load using extended
36153 fusion. The address that is used is the logical address that was formed
36154 during peephole2: (lo_sum (high) (low-part))
36155
36156 The code is complicated, so we call output_asm_insn directly, and just
36157 return "". */
36158
36159 const char *
36160 emit_fusion_p9_load (rtx reg, rtx mem, rtx tmp_reg)
36161 {
36162 enum machine_mode mode = GET_MODE (reg);
36163 rtx hi;
36164 rtx lo;
36165 rtx addr;
36166 const char *load_string;
36167 int r;
36168
36169 if (GET_CODE (mem) == FLOAT_EXTEND || GET_CODE (mem) == ZERO_EXTEND)
36170 {
36171 mem = XEXP (mem, 0);
36172 mode = GET_MODE (mem);
36173 }
36174
36175 if (GET_CODE (reg) == SUBREG)
36176 {
36177 gcc_assert (SUBREG_BYTE (reg) == 0);
36178 reg = SUBREG_REG (reg);
36179 }
36180
36181 if (!REG_P (reg))
36182 fatal_insn ("emit_fusion_p9_load, bad reg #1", reg);
36183
36184 r = REGNO (reg);
36185 if (FP_REGNO_P (r))
36186 {
36187 if (mode == SFmode)
36188 load_string = "lfs";
36189 else if (mode == DFmode || mode == DImode)
36190 load_string = "lfd";
36191 else
36192 gcc_unreachable ();
36193 }
36194 else if (INT_REGNO_P (r))
36195 {
36196 switch (mode)
36197 {
36198 case QImode:
36199 load_string = "lbz";
36200 break;
36201 case HImode:
36202 load_string = "lhz";
36203 break;
36204 case SImode:
36205 case SFmode:
36206 load_string = "lwz";
36207 break;
36208 case DImode:
36209 case DFmode:
36210 if (!TARGET_POWERPC64)
36211 gcc_unreachable ();
36212 load_string = "ld";
36213 break;
36214 default:
36215 gcc_unreachable ();
36216 }
36217 }
36218 else
36219 fatal_insn ("emit_fusion_p9_load, bad reg #2", reg);
36220
36221 if (!MEM_P (mem))
36222 fatal_insn ("emit_fusion_p9_load not MEM", mem);
36223
36224 addr = XEXP (mem, 0);
36225 fusion_split_address (addr, &hi, &lo);
36226
36227 /* Emit the addis instruction. */
36228 emit_fusion_addis (tmp_reg, hi, "power9 load fusion", GET_MODE_NAME (mode));
36229
36230 /* Emit the D-form load instruction. */
36231 emit_fusion_load_store (reg, tmp_reg, lo, load_string);
36232
36233 return "";
36234 }
36235
36236 /* Return a string to fuse an addis instruction with a store using extended
36237 fusion. The address that is used is the logical address that was formed
36238 during peephole2: (lo_sum (high) (low-part))
36239
36240 The code is complicated, so we call output_asm_insn directly, and just
36241 return "". */
36242
36243 const char *
36244 emit_fusion_p9_store (rtx mem, rtx reg, rtx tmp_reg)
36245 {
36246 enum machine_mode mode = GET_MODE (reg);
36247 rtx hi;
36248 rtx lo;
36249 rtx addr;
36250 const char *store_string;
36251 int r;
36252
36253 if (GET_CODE (reg) == SUBREG)
36254 {
36255 gcc_assert (SUBREG_BYTE (reg) == 0);
36256 reg = SUBREG_REG (reg);
36257 }
36258
36259 if (!REG_P (reg))
36260 fatal_insn ("emit_fusion_p9_store, bad reg #1", reg);
36261
36262 r = REGNO (reg);
36263 if (FP_REGNO_P (r))
36264 {
36265 if (mode == SFmode)
36266 store_string = "stfs";
36267 else if (mode == DFmode)
36268 store_string = "stfd";
36269 else
36270 gcc_unreachable ();
36271 }
36272 else if (INT_REGNO_P (r))
36273 {
36274 switch (mode)
36275 {
36276 case QImode:
36277 store_string = "stb";
36278 break;
36279 case HImode:
36280 store_string = "sth";
36281 break;
36282 case SImode:
36283 case SFmode:
36284 store_string = "stw";
36285 break;
36286 case DImode:
36287 case DFmode:
36288 if (!TARGET_POWERPC64)
36289 gcc_unreachable ();
36290 store_string = "std";
36291 break;
36292 default:
36293 gcc_unreachable ();
36294 }
36295 }
36296 else
36297 fatal_insn ("emit_fusion_p9_store, bad reg #2", reg);
36298
36299 if (!MEM_P (mem))
36300 fatal_insn ("emit_fusion_p9_store not MEM", mem);
36301
36302 addr = XEXP (mem, 0);
36303 fusion_split_address (addr, &hi, &lo);
36304
36305 /* Emit the addis instruction. */
36306 emit_fusion_addis (tmp_reg, hi, "power9 store fusion", GET_MODE_NAME (mode));
36307
36308 /* Emit the D-form load instruction. */
36309 emit_fusion_load_store (reg, tmp_reg, lo, store_string);
36310
36311 return "";
36312 }
36313
36314 \f
36315 /* Analyze vector computations and remove unnecessary doubleword
36316 swaps (xxswapdi instructions). This pass is performed only
36317 for little-endian VSX code generation.
36318
36319 For this specific case, loads and stores of 4x32 and 2x64 vectors
36320 are inefficient. These are implemented using the lvx2dx and
36321 stvx2dx instructions, which invert the order of doublewords in
36322 a vector register. Thus the code generation inserts an xxswapdi
36323 after each such load, and prior to each such store. (For spill
36324 code after register assignment, an additional xxswapdi is inserted
36325 following each store in order to return a hard register to its
36326 unpermuted value.)
36327
36328 The extra xxswapdi instructions reduce performance. This can be
36329 particularly bad for vectorized code. The purpose of this pass
36330 is to reduce the number of xxswapdi instructions required for
36331 correctness.
36332
36333 The primary insight is that much code that operates on vectors
36334 does not care about the relative order of elements in a register,
36335 so long as the correct memory order is preserved. If we have
36336 a computation where all input values are provided by lvxd2x/xxswapdi
36337 sequences, all outputs are stored using xxswapdi/stvxd2x sequences,
36338 and all intermediate computations are pure SIMD (independent of
36339 element order), then all the xxswapdi's associated with the loads
36340 and stores may be removed.
36341
36342 This pass uses some of the infrastructure and logical ideas from
36343 the "web" pass in web.c. We create maximal webs of computations
36344 fitting the description above using union-find. Each such web is
36345 then optimized by removing its unnecessary xxswapdi instructions.
36346
36347 The pass is placed prior to global optimization so that we can
36348 perform the optimization in the safest and simplest way possible;
36349 that is, by replacing each xxswapdi insn with a register copy insn.
36350 Subsequent forward propagation will remove copies where possible.
36351
36352 There are some operations sensitive to element order for which we
36353 can still allow the operation, provided we modify those operations.
36354 These include CONST_VECTORs, for which we must swap the first and
36355 second halves of the constant vector; and SUBREGs, for which we
36356 must adjust the byte offset to account for the swapped doublewords.
36357 A remaining opportunity would be non-immediate-form splats, for
36358 which we should adjust the selected lane of the input. We should
36359 also make code generation adjustments for sum-across operations,
36360 since this is a common vectorizer reduction.
36361
36362 Because we run prior to the first split, we can see loads and stores
36363 here that match *vsx_le_perm_{load,store}_<mode>. These are vanilla
36364 vector loads and stores that have not yet been split into a permuting
36365 load/store and a swap. (One way this can happen is with a builtin
36366 call to vec_vsx_{ld,st}.) We can handle these as well, but rather
36367 than deleting a swap, we convert the load/store into a permuting
36368 load/store (which effectively removes the swap). */
36369
36370 /* Notes on Permutes
36371
36372 We do not currently handle computations that contain permutes. There
36373 is a general transformation that can be performed correctly, but it
36374 may introduce more expensive code than it replaces. To handle these
36375 would require a cost model to determine when to perform the optimization.
36376 This commentary records how this could be done if desired.
36377
36378 The most general permute is something like this (example for V16QI):
36379
36380 (vec_select:V16QI (vec_concat:V32QI (op1:V16QI) (op2:V16QI))
36381 (parallel [(const_int a0) (const_int a1)
36382 ...
36383 (const_int a14) (const_int a15)]))
36384
36385 where a0,...,a15 are in [0,31] and select elements from op1 and op2
36386 to produce in the result.
36387
36388 Regardless of mode, we can convert the PARALLEL to a mask of 16
36389 byte-element selectors. Let's call this M, with M[i] representing
36390 the ith byte-element selector value. Then if we swap doublewords
36391 throughout the computation, we can get correct behavior by replacing
36392 M with M' as follows:
36393
36394 M'[i] = { (M[i]+8)%16 : M[i] in [0,15]
36395 { ((M[i]+8)%16)+16 : M[i] in [16,31]
36396
36397 This seems promising at first, since we are just replacing one mask
36398 with another. But certain masks are preferable to others. If M
36399 is a mask that matches a vmrghh pattern, for example, M' certainly
36400 will not. Instead of a single vmrghh, we would generate a load of
36401 M' and a vperm. So we would need to know how many xxswapd's we can
36402 remove as a result of this transformation to determine if it's
36403 profitable; and preferably the logic would need to be aware of all
36404 the special preferable masks.
36405
36406 Another form of permute is an UNSPEC_VPERM, in which the mask is
36407 already in a register. In some cases, this mask may be a constant
36408 that we can discover with ud-chains, in which case the above
36409 transformation is ok. However, the common usage here is for the
36410 mask to be produced by an UNSPEC_LVSL, in which case the mask
36411 cannot be known at compile time. In such a case we would have to
36412 generate several instructions to compute M' as above at run time,
36413 and a cost model is needed again.
36414
36415 However, when the mask M for an UNSPEC_VPERM is loaded from the
36416 constant pool, we can replace M with M' as above at no cost
36417 beyond adding a constant pool entry. */
36418
36419 /* This is based on the union-find logic in web.c. web_entry_base is
36420 defined in df.h. */
36421 class swap_web_entry : public web_entry_base
36422 {
36423 public:
36424 /* Pointer to the insn. */
36425 rtx_insn *insn;
36426 /* Set if insn contains a mention of a vector register. All other
36427 fields are undefined if this field is unset. */
36428 unsigned int is_relevant : 1;
36429 /* Set if insn is a load. */
36430 unsigned int is_load : 1;
36431 /* Set if insn is a store. */
36432 unsigned int is_store : 1;
36433 /* Set if insn is a doubleword swap. This can either be a register swap
36434 or a permuting load or store (test is_load and is_store for this). */
36435 unsigned int is_swap : 1;
36436 /* Set if the insn has a live-in use of a parameter register. */
36437 unsigned int is_live_in : 1;
36438 /* Set if the insn has a live-out def of a return register. */
36439 unsigned int is_live_out : 1;
36440 /* Set if the insn contains a subreg reference of a vector register. */
36441 unsigned int contains_subreg : 1;
36442 /* Set if the insn contains a 128-bit integer operand. */
36443 unsigned int is_128_int : 1;
36444 /* Set if this is a call-insn. */
36445 unsigned int is_call : 1;
36446 /* Set if this insn does not perform a vector operation for which
36447 element order matters, or if we know how to fix it up if it does.
36448 Undefined if is_swap is set. */
36449 unsigned int is_swappable : 1;
36450 /* A nonzero value indicates what kind of special handling for this
36451 insn is required if doublewords are swapped. Undefined if
36452 is_swappable is not set. */
36453 unsigned int special_handling : 4;
36454 /* Set if the web represented by this entry cannot be optimized. */
36455 unsigned int web_not_optimizable : 1;
36456 /* Set if this insn should be deleted. */
36457 unsigned int will_delete : 1;
36458 };
36459
36460 enum special_handling_values {
36461 SH_NONE = 0,
36462 SH_CONST_VECTOR,
36463 SH_SUBREG,
36464 SH_NOSWAP_LD,
36465 SH_NOSWAP_ST,
36466 SH_EXTRACT,
36467 SH_SPLAT,
36468 SH_XXPERMDI,
36469 SH_CONCAT,
36470 SH_VPERM
36471 };
36472
36473 /* Union INSN with all insns containing definitions that reach USE.
36474 Detect whether USE is live-in to the current function. */
36475 static void
36476 union_defs (swap_web_entry *insn_entry, rtx insn, df_ref use)
36477 {
36478 struct df_link *link = DF_REF_CHAIN (use);
36479
36480 if (!link)
36481 insn_entry[INSN_UID (insn)].is_live_in = 1;
36482
36483 while (link)
36484 {
36485 if (DF_REF_IS_ARTIFICIAL (link->ref))
36486 insn_entry[INSN_UID (insn)].is_live_in = 1;
36487
36488 if (DF_REF_INSN_INFO (link->ref))
36489 {
36490 rtx def_insn = DF_REF_INSN (link->ref);
36491 (void)unionfind_union (insn_entry + INSN_UID (insn),
36492 insn_entry + INSN_UID (def_insn));
36493 }
36494
36495 link = link->next;
36496 }
36497 }
36498
36499 /* Union INSN with all insns containing uses reached from DEF.
36500 Detect whether DEF is live-out from the current function. */
36501 static void
36502 union_uses (swap_web_entry *insn_entry, rtx insn, df_ref def)
36503 {
36504 struct df_link *link = DF_REF_CHAIN (def);
36505
36506 if (!link)
36507 insn_entry[INSN_UID (insn)].is_live_out = 1;
36508
36509 while (link)
36510 {
36511 /* This could be an eh use or some other artificial use;
36512 we treat these all the same (killing the optimization). */
36513 if (DF_REF_IS_ARTIFICIAL (link->ref))
36514 insn_entry[INSN_UID (insn)].is_live_out = 1;
36515
36516 if (DF_REF_INSN_INFO (link->ref))
36517 {
36518 rtx use_insn = DF_REF_INSN (link->ref);
36519 (void)unionfind_union (insn_entry + INSN_UID (insn),
36520 insn_entry + INSN_UID (use_insn));
36521 }
36522
36523 link = link->next;
36524 }
36525 }
36526
36527 /* Return 1 iff INSN is a load insn, including permuting loads that
36528 represent an lvxd2x instruction; else return 0. */
36529 static unsigned int
36530 insn_is_load_p (rtx insn)
36531 {
36532 rtx body = PATTERN (insn);
36533
36534 if (GET_CODE (body) == SET)
36535 {
36536 if (GET_CODE (SET_SRC (body)) == MEM)
36537 return 1;
36538
36539 if (GET_CODE (SET_SRC (body)) == VEC_SELECT
36540 && GET_CODE (XEXP (SET_SRC (body), 0)) == MEM)
36541 return 1;
36542
36543 return 0;
36544 }
36545
36546 if (GET_CODE (body) != PARALLEL)
36547 return 0;
36548
36549 rtx set = XVECEXP (body, 0, 0);
36550
36551 if (GET_CODE (set) == SET && GET_CODE (SET_SRC (set)) == MEM)
36552 return 1;
36553
36554 return 0;
36555 }
36556
36557 /* Return 1 iff INSN is a store insn, including permuting stores that
36558 represent an stvxd2x instruction; else return 0. */
36559 static unsigned int
36560 insn_is_store_p (rtx insn)
36561 {
36562 rtx body = PATTERN (insn);
36563 if (GET_CODE (body) == SET && GET_CODE (SET_DEST (body)) == MEM)
36564 return 1;
36565 if (GET_CODE (body) != PARALLEL)
36566 return 0;
36567 rtx set = XVECEXP (body, 0, 0);
36568 if (GET_CODE (set) == SET && GET_CODE (SET_DEST (set)) == MEM)
36569 return 1;
36570 return 0;
36571 }
36572
36573 /* Return 1 iff INSN swaps doublewords. This may be a reg-reg swap,
36574 a permuting load, or a permuting store. */
36575 static unsigned int
36576 insn_is_swap_p (rtx insn)
36577 {
36578 rtx body = PATTERN (insn);
36579 if (GET_CODE (body) != SET)
36580 return 0;
36581 rtx rhs = SET_SRC (body);
36582 if (GET_CODE (rhs) != VEC_SELECT)
36583 return 0;
36584 rtx parallel = XEXP (rhs, 1);
36585 if (GET_CODE (parallel) != PARALLEL)
36586 return 0;
36587 unsigned int len = XVECLEN (parallel, 0);
36588 if (len != 2 && len != 4 && len != 8 && len != 16)
36589 return 0;
36590 for (unsigned int i = 0; i < len / 2; ++i)
36591 {
36592 rtx op = XVECEXP (parallel, 0, i);
36593 if (GET_CODE (op) != CONST_INT || INTVAL (op) != len / 2 + i)
36594 return 0;
36595 }
36596 for (unsigned int i = len / 2; i < len; ++i)
36597 {
36598 rtx op = XVECEXP (parallel, 0, i);
36599 if (GET_CODE (op) != CONST_INT || INTVAL (op) != i - len / 2)
36600 return 0;
36601 }
36602 return 1;
36603 }
36604
36605 /* Return TRUE if insn is a swap fed by a load from the constant pool. */
36606 static bool
36607 const_load_sequence_p (swap_web_entry *insn_entry, rtx insn)
36608 {
36609 unsigned uid = INSN_UID (insn);
36610 if (!insn_entry[uid].is_swap || insn_entry[uid].is_load)
36611 return false;
36612
36613 /* Find the unique use in the swap and locate its def. If the def
36614 isn't unique, punt. */
36615 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
36616 df_ref use;
36617 FOR_EACH_INSN_INFO_USE (use, insn_info)
36618 {
36619 struct df_link *def_link = DF_REF_CHAIN (use);
36620 if (!def_link || def_link->next)
36621 return false;
36622
36623 rtx def_insn = DF_REF_INSN (def_link->ref);
36624 unsigned uid2 = INSN_UID (def_insn);
36625 if (!insn_entry[uid2].is_load || !insn_entry[uid2].is_swap)
36626 return false;
36627
36628 rtx body = PATTERN (def_insn);
36629 if (GET_CODE (body) != SET
36630 || GET_CODE (SET_SRC (body)) != VEC_SELECT
36631 || GET_CODE (XEXP (SET_SRC (body), 0)) != MEM)
36632 return false;
36633
36634 rtx mem = XEXP (SET_SRC (body), 0);
36635 rtx base_reg = XEXP (mem, 0);
36636
36637 df_ref base_use;
36638 insn_info = DF_INSN_INFO_GET (def_insn);
36639 FOR_EACH_INSN_INFO_USE (base_use, insn_info)
36640 {
36641 if (!rtx_equal_p (DF_REF_REG (base_use), base_reg))
36642 continue;
36643
36644 struct df_link *base_def_link = DF_REF_CHAIN (base_use);
36645 if (!base_def_link || base_def_link->next)
36646 return false;
36647
36648 rtx tocrel_insn = DF_REF_INSN (base_def_link->ref);
36649 rtx tocrel_body = PATTERN (tocrel_insn);
36650 rtx base, offset;
36651 if (GET_CODE (tocrel_body) != SET)
36652 return false;
36653 /* There is an extra level of indirection for small/large
36654 code models. */
36655 rtx tocrel_expr = SET_SRC (tocrel_body);
36656 if (GET_CODE (tocrel_expr) == MEM)
36657 tocrel_expr = XEXP (tocrel_expr, 0);
36658 if (!toc_relative_expr_p (tocrel_expr, false))
36659 return false;
36660 split_const (XVECEXP (tocrel_base, 0, 0), &base, &offset);
36661 if (GET_CODE (base) != SYMBOL_REF || !CONSTANT_POOL_ADDRESS_P (base))
36662 return false;
36663 }
36664 }
36665 return true;
36666 }
36667
36668 /* Return 1 iff OP is an operand that will not be affected by having
36669 vector doublewords swapped in memory. */
36670 static unsigned int
36671 rtx_is_swappable_p (rtx op, unsigned int *special)
36672 {
36673 enum rtx_code code = GET_CODE (op);
36674 int i, j;
36675 rtx parallel;
36676
36677 switch (code)
36678 {
36679 case LABEL_REF:
36680 case SYMBOL_REF:
36681 case CLOBBER:
36682 case REG:
36683 return 1;
36684
36685 case VEC_CONCAT:
36686 case ASM_INPUT:
36687 case ASM_OPERANDS:
36688 return 0;
36689
36690 case CONST_VECTOR:
36691 {
36692 *special = SH_CONST_VECTOR;
36693 return 1;
36694 }
36695
36696 case VEC_DUPLICATE:
36697 /* Opportunity: If XEXP (op, 0) has the same mode as the result,
36698 and XEXP (op, 1) is a PARALLEL with a single QImode const int,
36699 it represents a vector splat for which we can do special
36700 handling. */
36701 if (GET_CODE (XEXP (op, 0)) == CONST_INT)
36702 return 1;
36703 else if (GET_CODE (XEXP (op, 0)) == REG
36704 && GET_MODE_INNER (GET_MODE (op)) == GET_MODE (XEXP (op, 0)))
36705 /* This catches V2DF and V2DI splat, at a minimum. */
36706 return 1;
36707 else if (GET_CODE (XEXP (op, 0)) == VEC_SELECT)
36708 /* If the duplicated item is from a select, defer to the select
36709 processing to see if we can change the lane for the splat. */
36710 return rtx_is_swappable_p (XEXP (op, 0), special);
36711 else
36712 return 0;
36713
36714 case VEC_SELECT:
36715 /* A vec_extract operation is ok if we change the lane. */
36716 if (GET_CODE (XEXP (op, 0)) == REG
36717 && GET_MODE_INNER (GET_MODE (XEXP (op, 0))) == GET_MODE (op)
36718 && GET_CODE ((parallel = XEXP (op, 1))) == PARALLEL
36719 && XVECLEN (parallel, 0) == 1
36720 && GET_CODE (XVECEXP (parallel, 0, 0)) == CONST_INT)
36721 {
36722 *special = SH_EXTRACT;
36723 return 1;
36724 }
36725 /* An XXPERMDI is ok if we adjust the lanes. Note that if the
36726 XXPERMDI is a swap operation, it will be identified by
36727 insn_is_swap_p and therefore we won't get here. */
36728 else if (GET_CODE (XEXP (op, 0)) == VEC_CONCAT
36729 && (GET_MODE (XEXP (op, 0)) == V4DFmode
36730 || GET_MODE (XEXP (op, 0)) == V4DImode)
36731 && GET_CODE ((parallel = XEXP (op, 1))) == PARALLEL
36732 && XVECLEN (parallel, 0) == 2
36733 && GET_CODE (XVECEXP (parallel, 0, 0)) == CONST_INT
36734 && GET_CODE (XVECEXP (parallel, 0, 1)) == CONST_INT)
36735 {
36736 *special = SH_XXPERMDI;
36737 return 1;
36738 }
36739 else
36740 return 0;
36741
36742 case UNSPEC:
36743 {
36744 /* Various operations are unsafe for this optimization, at least
36745 without significant additional work. Permutes are obviously
36746 problematic, as both the permute control vector and the ordering
36747 of the target values are invalidated by doubleword swapping.
36748 Vector pack and unpack modify the number of vector lanes.
36749 Merge-high/low will not operate correctly on swapped operands.
36750 Vector shifts across element boundaries are clearly uncool,
36751 as are vector select and concatenate operations. Vector
36752 sum-across instructions define one operand with a specific
36753 order-dependent element, so additional fixup code would be
36754 needed to make those work. Vector set and non-immediate-form
36755 vector splat are element-order sensitive. A few of these
36756 cases might be workable with special handling if required.
36757 Adding cost modeling would be appropriate in some cases. */
36758 int val = XINT (op, 1);
36759 switch (val)
36760 {
36761 default:
36762 break;
36763 case UNSPEC_VMRGH_DIRECT:
36764 case UNSPEC_VMRGL_DIRECT:
36765 case UNSPEC_VPACK_SIGN_SIGN_SAT:
36766 case UNSPEC_VPACK_SIGN_UNS_SAT:
36767 case UNSPEC_VPACK_UNS_UNS_MOD:
36768 case UNSPEC_VPACK_UNS_UNS_MOD_DIRECT:
36769 case UNSPEC_VPACK_UNS_UNS_SAT:
36770 case UNSPEC_VPERM:
36771 case UNSPEC_VPERM_UNS:
36772 case UNSPEC_VPERMHI:
36773 case UNSPEC_VPERMSI:
36774 case UNSPEC_VPKPX:
36775 case UNSPEC_VSLDOI:
36776 case UNSPEC_VSLO:
36777 case UNSPEC_VSRO:
36778 case UNSPEC_VSUM2SWS:
36779 case UNSPEC_VSUM4S:
36780 case UNSPEC_VSUM4UBS:
36781 case UNSPEC_VSUMSWS:
36782 case UNSPEC_VSUMSWS_DIRECT:
36783 case UNSPEC_VSX_CONCAT:
36784 case UNSPEC_VSX_SET:
36785 case UNSPEC_VSX_SLDWI:
36786 case UNSPEC_VUNPACK_HI_SIGN:
36787 case UNSPEC_VUNPACK_HI_SIGN_DIRECT:
36788 case UNSPEC_VUNPACK_LO_SIGN:
36789 case UNSPEC_VUNPACK_LO_SIGN_DIRECT:
36790 case UNSPEC_VUPKHPX:
36791 case UNSPEC_VUPKHS_V4SF:
36792 case UNSPEC_VUPKHU_V4SF:
36793 case UNSPEC_VUPKLPX:
36794 case UNSPEC_VUPKLS_V4SF:
36795 case UNSPEC_VUPKLU_V4SF:
36796 case UNSPEC_VSX_CVDPSPN:
36797 case UNSPEC_VSX_CVSPDP:
36798 case UNSPEC_VSX_CVSPDPN:
36799 return 0;
36800 case UNSPEC_VSPLT_DIRECT:
36801 *special = SH_SPLAT;
36802 return 1;
36803 }
36804 }
36805
36806 default:
36807 break;
36808 }
36809
36810 const char *fmt = GET_RTX_FORMAT (code);
36811 int ok = 1;
36812
36813 for (i = 0; i < GET_RTX_LENGTH (code); ++i)
36814 if (fmt[i] == 'e' || fmt[i] == 'u')
36815 {
36816 unsigned int special_op = SH_NONE;
36817 ok &= rtx_is_swappable_p (XEXP (op, i), &special_op);
36818 if (special_op == SH_NONE)
36819 continue;
36820 /* Ensure we never have two kinds of special handling
36821 for the same insn. */
36822 if (*special != SH_NONE && *special != special_op)
36823 return 0;
36824 *special = special_op;
36825 }
36826 else if (fmt[i] == 'E')
36827 for (j = 0; j < XVECLEN (op, i); ++j)
36828 {
36829 unsigned int special_op = SH_NONE;
36830 ok &= rtx_is_swappable_p (XVECEXP (op, i, j), &special_op);
36831 if (special_op == SH_NONE)
36832 continue;
36833 /* Ensure we never have two kinds of special handling
36834 for the same insn. */
36835 if (*special != SH_NONE && *special != special_op)
36836 return 0;
36837 *special = special_op;
36838 }
36839
36840 return ok;
36841 }
36842
36843 /* Return 1 iff INSN is an operand that will not be affected by
36844 having vector doublewords swapped in memory (in which case
36845 *SPECIAL is unchanged), or that can be modified to be correct
36846 if vector doublewords are swapped in memory (in which case
36847 *SPECIAL is changed to a value indicating how). */
36848 static unsigned int
36849 insn_is_swappable_p (swap_web_entry *insn_entry, rtx insn,
36850 unsigned int *special)
36851 {
36852 /* Calls are always bad. */
36853 if (GET_CODE (insn) == CALL_INSN)
36854 return 0;
36855
36856 /* Loads and stores seen here are not permuting, but we can still
36857 fix them up by converting them to permuting ones. Exceptions:
36858 UNSPEC_LVE, UNSPEC_LVX, and UNSPEC_STVX, which have a PARALLEL
36859 body instead of a SET; and UNSPEC_STVE, which has an UNSPEC
36860 for the SET source. */
36861 rtx body = PATTERN (insn);
36862 int i = INSN_UID (insn);
36863
36864 if (insn_entry[i].is_load)
36865 {
36866 if (GET_CODE (body) == SET)
36867 {
36868 *special = SH_NOSWAP_LD;
36869 return 1;
36870 }
36871 else
36872 return 0;
36873 }
36874
36875 if (insn_entry[i].is_store)
36876 {
36877 if (GET_CODE (body) == SET && GET_CODE (SET_SRC (body)) != UNSPEC)
36878 {
36879 *special = SH_NOSWAP_ST;
36880 return 1;
36881 }
36882 else
36883 return 0;
36884 }
36885
36886 /* A convert to single precision can be left as is provided that
36887 all of its uses are in xxspltw instructions that splat BE element
36888 zero. */
36889 if (GET_CODE (body) == SET
36890 && GET_CODE (SET_SRC (body)) == UNSPEC
36891 && XINT (SET_SRC (body), 1) == UNSPEC_VSX_CVDPSPN)
36892 {
36893 df_ref def;
36894 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
36895
36896 FOR_EACH_INSN_INFO_DEF (def, insn_info)
36897 {
36898 struct df_link *link = DF_REF_CHAIN (def);
36899 if (!link)
36900 return 0;
36901
36902 for (; link; link = link->next) {
36903 rtx use_insn = DF_REF_INSN (link->ref);
36904 rtx use_body = PATTERN (use_insn);
36905 if (GET_CODE (use_body) != SET
36906 || GET_CODE (SET_SRC (use_body)) != UNSPEC
36907 || XINT (SET_SRC (use_body), 1) != UNSPEC_VSX_XXSPLTW
36908 || XEXP (XEXP (SET_SRC (use_body), 0), 1) != const0_rtx)
36909 return 0;
36910 }
36911 }
36912
36913 return 1;
36914 }
36915
36916 /* A concatenation of two doublewords is ok if we reverse the
36917 order of the inputs. */
36918 if (GET_CODE (body) == SET
36919 && GET_CODE (SET_SRC (body)) == VEC_CONCAT
36920 && (GET_MODE (SET_SRC (body)) == V2DFmode
36921 || GET_MODE (SET_SRC (body)) == V2DImode))
36922 {
36923 *special = SH_CONCAT;
36924 return 1;
36925 }
36926
36927 /* An UNSPEC_VPERM is ok if the mask operand is loaded from the
36928 constant pool. */
36929 if (GET_CODE (body) == SET
36930 && GET_CODE (SET_SRC (body)) == UNSPEC
36931 && XINT (SET_SRC (body), 1) == UNSPEC_VPERM
36932 && XVECLEN (SET_SRC (body), 0) == 3
36933 && GET_CODE (XVECEXP (SET_SRC (body), 0, 2)) == REG)
36934 {
36935 rtx mask_reg = XVECEXP (SET_SRC (body), 0, 2);
36936 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
36937 df_ref use;
36938 FOR_EACH_INSN_INFO_USE (use, insn_info)
36939 if (rtx_equal_p (DF_REF_REG (use), mask_reg))
36940 {
36941 struct df_link *def_link = DF_REF_CHAIN (use);
36942 /* Punt if multiple definitions for this reg. */
36943 if (def_link && !def_link->next &&
36944 const_load_sequence_p (insn_entry,
36945 DF_REF_INSN (def_link->ref)))
36946 {
36947 *special = SH_VPERM;
36948 return 1;
36949 }
36950 }
36951 }
36952
36953 /* Otherwise check the operands for vector lane violations. */
36954 return rtx_is_swappable_p (body, special);
36955 }
36956
36957 enum chain_purpose { FOR_LOADS, FOR_STORES };
36958
36959 /* Return true if the UD or DU chain headed by LINK is non-empty,
36960 and every entry on the chain references an insn that is a
36961 register swap. Furthermore, if PURPOSE is FOR_LOADS, each such
36962 register swap must have only permuting loads as reaching defs.
36963 If PURPOSE is FOR_STORES, each such register swap must have only
36964 register swaps or permuting stores as reached uses. */
36965 static bool
36966 chain_contains_only_swaps (swap_web_entry *insn_entry, struct df_link *link,
36967 enum chain_purpose purpose)
36968 {
36969 if (!link)
36970 return false;
36971
36972 for (; link; link = link->next)
36973 {
36974 if (!ALTIVEC_OR_VSX_VECTOR_MODE (GET_MODE (DF_REF_REG (link->ref))))
36975 continue;
36976
36977 if (DF_REF_IS_ARTIFICIAL (link->ref))
36978 return false;
36979
36980 rtx reached_insn = DF_REF_INSN (link->ref);
36981 unsigned uid = INSN_UID (reached_insn);
36982 struct df_insn_info *insn_info = DF_INSN_INFO_GET (reached_insn);
36983
36984 if (!insn_entry[uid].is_swap || insn_entry[uid].is_load
36985 || insn_entry[uid].is_store)
36986 return false;
36987
36988 if (purpose == FOR_LOADS)
36989 {
36990 df_ref use;
36991 FOR_EACH_INSN_INFO_USE (use, insn_info)
36992 {
36993 struct df_link *swap_link = DF_REF_CHAIN (use);
36994
36995 while (swap_link)
36996 {
36997 if (DF_REF_IS_ARTIFICIAL (link->ref))
36998 return false;
36999
37000 rtx swap_def_insn = DF_REF_INSN (swap_link->ref);
37001 unsigned uid2 = INSN_UID (swap_def_insn);
37002
37003 /* Only permuting loads are allowed. */
37004 if (!insn_entry[uid2].is_swap || !insn_entry[uid2].is_load)
37005 return false;
37006
37007 swap_link = swap_link->next;
37008 }
37009 }
37010 }
37011 else if (purpose == FOR_STORES)
37012 {
37013 df_ref def;
37014 FOR_EACH_INSN_INFO_DEF (def, insn_info)
37015 {
37016 struct df_link *swap_link = DF_REF_CHAIN (def);
37017
37018 while (swap_link)
37019 {
37020 if (DF_REF_IS_ARTIFICIAL (link->ref))
37021 return false;
37022
37023 rtx swap_use_insn = DF_REF_INSN (swap_link->ref);
37024 unsigned uid2 = INSN_UID (swap_use_insn);
37025
37026 /* Permuting stores or register swaps are allowed. */
37027 if (!insn_entry[uid2].is_swap || insn_entry[uid2].is_load)
37028 return false;
37029
37030 swap_link = swap_link->next;
37031 }
37032 }
37033 }
37034 }
37035
37036 return true;
37037 }
37038
37039 /* Mark the xxswapdi instructions associated with permuting loads and
37040 stores for removal. Note that we only flag them for deletion here,
37041 as there is a possibility of a swap being reached from multiple
37042 loads, etc. */
37043 static void
37044 mark_swaps_for_removal (swap_web_entry *insn_entry, unsigned int i)
37045 {
37046 rtx insn = insn_entry[i].insn;
37047 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
37048
37049 if (insn_entry[i].is_load)
37050 {
37051 df_ref def;
37052 FOR_EACH_INSN_INFO_DEF (def, insn_info)
37053 {
37054 struct df_link *link = DF_REF_CHAIN (def);
37055
37056 /* We know by now that these are swaps, so we can delete
37057 them confidently. */
37058 while (link)
37059 {
37060 rtx use_insn = DF_REF_INSN (link->ref);
37061 insn_entry[INSN_UID (use_insn)].will_delete = 1;
37062 link = link->next;
37063 }
37064 }
37065 }
37066 else if (insn_entry[i].is_store)
37067 {
37068 df_ref use;
37069 FOR_EACH_INSN_INFO_USE (use, insn_info)
37070 {
37071 /* Ignore uses for addressability. */
37072 machine_mode mode = GET_MODE (DF_REF_REG (use));
37073 if (!ALTIVEC_OR_VSX_VECTOR_MODE (mode))
37074 continue;
37075
37076 struct df_link *link = DF_REF_CHAIN (use);
37077
37078 /* We know by now that these are swaps, so we can delete
37079 them confidently. */
37080 while (link)
37081 {
37082 rtx def_insn = DF_REF_INSN (link->ref);
37083 insn_entry[INSN_UID (def_insn)].will_delete = 1;
37084 link = link->next;
37085 }
37086 }
37087 }
37088 }
37089
37090 /* OP is either a CONST_VECTOR or an expression containing one.
37091 Swap the first half of the vector with the second in the first
37092 case. Recurse to find it in the second. */
37093 static void
37094 swap_const_vector_halves (rtx op)
37095 {
37096 int i;
37097 enum rtx_code code = GET_CODE (op);
37098 if (GET_CODE (op) == CONST_VECTOR)
37099 {
37100 int half_units = GET_MODE_NUNITS (GET_MODE (op)) / 2;
37101 for (i = 0; i < half_units; ++i)
37102 {
37103 rtx temp = CONST_VECTOR_ELT (op, i);
37104 CONST_VECTOR_ELT (op, i) = CONST_VECTOR_ELT (op, i + half_units);
37105 CONST_VECTOR_ELT (op, i + half_units) = temp;
37106 }
37107 }
37108 else
37109 {
37110 int j;
37111 const char *fmt = GET_RTX_FORMAT (code);
37112 for (i = 0; i < GET_RTX_LENGTH (code); ++i)
37113 if (fmt[i] == 'e' || fmt[i] == 'u')
37114 swap_const_vector_halves (XEXP (op, i));
37115 else if (fmt[i] == 'E')
37116 for (j = 0; j < XVECLEN (op, i); ++j)
37117 swap_const_vector_halves (XVECEXP (op, i, j));
37118 }
37119 }
37120
37121 /* Find all subregs of a vector expression that perform a narrowing,
37122 and adjust the subreg index to account for doubleword swapping. */
37123 static void
37124 adjust_subreg_index (rtx op)
37125 {
37126 enum rtx_code code = GET_CODE (op);
37127 if (code == SUBREG
37128 && (GET_MODE_SIZE (GET_MODE (op))
37129 < GET_MODE_SIZE (GET_MODE (XEXP (op, 0)))))
37130 {
37131 unsigned int index = SUBREG_BYTE (op);
37132 if (index < 8)
37133 index += 8;
37134 else
37135 index -= 8;
37136 SUBREG_BYTE (op) = index;
37137 }
37138
37139 const char *fmt = GET_RTX_FORMAT (code);
37140 int i,j;
37141 for (i = 0; i < GET_RTX_LENGTH (code); ++i)
37142 if (fmt[i] == 'e' || fmt[i] == 'u')
37143 adjust_subreg_index (XEXP (op, i));
37144 else if (fmt[i] == 'E')
37145 for (j = 0; j < XVECLEN (op, i); ++j)
37146 adjust_subreg_index (XVECEXP (op, i, j));
37147 }
37148
37149 /* Convert the non-permuting load INSN to a permuting one. */
37150 static void
37151 permute_load (rtx_insn *insn)
37152 {
37153 rtx body = PATTERN (insn);
37154 rtx mem_op = SET_SRC (body);
37155 rtx tgt_reg = SET_DEST (body);
37156 machine_mode mode = GET_MODE (tgt_reg);
37157 int n_elts = GET_MODE_NUNITS (mode);
37158 int half_elts = n_elts / 2;
37159 rtx par = gen_rtx_PARALLEL (mode, rtvec_alloc (n_elts));
37160 int i, j;
37161 for (i = 0, j = half_elts; i < half_elts; ++i, ++j)
37162 XVECEXP (par, 0, i) = GEN_INT (j);
37163 for (i = half_elts, j = 0; j < half_elts; ++i, ++j)
37164 XVECEXP (par, 0, i) = GEN_INT (j);
37165 rtx sel = gen_rtx_VEC_SELECT (mode, mem_op, par);
37166 SET_SRC (body) = sel;
37167 INSN_CODE (insn) = -1; /* Force re-recognition. */
37168 df_insn_rescan (insn);
37169
37170 if (dump_file)
37171 fprintf (dump_file, "Replacing load %d with permuted load\n",
37172 INSN_UID (insn));
37173 }
37174
37175 /* Convert the non-permuting store INSN to a permuting one. */
37176 static void
37177 permute_store (rtx_insn *insn)
37178 {
37179 rtx body = PATTERN (insn);
37180 rtx src_reg = SET_SRC (body);
37181 machine_mode mode = GET_MODE (src_reg);
37182 int n_elts = GET_MODE_NUNITS (mode);
37183 int half_elts = n_elts / 2;
37184 rtx par = gen_rtx_PARALLEL (mode, rtvec_alloc (n_elts));
37185 int i, j;
37186 for (i = 0, j = half_elts; i < half_elts; ++i, ++j)
37187 XVECEXP (par, 0, i) = GEN_INT (j);
37188 for (i = half_elts, j = 0; j < half_elts; ++i, ++j)
37189 XVECEXP (par, 0, i) = GEN_INT (j);
37190 rtx sel = gen_rtx_VEC_SELECT (mode, src_reg, par);
37191 SET_SRC (body) = sel;
37192 INSN_CODE (insn) = -1; /* Force re-recognition. */
37193 df_insn_rescan (insn);
37194
37195 if (dump_file)
37196 fprintf (dump_file, "Replacing store %d with permuted store\n",
37197 INSN_UID (insn));
37198 }
37199
37200 /* Given OP that contains a vector extract operation, adjust the index
37201 of the extracted lane to account for the doubleword swap. */
37202 static void
37203 adjust_extract (rtx_insn *insn)
37204 {
37205 rtx pattern = PATTERN (insn);
37206 if (GET_CODE (pattern) == PARALLEL)
37207 pattern = XVECEXP (pattern, 0, 0);
37208 rtx src = SET_SRC (pattern);
37209 /* The vec_select may be wrapped in a vec_duplicate for a splat, so
37210 account for that. */
37211 rtx sel = GET_CODE (src) == VEC_DUPLICATE ? XEXP (src, 0) : src;
37212 rtx par = XEXP (sel, 1);
37213 int half_elts = GET_MODE_NUNITS (GET_MODE (XEXP (sel, 0))) >> 1;
37214 int lane = INTVAL (XVECEXP (par, 0, 0));
37215 lane = lane >= half_elts ? lane - half_elts : lane + half_elts;
37216 XVECEXP (par, 0, 0) = GEN_INT (lane);
37217 INSN_CODE (insn) = -1; /* Force re-recognition. */
37218 df_insn_rescan (insn);
37219
37220 if (dump_file)
37221 fprintf (dump_file, "Changing lane for extract %d\n", INSN_UID (insn));
37222 }
37223
37224 /* Given OP that contains a vector direct-splat operation, adjust the index
37225 of the source lane to account for the doubleword swap. */
37226 static void
37227 adjust_splat (rtx_insn *insn)
37228 {
37229 rtx body = PATTERN (insn);
37230 rtx unspec = XEXP (body, 1);
37231 int half_elts = GET_MODE_NUNITS (GET_MODE (unspec)) >> 1;
37232 int lane = INTVAL (XVECEXP (unspec, 0, 1));
37233 lane = lane >= half_elts ? lane - half_elts : lane + half_elts;
37234 XVECEXP (unspec, 0, 1) = GEN_INT (lane);
37235 INSN_CODE (insn) = -1; /* Force re-recognition. */
37236 df_insn_rescan (insn);
37237
37238 if (dump_file)
37239 fprintf (dump_file, "Changing lane for splat %d\n", INSN_UID (insn));
37240 }
37241
37242 /* Given OP that contains an XXPERMDI operation (that is not a doubleword
37243 swap), reverse the order of the source operands and adjust the indices
37244 of the source lanes to account for doubleword reversal. */
37245 static void
37246 adjust_xxpermdi (rtx_insn *insn)
37247 {
37248 rtx set = PATTERN (insn);
37249 rtx select = XEXP (set, 1);
37250 rtx concat = XEXP (select, 0);
37251 rtx src0 = XEXP (concat, 0);
37252 XEXP (concat, 0) = XEXP (concat, 1);
37253 XEXP (concat, 1) = src0;
37254 rtx parallel = XEXP (select, 1);
37255 int lane0 = INTVAL (XVECEXP (parallel, 0, 0));
37256 int lane1 = INTVAL (XVECEXP (parallel, 0, 1));
37257 int new_lane0 = 3 - lane1;
37258 int new_lane1 = 3 - lane0;
37259 XVECEXP (parallel, 0, 0) = GEN_INT (new_lane0);
37260 XVECEXP (parallel, 0, 1) = GEN_INT (new_lane1);
37261 INSN_CODE (insn) = -1; /* Force re-recognition. */
37262 df_insn_rescan (insn);
37263
37264 if (dump_file)
37265 fprintf (dump_file, "Changing lanes for xxpermdi %d\n", INSN_UID (insn));
37266 }
37267
37268 /* Given OP that contains a VEC_CONCAT operation of two doublewords,
37269 reverse the order of those inputs. */
37270 static void
37271 adjust_concat (rtx_insn *insn)
37272 {
37273 rtx set = PATTERN (insn);
37274 rtx concat = XEXP (set, 1);
37275 rtx src0 = XEXP (concat, 0);
37276 XEXP (concat, 0) = XEXP (concat, 1);
37277 XEXP (concat, 1) = src0;
37278 INSN_CODE (insn) = -1; /* Force re-recognition. */
37279 df_insn_rescan (insn);
37280
37281 if (dump_file)
37282 fprintf (dump_file, "Reversing inputs for concat %d\n", INSN_UID (insn));
37283 }
37284
37285 /* Given an UNSPEC_VPERM insn, modify the mask loaded from the
37286 constant pool to reflect swapped doublewords. */
37287 static void
37288 adjust_vperm (rtx_insn *insn)
37289 {
37290 /* We previously determined that the UNSPEC_VPERM was fed by a
37291 swap of a swapping load of a TOC-relative constant pool symbol.
37292 Find the MEM in the swapping load and replace it with a MEM for
37293 the adjusted mask constant. */
37294 rtx set = PATTERN (insn);
37295 rtx mask_reg = XVECEXP (SET_SRC (set), 0, 2);
37296
37297 /* Find the swap. */
37298 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
37299 df_ref use;
37300 rtx_insn *swap_insn = 0;
37301 FOR_EACH_INSN_INFO_USE (use, insn_info)
37302 if (rtx_equal_p (DF_REF_REG (use), mask_reg))
37303 {
37304 struct df_link *def_link = DF_REF_CHAIN (use);
37305 gcc_assert (def_link && !def_link->next);
37306 swap_insn = DF_REF_INSN (def_link->ref);
37307 break;
37308 }
37309 gcc_assert (swap_insn);
37310
37311 /* Find the load. */
37312 insn_info = DF_INSN_INFO_GET (swap_insn);
37313 rtx_insn *load_insn = 0;
37314 FOR_EACH_INSN_INFO_USE (use, insn_info)
37315 {
37316 struct df_link *def_link = DF_REF_CHAIN (use);
37317 gcc_assert (def_link && !def_link->next);
37318 load_insn = DF_REF_INSN (def_link->ref);
37319 break;
37320 }
37321 gcc_assert (load_insn);
37322
37323 /* Find the TOC-relative symbol access. */
37324 insn_info = DF_INSN_INFO_GET (load_insn);
37325 rtx_insn *tocrel_insn = 0;
37326 FOR_EACH_INSN_INFO_USE (use, insn_info)
37327 {
37328 struct df_link *def_link = DF_REF_CHAIN (use);
37329 gcc_assert (def_link && !def_link->next);
37330 tocrel_insn = DF_REF_INSN (def_link->ref);
37331 break;
37332 }
37333 gcc_assert (tocrel_insn);
37334
37335 /* Find the embedded CONST_VECTOR. We have to call toc_relative_expr_p
37336 to set tocrel_base; otherwise it would be unnecessary as we've
37337 already established it will return true. */
37338 rtx base, offset;
37339 rtx tocrel_expr = SET_SRC (PATTERN (tocrel_insn));
37340 /* There is an extra level of indirection for small/large code models. */
37341 if (GET_CODE (tocrel_expr) == MEM)
37342 tocrel_expr = XEXP (tocrel_expr, 0);
37343 if (!toc_relative_expr_p (tocrel_expr, false))
37344 gcc_unreachable ();
37345 split_const (XVECEXP (tocrel_base, 0, 0), &base, &offset);
37346 rtx const_vector = get_pool_constant (base);
37347 /* With the extra indirection, get_pool_constant will produce the
37348 real constant from the reg_equal expression, so get the real
37349 constant. */
37350 if (GET_CODE (const_vector) == SYMBOL_REF)
37351 const_vector = get_pool_constant (const_vector);
37352 gcc_assert (GET_CODE (const_vector) == CONST_VECTOR);
37353
37354 /* Create an adjusted mask from the initial mask. */
37355 unsigned int new_mask[16], i, val;
37356 for (i = 0; i < 16; ++i) {
37357 val = INTVAL (XVECEXP (const_vector, 0, i));
37358 if (val < 16)
37359 new_mask[i] = (val + 8) % 16;
37360 else
37361 new_mask[i] = ((val + 8) % 16) + 16;
37362 }
37363
37364 /* Create a new CONST_VECTOR and a MEM that references it. */
37365 rtx vals = gen_rtx_PARALLEL (V16QImode, rtvec_alloc (16));
37366 for (i = 0; i < 16; ++i)
37367 XVECEXP (vals, 0, i) = GEN_INT (new_mask[i]);
37368 rtx new_const_vector = gen_rtx_CONST_VECTOR (V16QImode, XVEC (vals, 0));
37369 rtx new_mem = force_const_mem (V16QImode, new_const_vector);
37370 /* This gives us a MEM whose base operand is a SYMBOL_REF, which we
37371 can't recognize. Force the SYMBOL_REF into a register. */
37372 if (!REG_P (XEXP (new_mem, 0))) {
37373 rtx base_reg = force_reg (Pmode, XEXP (new_mem, 0));
37374 XEXP (new_mem, 0) = base_reg;
37375 /* Move the newly created insn ahead of the load insn. */
37376 rtx_insn *force_insn = get_last_insn ();
37377 remove_insn (force_insn);
37378 rtx_insn *before_load_insn = PREV_INSN (load_insn);
37379 add_insn_after (force_insn, before_load_insn, BLOCK_FOR_INSN (load_insn));
37380 df_insn_rescan (before_load_insn);
37381 df_insn_rescan (force_insn);
37382 }
37383
37384 /* Replace the MEM in the load instruction and rescan it. */
37385 XEXP (SET_SRC (PATTERN (load_insn)), 0) = new_mem;
37386 INSN_CODE (load_insn) = -1; /* Force re-recognition. */
37387 df_insn_rescan (load_insn);
37388
37389 if (dump_file)
37390 fprintf (dump_file, "Adjusting mask for vperm %d\n", INSN_UID (insn));
37391 }
37392
37393 /* The insn described by INSN_ENTRY[I] can be swapped, but only
37394 with special handling. Take care of that here. */
37395 static void
37396 handle_special_swappables (swap_web_entry *insn_entry, unsigned i)
37397 {
37398 rtx_insn *insn = insn_entry[i].insn;
37399 rtx body = PATTERN (insn);
37400
37401 switch (insn_entry[i].special_handling)
37402 {
37403 default:
37404 gcc_unreachable ();
37405 case SH_CONST_VECTOR:
37406 {
37407 /* A CONST_VECTOR will only show up somewhere in the RHS of a SET. */
37408 gcc_assert (GET_CODE (body) == SET);
37409 rtx rhs = SET_SRC (body);
37410 swap_const_vector_halves (rhs);
37411 if (dump_file)
37412 fprintf (dump_file, "Swapping constant halves in insn %d\n", i);
37413 break;
37414 }
37415 case SH_SUBREG:
37416 /* A subreg of the same size is already safe. For subregs that
37417 select a smaller portion of a reg, adjust the index for
37418 swapped doublewords. */
37419 adjust_subreg_index (body);
37420 if (dump_file)
37421 fprintf (dump_file, "Adjusting subreg in insn %d\n", i);
37422 break;
37423 case SH_NOSWAP_LD:
37424 /* Convert a non-permuting load to a permuting one. */
37425 permute_load (insn);
37426 break;
37427 case SH_NOSWAP_ST:
37428 /* Convert a non-permuting store to a permuting one. */
37429 permute_store (insn);
37430 break;
37431 case SH_EXTRACT:
37432 /* Change the lane on an extract operation. */
37433 adjust_extract (insn);
37434 break;
37435 case SH_SPLAT:
37436 /* Change the lane on a direct-splat operation. */
37437 adjust_splat (insn);
37438 break;
37439 case SH_XXPERMDI:
37440 /* Change the lanes on an XXPERMDI operation. */
37441 adjust_xxpermdi (insn);
37442 break;
37443 case SH_CONCAT:
37444 /* Reverse the order of a concatenation operation. */
37445 adjust_concat (insn);
37446 break;
37447 case SH_VPERM:
37448 /* Change the mask loaded from the constant pool for a VPERM. */
37449 adjust_vperm (insn);
37450 break;
37451 }
37452 }
37453
37454 /* Find the insn from the Ith table entry, which is known to be a
37455 register swap Y = SWAP(X). Replace it with a copy Y = X. */
37456 static void
37457 replace_swap_with_copy (swap_web_entry *insn_entry, unsigned i)
37458 {
37459 rtx_insn *insn = insn_entry[i].insn;
37460 rtx body = PATTERN (insn);
37461 rtx src_reg = XEXP (SET_SRC (body), 0);
37462 rtx copy = gen_rtx_SET (SET_DEST (body), src_reg);
37463 rtx_insn *new_insn = emit_insn_before (copy, insn);
37464 set_block_for_insn (new_insn, BLOCK_FOR_INSN (insn));
37465 df_insn_rescan (new_insn);
37466
37467 if (dump_file)
37468 {
37469 unsigned int new_uid = INSN_UID (new_insn);
37470 fprintf (dump_file, "Replacing swap %d with copy %d\n", i, new_uid);
37471 }
37472
37473 df_insn_delete (insn);
37474 remove_insn (insn);
37475 insn->set_deleted ();
37476 }
37477
37478 /* Dump the swap table to DUMP_FILE. */
37479 static void
37480 dump_swap_insn_table (swap_web_entry *insn_entry)
37481 {
37482 int e = get_max_uid ();
37483 fprintf (dump_file, "\nRelevant insns with their flag settings\n\n");
37484
37485 for (int i = 0; i < e; ++i)
37486 if (insn_entry[i].is_relevant)
37487 {
37488 swap_web_entry *pred_entry = (swap_web_entry *)insn_entry[i].pred ();
37489 fprintf (dump_file, "%6d %6d ", i,
37490 pred_entry && pred_entry->insn
37491 ? INSN_UID (pred_entry->insn) : 0);
37492 if (insn_entry[i].is_load)
37493 fputs ("load ", dump_file);
37494 if (insn_entry[i].is_store)
37495 fputs ("store ", dump_file);
37496 if (insn_entry[i].is_swap)
37497 fputs ("swap ", dump_file);
37498 if (insn_entry[i].is_live_in)
37499 fputs ("live-in ", dump_file);
37500 if (insn_entry[i].is_live_out)
37501 fputs ("live-out ", dump_file);
37502 if (insn_entry[i].contains_subreg)
37503 fputs ("subreg ", dump_file);
37504 if (insn_entry[i].is_128_int)
37505 fputs ("int128 ", dump_file);
37506 if (insn_entry[i].is_call)
37507 fputs ("call ", dump_file);
37508 if (insn_entry[i].is_swappable)
37509 {
37510 fputs ("swappable ", dump_file);
37511 if (insn_entry[i].special_handling == SH_CONST_VECTOR)
37512 fputs ("special:constvec ", dump_file);
37513 else if (insn_entry[i].special_handling == SH_SUBREG)
37514 fputs ("special:subreg ", dump_file);
37515 else if (insn_entry[i].special_handling == SH_NOSWAP_LD)
37516 fputs ("special:load ", dump_file);
37517 else if (insn_entry[i].special_handling == SH_NOSWAP_ST)
37518 fputs ("special:store ", dump_file);
37519 else if (insn_entry[i].special_handling == SH_EXTRACT)
37520 fputs ("special:extract ", dump_file);
37521 else if (insn_entry[i].special_handling == SH_SPLAT)
37522 fputs ("special:splat ", dump_file);
37523 else if (insn_entry[i].special_handling == SH_XXPERMDI)
37524 fputs ("special:xxpermdi ", dump_file);
37525 else if (insn_entry[i].special_handling == SH_CONCAT)
37526 fputs ("special:concat ", dump_file);
37527 else if (insn_entry[i].special_handling == SH_VPERM)
37528 fputs ("special:vperm ", dump_file);
37529 }
37530 if (insn_entry[i].web_not_optimizable)
37531 fputs ("unoptimizable ", dump_file);
37532 if (insn_entry[i].will_delete)
37533 fputs ("delete ", dump_file);
37534 fputs ("\n", dump_file);
37535 }
37536 fputs ("\n", dump_file);
37537 }
37538
37539 /* Main entry point for this pass. */
37540 unsigned int
37541 rs6000_analyze_swaps (function *fun)
37542 {
37543 swap_web_entry *insn_entry;
37544 basic_block bb;
37545 rtx_insn *insn;
37546
37547 /* Dataflow analysis for use-def chains. */
37548 df_set_flags (DF_RD_PRUNE_DEAD_DEFS);
37549 df_chain_add_problem (DF_DU_CHAIN | DF_UD_CHAIN);
37550 df_analyze ();
37551 df_set_flags (DF_DEFER_INSN_RESCAN);
37552
37553 /* Allocate structure to represent webs of insns. */
37554 insn_entry = XCNEWVEC (swap_web_entry, get_max_uid ());
37555
37556 /* Walk the insns to gather basic data. */
37557 FOR_ALL_BB_FN (bb, fun)
37558 FOR_BB_INSNS (bb, insn)
37559 {
37560 unsigned int uid = INSN_UID (insn);
37561 if (NONDEBUG_INSN_P (insn))
37562 {
37563 insn_entry[uid].insn = insn;
37564
37565 if (GET_CODE (insn) == CALL_INSN)
37566 insn_entry[uid].is_call = 1;
37567
37568 /* Walk the uses and defs to see if we mention vector regs.
37569 Record any constraints on optimization of such mentions. */
37570 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
37571 df_ref mention;
37572 FOR_EACH_INSN_INFO_USE (mention, insn_info)
37573 {
37574 /* We use DF_REF_REAL_REG here to get inside any subregs. */
37575 machine_mode mode = GET_MODE (DF_REF_REAL_REG (mention));
37576
37577 /* If a use gets its value from a call insn, it will be
37578 a hard register and will look like (reg:V4SI 3 3).
37579 The df analysis creates two mentions for GPR3 and GPR4,
37580 both DImode. We must recognize this and treat it as a
37581 vector mention to ensure the call is unioned with this
37582 use. */
37583 if (mode == DImode && DF_REF_INSN_INFO (mention))
37584 {
37585 rtx feeder = DF_REF_INSN (mention);
37586 /* FIXME: It is pretty hard to get from the df mention
37587 to the mode of the use in the insn. We arbitrarily
37588 pick a vector mode here, even though the use might
37589 be a real DImode. We can be too conservative
37590 (create a web larger than necessary) because of
37591 this, so consider eventually fixing this. */
37592 if (GET_CODE (feeder) == CALL_INSN)
37593 mode = V4SImode;
37594 }
37595
37596 if (ALTIVEC_OR_VSX_VECTOR_MODE (mode) || mode == TImode)
37597 {
37598 insn_entry[uid].is_relevant = 1;
37599 if (mode == TImode || mode == V1TImode
37600 || FLOAT128_VECTOR_P (mode))
37601 insn_entry[uid].is_128_int = 1;
37602 if (DF_REF_INSN_INFO (mention))
37603 insn_entry[uid].contains_subreg
37604 = !rtx_equal_p (DF_REF_REG (mention),
37605 DF_REF_REAL_REG (mention));
37606 union_defs (insn_entry, insn, mention);
37607 }
37608 }
37609 FOR_EACH_INSN_INFO_DEF (mention, insn_info)
37610 {
37611 /* We use DF_REF_REAL_REG here to get inside any subregs. */
37612 machine_mode mode = GET_MODE (DF_REF_REAL_REG (mention));
37613
37614 /* If we're loading up a hard vector register for a call,
37615 it looks like (set (reg:V4SI 9 9) (...)). The df
37616 analysis creates two mentions for GPR9 and GPR10, both
37617 DImode. So relying on the mode from the mentions
37618 isn't sufficient to ensure we union the call into the
37619 web with the parameter setup code. */
37620 if (mode == DImode && GET_CODE (insn) == SET
37621 && ALTIVEC_OR_VSX_VECTOR_MODE (GET_MODE (SET_DEST (insn))))
37622 mode = GET_MODE (SET_DEST (insn));
37623
37624 if (ALTIVEC_OR_VSX_VECTOR_MODE (mode) || mode == TImode)
37625 {
37626 insn_entry[uid].is_relevant = 1;
37627 if (mode == TImode || mode == V1TImode
37628 || FLOAT128_VECTOR_P (mode))
37629 insn_entry[uid].is_128_int = 1;
37630 if (DF_REF_INSN_INFO (mention))
37631 insn_entry[uid].contains_subreg
37632 = !rtx_equal_p (DF_REF_REG (mention),
37633 DF_REF_REAL_REG (mention));
37634 /* REG_FUNCTION_VALUE_P is not valid for subregs. */
37635 else if (REG_FUNCTION_VALUE_P (DF_REF_REG (mention)))
37636 insn_entry[uid].is_live_out = 1;
37637 union_uses (insn_entry, insn, mention);
37638 }
37639 }
37640
37641 if (insn_entry[uid].is_relevant)
37642 {
37643 /* Determine if this is a load or store. */
37644 insn_entry[uid].is_load = insn_is_load_p (insn);
37645 insn_entry[uid].is_store = insn_is_store_p (insn);
37646
37647 /* Determine if this is a doubleword swap. If not,
37648 determine whether it can legally be swapped. */
37649 if (insn_is_swap_p (insn))
37650 insn_entry[uid].is_swap = 1;
37651 else
37652 {
37653 unsigned int special = SH_NONE;
37654 insn_entry[uid].is_swappable
37655 = insn_is_swappable_p (insn_entry, insn, &special);
37656 if (special != SH_NONE && insn_entry[uid].contains_subreg)
37657 insn_entry[uid].is_swappable = 0;
37658 else if (special != SH_NONE)
37659 insn_entry[uid].special_handling = special;
37660 else if (insn_entry[uid].contains_subreg)
37661 insn_entry[uid].special_handling = SH_SUBREG;
37662 }
37663 }
37664 }
37665 }
37666
37667 if (dump_file)
37668 {
37669 fprintf (dump_file, "\nSwap insn entry table when first built\n");
37670 dump_swap_insn_table (insn_entry);
37671 }
37672
37673 /* Record unoptimizable webs. */
37674 unsigned e = get_max_uid (), i;
37675 for (i = 0; i < e; ++i)
37676 {
37677 if (!insn_entry[i].is_relevant)
37678 continue;
37679
37680 swap_web_entry *root
37681 = (swap_web_entry*)(&insn_entry[i])->unionfind_root ();
37682
37683 if (insn_entry[i].is_live_in || insn_entry[i].is_live_out
37684 || (insn_entry[i].contains_subreg
37685 && insn_entry[i].special_handling != SH_SUBREG)
37686 || insn_entry[i].is_128_int || insn_entry[i].is_call
37687 || !(insn_entry[i].is_swappable || insn_entry[i].is_swap))
37688 root->web_not_optimizable = 1;
37689
37690 /* If we have loads or stores that aren't permuting then the
37691 optimization isn't appropriate. */
37692 else if ((insn_entry[i].is_load || insn_entry[i].is_store)
37693 && !insn_entry[i].is_swap && !insn_entry[i].is_swappable)
37694 root->web_not_optimizable = 1;
37695
37696 /* If we have permuting loads or stores that are not accompanied
37697 by a register swap, the optimization isn't appropriate. */
37698 else if (insn_entry[i].is_load && insn_entry[i].is_swap)
37699 {
37700 rtx insn = insn_entry[i].insn;
37701 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
37702 df_ref def;
37703
37704 FOR_EACH_INSN_INFO_DEF (def, insn_info)
37705 {
37706 struct df_link *link = DF_REF_CHAIN (def);
37707
37708 if (!chain_contains_only_swaps (insn_entry, link, FOR_LOADS))
37709 {
37710 root->web_not_optimizable = 1;
37711 break;
37712 }
37713 }
37714 }
37715 else if (insn_entry[i].is_store && insn_entry[i].is_swap)
37716 {
37717 rtx insn = insn_entry[i].insn;
37718 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
37719 df_ref use;
37720
37721 FOR_EACH_INSN_INFO_USE (use, insn_info)
37722 {
37723 struct df_link *link = DF_REF_CHAIN (use);
37724
37725 if (!chain_contains_only_swaps (insn_entry, link, FOR_STORES))
37726 {
37727 root->web_not_optimizable = 1;
37728 break;
37729 }
37730 }
37731 }
37732 }
37733
37734 if (dump_file)
37735 {
37736 fprintf (dump_file, "\nSwap insn entry table after web analysis\n");
37737 dump_swap_insn_table (insn_entry);
37738 }
37739
37740 /* For each load and store in an optimizable web (which implies
37741 the loads and stores are permuting), find the associated
37742 register swaps and mark them for removal. Due to various
37743 optimizations we may mark the same swap more than once. Also
37744 perform special handling for swappable insns that require it. */
37745 for (i = 0; i < e; ++i)
37746 if ((insn_entry[i].is_load || insn_entry[i].is_store)
37747 && insn_entry[i].is_swap)
37748 {
37749 swap_web_entry* root_entry
37750 = (swap_web_entry*)((&insn_entry[i])->unionfind_root ());
37751 if (!root_entry->web_not_optimizable)
37752 mark_swaps_for_removal (insn_entry, i);
37753 }
37754 else if (insn_entry[i].is_swappable && insn_entry[i].special_handling)
37755 {
37756 swap_web_entry* root_entry
37757 = (swap_web_entry*)((&insn_entry[i])->unionfind_root ());
37758 if (!root_entry->web_not_optimizable)
37759 handle_special_swappables (insn_entry, i);
37760 }
37761
37762 /* Now delete the swaps marked for removal. */
37763 for (i = 0; i < e; ++i)
37764 if (insn_entry[i].will_delete)
37765 replace_swap_with_copy (insn_entry, i);
37766
37767 /* Clean up. */
37768 free (insn_entry);
37769 return 0;
37770 }
37771
37772 const pass_data pass_data_analyze_swaps =
37773 {
37774 RTL_PASS, /* type */
37775 "swaps", /* name */
37776 OPTGROUP_NONE, /* optinfo_flags */
37777 TV_NONE, /* tv_id */
37778 0, /* properties_required */
37779 0, /* properties_provided */
37780 0, /* properties_destroyed */
37781 0, /* todo_flags_start */
37782 TODO_df_finish, /* todo_flags_finish */
37783 };
37784
37785 class pass_analyze_swaps : public rtl_opt_pass
37786 {
37787 public:
37788 pass_analyze_swaps(gcc::context *ctxt)
37789 : rtl_opt_pass(pass_data_analyze_swaps, ctxt)
37790 {}
37791
37792 /* opt_pass methods: */
37793 virtual bool gate (function *)
37794 {
37795 return (optimize > 0 && !BYTES_BIG_ENDIAN && TARGET_VSX
37796 && rs6000_optimize_swaps);
37797 }
37798
37799 virtual unsigned int execute (function *fun)
37800 {
37801 return rs6000_analyze_swaps (fun);
37802 }
37803
37804 }; // class pass_analyze_swaps
37805
37806 rtl_opt_pass *
37807 make_pass_analyze_swaps (gcc::context *ctxt)
37808 {
37809 return new pass_analyze_swaps (ctxt);
37810 }
37811
37812 #ifdef RS6000_GLIBC_ATOMIC_FENV
37813 /* Function declarations for rs6000_atomic_assign_expand_fenv. */
37814 static tree atomic_hold_decl, atomic_clear_decl, atomic_update_decl;
37815 #endif
37816
37817 /* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV hook. */
37818
37819 static void
37820 rs6000_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
37821 {
37822 if (!TARGET_HARD_FLOAT || !TARGET_FPRS)
37823 {
37824 #ifdef RS6000_GLIBC_ATOMIC_FENV
37825 if (atomic_hold_decl == NULL_TREE)
37826 {
37827 atomic_hold_decl
37828 = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
37829 get_identifier ("__atomic_feholdexcept"),
37830 build_function_type_list (void_type_node,
37831 double_ptr_type_node,
37832 NULL_TREE));
37833 TREE_PUBLIC (atomic_hold_decl) = 1;
37834 DECL_EXTERNAL (atomic_hold_decl) = 1;
37835 }
37836
37837 if (atomic_clear_decl == NULL_TREE)
37838 {
37839 atomic_clear_decl
37840 = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
37841 get_identifier ("__atomic_feclearexcept"),
37842 build_function_type_list (void_type_node,
37843 NULL_TREE));
37844 TREE_PUBLIC (atomic_clear_decl) = 1;
37845 DECL_EXTERNAL (atomic_clear_decl) = 1;
37846 }
37847
37848 tree const_double = build_qualified_type (double_type_node,
37849 TYPE_QUAL_CONST);
37850 tree const_double_ptr = build_pointer_type (const_double);
37851 if (atomic_update_decl == NULL_TREE)
37852 {
37853 atomic_update_decl
37854 = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
37855 get_identifier ("__atomic_feupdateenv"),
37856 build_function_type_list (void_type_node,
37857 const_double_ptr,
37858 NULL_TREE));
37859 TREE_PUBLIC (atomic_update_decl) = 1;
37860 DECL_EXTERNAL (atomic_update_decl) = 1;
37861 }
37862
37863 tree fenv_var = create_tmp_var_raw (double_type_node);
37864 TREE_ADDRESSABLE (fenv_var) = 1;
37865 tree fenv_addr = build1 (ADDR_EXPR, double_ptr_type_node, fenv_var);
37866
37867 *hold = build_call_expr (atomic_hold_decl, 1, fenv_addr);
37868 *clear = build_call_expr (atomic_clear_decl, 0);
37869 *update = build_call_expr (atomic_update_decl, 1,
37870 fold_convert (const_double_ptr, fenv_addr));
37871 #endif
37872 return;
37873 }
37874
37875 tree mffs = rs6000_builtin_decls[RS6000_BUILTIN_MFFS];
37876 tree mtfsf = rs6000_builtin_decls[RS6000_BUILTIN_MTFSF];
37877 tree call_mffs = build_call_expr (mffs, 0);
37878
37879 /* Generates the equivalent of feholdexcept (&fenv_var)
37880
37881 *fenv_var = __builtin_mffs ();
37882 double fenv_hold;
37883 *(uint64_t*)&fenv_hold = *(uint64_t*)fenv_var & 0xffffffff00000007LL;
37884 __builtin_mtfsf (0xff, fenv_hold); */
37885
37886 /* Mask to clear everything except for the rounding modes and non-IEEE
37887 arithmetic flag. */
37888 const unsigned HOST_WIDE_INT hold_exception_mask =
37889 HOST_WIDE_INT_C (0xffffffff00000007);
37890
37891 tree fenv_var = create_tmp_var_raw (double_type_node);
37892
37893 tree hold_mffs = build2 (MODIFY_EXPR, void_type_node, fenv_var, call_mffs);
37894
37895 tree fenv_llu = build1 (VIEW_CONVERT_EXPR, uint64_type_node, fenv_var);
37896 tree fenv_llu_and = build2 (BIT_AND_EXPR, uint64_type_node, fenv_llu,
37897 build_int_cst (uint64_type_node,
37898 hold_exception_mask));
37899
37900 tree fenv_hold_mtfsf = build1 (VIEW_CONVERT_EXPR, double_type_node,
37901 fenv_llu_and);
37902
37903 tree hold_mtfsf = build_call_expr (mtfsf, 2,
37904 build_int_cst (unsigned_type_node, 0xff),
37905 fenv_hold_mtfsf);
37906
37907 *hold = build2 (COMPOUND_EXPR, void_type_node, hold_mffs, hold_mtfsf);
37908
37909 /* Generates the equivalent of feclearexcept (FE_ALL_EXCEPT):
37910
37911 double fenv_clear = __builtin_mffs ();
37912 *(uint64_t)&fenv_clear &= 0xffffffff00000000LL;
37913 __builtin_mtfsf (0xff, fenv_clear); */
37914
37915 /* Mask to clear everything except for the rounding modes and non-IEEE
37916 arithmetic flag. */
37917 const unsigned HOST_WIDE_INT clear_exception_mask =
37918 HOST_WIDE_INT_C (0xffffffff00000000);
37919
37920 tree fenv_clear = create_tmp_var_raw (double_type_node);
37921
37922 tree clear_mffs = build2 (MODIFY_EXPR, void_type_node, fenv_clear, call_mffs);
37923
37924 tree fenv_clean_llu = build1 (VIEW_CONVERT_EXPR, uint64_type_node, fenv_clear);
37925 tree fenv_clear_llu_and = build2 (BIT_AND_EXPR, uint64_type_node,
37926 fenv_clean_llu,
37927 build_int_cst (uint64_type_node,
37928 clear_exception_mask));
37929
37930 tree fenv_clear_mtfsf = build1 (VIEW_CONVERT_EXPR, double_type_node,
37931 fenv_clear_llu_and);
37932
37933 tree clear_mtfsf = build_call_expr (mtfsf, 2,
37934 build_int_cst (unsigned_type_node, 0xff),
37935 fenv_clear_mtfsf);
37936
37937 *clear = build2 (COMPOUND_EXPR, void_type_node, clear_mffs, clear_mtfsf);
37938
37939 /* Generates the equivalent of feupdateenv (&fenv_var)
37940
37941 double old_fenv = __builtin_mffs ();
37942 double fenv_update;
37943 *(uint64_t*)&fenv_update = (*(uint64_t*)&old & 0xffffffff1fffff00LL) |
37944 (*(uint64_t*)fenv_var 0x1ff80fff);
37945 __builtin_mtfsf (0xff, fenv_update); */
37946
37947 const unsigned HOST_WIDE_INT update_exception_mask =
37948 HOST_WIDE_INT_C (0xffffffff1fffff00);
37949 const unsigned HOST_WIDE_INT new_exception_mask =
37950 HOST_WIDE_INT_C (0x1ff80fff);
37951
37952 tree old_fenv = create_tmp_var_raw (double_type_node);
37953 tree update_mffs = build2 (MODIFY_EXPR, void_type_node, old_fenv, call_mffs);
37954
37955 tree old_llu = build1 (VIEW_CONVERT_EXPR, uint64_type_node, old_fenv);
37956 tree old_llu_and = build2 (BIT_AND_EXPR, uint64_type_node, old_llu,
37957 build_int_cst (uint64_type_node,
37958 update_exception_mask));
37959
37960 tree new_llu_and = build2 (BIT_AND_EXPR, uint64_type_node, fenv_llu,
37961 build_int_cst (uint64_type_node,
37962 new_exception_mask));
37963
37964 tree new_llu_mask = build2 (BIT_IOR_EXPR, uint64_type_node,
37965 old_llu_and, new_llu_and);
37966
37967 tree fenv_update_mtfsf = build1 (VIEW_CONVERT_EXPR, double_type_node,
37968 new_llu_mask);
37969
37970 tree update_mtfsf = build_call_expr (mtfsf, 2,
37971 build_int_cst (unsigned_type_node, 0xff),
37972 fenv_update_mtfsf);
37973
37974 *update = build2 (COMPOUND_EXPR, void_type_node, update_mffs, update_mtfsf);
37975 }
37976
37977 /* Implement the TARGET_OPTAB_SUPPORTED_P hook. */
37978
37979 static bool
37980 rs6000_optab_supported_p (int op, machine_mode mode1, machine_mode,
37981 optimization_type opt_type)
37982 {
37983 switch (op)
37984 {
37985 case rsqrt_optab:
37986 return (opt_type == OPTIMIZE_FOR_SPEED
37987 && RS6000_RECIP_AUTO_RSQRTE_P (mode1));
37988
37989 default:
37990 return true;
37991 }
37992 }
37993 \f
37994 struct gcc_target targetm = TARGET_INITIALIZER;
37995
37996 #include "gt-rs6000.h"
This page took 1.678093 seconds and 5 git commands to generate.