]> gcc.gnu.org Git - gcc.git/blob - gcc/config/rs6000/rs6000.c
PR jit/63854: Fix memory leaks within context/pass_manager/dump_manager
[gcc.git] / gcc / config / rs6000 / rs6000.c
1 /* Subroutines used for code generation on IBM RS/6000.
2 Copyright (C) 1991-2014 Free Software Foundation, Inc.
3 Contributed by Richard Kenner (kenner@vlsi1.ultra.nyu.edu)
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published
9 by the Free Software Foundation; either version 3, or (at your
10 option) any later version.
11
12 GCC is distributed in the hope that it will be useful, but WITHOUT
13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
14 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
15 License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
20
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "tm.h"
25 #include "rtl.h"
26 #include "regs.h"
27 #include "hard-reg-set.h"
28 #include "insn-config.h"
29 #include "conditions.h"
30 #include "insn-attr.h"
31 #include "flags.h"
32 #include "recog.h"
33 #include "obstack.h"
34 #include "tree.h"
35 #include "stringpool.h"
36 #include "stor-layout.h"
37 #include "calls.h"
38 #include "print-tree.h"
39 #include "varasm.h"
40 #include "expr.h"
41 #include "insn-codes.h"
42 #include "optabs.h"
43 #include "except.h"
44 #include "hashtab.h"
45 #include "hash-set.h"
46 #include "vec.h"
47 #include "machmode.h"
48 #include "input.h"
49 #include "function.h"
50 #include "output.h"
51 #include "dbxout.h"
52 #include "predict.h"
53 #include "dominance.h"
54 #include "cfg.h"
55 #include "cfgrtl.h"
56 #include "cfganal.h"
57 #include "lcm.h"
58 #include "cfgbuild.h"
59 #include "cfgcleanup.h"
60 #include "basic-block.h"
61 #include "diagnostic-core.h"
62 #include "toplev.h"
63 #include "ggc.h"
64 #include "tm_p.h"
65 #include "target.h"
66 #include "target-def.h"
67 #include "common/common-target.h"
68 #include "langhooks.h"
69 #include "reload.h"
70 #include "cfgloop.h"
71 #include "sched-int.h"
72 #include "hash-table.h"
73 #include "tree-ssa-alias.h"
74 #include "internal-fn.h"
75 #include "gimple-fold.h"
76 #include "tree-eh.h"
77 #include "gimple-expr.h"
78 #include "is-a.h"
79 #include "gimple.h"
80 #include "gimplify.h"
81 #include "gimple-iterator.h"
82 #include "gimple-walk.h"
83 #include "intl.h"
84 #include "params.h"
85 #include "tm-constrs.h"
86 #include "ira.h"
87 #include "opts.h"
88 #include "tree-vectorizer.h"
89 #include "dumpfile.h"
90 #include "hash-map.h"
91 #include "plugin-api.h"
92 #include "ipa-ref.h"
93 #include "cgraph.h"
94 #include "target-globals.h"
95 #include "builtins.h"
96 #include "context.h"
97 #include "tree-pass.h"
98 #include "real.h"
99 #if TARGET_XCOFF
100 #include "xcoffout.h" /* get declarations of xcoff_*_section_name */
101 #endif
102 #if TARGET_MACHO
103 #include "gstab.h" /* for N_SLINE */
104 #endif
105
106 #ifndef TARGET_NO_PROTOTYPE
107 #define TARGET_NO_PROTOTYPE 0
108 #endif
109
110 #define min(A,B) ((A) < (B) ? (A) : (B))
111 #define max(A,B) ((A) > (B) ? (A) : (B))
112
113 /* Structure used to define the rs6000 stack */
114 typedef struct rs6000_stack {
115 int reload_completed; /* stack info won't change from here on */
116 int first_gp_reg_save; /* first callee saved GP register used */
117 int first_fp_reg_save; /* first callee saved FP register used */
118 int first_altivec_reg_save; /* first callee saved AltiVec register used */
119 int lr_save_p; /* true if the link reg needs to be saved */
120 int cr_save_p; /* true if the CR reg needs to be saved */
121 unsigned int vrsave_mask; /* mask of vec registers to save */
122 int push_p; /* true if we need to allocate stack space */
123 int calls_p; /* true if the function makes any calls */
124 int world_save_p; /* true if we're saving *everything*:
125 r13-r31, cr, f14-f31, vrsave, v20-v31 */
126 enum rs6000_abi abi; /* which ABI to use */
127 int gp_save_offset; /* offset to save GP regs from initial SP */
128 int fp_save_offset; /* offset to save FP regs from initial SP */
129 int altivec_save_offset; /* offset to save AltiVec regs from initial SP */
130 int lr_save_offset; /* offset to save LR from initial SP */
131 int cr_save_offset; /* offset to save CR from initial SP */
132 int vrsave_save_offset; /* offset to save VRSAVE from initial SP */
133 int spe_gp_save_offset; /* offset to save spe 64-bit gprs */
134 int varargs_save_offset; /* offset to save the varargs registers */
135 int ehrd_offset; /* offset to EH return data */
136 int ehcr_offset; /* offset to EH CR field data */
137 int reg_size; /* register size (4 or 8) */
138 HOST_WIDE_INT vars_size; /* variable save area size */
139 int parm_size; /* outgoing parameter size */
140 int save_size; /* save area size */
141 int fixed_size; /* fixed size of stack frame */
142 int gp_size; /* size of saved GP registers */
143 int fp_size; /* size of saved FP registers */
144 int altivec_size; /* size of saved AltiVec registers */
145 int cr_size; /* size to hold CR if not in save_size */
146 int vrsave_size; /* size to hold VRSAVE if not in save_size */
147 int altivec_padding_size; /* size of altivec alignment padding if
148 not in save_size */
149 int spe_gp_size; /* size of 64-bit GPR save size for SPE */
150 int spe_padding_size;
151 HOST_WIDE_INT total_size; /* total bytes allocated for stack */
152 int spe_64bit_regs_used;
153 int savres_strategy;
154 } rs6000_stack_t;
155
156 /* A C structure for machine-specific, per-function data.
157 This is added to the cfun structure. */
158 typedef struct GTY(()) machine_function
159 {
160 /* Whether the instruction chain has been scanned already. */
161 int insn_chain_scanned_p;
162 /* Flags if __builtin_return_address (n) with n >= 1 was used. */
163 int ra_needs_full_frame;
164 /* Flags if __builtin_return_address (0) was used. */
165 int ra_need_lr;
166 /* Cache lr_save_p after expansion of builtin_eh_return. */
167 int lr_save_state;
168 /* Whether we need to save the TOC to the reserved stack location in the
169 function prologue. */
170 bool save_toc_in_prologue;
171 /* Offset from virtual_stack_vars_rtx to the start of the ABI_V4
172 varargs save area. */
173 HOST_WIDE_INT varargs_save_offset;
174 /* Temporary stack slot to use for SDmode copies. This slot is
175 64-bits wide and is allocated early enough so that the offset
176 does not overflow the 16-bit load/store offset field. */
177 rtx sdmode_stack_slot;
178 /* Flag if r2 setup is needed with ELFv2 ABI. */
179 bool r2_setup_needed;
180 } machine_function;
181
182 /* Support targetm.vectorize.builtin_mask_for_load. */
183 static GTY(()) tree altivec_builtin_mask_for_load;
184
185 /* Set to nonzero once AIX common-mode calls have been defined. */
186 static GTY(()) int common_mode_defined;
187
188 /* Label number of label created for -mrelocatable, to call to so we can
189 get the address of the GOT section */
190 static int rs6000_pic_labelno;
191
192 #ifdef USING_ELFOS_H
193 /* Counter for labels which are to be placed in .fixup. */
194 int fixuplabelno = 0;
195 #endif
196
197 /* Whether to use variant of AIX ABI for PowerPC64 Linux. */
198 int dot_symbols;
199
200 /* Specify the machine mode that pointers have. After generation of rtl, the
201 compiler makes no further distinction between pointers and any other objects
202 of this machine mode. The type is unsigned since not all things that
203 include rs6000.h also include machmode.h. */
204 unsigned rs6000_pmode;
205
206 /* Width in bits of a pointer. */
207 unsigned rs6000_pointer_size;
208
209 #ifdef HAVE_AS_GNU_ATTRIBUTE
210 /* Flag whether floating point values have been passed/returned. */
211 static bool rs6000_passes_float;
212 /* Flag whether vector values have been passed/returned. */
213 static bool rs6000_passes_vector;
214 /* Flag whether small (<= 8 byte) structures have been returned. */
215 static bool rs6000_returns_struct;
216 #endif
217
218 /* Value is TRUE if register/mode pair is acceptable. */
219 bool rs6000_hard_regno_mode_ok_p[NUM_MACHINE_MODES][FIRST_PSEUDO_REGISTER];
220
221 /* Maximum number of registers needed for a given register class and mode. */
222 unsigned char rs6000_class_max_nregs[NUM_MACHINE_MODES][LIM_REG_CLASSES];
223
224 /* How many registers are needed for a given register and mode. */
225 unsigned char rs6000_hard_regno_nregs[NUM_MACHINE_MODES][FIRST_PSEUDO_REGISTER];
226
227 /* Map register number to register class. */
228 enum reg_class rs6000_regno_regclass[FIRST_PSEUDO_REGISTER];
229
230 static int dbg_cost_ctrl;
231
232 /* Built in types. */
233 tree rs6000_builtin_types[RS6000_BTI_MAX];
234 tree rs6000_builtin_decls[RS6000_BUILTIN_COUNT];
235
236 /* Flag to say the TOC is initialized */
237 int toc_initialized;
238 char toc_label_name[10];
239
240 /* Cached value of rs6000_variable_issue. This is cached in
241 rs6000_variable_issue hook and returned from rs6000_sched_reorder2. */
242 static short cached_can_issue_more;
243
244 static GTY(()) section *read_only_data_section;
245 static GTY(()) section *private_data_section;
246 static GTY(()) section *tls_data_section;
247 static GTY(()) section *tls_private_data_section;
248 static GTY(()) section *read_only_private_data_section;
249 static GTY(()) section *sdata2_section;
250 static GTY(()) section *toc_section;
251
252 struct builtin_description
253 {
254 const HOST_WIDE_INT mask;
255 const enum insn_code icode;
256 const char *const name;
257 const enum rs6000_builtins code;
258 };
259
260 /* Describe the vector unit used for modes. */
261 enum rs6000_vector rs6000_vector_unit[NUM_MACHINE_MODES];
262 enum rs6000_vector rs6000_vector_mem[NUM_MACHINE_MODES];
263
264 /* Register classes for various constraints that are based on the target
265 switches. */
266 enum reg_class rs6000_constraints[RS6000_CONSTRAINT_MAX];
267
268 /* Describe the alignment of a vector. */
269 int rs6000_vector_align[NUM_MACHINE_MODES];
270
271 /* Map selected modes to types for builtins. */
272 static GTY(()) tree builtin_mode_to_type[MAX_MACHINE_MODE][2];
273
274 /* What modes to automatically generate reciprocal divide estimate (fre) and
275 reciprocal sqrt (frsqrte) for. */
276 unsigned char rs6000_recip_bits[MAX_MACHINE_MODE];
277
278 /* Masks to determine which reciprocal esitmate instructions to generate
279 automatically. */
280 enum rs6000_recip_mask {
281 RECIP_SF_DIV = 0x001, /* Use divide estimate */
282 RECIP_DF_DIV = 0x002,
283 RECIP_V4SF_DIV = 0x004,
284 RECIP_V2DF_DIV = 0x008,
285
286 RECIP_SF_RSQRT = 0x010, /* Use reciprocal sqrt estimate. */
287 RECIP_DF_RSQRT = 0x020,
288 RECIP_V4SF_RSQRT = 0x040,
289 RECIP_V2DF_RSQRT = 0x080,
290
291 /* Various combination of flags for -mrecip=xxx. */
292 RECIP_NONE = 0,
293 RECIP_ALL = (RECIP_SF_DIV | RECIP_DF_DIV | RECIP_V4SF_DIV
294 | RECIP_V2DF_DIV | RECIP_SF_RSQRT | RECIP_DF_RSQRT
295 | RECIP_V4SF_RSQRT | RECIP_V2DF_RSQRT),
296
297 RECIP_HIGH_PRECISION = RECIP_ALL,
298
299 /* On low precision machines like the power5, don't enable double precision
300 reciprocal square root estimate, since it isn't accurate enough. */
301 RECIP_LOW_PRECISION = (RECIP_ALL & ~(RECIP_DF_RSQRT | RECIP_V2DF_RSQRT))
302 };
303
304 /* -mrecip options. */
305 static struct
306 {
307 const char *string; /* option name */
308 unsigned int mask; /* mask bits to set */
309 } recip_options[] = {
310 { "all", RECIP_ALL },
311 { "none", RECIP_NONE },
312 { "div", (RECIP_SF_DIV | RECIP_DF_DIV | RECIP_V4SF_DIV
313 | RECIP_V2DF_DIV) },
314 { "divf", (RECIP_SF_DIV | RECIP_V4SF_DIV) },
315 { "divd", (RECIP_DF_DIV | RECIP_V2DF_DIV) },
316 { "rsqrt", (RECIP_SF_RSQRT | RECIP_DF_RSQRT | RECIP_V4SF_RSQRT
317 | RECIP_V2DF_RSQRT) },
318 { "rsqrtf", (RECIP_SF_RSQRT | RECIP_V4SF_RSQRT) },
319 { "rsqrtd", (RECIP_DF_RSQRT | RECIP_V2DF_RSQRT) },
320 };
321
322 /* Pointer to function (in rs6000-c.c) that can define or undefine target
323 macros that have changed. Languages that don't support the preprocessor
324 don't link in rs6000-c.c, so we can't call it directly. */
325 void (*rs6000_target_modify_macros_ptr) (bool, HOST_WIDE_INT, HOST_WIDE_INT);
326
327 /* Simplfy register classes into simpler classifications. We assume
328 GPR_REG_TYPE - FPR_REG_TYPE are ordered so that we can use a simple range
329 check for standard register classes (gpr/floating/altivec/vsx) and
330 floating/vector classes (float/altivec/vsx). */
331
332 enum rs6000_reg_type {
333 NO_REG_TYPE,
334 PSEUDO_REG_TYPE,
335 GPR_REG_TYPE,
336 VSX_REG_TYPE,
337 ALTIVEC_REG_TYPE,
338 FPR_REG_TYPE,
339 SPR_REG_TYPE,
340 CR_REG_TYPE,
341 SPE_ACC_TYPE,
342 SPEFSCR_REG_TYPE
343 };
344
345 /* Map register class to register type. */
346 static enum rs6000_reg_type reg_class_to_reg_type[N_REG_CLASSES];
347
348 /* First/last register type for the 'normal' register types (i.e. general
349 purpose, floating point, altivec, and VSX registers). */
350 #define IS_STD_REG_TYPE(RTYPE) IN_RANGE(RTYPE, GPR_REG_TYPE, FPR_REG_TYPE)
351
352 #define IS_FP_VECT_REG_TYPE(RTYPE) IN_RANGE(RTYPE, VSX_REG_TYPE, FPR_REG_TYPE)
353
354
355 /* Register classes we care about in secondary reload or go if legitimate
356 address. We only need to worry about GPR, FPR, and Altivec registers here,
357 along an ANY field that is the OR of the 3 register classes. */
358
359 enum rs6000_reload_reg_type {
360 RELOAD_REG_GPR, /* General purpose registers. */
361 RELOAD_REG_FPR, /* Traditional floating point regs. */
362 RELOAD_REG_VMX, /* Altivec (VMX) registers. */
363 RELOAD_REG_ANY, /* OR of GPR, FPR, Altivec masks. */
364 N_RELOAD_REG
365 };
366
367 /* For setting up register classes, loop through the 3 register classes mapping
368 into real registers, and skip the ANY class, which is just an OR of the
369 bits. */
370 #define FIRST_RELOAD_REG_CLASS RELOAD_REG_GPR
371 #define LAST_RELOAD_REG_CLASS RELOAD_REG_VMX
372
373 /* Map reload register type to a register in the register class. */
374 struct reload_reg_map_type {
375 const char *name; /* Register class name. */
376 int reg; /* Register in the register class. */
377 };
378
379 static const struct reload_reg_map_type reload_reg_map[N_RELOAD_REG] = {
380 { "Gpr", FIRST_GPR_REGNO }, /* RELOAD_REG_GPR. */
381 { "Fpr", FIRST_FPR_REGNO }, /* RELOAD_REG_FPR. */
382 { "VMX", FIRST_ALTIVEC_REGNO }, /* RELOAD_REG_VMX. */
383 { "Any", -1 }, /* RELOAD_REG_ANY. */
384 };
385
386 /* Mask bits for each register class, indexed per mode. Historically the
387 compiler has been more restrictive which types can do PRE_MODIFY instead of
388 PRE_INC and PRE_DEC, so keep track of sepaate bits for these two. */
389 typedef unsigned char addr_mask_type;
390
391 #define RELOAD_REG_VALID 0x01 /* Mode valid in register.. */
392 #define RELOAD_REG_MULTIPLE 0x02 /* Mode takes multiple registers. */
393 #define RELOAD_REG_INDEXED 0x04 /* Reg+reg addressing. */
394 #define RELOAD_REG_OFFSET 0x08 /* Reg+offset addressing. */
395 #define RELOAD_REG_PRE_INCDEC 0x10 /* PRE_INC/PRE_DEC valid. */
396 #define RELOAD_REG_PRE_MODIFY 0x20 /* PRE_MODIFY valid. */
397 #define RELOAD_REG_AND_M16 0x40 /* AND -16 addressing. */
398
399 /* Register type masks based on the type, of valid addressing modes. */
400 struct rs6000_reg_addr {
401 enum insn_code reload_load; /* INSN to reload for loading. */
402 enum insn_code reload_store; /* INSN to reload for storing. */
403 enum insn_code reload_fpr_gpr; /* INSN to move from FPR to GPR. */
404 enum insn_code reload_gpr_vsx; /* INSN to move from GPR to VSX. */
405 enum insn_code reload_vsx_gpr; /* INSN to move from VSX to GPR. */
406 addr_mask_type addr_mask[(int)N_RELOAD_REG]; /* Valid address masks. */
407 bool scalar_in_vmx_p; /* Scalar value can go in VMX. */
408 };
409
410 static struct rs6000_reg_addr reg_addr[NUM_MACHINE_MODES];
411
412 /* Helper function to say whether a mode supports PRE_INC or PRE_DEC. */
413 static inline bool
414 mode_supports_pre_incdec_p (machine_mode mode)
415 {
416 return ((reg_addr[mode].addr_mask[RELOAD_REG_ANY] & RELOAD_REG_PRE_INCDEC)
417 != 0);
418 }
419
420 /* Helper function to say whether a mode supports PRE_MODIFY. */
421 static inline bool
422 mode_supports_pre_modify_p (machine_mode mode)
423 {
424 return ((reg_addr[mode].addr_mask[RELOAD_REG_ANY] & RELOAD_REG_PRE_MODIFY)
425 != 0);
426 }
427
428 \f
429 /* Target cpu costs. */
430
431 struct processor_costs {
432 const int mulsi; /* cost of SImode multiplication. */
433 const int mulsi_const; /* cost of SImode multiplication by constant. */
434 const int mulsi_const9; /* cost of SImode mult by short constant. */
435 const int muldi; /* cost of DImode multiplication. */
436 const int divsi; /* cost of SImode division. */
437 const int divdi; /* cost of DImode division. */
438 const int fp; /* cost of simple SFmode and DFmode insns. */
439 const int dmul; /* cost of DFmode multiplication (and fmadd). */
440 const int sdiv; /* cost of SFmode division (fdivs). */
441 const int ddiv; /* cost of DFmode division (fdiv). */
442 const int cache_line_size; /* cache line size in bytes. */
443 const int l1_cache_size; /* size of l1 cache, in kilobytes. */
444 const int l2_cache_size; /* size of l2 cache, in kilobytes. */
445 const int simultaneous_prefetches; /* number of parallel prefetch
446 operations. */
447 };
448
449 const struct processor_costs *rs6000_cost;
450
451 /* Processor costs (relative to an add) */
452
453 /* Instruction size costs on 32bit processors. */
454 static const
455 struct processor_costs size32_cost = {
456 COSTS_N_INSNS (1), /* mulsi */
457 COSTS_N_INSNS (1), /* mulsi_const */
458 COSTS_N_INSNS (1), /* mulsi_const9 */
459 COSTS_N_INSNS (1), /* muldi */
460 COSTS_N_INSNS (1), /* divsi */
461 COSTS_N_INSNS (1), /* divdi */
462 COSTS_N_INSNS (1), /* fp */
463 COSTS_N_INSNS (1), /* dmul */
464 COSTS_N_INSNS (1), /* sdiv */
465 COSTS_N_INSNS (1), /* ddiv */
466 32,
467 0,
468 0,
469 0,
470 };
471
472 /* Instruction size costs on 64bit processors. */
473 static const
474 struct processor_costs size64_cost = {
475 COSTS_N_INSNS (1), /* mulsi */
476 COSTS_N_INSNS (1), /* mulsi_const */
477 COSTS_N_INSNS (1), /* mulsi_const9 */
478 COSTS_N_INSNS (1), /* muldi */
479 COSTS_N_INSNS (1), /* divsi */
480 COSTS_N_INSNS (1), /* divdi */
481 COSTS_N_INSNS (1), /* fp */
482 COSTS_N_INSNS (1), /* dmul */
483 COSTS_N_INSNS (1), /* sdiv */
484 COSTS_N_INSNS (1), /* ddiv */
485 128,
486 0,
487 0,
488 0,
489 };
490
491 /* Instruction costs on RS64A processors. */
492 static const
493 struct processor_costs rs64a_cost = {
494 COSTS_N_INSNS (20), /* mulsi */
495 COSTS_N_INSNS (12), /* mulsi_const */
496 COSTS_N_INSNS (8), /* mulsi_const9 */
497 COSTS_N_INSNS (34), /* muldi */
498 COSTS_N_INSNS (65), /* divsi */
499 COSTS_N_INSNS (67), /* divdi */
500 COSTS_N_INSNS (4), /* fp */
501 COSTS_N_INSNS (4), /* dmul */
502 COSTS_N_INSNS (31), /* sdiv */
503 COSTS_N_INSNS (31), /* ddiv */
504 128, /* cache line size */
505 128, /* l1 cache */
506 2048, /* l2 cache */
507 1, /* streams */
508 };
509
510 /* Instruction costs on MPCCORE processors. */
511 static const
512 struct processor_costs mpccore_cost = {
513 COSTS_N_INSNS (2), /* mulsi */
514 COSTS_N_INSNS (2), /* mulsi_const */
515 COSTS_N_INSNS (2), /* mulsi_const9 */
516 COSTS_N_INSNS (2), /* muldi */
517 COSTS_N_INSNS (6), /* divsi */
518 COSTS_N_INSNS (6), /* divdi */
519 COSTS_N_INSNS (4), /* fp */
520 COSTS_N_INSNS (5), /* dmul */
521 COSTS_N_INSNS (10), /* sdiv */
522 COSTS_N_INSNS (17), /* ddiv */
523 32, /* cache line size */
524 4, /* l1 cache */
525 16, /* l2 cache */
526 1, /* streams */
527 };
528
529 /* Instruction costs on PPC403 processors. */
530 static const
531 struct processor_costs ppc403_cost = {
532 COSTS_N_INSNS (4), /* mulsi */
533 COSTS_N_INSNS (4), /* mulsi_const */
534 COSTS_N_INSNS (4), /* mulsi_const9 */
535 COSTS_N_INSNS (4), /* muldi */
536 COSTS_N_INSNS (33), /* divsi */
537 COSTS_N_INSNS (33), /* divdi */
538 COSTS_N_INSNS (11), /* fp */
539 COSTS_N_INSNS (11), /* dmul */
540 COSTS_N_INSNS (11), /* sdiv */
541 COSTS_N_INSNS (11), /* ddiv */
542 32, /* cache line size */
543 4, /* l1 cache */
544 16, /* l2 cache */
545 1, /* streams */
546 };
547
548 /* Instruction costs on PPC405 processors. */
549 static const
550 struct processor_costs ppc405_cost = {
551 COSTS_N_INSNS (5), /* mulsi */
552 COSTS_N_INSNS (4), /* mulsi_const */
553 COSTS_N_INSNS (3), /* mulsi_const9 */
554 COSTS_N_INSNS (5), /* muldi */
555 COSTS_N_INSNS (35), /* divsi */
556 COSTS_N_INSNS (35), /* divdi */
557 COSTS_N_INSNS (11), /* fp */
558 COSTS_N_INSNS (11), /* dmul */
559 COSTS_N_INSNS (11), /* sdiv */
560 COSTS_N_INSNS (11), /* ddiv */
561 32, /* cache line size */
562 16, /* l1 cache */
563 128, /* l2 cache */
564 1, /* streams */
565 };
566
567 /* Instruction costs on PPC440 processors. */
568 static const
569 struct processor_costs ppc440_cost = {
570 COSTS_N_INSNS (3), /* mulsi */
571 COSTS_N_INSNS (2), /* mulsi_const */
572 COSTS_N_INSNS (2), /* mulsi_const9 */
573 COSTS_N_INSNS (3), /* muldi */
574 COSTS_N_INSNS (34), /* divsi */
575 COSTS_N_INSNS (34), /* divdi */
576 COSTS_N_INSNS (5), /* fp */
577 COSTS_N_INSNS (5), /* dmul */
578 COSTS_N_INSNS (19), /* sdiv */
579 COSTS_N_INSNS (33), /* ddiv */
580 32, /* cache line size */
581 32, /* l1 cache */
582 256, /* l2 cache */
583 1, /* streams */
584 };
585
586 /* Instruction costs on PPC476 processors. */
587 static const
588 struct processor_costs ppc476_cost = {
589 COSTS_N_INSNS (4), /* mulsi */
590 COSTS_N_INSNS (4), /* mulsi_const */
591 COSTS_N_INSNS (4), /* mulsi_const9 */
592 COSTS_N_INSNS (4), /* muldi */
593 COSTS_N_INSNS (11), /* divsi */
594 COSTS_N_INSNS (11), /* divdi */
595 COSTS_N_INSNS (6), /* fp */
596 COSTS_N_INSNS (6), /* dmul */
597 COSTS_N_INSNS (19), /* sdiv */
598 COSTS_N_INSNS (33), /* ddiv */
599 32, /* l1 cache line size */
600 32, /* l1 cache */
601 512, /* l2 cache */
602 1, /* streams */
603 };
604
605 /* Instruction costs on PPC601 processors. */
606 static const
607 struct processor_costs ppc601_cost = {
608 COSTS_N_INSNS (5), /* mulsi */
609 COSTS_N_INSNS (5), /* mulsi_const */
610 COSTS_N_INSNS (5), /* mulsi_const9 */
611 COSTS_N_INSNS (5), /* muldi */
612 COSTS_N_INSNS (36), /* divsi */
613 COSTS_N_INSNS (36), /* divdi */
614 COSTS_N_INSNS (4), /* fp */
615 COSTS_N_INSNS (5), /* dmul */
616 COSTS_N_INSNS (17), /* sdiv */
617 COSTS_N_INSNS (31), /* ddiv */
618 32, /* cache line size */
619 32, /* l1 cache */
620 256, /* l2 cache */
621 1, /* streams */
622 };
623
624 /* Instruction costs on PPC603 processors. */
625 static const
626 struct processor_costs ppc603_cost = {
627 COSTS_N_INSNS (5), /* mulsi */
628 COSTS_N_INSNS (3), /* mulsi_const */
629 COSTS_N_INSNS (2), /* mulsi_const9 */
630 COSTS_N_INSNS (5), /* muldi */
631 COSTS_N_INSNS (37), /* divsi */
632 COSTS_N_INSNS (37), /* divdi */
633 COSTS_N_INSNS (3), /* fp */
634 COSTS_N_INSNS (4), /* dmul */
635 COSTS_N_INSNS (18), /* sdiv */
636 COSTS_N_INSNS (33), /* ddiv */
637 32, /* cache line size */
638 8, /* l1 cache */
639 64, /* l2 cache */
640 1, /* streams */
641 };
642
643 /* Instruction costs on PPC604 processors. */
644 static const
645 struct processor_costs ppc604_cost = {
646 COSTS_N_INSNS (4), /* mulsi */
647 COSTS_N_INSNS (4), /* mulsi_const */
648 COSTS_N_INSNS (4), /* mulsi_const9 */
649 COSTS_N_INSNS (4), /* muldi */
650 COSTS_N_INSNS (20), /* divsi */
651 COSTS_N_INSNS (20), /* divdi */
652 COSTS_N_INSNS (3), /* fp */
653 COSTS_N_INSNS (3), /* dmul */
654 COSTS_N_INSNS (18), /* sdiv */
655 COSTS_N_INSNS (32), /* ddiv */
656 32, /* cache line size */
657 16, /* l1 cache */
658 512, /* l2 cache */
659 1, /* streams */
660 };
661
662 /* Instruction costs on PPC604e processors. */
663 static const
664 struct processor_costs ppc604e_cost = {
665 COSTS_N_INSNS (2), /* mulsi */
666 COSTS_N_INSNS (2), /* mulsi_const */
667 COSTS_N_INSNS (2), /* mulsi_const9 */
668 COSTS_N_INSNS (2), /* muldi */
669 COSTS_N_INSNS (20), /* divsi */
670 COSTS_N_INSNS (20), /* divdi */
671 COSTS_N_INSNS (3), /* fp */
672 COSTS_N_INSNS (3), /* dmul */
673 COSTS_N_INSNS (18), /* sdiv */
674 COSTS_N_INSNS (32), /* ddiv */
675 32, /* cache line size */
676 32, /* l1 cache */
677 1024, /* l2 cache */
678 1, /* streams */
679 };
680
681 /* Instruction costs on PPC620 processors. */
682 static const
683 struct processor_costs ppc620_cost = {
684 COSTS_N_INSNS (5), /* mulsi */
685 COSTS_N_INSNS (4), /* mulsi_const */
686 COSTS_N_INSNS (3), /* mulsi_const9 */
687 COSTS_N_INSNS (7), /* muldi */
688 COSTS_N_INSNS (21), /* divsi */
689 COSTS_N_INSNS (37), /* divdi */
690 COSTS_N_INSNS (3), /* fp */
691 COSTS_N_INSNS (3), /* dmul */
692 COSTS_N_INSNS (18), /* sdiv */
693 COSTS_N_INSNS (32), /* ddiv */
694 128, /* cache line size */
695 32, /* l1 cache */
696 1024, /* l2 cache */
697 1, /* streams */
698 };
699
700 /* Instruction costs on PPC630 processors. */
701 static const
702 struct processor_costs ppc630_cost = {
703 COSTS_N_INSNS (5), /* mulsi */
704 COSTS_N_INSNS (4), /* mulsi_const */
705 COSTS_N_INSNS (3), /* mulsi_const9 */
706 COSTS_N_INSNS (7), /* muldi */
707 COSTS_N_INSNS (21), /* divsi */
708 COSTS_N_INSNS (37), /* divdi */
709 COSTS_N_INSNS (3), /* fp */
710 COSTS_N_INSNS (3), /* dmul */
711 COSTS_N_INSNS (17), /* sdiv */
712 COSTS_N_INSNS (21), /* ddiv */
713 128, /* cache line size */
714 64, /* l1 cache */
715 1024, /* l2 cache */
716 1, /* streams */
717 };
718
719 /* Instruction costs on Cell processor. */
720 /* COSTS_N_INSNS (1) ~ one add. */
721 static const
722 struct processor_costs ppccell_cost = {
723 COSTS_N_INSNS (9/2)+2, /* mulsi */
724 COSTS_N_INSNS (6/2), /* mulsi_const */
725 COSTS_N_INSNS (6/2), /* mulsi_const9 */
726 COSTS_N_INSNS (15/2)+2, /* muldi */
727 COSTS_N_INSNS (38/2), /* divsi */
728 COSTS_N_INSNS (70/2), /* divdi */
729 COSTS_N_INSNS (10/2), /* fp */
730 COSTS_N_INSNS (10/2), /* dmul */
731 COSTS_N_INSNS (74/2), /* sdiv */
732 COSTS_N_INSNS (74/2), /* ddiv */
733 128, /* cache line size */
734 32, /* l1 cache */
735 512, /* l2 cache */
736 6, /* streams */
737 };
738
739 /* Instruction costs on PPC750 and PPC7400 processors. */
740 static const
741 struct processor_costs ppc750_cost = {
742 COSTS_N_INSNS (5), /* mulsi */
743 COSTS_N_INSNS (3), /* mulsi_const */
744 COSTS_N_INSNS (2), /* mulsi_const9 */
745 COSTS_N_INSNS (5), /* muldi */
746 COSTS_N_INSNS (17), /* divsi */
747 COSTS_N_INSNS (17), /* divdi */
748 COSTS_N_INSNS (3), /* fp */
749 COSTS_N_INSNS (3), /* dmul */
750 COSTS_N_INSNS (17), /* sdiv */
751 COSTS_N_INSNS (31), /* ddiv */
752 32, /* cache line size */
753 32, /* l1 cache */
754 512, /* l2 cache */
755 1, /* streams */
756 };
757
758 /* Instruction costs on PPC7450 processors. */
759 static const
760 struct processor_costs ppc7450_cost = {
761 COSTS_N_INSNS (4), /* mulsi */
762 COSTS_N_INSNS (3), /* mulsi_const */
763 COSTS_N_INSNS (3), /* mulsi_const9 */
764 COSTS_N_INSNS (4), /* muldi */
765 COSTS_N_INSNS (23), /* divsi */
766 COSTS_N_INSNS (23), /* divdi */
767 COSTS_N_INSNS (5), /* fp */
768 COSTS_N_INSNS (5), /* dmul */
769 COSTS_N_INSNS (21), /* sdiv */
770 COSTS_N_INSNS (35), /* ddiv */
771 32, /* cache line size */
772 32, /* l1 cache */
773 1024, /* l2 cache */
774 1, /* streams */
775 };
776
777 /* Instruction costs on PPC8540 processors. */
778 static const
779 struct processor_costs ppc8540_cost = {
780 COSTS_N_INSNS (4), /* mulsi */
781 COSTS_N_INSNS (4), /* mulsi_const */
782 COSTS_N_INSNS (4), /* mulsi_const9 */
783 COSTS_N_INSNS (4), /* muldi */
784 COSTS_N_INSNS (19), /* divsi */
785 COSTS_N_INSNS (19), /* divdi */
786 COSTS_N_INSNS (4), /* fp */
787 COSTS_N_INSNS (4), /* dmul */
788 COSTS_N_INSNS (29), /* sdiv */
789 COSTS_N_INSNS (29), /* ddiv */
790 32, /* cache line size */
791 32, /* l1 cache */
792 256, /* l2 cache */
793 1, /* prefetch streams /*/
794 };
795
796 /* Instruction costs on E300C2 and E300C3 cores. */
797 static const
798 struct processor_costs ppce300c2c3_cost = {
799 COSTS_N_INSNS (4), /* mulsi */
800 COSTS_N_INSNS (4), /* mulsi_const */
801 COSTS_N_INSNS (4), /* mulsi_const9 */
802 COSTS_N_INSNS (4), /* muldi */
803 COSTS_N_INSNS (19), /* divsi */
804 COSTS_N_INSNS (19), /* divdi */
805 COSTS_N_INSNS (3), /* fp */
806 COSTS_N_INSNS (4), /* dmul */
807 COSTS_N_INSNS (18), /* sdiv */
808 COSTS_N_INSNS (33), /* ddiv */
809 32,
810 16, /* l1 cache */
811 16, /* l2 cache */
812 1, /* prefetch streams /*/
813 };
814
815 /* Instruction costs on PPCE500MC processors. */
816 static const
817 struct processor_costs ppce500mc_cost = {
818 COSTS_N_INSNS (4), /* mulsi */
819 COSTS_N_INSNS (4), /* mulsi_const */
820 COSTS_N_INSNS (4), /* mulsi_const9 */
821 COSTS_N_INSNS (4), /* muldi */
822 COSTS_N_INSNS (14), /* divsi */
823 COSTS_N_INSNS (14), /* divdi */
824 COSTS_N_INSNS (8), /* fp */
825 COSTS_N_INSNS (10), /* dmul */
826 COSTS_N_INSNS (36), /* sdiv */
827 COSTS_N_INSNS (66), /* ddiv */
828 64, /* cache line size */
829 32, /* l1 cache */
830 128, /* l2 cache */
831 1, /* prefetch streams /*/
832 };
833
834 /* Instruction costs on PPCE500MC64 processors. */
835 static const
836 struct processor_costs ppce500mc64_cost = {
837 COSTS_N_INSNS (4), /* mulsi */
838 COSTS_N_INSNS (4), /* mulsi_const */
839 COSTS_N_INSNS (4), /* mulsi_const9 */
840 COSTS_N_INSNS (4), /* muldi */
841 COSTS_N_INSNS (14), /* divsi */
842 COSTS_N_INSNS (14), /* divdi */
843 COSTS_N_INSNS (4), /* fp */
844 COSTS_N_INSNS (10), /* dmul */
845 COSTS_N_INSNS (36), /* sdiv */
846 COSTS_N_INSNS (66), /* ddiv */
847 64, /* cache line size */
848 32, /* l1 cache */
849 128, /* l2 cache */
850 1, /* prefetch streams /*/
851 };
852
853 /* Instruction costs on PPCE5500 processors. */
854 static const
855 struct processor_costs ppce5500_cost = {
856 COSTS_N_INSNS (5), /* mulsi */
857 COSTS_N_INSNS (5), /* mulsi_const */
858 COSTS_N_INSNS (4), /* mulsi_const9 */
859 COSTS_N_INSNS (5), /* muldi */
860 COSTS_N_INSNS (14), /* divsi */
861 COSTS_N_INSNS (14), /* divdi */
862 COSTS_N_INSNS (7), /* fp */
863 COSTS_N_INSNS (10), /* dmul */
864 COSTS_N_INSNS (36), /* sdiv */
865 COSTS_N_INSNS (66), /* ddiv */
866 64, /* cache line size */
867 32, /* l1 cache */
868 128, /* l2 cache */
869 1, /* prefetch streams /*/
870 };
871
872 /* Instruction costs on PPCE6500 processors. */
873 static const
874 struct processor_costs ppce6500_cost = {
875 COSTS_N_INSNS (5), /* mulsi */
876 COSTS_N_INSNS (5), /* mulsi_const */
877 COSTS_N_INSNS (4), /* mulsi_const9 */
878 COSTS_N_INSNS (5), /* muldi */
879 COSTS_N_INSNS (14), /* divsi */
880 COSTS_N_INSNS (14), /* divdi */
881 COSTS_N_INSNS (7), /* fp */
882 COSTS_N_INSNS (10), /* dmul */
883 COSTS_N_INSNS (36), /* sdiv */
884 COSTS_N_INSNS (66), /* ddiv */
885 64, /* cache line size */
886 32, /* l1 cache */
887 128, /* l2 cache */
888 1, /* prefetch streams /*/
889 };
890
891 /* Instruction costs on AppliedMicro Titan processors. */
892 static const
893 struct processor_costs titan_cost = {
894 COSTS_N_INSNS (5), /* mulsi */
895 COSTS_N_INSNS (5), /* mulsi_const */
896 COSTS_N_INSNS (5), /* mulsi_const9 */
897 COSTS_N_INSNS (5), /* muldi */
898 COSTS_N_INSNS (18), /* divsi */
899 COSTS_N_INSNS (18), /* divdi */
900 COSTS_N_INSNS (10), /* fp */
901 COSTS_N_INSNS (10), /* dmul */
902 COSTS_N_INSNS (46), /* sdiv */
903 COSTS_N_INSNS (72), /* ddiv */
904 32, /* cache line size */
905 32, /* l1 cache */
906 512, /* l2 cache */
907 1, /* prefetch streams /*/
908 };
909
910 /* Instruction costs on POWER4 and POWER5 processors. */
911 static const
912 struct processor_costs power4_cost = {
913 COSTS_N_INSNS (3), /* mulsi */
914 COSTS_N_INSNS (2), /* mulsi_const */
915 COSTS_N_INSNS (2), /* mulsi_const9 */
916 COSTS_N_INSNS (4), /* muldi */
917 COSTS_N_INSNS (18), /* divsi */
918 COSTS_N_INSNS (34), /* divdi */
919 COSTS_N_INSNS (3), /* fp */
920 COSTS_N_INSNS (3), /* dmul */
921 COSTS_N_INSNS (17), /* sdiv */
922 COSTS_N_INSNS (17), /* ddiv */
923 128, /* cache line size */
924 32, /* l1 cache */
925 1024, /* l2 cache */
926 8, /* prefetch streams /*/
927 };
928
929 /* Instruction costs on POWER6 processors. */
930 static const
931 struct processor_costs power6_cost = {
932 COSTS_N_INSNS (8), /* mulsi */
933 COSTS_N_INSNS (8), /* mulsi_const */
934 COSTS_N_INSNS (8), /* mulsi_const9 */
935 COSTS_N_INSNS (8), /* muldi */
936 COSTS_N_INSNS (22), /* divsi */
937 COSTS_N_INSNS (28), /* divdi */
938 COSTS_N_INSNS (3), /* fp */
939 COSTS_N_INSNS (3), /* dmul */
940 COSTS_N_INSNS (13), /* sdiv */
941 COSTS_N_INSNS (16), /* ddiv */
942 128, /* cache line size */
943 64, /* l1 cache */
944 2048, /* l2 cache */
945 16, /* prefetch streams */
946 };
947
948 /* Instruction costs on POWER7 processors. */
949 static const
950 struct processor_costs power7_cost = {
951 COSTS_N_INSNS (2), /* mulsi */
952 COSTS_N_INSNS (2), /* mulsi_const */
953 COSTS_N_INSNS (2), /* mulsi_const9 */
954 COSTS_N_INSNS (2), /* muldi */
955 COSTS_N_INSNS (18), /* divsi */
956 COSTS_N_INSNS (34), /* divdi */
957 COSTS_N_INSNS (3), /* fp */
958 COSTS_N_INSNS (3), /* dmul */
959 COSTS_N_INSNS (13), /* sdiv */
960 COSTS_N_INSNS (16), /* ddiv */
961 128, /* cache line size */
962 32, /* l1 cache */
963 256, /* l2 cache */
964 12, /* prefetch streams */
965 };
966
967 /* Instruction costs on POWER8 processors. */
968 static const
969 struct processor_costs power8_cost = {
970 COSTS_N_INSNS (3), /* mulsi */
971 COSTS_N_INSNS (3), /* mulsi_const */
972 COSTS_N_INSNS (3), /* mulsi_const9 */
973 COSTS_N_INSNS (3), /* muldi */
974 COSTS_N_INSNS (19), /* divsi */
975 COSTS_N_INSNS (35), /* divdi */
976 COSTS_N_INSNS (3), /* fp */
977 COSTS_N_INSNS (3), /* dmul */
978 COSTS_N_INSNS (14), /* sdiv */
979 COSTS_N_INSNS (17), /* ddiv */
980 128, /* cache line size */
981 32, /* l1 cache */
982 256, /* l2 cache */
983 12, /* prefetch streams */
984 };
985
986 /* Instruction costs on POWER A2 processors. */
987 static const
988 struct processor_costs ppca2_cost = {
989 COSTS_N_INSNS (16), /* mulsi */
990 COSTS_N_INSNS (16), /* mulsi_const */
991 COSTS_N_INSNS (16), /* mulsi_const9 */
992 COSTS_N_INSNS (16), /* muldi */
993 COSTS_N_INSNS (22), /* divsi */
994 COSTS_N_INSNS (28), /* divdi */
995 COSTS_N_INSNS (3), /* fp */
996 COSTS_N_INSNS (3), /* dmul */
997 COSTS_N_INSNS (59), /* sdiv */
998 COSTS_N_INSNS (72), /* ddiv */
999 64,
1000 16, /* l1 cache */
1001 2048, /* l2 cache */
1002 16, /* prefetch streams */
1003 };
1004
1005 \f
1006 /* Table that classifies rs6000 builtin functions (pure, const, etc.). */
1007 #undef RS6000_BUILTIN_1
1008 #undef RS6000_BUILTIN_2
1009 #undef RS6000_BUILTIN_3
1010 #undef RS6000_BUILTIN_A
1011 #undef RS6000_BUILTIN_D
1012 #undef RS6000_BUILTIN_E
1013 #undef RS6000_BUILTIN_H
1014 #undef RS6000_BUILTIN_P
1015 #undef RS6000_BUILTIN_Q
1016 #undef RS6000_BUILTIN_S
1017 #undef RS6000_BUILTIN_X
1018
1019 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE) \
1020 { NAME, ICODE, MASK, ATTR },
1021
1022 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE) \
1023 { NAME, ICODE, MASK, ATTR },
1024
1025 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE) \
1026 { NAME, ICODE, MASK, ATTR },
1027
1028 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE) \
1029 { NAME, ICODE, MASK, ATTR },
1030
1031 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE) \
1032 { NAME, ICODE, MASK, ATTR },
1033
1034 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE) \
1035 { NAME, ICODE, MASK, ATTR },
1036
1037 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE) \
1038 { NAME, ICODE, MASK, ATTR },
1039
1040 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE) \
1041 { NAME, ICODE, MASK, ATTR },
1042
1043 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE) \
1044 { NAME, ICODE, MASK, ATTR },
1045
1046 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE) \
1047 { NAME, ICODE, MASK, ATTR },
1048
1049 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE) \
1050 { NAME, ICODE, MASK, ATTR },
1051
1052 struct rs6000_builtin_info_type {
1053 const char *name;
1054 const enum insn_code icode;
1055 const HOST_WIDE_INT mask;
1056 const unsigned attr;
1057 };
1058
1059 static const struct rs6000_builtin_info_type rs6000_builtin_info[] =
1060 {
1061 #include "rs6000-builtin.def"
1062 };
1063
1064 #undef RS6000_BUILTIN_1
1065 #undef RS6000_BUILTIN_2
1066 #undef RS6000_BUILTIN_3
1067 #undef RS6000_BUILTIN_A
1068 #undef RS6000_BUILTIN_D
1069 #undef RS6000_BUILTIN_E
1070 #undef RS6000_BUILTIN_H
1071 #undef RS6000_BUILTIN_P
1072 #undef RS6000_BUILTIN_Q
1073 #undef RS6000_BUILTIN_S
1074 #undef RS6000_BUILTIN_X
1075
1076 /* Support for -mveclibabi=<xxx> to control which vector library to use. */
1077 static tree (*rs6000_veclib_handler) (tree, tree, tree);
1078
1079 \f
1080 static bool rs6000_debug_legitimate_address_p (machine_mode, rtx, bool);
1081 static bool spe_func_has_64bit_regs_p (void);
1082 static struct machine_function * rs6000_init_machine_status (void);
1083 static int rs6000_ra_ever_killed (void);
1084 static tree rs6000_handle_longcall_attribute (tree *, tree, tree, int, bool *);
1085 static tree rs6000_handle_altivec_attribute (tree *, tree, tree, int, bool *);
1086 static tree rs6000_handle_struct_attribute (tree *, tree, tree, int, bool *);
1087 static tree rs6000_builtin_vectorized_libmass (tree, tree, tree);
1088 static void rs6000_emit_set_long_const (rtx, HOST_WIDE_INT);
1089 static int rs6000_memory_move_cost (machine_mode, reg_class_t, bool);
1090 static bool rs6000_debug_rtx_costs (rtx, int, int, int, int *, bool);
1091 static int rs6000_debug_address_cost (rtx, machine_mode, addr_space_t,
1092 bool);
1093 static int rs6000_debug_adjust_cost (rtx_insn *, rtx, rtx_insn *, int);
1094 static bool is_microcoded_insn (rtx_insn *);
1095 static bool is_nonpipeline_insn (rtx_insn *);
1096 static bool is_cracked_insn (rtx_insn *);
1097 static bool is_load_insn (rtx, rtx *);
1098 static bool is_store_insn (rtx, rtx *);
1099 static bool set_to_load_agen (rtx_insn *,rtx_insn *);
1100 static bool insn_terminates_group_p (rtx_insn *, enum group_termination);
1101 static bool insn_must_be_first_in_group (rtx_insn *);
1102 static bool insn_must_be_last_in_group (rtx_insn *);
1103 static void altivec_init_builtins (void);
1104 static tree builtin_function_type (machine_mode, machine_mode,
1105 machine_mode, machine_mode,
1106 enum rs6000_builtins, const char *name);
1107 static void rs6000_common_init_builtins (void);
1108 static void paired_init_builtins (void);
1109 static rtx paired_expand_predicate_builtin (enum insn_code, tree, rtx);
1110 static void spe_init_builtins (void);
1111 static void htm_init_builtins (void);
1112 static rtx spe_expand_predicate_builtin (enum insn_code, tree, rtx);
1113 static rtx spe_expand_evsel_builtin (enum insn_code, tree, rtx);
1114 static int rs6000_emit_int_cmove (rtx, rtx, rtx, rtx);
1115 static rs6000_stack_t *rs6000_stack_info (void);
1116 static void is_altivec_return_reg (rtx, void *);
1117 int easy_vector_constant (rtx, machine_mode);
1118 static rtx rs6000_debug_legitimize_address (rtx, rtx, machine_mode);
1119 static rtx rs6000_legitimize_tls_address (rtx, enum tls_model);
1120 static rtx rs6000_darwin64_record_arg (CUMULATIVE_ARGS *, const_tree,
1121 bool, bool);
1122 #if TARGET_MACHO
1123 static void macho_branch_islands (void);
1124 #endif
1125 static rtx rs6000_legitimize_reload_address (rtx, machine_mode, int, int,
1126 int, int *);
1127 static rtx rs6000_debug_legitimize_reload_address (rtx, machine_mode, int,
1128 int, int, int *);
1129 static bool rs6000_mode_dependent_address (const_rtx);
1130 static bool rs6000_debug_mode_dependent_address (const_rtx);
1131 static enum reg_class rs6000_secondary_reload_class (enum reg_class,
1132 machine_mode, rtx);
1133 static enum reg_class rs6000_debug_secondary_reload_class (enum reg_class,
1134 machine_mode,
1135 rtx);
1136 static enum reg_class rs6000_preferred_reload_class (rtx, enum reg_class);
1137 static enum reg_class rs6000_debug_preferred_reload_class (rtx,
1138 enum reg_class);
1139 static bool rs6000_secondary_memory_needed (enum reg_class, enum reg_class,
1140 machine_mode);
1141 static bool rs6000_debug_secondary_memory_needed (enum reg_class,
1142 enum reg_class,
1143 machine_mode);
1144 static bool rs6000_cannot_change_mode_class (machine_mode,
1145 machine_mode,
1146 enum reg_class);
1147 static bool rs6000_debug_cannot_change_mode_class (machine_mode,
1148 machine_mode,
1149 enum reg_class);
1150 static bool rs6000_save_toc_in_prologue_p (void);
1151
1152 rtx (*rs6000_legitimize_reload_address_ptr) (rtx, machine_mode, int, int,
1153 int, int *)
1154 = rs6000_legitimize_reload_address;
1155
1156 static bool (*rs6000_mode_dependent_address_ptr) (const_rtx)
1157 = rs6000_mode_dependent_address;
1158
1159 enum reg_class (*rs6000_secondary_reload_class_ptr) (enum reg_class,
1160 machine_mode, rtx)
1161 = rs6000_secondary_reload_class;
1162
1163 enum reg_class (*rs6000_preferred_reload_class_ptr) (rtx, enum reg_class)
1164 = rs6000_preferred_reload_class;
1165
1166 bool (*rs6000_secondary_memory_needed_ptr) (enum reg_class, enum reg_class,
1167 machine_mode)
1168 = rs6000_secondary_memory_needed;
1169
1170 bool (*rs6000_cannot_change_mode_class_ptr) (machine_mode,
1171 machine_mode,
1172 enum reg_class)
1173 = rs6000_cannot_change_mode_class;
1174
1175 const int INSN_NOT_AVAILABLE = -1;
1176
1177 static void rs6000_print_isa_options (FILE *, int, const char *,
1178 HOST_WIDE_INT);
1179 static void rs6000_print_builtin_options (FILE *, int, const char *,
1180 HOST_WIDE_INT);
1181
1182 static enum rs6000_reg_type register_to_reg_type (rtx, bool *);
1183 static bool rs6000_secondary_reload_move (enum rs6000_reg_type,
1184 enum rs6000_reg_type,
1185 machine_mode,
1186 secondary_reload_info *,
1187 bool);
1188 rtl_opt_pass *make_pass_analyze_swaps (gcc::context*);
1189
1190 /* Hash table stuff for keeping track of TOC entries. */
1191
1192 struct GTY((for_user)) toc_hash_struct
1193 {
1194 /* `key' will satisfy CONSTANT_P; in fact, it will satisfy
1195 ASM_OUTPUT_SPECIAL_POOL_ENTRY_P. */
1196 rtx key;
1197 machine_mode key_mode;
1198 int labelno;
1199 };
1200
1201 struct toc_hasher : ggc_hasher<toc_hash_struct *>
1202 {
1203 static hashval_t hash (toc_hash_struct *);
1204 static bool equal (toc_hash_struct *, toc_hash_struct *);
1205 };
1206
1207 static GTY (()) hash_table<toc_hasher> *toc_hash_table;
1208
1209 /* Hash table to keep track of the argument types for builtin functions. */
1210
1211 struct GTY((for_user)) builtin_hash_struct
1212 {
1213 tree type;
1214 machine_mode mode[4]; /* return value + 3 arguments. */
1215 unsigned char uns_p[4]; /* and whether the types are unsigned. */
1216 };
1217
1218 struct builtin_hasher : ggc_hasher<builtin_hash_struct *>
1219 {
1220 static hashval_t hash (builtin_hash_struct *);
1221 static bool equal (builtin_hash_struct *, builtin_hash_struct *);
1222 };
1223
1224 static GTY (()) hash_table<builtin_hasher> *builtin_hash_table;
1225
1226 \f
1227 /* Default register names. */
1228 char rs6000_reg_names[][8] =
1229 {
1230 "0", "1", "2", "3", "4", "5", "6", "7",
1231 "8", "9", "10", "11", "12", "13", "14", "15",
1232 "16", "17", "18", "19", "20", "21", "22", "23",
1233 "24", "25", "26", "27", "28", "29", "30", "31",
1234 "0", "1", "2", "3", "4", "5", "6", "7",
1235 "8", "9", "10", "11", "12", "13", "14", "15",
1236 "16", "17", "18", "19", "20", "21", "22", "23",
1237 "24", "25", "26", "27", "28", "29", "30", "31",
1238 "mq", "lr", "ctr","ap",
1239 "0", "1", "2", "3", "4", "5", "6", "7",
1240 "ca",
1241 /* AltiVec registers. */
1242 "0", "1", "2", "3", "4", "5", "6", "7",
1243 "8", "9", "10", "11", "12", "13", "14", "15",
1244 "16", "17", "18", "19", "20", "21", "22", "23",
1245 "24", "25", "26", "27", "28", "29", "30", "31",
1246 "vrsave", "vscr",
1247 /* SPE registers. */
1248 "spe_acc", "spefscr",
1249 /* Soft frame pointer. */
1250 "sfp",
1251 /* HTM SPR registers. */
1252 "tfhar", "tfiar", "texasr",
1253 /* SPE High registers. */
1254 "0", "1", "2", "3", "4", "5", "6", "7",
1255 "8", "9", "10", "11", "12", "13", "14", "15",
1256 "16", "17", "18", "19", "20", "21", "22", "23",
1257 "24", "25", "26", "27", "28", "29", "30", "31"
1258 };
1259
1260 #ifdef TARGET_REGNAMES
1261 static const char alt_reg_names[][8] =
1262 {
1263 "%r0", "%r1", "%r2", "%r3", "%r4", "%r5", "%r6", "%r7",
1264 "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15",
1265 "%r16", "%r17", "%r18", "%r19", "%r20", "%r21", "%r22", "%r23",
1266 "%r24", "%r25", "%r26", "%r27", "%r28", "%r29", "%r30", "%r31",
1267 "%f0", "%f1", "%f2", "%f3", "%f4", "%f5", "%f6", "%f7",
1268 "%f8", "%f9", "%f10", "%f11", "%f12", "%f13", "%f14", "%f15",
1269 "%f16", "%f17", "%f18", "%f19", "%f20", "%f21", "%f22", "%f23",
1270 "%f24", "%f25", "%f26", "%f27", "%f28", "%f29", "%f30", "%f31",
1271 "mq", "lr", "ctr", "ap",
1272 "%cr0", "%cr1", "%cr2", "%cr3", "%cr4", "%cr5", "%cr6", "%cr7",
1273 "ca",
1274 /* AltiVec registers. */
1275 "%v0", "%v1", "%v2", "%v3", "%v4", "%v5", "%v6", "%v7",
1276 "%v8", "%v9", "%v10", "%v11", "%v12", "%v13", "%v14", "%v15",
1277 "%v16", "%v17", "%v18", "%v19", "%v20", "%v21", "%v22", "%v23",
1278 "%v24", "%v25", "%v26", "%v27", "%v28", "%v29", "%v30", "%v31",
1279 "vrsave", "vscr",
1280 /* SPE registers. */
1281 "spe_acc", "spefscr",
1282 /* Soft frame pointer. */
1283 "sfp",
1284 /* HTM SPR registers. */
1285 "tfhar", "tfiar", "texasr",
1286 /* SPE High registers. */
1287 "%rh0", "%rh1", "%rh2", "%rh3", "%rh4", "%rh5", "%rh6", "%rh7",
1288 "%rh8", "%rh9", "%rh10", "%r11", "%rh12", "%rh13", "%rh14", "%rh15",
1289 "%rh16", "%rh17", "%rh18", "%rh19", "%rh20", "%rh21", "%rh22", "%rh23",
1290 "%rh24", "%rh25", "%rh26", "%rh27", "%rh28", "%rh29", "%rh30", "%rh31"
1291 };
1292 #endif
1293
1294 /* Table of valid machine attributes. */
1295
1296 static const struct attribute_spec rs6000_attribute_table[] =
1297 {
1298 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
1299 affects_type_identity } */
1300 { "altivec", 1, 1, false, true, false, rs6000_handle_altivec_attribute,
1301 false },
1302 { "longcall", 0, 0, false, true, true, rs6000_handle_longcall_attribute,
1303 false },
1304 { "shortcall", 0, 0, false, true, true, rs6000_handle_longcall_attribute,
1305 false },
1306 { "ms_struct", 0, 0, false, false, false, rs6000_handle_struct_attribute,
1307 false },
1308 { "gcc_struct", 0, 0, false, false, false, rs6000_handle_struct_attribute,
1309 false },
1310 #ifdef SUBTARGET_ATTRIBUTE_TABLE
1311 SUBTARGET_ATTRIBUTE_TABLE,
1312 #endif
1313 { NULL, 0, 0, false, false, false, NULL, false }
1314 };
1315 \f
1316 #ifndef TARGET_PROFILE_KERNEL
1317 #define TARGET_PROFILE_KERNEL 0
1318 #endif
1319
1320 /* The VRSAVE bitmask puts bit %v0 as the most significant bit. */
1321 #define ALTIVEC_REG_BIT(REGNO) (0x80000000 >> ((REGNO) - FIRST_ALTIVEC_REGNO))
1322 \f
1323 /* Initialize the GCC target structure. */
1324 #undef TARGET_ATTRIBUTE_TABLE
1325 #define TARGET_ATTRIBUTE_TABLE rs6000_attribute_table
1326 #undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
1327 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES rs6000_set_default_type_attributes
1328 #undef TARGET_ATTRIBUTE_TAKES_IDENTIFIER_P
1329 #define TARGET_ATTRIBUTE_TAKES_IDENTIFIER_P rs6000_attribute_takes_identifier_p
1330
1331 #undef TARGET_ASM_ALIGNED_DI_OP
1332 #define TARGET_ASM_ALIGNED_DI_OP DOUBLE_INT_ASM_OP
1333
1334 /* Default unaligned ops are only provided for ELF. Find the ops needed
1335 for non-ELF systems. */
1336 #ifndef OBJECT_FORMAT_ELF
1337 #if TARGET_XCOFF
1338 /* For XCOFF. rs6000_assemble_integer will handle unaligned DIs on
1339 64-bit targets. */
1340 #undef TARGET_ASM_UNALIGNED_HI_OP
1341 #define TARGET_ASM_UNALIGNED_HI_OP "\t.vbyte\t2,"
1342 #undef TARGET_ASM_UNALIGNED_SI_OP
1343 #define TARGET_ASM_UNALIGNED_SI_OP "\t.vbyte\t4,"
1344 #undef TARGET_ASM_UNALIGNED_DI_OP
1345 #define TARGET_ASM_UNALIGNED_DI_OP "\t.vbyte\t8,"
1346 #else
1347 /* For Darwin. */
1348 #undef TARGET_ASM_UNALIGNED_HI_OP
1349 #define TARGET_ASM_UNALIGNED_HI_OP "\t.short\t"
1350 #undef TARGET_ASM_UNALIGNED_SI_OP
1351 #define TARGET_ASM_UNALIGNED_SI_OP "\t.long\t"
1352 #undef TARGET_ASM_UNALIGNED_DI_OP
1353 #define TARGET_ASM_UNALIGNED_DI_OP "\t.quad\t"
1354 #undef TARGET_ASM_ALIGNED_DI_OP
1355 #define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
1356 #endif
1357 #endif
1358
1359 /* This hook deals with fixups for relocatable code and DI-mode objects
1360 in 64-bit code. */
1361 #undef TARGET_ASM_INTEGER
1362 #define TARGET_ASM_INTEGER rs6000_assemble_integer
1363
1364 #if defined (HAVE_GAS_HIDDEN) && !TARGET_MACHO
1365 #undef TARGET_ASM_ASSEMBLE_VISIBILITY
1366 #define TARGET_ASM_ASSEMBLE_VISIBILITY rs6000_assemble_visibility
1367 #endif
1368
1369 #undef TARGET_SET_UP_BY_PROLOGUE
1370 #define TARGET_SET_UP_BY_PROLOGUE rs6000_set_up_by_prologue
1371
1372 #undef TARGET_HAVE_TLS
1373 #define TARGET_HAVE_TLS HAVE_AS_TLS
1374
1375 #undef TARGET_CANNOT_FORCE_CONST_MEM
1376 #define TARGET_CANNOT_FORCE_CONST_MEM rs6000_cannot_force_const_mem
1377
1378 #undef TARGET_DELEGITIMIZE_ADDRESS
1379 #define TARGET_DELEGITIMIZE_ADDRESS rs6000_delegitimize_address
1380
1381 #undef TARGET_CONST_NOT_OK_FOR_DEBUG_P
1382 #define TARGET_CONST_NOT_OK_FOR_DEBUG_P rs6000_const_not_ok_for_debug_p
1383
1384 #undef TARGET_ASM_FUNCTION_PROLOGUE
1385 #define TARGET_ASM_FUNCTION_PROLOGUE rs6000_output_function_prologue
1386 #undef TARGET_ASM_FUNCTION_EPILOGUE
1387 #define TARGET_ASM_FUNCTION_EPILOGUE rs6000_output_function_epilogue
1388
1389 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
1390 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA rs6000_output_addr_const_extra
1391
1392 #undef TARGET_LEGITIMIZE_ADDRESS
1393 #define TARGET_LEGITIMIZE_ADDRESS rs6000_legitimize_address
1394
1395 #undef TARGET_SCHED_VARIABLE_ISSUE
1396 #define TARGET_SCHED_VARIABLE_ISSUE rs6000_variable_issue
1397
1398 #undef TARGET_SCHED_ISSUE_RATE
1399 #define TARGET_SCHED_ISSUE_RATE rs6000_issue_rate
1400 #undef TARGET_SCHED_ADJUST_COST
1401 #define TARGET_SCHED_ADJUST_COST rs6000_adjust_cost
1402 #undef TARGET_SCHED_ADJUST_PRIORITY
1403 #define TARGET_SCHED_ADJUST_PRIORITY rs6000_adjust_priority
1404 #undef TARGET_SCHED_IS_COSTLY_DEPENDENCE
1405 #define TARGET_SCHED_IS_COSTLY_DEPENDENCE rs6000_is_costly_dependence
1406 #undef TARGET_SCHED_INIT
1407 #define TARGET_SCHED_INIT rs6000_sched_init
1408 #undef TARGET_SCHED_FINISH
1409 #define TARGET_SCHED_FINISH rs6000_sched_finish
1410 #undef TARGET_SCHED_REORDER
1411 #define TARGET_SCHED_REORDER rs6000_sched_reorder
1412 #undef TARGET_SCHED_REORDER2
1413 #define TARGET_SCHED_REORDER2 rs6000_sched_reorder2
1414
1415 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
1416 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD rs6000_use_sched_lookahead
1417
1418 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD
1419 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD rs6000_use_sched_lookahead_guard
1420
1421 #undef TARGET_SCHED_ALLOC_SCHED_CONTEXT
1422 #define TARGET_SCHED_ALLOC_SCHED_CONTEXT rs6000_alloc_sched_context
1423 #undef TARGET_SCHED_INIT_SCHED_CONTEXT
1424 #define TARGET_SCHED_INIT_SCHED_CONTEXT rs6000_init_sched_context
1425 #undef TARGET_SCHED_SET_SCHED_CONTEXT
1426 #define TARGET_SCHED_SET_SCHED_CONTEXT rs6000_set_sched_context
1427 #undef TARGET_SCHED_FREE_SCHED_CONTEXT
1428 #define TARGET_SCHED_FREE_SCHED_CONTEXT rs6000_free_sched_context
1429
1430 #undef TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD
1431 #define TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD rs6000_builtin_mask_for_load
1432 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
1433 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
1434 rs6000_builtin_support_vector_misalignment
1435 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
1436 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE rs6000_vector_alignment_reachable
1437 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
1438 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
1439 rs6000_builtin_vectorization_cost
1440 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
1441 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE \
1442 rs6000_preferred_simd_mode
1443 #undef TARGET_VECTORIZE_INIT_COST
1444 #define TARGET_VECTORIZE_INIT_COST rs6000_init_cost
1445 #undef TARGET_VECTORIZE_ADD_STMT_COST
1446 #define TARGET_VECTORIZE_ADD_STMT_COST rs6000_add_stmt_cost
1447 #undef TARGET_VECTORIZE_FINISH_COST
1448 #define TARGET_VECTORIZE_FINISH_COST rs6000_finish_cost
1449 #undef TARGET_VECTORIZE_DESTROY_COST_DATA
1450 #define TARGET_VECTORIZE_DESTROY_COST_DATA rs6000_destroy_cost_data
1451
1452 #undef TARGET_INIT_BUILTINS
1453 #define TARGET_INIT_BUILTINS rs6000_init_builtins
1454 #undef TARGET_BUILTIN_DECL
1455 #define TARGET_BUILTIN_DECL rs6000_builtin_decl
1456
1457 #undef TARGET_EXPAND_BUILTIN
1458 #define TARGET_EXPAND_BUILTIN rs6000_expand_builtin
1459
1460 #undef TARGET_MANGLE_TYPE
1461 #define TARGET_MANGLE_TYPE rs6000_mangle_type
1462
1463 #undef TARGET_INIT_LIBFUNCS
1464 #define TARGET_INIT_LIBFUNCS rs6000_init_libfuncs
1465
1466 #if TARGET_MACHO
1467 #undef TARGET_BINDS_LOCAL_P
1468 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
1469 #endif
1470
1471 #undef TARGET_MS_BITFIELD_LAYOUT_P
1472 #define TARGET_MS_BITFIELD_LAYOUT_P rs6000_ms_bitfield_layout_p
1473
1474 #undef TARGET_ASM_OUTPUT_MI_THUNK
1475 #define TARGET_ASM_OUTPUT_MI_THUNK rs6000_output_mi_thunk
1476
1477 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
1478 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true
1479
1480 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
1481 #define TARGET_FUNCTION_OK_FOR_SIBCALL rs6000_function_ok_for_sibcall
1482
1483 #undef TARGET_REGISTER_MOVE_COST
1484 #define TARGET_REGISTER_MOVE_COST rs6000_register_move_cost
1485 #undef TARGET_MEMORY_MOVE_COST
1486 #define TARGET_MEMORY_MOVE_COST rs6000_memory_move_cost
1487 #undef TARGET_RTX_COSTS
1488 #define TARGET_RTX_COSTS rs6000_rtx_costs
1489 #undef TARGET_ADDRESS_COST
1490 #define TARGET_ADDRESS_COST hook_int_rtx_mode_as_bool_0
1491
1492 #undef TARGET_DWARF_REGISTER_SPAN
1493 #define TARGET_DWARF_REGISTER_SPAN rs6000_dwarf_register_span
1494
1495 #undef TARGET_INIT_DWARF_REG_SIZES_EXTRA
1496 #define TARGET_INIT_DWARF_REG_SIZES_EXTRA rs6000_init_dwarf_reg_sizes_extra
1497
1498 #undef TARGET_MEMBER_TYPE_FORCES_BLK
1499 #define TARGET_MEMBER_TYPE_FORCES_BLK rs6000_member_type_forces_blk
1500
1501 /* On rs6000, function arguments are promoted, as are function return
1502 values. */
1503 #undef TARGET_PROMOTE_FUNCTION_MODE
1504 #define TARGET_PROMOTE_FUNCTION_MODE default_promote_function_mode_always_promote
1505
1506 #undef TARGET_RETURN_IN_MEMORY
1507 #define TARGET_RETURN_IN_MEMORY rs6000_return_in_memory
1508
1509 #undef TARGET_RETURN_IN_MSB
1510 #define TARGET_RETURN_IN_MSB rs6000_return_in_msb
1511
1512 #undef TARGET_SETUP_INCOMING_VARARGS
1513 #define TARGET_SETUP_INCOMING_VARARGS setup_incoming_varargs
1514
1515 /* Always strict argument naming on rs6000. */
1516 #undef TARGET_STRICT_ARGUMENT_NAMING
1517 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
1518 #undef TARGET_PRETEND_OUTGOING_VARARGS_NAMED
1519 #define TARGET_PRETEND_OUTGOING_VARARGS_NAMED hook_bool_CUMULATIVE_ARGS_true
1520 #undef TARGET_SPLIT_COMPLEX_ARG
1521 #define TARGET_SPLIT_COMPLEX_ARG hook_bool_const_tree_true
1522 #undef TARGET_MUST_PASS_IN_STACK
1523 #define TARGET_MUST_PASS_IN_STACK rs6000_must_pass_in_stack
1524 #undef TARGET_PASS_BY_REFERENCE
1525 #define TARGET_PASS_BY_REFERENCE rs6000_pass_by_reference
1526 #undef TARGET_ARG_PARTIAL_BYTES
1527 #define TARGET_ARG_PARTIAL_BYTES rs6000_arg_partial_bytes
1528 #undef TARGET_FUNCTION_ARG_ADVANCE
1529 #define TARGET_FUNCTION_ARG_ADVANCE rs6000_function_arg_advance
1530 #undef TARGET_FUNCTION_ARG
1531 #define TARGET_FUNCTION_ARG rs6000_function_arg
1532 #undef TARGET_FUNCTION_ARG_BOUNDARY
1533 #define TARGET_FUNCTION_ARG_BOUNDARY rs6000_function_arg_boundary
1534
1535 #undef TARGET_BUILD_BUILTIN_VA_LIST
1536 #define TARGET_BUILD_BUILTIN_VA_LIST rs6000_build_builtin_va_list
1537
1538 #undef TARGET_EXPAND_BUILTIN_VA_START
1539 #define TARGET_EXPAND_BUILTIN_VA_START rs6000_va_start
1540
1541 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
1542 #define TARGET_GIMPLIFY_VA_ARG_EXPR rs6000_gimplify_va_arg
1543
1544 #undef TARGET_EH_RETURN_FILTER_MODE
1545 #define TARGET_EH_RETURN_FILTER_MODE rs6000_eh_return_filter_mode
1546
1547 #undef TARGET_SCALAR_MODE_SUPPORTED_P
1548 #define TARGET_SCALAR_MODE_SUPPORTED_P rs6000_scalar_mode_supported_p
1549
1550 #undef TARGET_VECTOR_MODE_SUPPORTED_P
1551 #define TARGET_VECTOR_MODE_SUPPORTED_P rs6000_vector_mode_supported_p
1552
1553 #undef TARGET_INVALID_ARG_FOR_UNPROTOTYPED_FN
1554 #define TARGET_INVALID_ARG_FOR_UNPROTOTYPED_FN invalid_arg_for_unprototyped_fn
1555
1556 #undef TARGET_ASM_LOOP_ALIGN_MAX_SKIP
1557 #define TARGET_ASM_LOOP_ALIGN_MAX_SKIP rs6000_loop_align_max_skip
1558
1559 #undef TARGET_OPTION_OVERRIDE
1560 #define TARGET_OPTION_OVERRIDE rs6000_option_override
1561
1562 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
1563 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
1564 rs6000_builtin_vectorized_function
1565
1566 #if !TARGET_MACHO
1567 #undef TARGET_STACK_PROTECT_FAIL
1568 #define TARGET_STACK_PROTECT_FAIL rs6000_stack_protect_fail
1569 #endif
1570
1571 /* MPC604EUM 3.5.2 Weak Consistency between Multiple Processors
1572 The PowerPC architecture requires only weak consistency among
1573 processors--that is, memory accesses between processors need not be
1574 sequentially consistent and memory accesses among processors can occur
1575 in any order. The ability to order memory accesses weakly provides
1576 opportunities for more efficient use of the system bus. Unless a
1577 dependency exists, the 604e allows read operations to precede store
1578 operations. */
1579 #undef TARGET_RELAXED_ORDERING
1580 #define TARGET_RELAXED_ORDERING true
1581
1582 #ifdef HAVE_AS_TLS
1583 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
1584 #define TARGET_ASM_OUTPUT_DWARF_DTPREL rs6000_output_dwarf_dtprel
1585 #endif
1586
1587 /* Use a 32-bit anchor range. This leads to sequences like:
1588
1589 addis tmp,anchor,high
1590 add dest,tmp,low
1591
1592 where tmp itself acts as an anchor, and can be shared between
1593 accesses to the same 64k page. */
1594 #undef TARGET_MIN_ANCHOR_OFFSET
1595 #define TARGET_MIN_ANCHOR_OFFSET -0x7fffffff - 1
1596 #undef TARGET_MAX_ANCHOR_OFFSET
1597 #define TARGET_MAX_ANCHOR_OFFSET 0x7fffffff
1598 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
1599 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P rs6000_use_blocks_for_constant_p
1600 #undef TARGET_USE_BLOCKS_FOR_DECL_P
1601 #define TARGET_USE_BLOCKS_FOR_DECL_P rs6000_use_blocks_for_decl_p
1602
1603 #undef TARGET_BUILTIN_RECIPROCAL
1604 #define TARGET_BUILTIN_RECIPROCAL rs6000_builtin_reciprocal
1605
1606 #undef TARGET_EXPAND_TO_RTL_HOOK
1607 #define TARGET_EXPAND_TO_RTL_HOOK rs6000_alloc_sdmode_stack_slot
1608
1609 #undef TARGET_INSTANTIATE_DECLS
1610 #define TARGET_INSTANTIATE_DECLS rs6000_instantiate_decls
1611
1612 #undef TARGET_SECONDARY_RELOAD
1613 #define TARGET_SECONDARY_RELOAD rs6000_secondary_reload
1614
1615 #undef TARGET_LEGITIMATE_ADDRESS_P
1616 #define TARGET_LEGITIMATE_ADDRESS_P rs6000_legitimate_address_p
1617
1618 #undef TARGET_MODE_DEPENDENT_ADDRESS_P
1619 #define TARGET_MODE_DEPENDENT_ADDRESS_P rs6000_mode_dependent_address_p
1620
1621 #undef TARGET_LRA_P
1622 #define TARGET_LRA_P rs6000_lra_p
1623
1624 #undef TARGET_CAN_ELIMINATE
1625 #define TARGET_CAN_ELIMINATE rs6000_can_eliminate
1626
1627 #undef TARGET_CONDITIONAL_REGISTER_USAGE
1628 #define TARGET_CONDITIONAL_REGISTER_USAGE rs6000_conditional_register_usage
1629
1630 #undef TARGET_TRAMPOLINE_INIT
1631 #define TARGET_TRAMPOLINE_INIT rs6000_trampoline_init
1632
1633 #undef TARGET_FUNCTION_VALUE
1634 #define TARGET_FUNCTION_VALUE rs6000_function_value
1635
1636 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
1637 #define TARGET_OPTION_VALID_ATTRIBUTE_P rs6000_valid_attribute_p
1638
1639 #undef TARGET_OPTION_SAVE
1640 #define TARGET_OPTION_SAVE rs6000_function_specific_save
1641
1642 #undef TARGET_OPTION_RESTORE
1643 #define TARGET_OPTION_RESTORE rs6000_function_specific_restore
1644
1645 #undef TARGET_OPTION_PRINT
1646 #define TARGET_OPTION_PRINT rs6000_function_specific_print
1647
1648 #undef TARGET_CAN_INLINE_P
1649 #define TARGET_CAN_INLINE_P rs6000_can_inline_p
1650
1651 #undef TARGET_SET_CURRENT_FUNCTION
1652 #define TARGET_SET_CURRENT_FUNCTION rs6000_set_current_function
1653
1654 #undef TARGET_LEGITIMATE_CONSTANT_P
1655 #define TARGET_LEGITIMATE_CONSTANT_P rs6000_legitimate_constant_p
1656
1657 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
1658 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK rs6000_vectorize_vec_perm_const_ok
1659
1660 #undef TARGET_CAN_USE_DOLOOP_P
1661 #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
1662
1663 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
1664 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV rs6000_atomic_assign_expand_fenv
1665 \f
1666
1667 /* Processor table. */
1668 struct rs6000_ptt
1669 {
1670 const char *const name; /* Canonical processor name. */
1671 const enum processor_type processor; /* Processor type enum value. */
1672 const HOST_WIDE_INT target_enable; /* Target flags to enable. */
1673 };
1674
1675 static struct rs6000_ptt const processor_target_table[] =
1676 {
1677 #define RS6000_CPU(NAME, CPU, FLAGS) { NAME, CPU, FLAGS },
1678 #include "rs6000-cpus.def"
1679 #undef RS6000_CPU
1680 };
1681
1682 /* Look up a processor name for -mcpu=xxx and -mtune=xxx. Return -1 if the
1683 name is invalid. */
1684
1685 static int
1686 rs6000_cpu_name_lookup (const char *name)
1687 {
1688 size_t i;
1689
1690 if (name != NULL)
1691 {
1692 for (i = 0; i < ARRAY_SIZE (processor_target_table); i++)
1693 if (! strcmp (name, processor_target_table[i].name))
1694 return (int)i;
1695 }
1696
1697 return -1;
1698 }
1699
1700 \f
1701 /* Return number of consecutive hard regs needed starting at reg REGNO
1702 to hold something of mode MODE.
1703 This is ordinarily the length in words of a value of mode MODE
1704 but can be less for certain modes in special long registers.
1705
1706 For the SPE, GPRs are 64 bits but only 32 bits are visible in
1707 scalar instructions. The upper 32 bits are only available to the
1708 SIMD instructions.
1709
1710 POWER and PowerPC GPRs hold 32 bits worth;
1711 PowerPC64 GPRs and FPRs point register holds 64 bits worth. */
1712
1713 static int
1714 rs6000_hard_regno_nregs_internal (int regno, machine_mode mode)
1715 {
1716 unsigned HOST_WIDE_INT reg_size;
1717
1718 /* TF/TD modes are special in that they always take 2 registers. */
1719 if (FP_REGNO_P (regno))
1720 reg_size = ((VECTOR_MEM_VSX_P (mode) && mode != TDmode && mode != TFmode)
1721 ? UNITS_PER_VSX_WORD
1722 : UNITS_PER_FP_WORD);
1723
1724 else if (SPE_SIMD_REGNO_P (regno) && TARGET_SPE && SPE_VECTOR_MODE (mode))
1725 reg_size = UNITS_PER_SPE_WORD;
1726
1727 else if (ALTIVEC_REGNO_P (regno))
1728 reg_size = UNITS_PER_ALTIVEC_WORD;
1729
1730 /* The value returned for SCmode in the E500 double case is 2 for
1731 ABI compatibility; storing an SCmode value in a single register
1732 would require function_arg and rs6000_spe_function_arg to handle
1733 SCmode so as to pass the value correctly in a pair of
1734 registers. */
1735 else if (TARGET_E500_DOUBLE && FLOAT_MODE_P (mode) && mode != SCmode
1736 && !DECIMAL_FLOAT_MODE_P (mode) && SPE_SIMD_REGNO_P (regno))
1737 reg_size = UNITS_PER_FP_WORD;
1738
1739 else
1740 reg_size = UNITS_PER_WORD;
1741
1742 return (GET_MODE_SIZE (mode) + reg_size - 1) / reg_size;
1743 }
1744
1745 /* Value is 1 if hard register REGNO can hold a value of machine-mode
1746 MODE. */
1747 static int
1748 rs6000_hard_regno_mode_ok (int regno, machine_mode mode)
1749 {
1750 int last_regno = regno + rs6000_hard_regno_nregs[mode][regno] - 1;
1751
1752 /* PTImode can only go in GPRs. Quad word memory operations require even/odd
1753 register combinations, and use PTImode where we need to deal with quad
1754 word memory operations. Don't allow quad words in the argument or frame
1755 pointer registers, just registers 0..31. */
1756 if (mode == PTImode)
1757 return (IN_RANGE (regno, FIRST_GPR_REGNO, LAST_GPR_REGNO)
1758 && IN_RANGE (last_regno, FIRST_GPR_REGNO, LAST_GPR_REGNO)
1759 && ((regno & 1) == 0));
1760
1761 /* VSX registers that overlap the FPR registers are larger than for non-VSX
1762 implementations. Don't allow an item to be split between a FP register
1763 and an Altivec register. Allow TImode in all VSX registers if the user
1764 asked for it. */
1765 if (TARGET_VSX && VSX_REGNO_P (regno)
1766 && (VECTOR_MEM_VSX_P (mode)
1767 || reg_addr[mode].scalar_in_vmx_p
1768 || (TARGET_VSX_TIMODE && mode == TImode)
1769 || (TARGET_VADDUQM && mode == V1TImode)))
1770 {
1771 if (FP_REGNO_P (regno))
1772 return FP_REGNO_P (last_regno);
1773
1774 if (ALTIVEC_REGNO_P (regno))
1775 {
1776 if (GET_MODE_SIZE (mode) != 16 && !reg_addr[mode].scalar_in_vmx_p)
1777 return 0;
1778
1779 return ALTIVEC_REGNO_P (last_regno);
1780 }
1781 }
1782
1783 /* The GPRs can hold any mode, but values bigger than one register
1784 cannot go past R31. */
1785 if (INT_REGNO_P (regno))
1786 return INT_REGNO_P (last_regno);
1787
1788 /* The float registers (except for VSX vector modes) can only hold floating
1789 modes and DImode. */
1790 if (FP_REGNO_P (regno))
1791 {
1792 if (SCALAR_FLOAT_MODE_P (mode)
1793 && (mode != TDmode || (regno % 2) == 0)
1794 && FP_REGNO_P (last_regno))
1795 return 1;
1796
1797 if (GET_MODE_CLASS (mode) == MODE_INT
1798 && GET_MODE_SIZE (mode) == UNITS_PER_FP_WORD)
1799 return 1;
1800
1801 if (PAIRED_SIMD_REGNO_P (regno) && TARGET_PAIRED_FLOAT
1802 && PAIRED_VECTOR_MODE (mode))
1803 return 1;
1804
1805 return 0;
1806 }
1807
1808 /* The CR register can only hold CC modes. */
1809 if (CR_REGNO_P (regno))
1810 return GET_MODE_CLASS (mode) == MODE_CC;
1811
1812 if (CA_REGNO_P (regno))
1813 return mode == Pmode || mode == SImode;
1814
1815 /* AltiVec only in AldyVec registers. */
1816 if (ALTIVEC_REGNO_P (regno))
1817 return (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode)
1818 || mode == V1TImode);
1819
1820 /* ...but GPRs can hold SIMD data on the SPE in one register. */
1821 if (SPE_SIMD_REGNO_P (regno) && TARGET_SPE && SPE_VECTOR_MODE (mode))
1822 return 1;
1823
1824 /* We cannot put non-VSX TImode or PTImode anywhere except general register
1825 and it must be able to fit within the register set. */
1826
1827 return GET_MODE_SIZE (mode) <= UNITS_PER_WORD;
1828 }
1829
1830 /* Print interesting facts about registers. */
1831 static void
1832 rs6000_debug_reg_print (int first_regno, int last_regno, const char *reg_name)
1833 {
1834 int r, m;
1835
1836 for (r = first_regno; r <= last_regno; ++r)
1837 {
1838 const char *comma = "";
1839 int len;
1840
1841 if (first_regno == last_regno)
1842 fprintf (stderr, "%s:\t", reg_name);
1843 else
1844 fprintf (stderr, "%s%d:\t", reg_name, r - first_regno);
1845
1846 len = 8;
1847 for (m = 0; m < NUM_MACHINE_MODES; ++m)
1848 if (rs6000_hard_regno_mode_ok_p[m][r] && rs6000_hard_regno_nregs[m][r])
1849 {
1850 if (len > 70)
1851 {
1852 fprintf (stderr, ",\n\t");
1853 len = 8;
1854 comma = "";
1855 }
1856
1857 if (rs6000_hard_regno_nregs[m][r] > 1)
1858 len += fprintf (stderr, "%s%s/%d", comma, GET_MODE_NAME (m),
1859 rs6000_hard_regno_nregs[m][r]);
1860 else
1861 len += fprintf (stderr, "%s%s", comma, GET_MODE_NAME (m));
1862
1863 comma = ", ";
1864 }
1865
1866 if (call_used_regs[r])
1867 {
1868 if (len > 70)
1869 {
1870 fprintf (stderr, ",\n\t");
1871 len = 8;
1872 comma = "";
1873 }
1874
1875 len += fprintf (stderr, "%s%s", comma, "call-used");
1876 comma = ", ";
1877 }
1878
1879 if (fixed_regs[r])
1880 {
1881 if (len > 70)
1882 {
1883 fprintf (stderr, ",\n\t");
1884 len = 8;
1885 comma = "";
1886 }
1887
1888 len += fprintf (stderr, "%s%s", comma, "fixed");
1889 comma = ", ";
1890 }
1891
1892 if (len > 70)
1893 {
1894 fprintf (stderr, ",\n\t");
1895 comma = "";
1896 }
1897
1898 len += fprintf (stderr, "%sreg-class = %s", comma,
1899 reg_class_names[(int)rs6000_regno_regclass[r]]);
1900 comma = ", ";
1901
1902 if (len > 70)
1903 {
1904 fprintf (stderr, ",\n\t");
1905 comma = "";
1906 }
1907
1908 fprintf (stderr, "%sregno = %d\n", comma, r);
1909 }
1910 }
1911
1912 static const char *
1913 rs6000_debug_vector_unit (enum rs6000_vector v)
1914 {
1915 const char *ret;
1916
1917 switch (v)
1918 {
1919 case VECTOR_NONE: ret = "none"; break;
1920 case VECTOR_ALTIVEC: ret = "altivec"; break;
1921 case VECTOR_VSX: ret = "vsx"; break;
1922 case VECTOR_P8_VECTOR: ret = "p8_vector"; break;
1923 case VECTOR_PAIRED: ret = "paired"; break;
1924 case VECTOR_SPE: ret = "spe"; break;
1925 case VECTOR_OTHER: ret = "other"; break;
1926 default: ret = "unknown"; break;
1927 }
1928
1929 return ret;
1930 }
1931
1932 /* Inner function printing just the address mask for a particular reload
1933 register class. */
1934 DEBUG_FUNCTION char *
1935 rs6000_debug_addr_mask (addr_mask_type mask, bool keep_spaces)
1936 {
1937 static char ret[8];
1938 char *p = ret;
1939
1940 if ((mask & RELOAD_REG_VALID) != 0)
1941 *p++ = 'v';
1942 else if (keep_spaces)
1943 *p++ = ' ';
1944
1945 if ((mask & RELOAD_REG_MULTIPLE) != 0)
1946 *p++ = 'm';
1947 else if (keep_spaces)
1948 *p++ = ' ';
1949
1950 if ((mask & RELOAD_REG_INDEXED) != 0)
1951 *p++ = 'i';
1952 else if (keep_spaces)
1953 *p++ = ' ';
1954
1955 if ((mask & RELOAD_REG_OFFSET) != 0)
1956 *p++ = 'o';
1957 else if (keep_spaces)
1958 *p++ = ' ';
1959
1960 if ((mask & RELOAD_REG_PRE_INCDEC) != 0)
1961 *p++ = '+';
1962 else if (keep_spaces)
1963 *p++ = ' ';
1964
1965 if ((mask & RELOAD_REG_PRE_MODIFY) != 0)
1966 *p++ = '+';
1967 else if (keep_spaces)
1968 *p++ = ' ';
1969
1970 if ((mask & RELOAD_REG_AND_M16) != 0)
1971 *p++ = '&';
1972 else if (keep_spaces)
1973 *p++ = ' ';
1974
1975 *p = '\0';
1976
1977 return ret;
1978 }
1979
1980 /* Print the address masks in a human readble fashion. */
1981 DEBUG_FUNCTION void
1982 rs6000_debug_print_mode (ssize_t m)
1983 {
1984 ssize_t rc;
1985
1986 fprintf (stderr, "Mode: %-5s", GET_MODE_NAME (m));
1987 for (rc = 0; rc < N_RELOAD_REG; rc++)
1988 fprintf (stderr, " %s: %s", reload_reg_map[rc].name,
1989 rs6000_debug_addr_mask (reg_addr[m].addr_mask[rc], true));
1990
1991 if (rs6000_vector_unit[m] != VECTOR_NONE
1992 || rs6000_vector_mem[m] != VECTOR_NONE
1993 || (reg_addr[m].reload_store != CODE_FOR_nothing)
1994 || (reg_addr[m].reload_load != CODE_FOR_nothing)
1995 || reg_addr[m].scalar_in_vmx_p)
1996 {
1997 fprintf (stderr,
1998 " Vector-arith=%-10s Vector-mem=%-10s Reload=%c%c Upper=%c",
1999 rs6000_debug_vector_unit (rs6000_vector_unit[m]),
2000 rs6000_debug_vector_unit (rs6000_vector_mem[m]),
2001 (reg_addr[m].reload_store != CODE_FOR_nothing) ? 's' : '*',
2002 (reg_addr[m].reload_load != CODE_FOR_nothing) ? 'l' : '*',
2003 (reg_addr[m].scalar_in_vmx_p) ? 'y' : 'n');
2004 }
2005
2006 fputs ("\n", stderr);
2007 }
2008
2009 #define DEBUG_FMT_ID "%-32s= "
2010 #define DEBUG_FMT_D DEBUG_FMT_ID "%d\n"
2011 #define DEBUG_FMT_WX DEBUG_FMT_ID "%#.12" HOST_WIDE_INT_PRINT "x: "
2012 #define DEBUG_FMT_S DEBUG_FMT_ID "%s\n"
2013
2014 /* Print various interesting information with -mdebug=reg. */
2015 static void
2016 rs6000_debug_reg_global (void)
2017 {
2018 static const char *const tf[2] = { "false", "true" };
2019 const char *nl = (const char *)0;
2020 int m;
2021 size_t m1, m2, v;
2022 char costly_num[20];
2023 char nop_num[20];
2024 char flags_buffer[40];
2025 const char *costly_str;
2026 const char *nop_str;
2027 const char *trace_str;
2028 const char *abi_str;
2029 const char *cmodel_str;
2030 struct cl_target_option cl_opts;
2031
2032 /* Modes we want tieable information on. */
2033 static const machine_mode print_tieable_modes[] = {
2034 QImode,
2035 HImode,
2036 SImode,
2037 DImode,
2038 TImode,
2039 PTImode,
2040 SFmode,
2041 DFmode,
2042 TFmode,
2043 SDmode,
2044 DDmode,
2045 TDmode,
2046 V8QImode,
2047 V4HImode,
2048 V2SImode,
2049 V16QImode,
2050 V8HImode,
2051 V4SImode,
2052 V2DImode,
2053 V1TImode,
2054 V32QImode,
2055 V16HImode,
2056 V8SImode,
2057 V4DImode,
2058 V2TImode,
2059 V2SFmode,
2060 V4SFmode,
2061 V2DFmode,
2062 V8SFmode,
2063 V4DFmode,
2064 CCmode,
2065 CCUNSmode,
2066 CCEQmode,
2067 };
2068
2069 /* Virtual regs we are interested in. */
2070 const static struct {
2071 int regno; /* register number. */
2072 const char *name; /* register name. */
2073 } virtual_regs[] = {
2074 { STACK_POINTER_REGNUM, "stack pointer:" },
2075 { TOC_REGNUM, "toc: " },
2076 { STATIC_CHAIN_REGNUM, "static chain: " },
2077 { RS6000_PIC_OFFSET_TABLE_REGNUM, "pic offset: " },
2078 { HARD_FRAME_POINTER_REGNUM, "hard frame: " },
2079 { ARG_POINTER_REGNUM, "arg pointer: " },
2080 { FRAME_POINTER_REGNUM, "frame pointer:" },
2081 { FIRST_PSEUDO_REGISTER, "first pseudo: " },
2082 { FIRST_VIRTUAL_REGISTER, "first virtual:" },
2083 { VIRTUAL_INCOMING_ARGS_REGNUM, "incoming_args:" },
2084 { VIRTUAL_STACK_VARS_REGNUM, "stack_vars: " },
2085 { VIRTUAL_STACK_DYNAMIC_REGNUM, "stack_dynamic:" },
2086 { VIRTUAL_OUTGOING_ARGS_REGNUM, "outgoing_args:" },
2087 { VIRTUAL_CFA_REGNUM, "cfa (frame): " },
2088 { VIRTUAL_PREFERRED_STACK_BOUNDARY_REGNUM, "stack boundry:" },
2089 { LAST_VIRTUAL_REGISTER, "last virtual: " },
2090 };
2091
2092 fputs ("\nHard register information:\n", stderr);
2093 rs6000_debug_reg_print (FIRST_GPR_REGNO, LAST_GPR_REGNO, "gr");
2094 rs6000_debug_reg_print (FIRST_FPR_REGNO, LAST_FPR_REGNO, "fp");
2095 rs6000_debug_reg_print (FIRST_ALTIVEC_REGNO,
2096 LAST_ALTIVEC_REGNO,
2097 "vs");
2098 rs6000_debug_reg_print (LR_REGNO, LR_REGNO, "lr");
2099 rs6000_debug_reg_print (CTR_REGNO, CTR_REGNO, "ctr");
2100 rs6000_debug_reg_print (CR0_REGNO, CR7_REGNO, "cr");
2101 rs6000_debug_reg_print (CA_REGNO, CA_REGNO, "ca");
2102 rs6000_debug_reg_print (VRSAVE_REGNO, VRSAVE_REGNO, "vrsave");
2103 rs6000_debug_reg_print (VSCR_REGNO, VSCR_REGNO, "vscr");
2104 rs6000_debug_reg_print (SPE_ACC_REGNO, SPE_ACC_REGNO, "spe_a");
2105 rs6000_debug_reg_print (SPEFSCR_REGNO, SPEFSCR_REGNO, "spe_f");
2106
2107 fputs ("\nVirtual/stack/frame registers:\n", stderr);
2108 for (v = 0; v < ARRAY_SIZE (virtual_regs); v++)
2109 fprintf (stderr, "%s regno = %3d\n", virtual_regs[v].name, virtual_regs[v].regno);
2110
2111 fprintf (stderr,
2112 "\n"
2113 "d reg_class = %s\n"
2114 "f reg_class = %s\n"
2115 "v reg_class = %s\n"
2116 "wa reg_class = %s\n"
2117 "wd reg_class = %s\n"
2118 "wf reg_class = %s\n"
2119 "wg reg_class = %s\n"
2120 "wh reg_class = %s\n"
2121 "wi reg_class = %s\n"
2122 "wj reg_class = %s\n"
2123 "wk reg_class = %s\n"
2124 "wl reg_class = %s\n"
2125 "wm reg_class = %s\n"
2126 "wr reg_class = %s\n"
2127 "ws reg_class = %s\n"
2128 "wt reg_class = %s\n"
2129 "wu reg_class = %s\n"
2130 "wv reg_class = %s\n"
2131 "ww reg_class = %s\n"
2132 "wx reg_class = %s\n"
2133 "wy reg_class = %s\n"
2134 "wz reg_class = %s\n"
2135 "\n",
2136 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_d]],
2137 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_f]],
2138 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_v]],
2139 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wa]],
2140 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wd]],
2141 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wf]],
2142 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wg]],
2143 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wh]],
2144 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wi]],
2145 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wj]],
2146 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wk]],
2147 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wl]],
2148 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wm]],
2149 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wr]],
2150 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_ws]],
2151 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wt]],
2152 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wu]],
2153 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wv]],
2154 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_ww]],
2155 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wx]],
2156 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wy]],
2157 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wz]]);
2158
2159 nl = "\n";
2160 for (m = 0; m < NUM_MACHINE_MODES; ++m)
2161 rs6000_debug_print_mode (m);
2162
2163 fputs ("\n", stderr);
2164
2165 for (m1 = 0; m1 < ARRAY_SIZE (print_tieable_modes); m1++)
2166 {
2167 machine_mode mode1 = print_tieable_modes[m1];
2168 bool first_time = true;
2169
2170 nl = (const char *)0;
2171 for (m2 = 0; m2 < ARRAY_SIZE (print_tieable_modes); m2++)
2172 {
2173 machine_mode mode2 = print_tieable_modes[m2];
2174 if (mode1 != mode2 && MODES_TIEABLE_P (mode1, mode2))
2175 {
2176 if (first_time)
2177 {
2178 fprintf (stderr, "Tieable modes %s:", GET_MODE_NAME (mode1));
2179 nl = "\n";
2180 first_time = false;
2181 }
2182
2183 fprintf (stderr, " %s", GET_MODE_NAME (mode2));
2184 }
2185 }
2186
2187 if (!first_time)
2188 fputs ("\n", stderr);
2189 }
2190
2191 if (nl)
2192 fputs (nl, stderr);
2193
2194 if (rs6000_recip_control)
2195 {
2196 fprintf (stderr, "\nReciprocal mask = 0x%x\n", rs6000_recip_control);
2197
2198 for (m = 0; m < NUM_MACHINE_MODES; ++m)
2199 if (rs6000_recip_bits[m])
2200 {
2201 fprintf (stderr,
2202 "Reciprocal estimate mode: %-5s divide: %s rsqrt: %s\n",
2203 GET_MODE_NAME (m),
2204 (RS6000_RECIP_AUTO_RE_P (m)
2205 ? "auto"
2206 : (RS6000_RECIP_HAVE_RE_P (m) ? "have" : "none")),
2207 (RS6000_RECIP_AUTO_RSQRTE_P (m)
2208 ? "auto"
2209 : (RS6000_RECIP_HAVE_RSQRTE_P (m) ? "have" : "none")));
2210 }
2211
2212 fputs ("\n", stderr);
2213 }
2214
2215 if (rs6000_cpu_index >= 0)
2216 {
2217 const char *name = processor_target_table[rs6000_cpu_index].name;
2218 HOST_WIDE_INT flags
2219 = processor_target_table[rs6000_cpu_index].target_enable;
2220
2221 sprintf (flags_buffer, "-mcpu=%s flags", name);
2222 rs6000_print_isa_options (stderr, 0, flags_buffer, flags);
2223 }
2224 else
2225 fprintf (stderr, DEBUG_FMT_S, "cpu", "<none>");
2226
2227 if (rs6000_tune_index >= 0)
2228 {
2229 const char *name = processor_target_table[rs6000_tune_index].name;
2230 HOST_WIDE_INT flags
2231 = processor_target_table[rs6000_tune_index].target_enable;
2232
2233 sprintf (flags_buffer, "-mtune=%s flags", name);
2234 rs6000_print_isa_options (stderr, 0, flags_buffer, flags);
2235 }
2236 else
2237 fprintf (stderr, DEBUG_FMT_S, "tune", "<none>");
2238
2239 cl_target_option_save (&cl_opts, &global_options);
2240 rs6000_print_isa_options (stderr, 0, "rs6000_isa_flags",
2241 rs6000_isa_flags);
2242
2243 rs6000_print_isa_options (stderr, 0, "rs6000_isa_flags_explicit",
2244 rs6000_isa_flags_explicit);
2245
2246 rs6000_print_builtin_options (stderr, 0, "rs6000_builtin_mask",
2247 rs6000_builtin_mask);
2248
2249 rs6000_print_isa_options (stderr, 0, "TARGET_DEFAULT", TARGET_DEFAULT);
2250
2251 fprintf (stderr, DEBUG_FMT_S, "--with-cpu default",
2252 OPTION_TARGET_CPU_DEFAULT ? OPTION_TARGET_CPU_DEFAULT : "<none>");
2253
2254 switch (rs6000_sched_costly_dep)
2255 {
2256 case max_dep_latency:
2257 costly_str = "max_dep_latency";
2258 break;
2259
2260 case no_dep_costly:
2261 costly_str = "no_dep_costly";
2262 break;
2263
2264 case all_deps_costly:
2265 costly_str = "all_deps_costly";
2266 break;
2267
2268 case true_store_to_load_dep_costly:
2269 costly_str = "true_store_to_load_dep_costly";
2270 break;
2271
2272 case store_to_load_dep_costly:
2273 costly_str = "store_to_load_dep_costly";
2274 break;
2275
2276 default:
2277 costly_str = costly_num;
2278 sprintf (costly_num, "%d", (int)rs6000_sched_costly_dep);
2279 break;
2280 }
2281
2282 fprintf (stderr, DEBUG_FMT_S, "sched_costly_dep", costly_str);
2283
2284 switch (rs6000_sched_insert_nops)
2285 {
2286 case sched_finish_regroup_exact:
2287 nop_str = "sched_finish_regroup_exact";
2288 break;
2289
2290 case sched_finish_pad_groups:
2291 nop_str = "sched_finish_pad_groups";
2292 break;
2293
2294 case sched_finish_none:
2295 nop_str = "sched_finish_none";
2296 break;
2297
2298 default:
2299 nop_str = nop_num;
2300 sprintf (nop_num, "%d", (int)rs6000_sched_insert_nops);
2301 break;
2302 }
2303
2304 fprintf (stderr, DEBUG_FMT_S, "sched_insert_nops", nop_str);
2305
2306 switch (rs6000_sdata)
2307 {
2308 default:
2309 case SDATA_NONE:
2310 break;
2311
2312 case SDATA_DATA:
2313 fprintf (stderr, DEBUG_FMT_S, "sdata", "data");
2314 break;
2315
2316 case SDATA_SYSV:
2317 fprintf (stderr, DEBUG_FMT_S, "sdata", "sysv");
2318 break;
2319
2320 case SDATA_EABI:
2321 fprintf (stderr, DEBUG_FMT_S, "sdata", "eabi");
2322 break;
2323
2324 }
2325
2326 switch (rs6000_traceback)
2327 {
2328 case traceback_default: trace_str = "default"; break;
2329 case traceback_none: trace_str = "none"; break;
2330 case traceback_part: trace_str = "part"; break;
2331 case traceback_full: trace_str = "full"; break;
2332 default: trace_str = "unknown"; break;
2333 }
2334
2335 fprintf (stderr, DEBUG_FMT_S, "traceback", trace_str);
2336
2337 switch (rs6000_current_cmodel)
2338 {
2339 case CMODEL_SMALL: cmodel_str = "small"; break;
2340 case CMODEL_MEDIUM: cmodel_str = "medium"; break;
2341 case CMODEL_LARGE: cmodel_str = "large"; break;
2342 default: cmodel_str = "unknown"; break;
2343 }
2344
2345 fprintf (stderr, DEBUG_FMT_S, "cmodel", cmodel_str);
2346
2347 switch (rs6000_current_abi)
2348 {
2349 case ABI_NONE: abi_str = "none"; break;
2350 case ABI_AIX: abi_str = "aix"; break;
2351 case ABI_ELFv2: abi_str = "ELFv2"; break;
2352 case ABI_V4: abi_str = "V4"; break;
2353 case ABI_DARWIN: abi_str = "darwin"; break;
2354 default: abi_str = "unknown"; break;
2355 }
2356
2357 fprintf (stderr, DEBUG_FMT_S, "abi", abi_str);
2358
2359 if (rs6000_altivec_abi)
2360 fprintf (stderr, DEBUG_FMT_S, "altivec_abi", "true");
2361
2362 if (rs6000_spe_abi)
2363 fprintf (stderr, DEBUG_FMT_S, "spe_abi", "true");
2364
2365 if (rs6000_darwin64_abi)
2366 fprintf (stderr, DEBUG_FMT_S, "darwin64_abi", "true");
2367
2368 if (rs6000_float_gprs)
2369 fprintf (stderr, DEBUG_FMT_S, "float_gprs", "true");
2370
2371 fprintf (stderr, DEBUG_FMT_S, "fprs",
2372 (TARGET_FPRS ? "true" : "false"));
2373
2374 fprintf (stderr, DEBUG_FMT_S, "single_float",
2375 (TARGET_SINGLE_FLOAT ? "true" : "false"));
2376
2377 fprintf (stderr, DEBUG_FMT_S, "double_float",
2378 (TARGET_DOUBLE_FLOAT ? "true" : "false"));
2379
2380 fprintf (stderr, DEBUG_FMT_S, "soft_float",
2381 (TARGET_SOFT_FLOAT ? "true" : "false"));
2382
2383 fprintf (stderr, DEBUG_FMT_S, "e500_single",
2384 (TARGET_E500_SINGLE ? "true" : "false"));
2385
2386 fprintf (stderr, DEBUG_FMT_S, "e500_double",
2387 (TARGET_E500_DOUBLE ? "true" : "false"));
2388
2389 if (TARGET_LINK_STACK)
2390 fprintf (stderr, DEBUG_FMT_S, "link_stack", "true");
2391
2392 if (targetm.lra_p ())
2393 fprintf (stderr, DEBUG_FMT_S, "lra", "true");
2394
2395 if (TARGET_P8_FUSION)
2396 fprintf (stderr, DEBUG_FMT_S, "p8 fusion",
2397 (TARGET_P8_FUSION_SIGN) ? "zero+sign" : "zero");
2398
2399 fprintf (stderr, DEBUG_FMT_S, "plt-format",
2400 TARGET_SECURE_PLT ? "secure" : "bss");
2401 fprintf (stderr, DEBUG_FMT_S, "struct-return",
2402 aix_struct_return ? "aix" : "sysv");
2403 fprintf (stderr, DEBUG_FMT_S, "always_hint", tf[!!rs6000_always_hint]);
2404 fprintf (stderr, DEBUG_FMT_S, "sched_groups", tf[!!rs6000_sched_groups]);
2405 fprintf (stderr, DEBUG_FMT_S, "align_branch",
2406 tf[!!rs6000_align_branch_targets]);
2407 fprintf (stderr, DEBUG_FMT_D, "tls_size", rs6000_tls_size);
2408 fprintf (stderr, DEBUG_FMT_D, "long_double_size",
2409 rs6000_long_double_type_size);
2410 fprintf (stderr, DEBUG_FMT_D, "sched_restricted_insns_priority",
2411 (int)rs6000_sched_restricted_insns_priority);
2412 fprintf (stderr, DEBUG_FMT_D, "Number of standard builtins",
2413 (int)END_BUILTINS);
2414 fprintf (stderr, DEBUG_FMT_D, "Number of rs6000 builtins",
2415 (int)RS6000_BUILTIN_COUNT);
2416
2417 if (TARGET_VSX)
2418 fprintf (stderr, DEBUG_FMT_D, "VSX easy 64-bit scalar element",
2419 (int)VECTOR_ELEMENT_SCALAR_64BIT);
2420 }
2421
2422 \f
2423 /* Update the addr mask bits in reg_addr to help secondary reload and go if
2424 legitimate address support to figure out the appropriate addressing to
2425 use. */
2426
2427 static void
2428 rs6000_setup_reg_addr_masks (void)
2429 {
2430 ssize_t rc, reg, m, nregs;
2431 addr_mask_type any_addr_mask, addr_mask;
2432
2433 for (m = 0; m < NUM_MACHINE_MODES; ++m)
2434 {
2435 machine_mode m2 = (machine_mode)m;
2436
2437 /* SDmode is special in that we want to access it only via REG+REG
2438 addressing on power7 and above, since we want to use the LFIWZX and
2439 STFIWZX instructions to load it. */
2440 bool indexed_only_p = (m == SDmode && TARGET_NO_SDMODE_STACK);
2441
2442 any_addr_mask = 0;
2443 for (rc = FIRST_RELOAD_REG_CLASS; rc <= LAST_RELOAD_REG_CLASS; rc++)
2444 {
2445 addr_mask = 0;
2446 reg = reload_reg_map[rc].reg;
2447
2448 /* Can mode values go in the GPR/FPR/Altivec registers? */
2449 if (reg >= 0 && rs6000_hard_regno_mode_ok_p[m][reg])
2450 {
2451 nregs = rs6000_hard_regno_nregs[m][reg];
2452 addr_mask |= RELOAD_REG_VALID;
2453
2454 /* Indicate if the mode takes more than 1 physical register. If
2455 it takes a single register, indicate it can do REG+REG
2456 addressing. */
2457 if (nregs > 1 || m == BLKmode)
2458 addr_mask |= RELOAD_REG_MULTIPLE;
2459 else
2460 addr_mask |= RELOAD_REG_INDEXED;
2461
2462 /* Figure out if we can do PRE_INC, PRE_DEC, or PRE_MODIFY
2463 addressing. Restrict addressing on SPE for 64-bit types
2464 because of the SUBREG hackery used to address 64-bit floats in
2465 '32-bit' GPRs. */
2466
2467 if (TARGET_UPDATE
2468 && (rc == RELOAD_REG_GPR || rc == RELOAD_REG_FPR)
2469 && GET_MODE_SIZE (m2) <= 8
2470 && !VECTOR_MODE_P (m2)
2471 && !COMPLEX_MODE_P (m2)
2472 && !indexed_only_p
2473 && !(TARGET_E500_DOUBLE && GET_MODE_SIZE (m2) == 8))
2474 {
2475 addr_mask |= RELOAD_REG_PRE_INCDEC;
2476
2477 /* PRE_MODIFY is more restricted than PRE_INC/PRE_DEC in that
2478 we don't allow PRE_MODIFY for some multi-register
2479 operations. */
2480 switch (m)
2481 {
2482 default:
2483 addr_mask |= RELOAD_REG_PRE_MODIFY;
2484 break;
2485
2486 case DImode:
2487 if (TARGET_POWERPC64)
2488 addr_mask |= RELOAD_REG_PRE_MODIFY;
2489 break;
2490
2491 case DFmode:
2492 case DDmode:
2493 if (TARGET_DF_INSN)
2494 addr_mask |= RELOAD_REG_PRE_MODIFY;
2495 break;
2496 }
2497 }
2498 }
2499
2500 /* GPR and FPR registers can do REG+OFFSET addressing, except
2501 possibly for SDmode. */
2502 if ((addr_mask != 0) && !indexed_only_p
2503 && (rc == RELOAD_REG_GPR || rc == RELOAD_REG_FPR))
2504 addr_mask |= RELOAD_REG_OFFSET;
2505
2506 /* VMX registers can do (REG & -16) and ((REG+REG) & -16)
2507 addressing on 128-bit types. */
2508 if (rc == RELOAD_REG_VMX && GET_MODE_SIZE (m2) == 16)
2509 addr_mask |= RELOAD_REG_AND_M16;
2510
2511 reg_addr[m].addr_mask[rc] = addr_mask;
2512 any_addr_mask |= addr_mask;
2513 }
2514
2515 reg_addr[m].addr_mask[RELOAD_REG_ANY] = any_addr_mask;
2516 }
2517 }
2518
2519 \f
2520 /* Initialize the various global tables that are based on register size. */
2521 static void
2522 rs6000_init_hard_regno_mode_ok (bool global_init_p)
2523 {
2524 ssize_t r, m, c;
2525 int align64;
2526 int align32;
2527
2528 /* Precalculate REGNO_REG_CLASS. */
2529 rs6000_regno_regclass[0] = GENERAL_REGS;
2530 for (r = 1; r < 32; ++r)
2531 rs6000_regno_regclass[r] = BASE_REGS;
2532
2533 for (r = 32; r < 64; ++r)
2534 rs6000_regno_regclass[r] = FLOAT_REGS;
2535
2536 for (r = 64; r < FIRST_PSEUDO_REGISTER; ++r)
2537 rs6000_regno_regclass[r] = NO_REGS;
2538
2539 for (r = FIRST_ALTIVEC_REGNO; r <= LAST_ALTIVEC_REGNO; ++r)
2540 rs6000_regno_regclass[r] = ALTIVEC_REGS;
2541
2542 rs6000_regno_regclass[CR0_REGNO] = CR0_REGS;
2543 for (r = CR1_REGNO; r <= CR7_REGNO; ++r)
2544 rs6000_regno_regclass[r] = CR_REGS;
2545
2546 rs6000_regno_regclass[LR_REGNO] = LINK_REGS;
2547 rs6000_regno_regclass[CTR_REGNO] = CTR_REGS;
2548 rs6000_regno_regclass[CA_REGNO] = NO_REGS;
2549 rs6000_regno_regclass[VRSAVE_REGNO] = VRSAVE_REGS;
2550 rs6000_regno_regclass[VSCR_REGNO] = VRSAVE_REGS;
2551 rs6000_regno_regclass[SPE_ACC_REGNO] = SPE_ACC_REGS;
2552 rs6000_regno_regclass[SPEFSCR_REGNO] = SPEFSCR_REGS;
2553 rs6000_regno_regclass[TFHAR_REGNO] = SPR_REGS;
2554 rs6000_regno_regclass[TFIAR_REGNO] = SPR_REGS;
2555 rs6000_regno_regclass[TEXASR_REGNO] = SPR_REGS;
2556 rs6000_regno_regclass[ARG_POINTER_REGNUM] = BASE_REGS;
2557 rs6000_regno_regclass[FRAME_POINTER_REGNUM] = BASE_REGS;
2558
2559 /* Precalculate register class to simpler reload register class. We don't
2560 need all of the register classes that are combinations of different
2561 classes, just the simple ones that have constraint letters. */
2562 for (c = 0; c < N_REG_CLASSES; c++)
2563 reg_class_to_reg_type[c] = NO_REG_TYPE;
2564
2565 reg_class_to_reg_type[(int)GENERAL_REGS] = GPR_REG_TYPE;
2566 reg_class_to_reg_type[(int)BASE_REGS] = GPR_REG_TYPE;
2567 reg_class_to_reg_type[(int)VSX_REGS] = VSX_REG_TYPE;
2568 reg_class_to_reg_type[(int)VRSAVE_REGS] = SPR_REG_TYPE;
2569 reg_class_to_reg_type[(int)VSCR_REGS] = SPR_REG_TYPE;
2570 reg_class_to_reg_type[(int)LINK_REGS] = SPR_REG_TYPE;
2571 reg_class_to_reg_type[(int)CTR_REGS] = SPR_REG_TYPE;
2572 reg_class_to_reg_type[(int)LINK_OR_CTR_REGS] = SPR_REG_TYPE;
2573 reg_class_to_reg_type[(int)CR_REGS] = CR_REG_TYPE;
2574 reg_class_to_reg_type[(int)CR0_REGS] = CR_REG_TYPE;
2575 reg_class_to_reg_type[(int)SPE_ACC_REGS] = SPE_ACC_TYPE;
2576 reg_class_to_reg_type[(int)SPEFSCR_REGS] = SPEFSCR_REG_TYPE;
2577
2578 if (TARGET_VSX)
2579 {
2580 reg_class_to_reg_type[(int)FLOAT_REGS] = VSX_REG_TYPE;
2581 reg_class_to_reg_type[(int)ALTIVEC_REGS] = VSX_REG_TYPE;
2582 }
2583 else
2584 {
2585 reg_class_to_reg_type[(int)FLOAT_REGS] = FPR_REG_TYPE;
2586 reg_class_to_reg_type[(int)ALTIVEC_REGS] = ALTIVEC_REG_TYPE;
2587 }
2588
2589 /* Precalculate the valid memory formats as well as the vector information,
2590 this must be set up before the rs6000_hard_regno_nregs_internal calls
2591 below. */
2592 gcc_assert ((int)VECTOR_NONE == 0);
2593 memset ((void *) &rs6000_vector_unit[0], '\0', sizeof (rs6000_vector_unit));
2594 memset ((void *) &rs6000_vector_mem[0], '\0', sizeof (rs6000_vector_unit));
2595
2596 gcc_assert ((int)CODE_FOR_nothing == 0);
2597 memset ((void *) &reg_addr[0], '\0', sizeof (reg_addr));
2598
2599 gcc_assert ((int)NO_REGS == 0);
2600 memset ((void *) &rs6000_constraints[0], '\0', sizeof (rs6000_constraints));
2601
2602 /* The VSX hardware allows native alignment for vectors, but control whether the compiler
2603 believes it can use native alignment or still uses 128-bit alignment. */
2604 if (TARGET_VSX && !TARGET_VSX_ALIGN_128)
2605 {
2606 align64 = 64;
2607 align32 = 32;
2608 }
2609 else
2610 {
2611 align64 = 128;
2612 align32 = 128;
2613 }
2614
2615 /* V2DF mode, VSX only. */
2616 if (TARGET_VSX)
2617 {
2618 rs6000_vector_unit[V2DFmode] = VECTOR_VSX;
2619 rs6000_vector_mem[V2DFmode] = VECTOR_VSX;
2620 rs6000_vector_align[V2DFmode] = align64;
2621 }
2622
2623 /* V4SF mode, either VSX or Altivec. */
2624 if (TARGET_VSX)
2625 {
2626 rs6000_vector_unit[V4SFmode] = VECTOR_VSX;
2627 rs6000_vector_mem[V4SFmode] = VECTOR_VSX;
2628 rs6000_vector_align[V4SFmode] = align32;
2629 }
2630 else if (TARGET_ALTIVEC)
2631 {
2632 rs6000_vector_unit[V4SFmode] = VECTOR_ALTIVEC;
2633 rs6000_vector_mem[V4SFmode] = VECTOR_ALTIVEC;
2634 rs6000_vector_align[V4SFmode] = align32;
2635 }
2636
2637 /* V16QImode, V8HImode, V4SImode are Altivec only, but possibly do VSX loads
2638 and stores. */
2639 if (TARGET_ALTIVEC)
2640 {
2641 rs6000_vector_unit[V4SImode] = VECTOR_ALTIVEC;
2642 rs6000_vector_unit[V8HImode] = VECTOR_ALTIVEC;
2643 rs6000_vector_unit[V16QImode] = VECTOR_ALTIVEC;
2644 rs6000_vector_align[V4SImode] = align32;
2645 rs6000_vector_align[V8HImode] = align32;
2646 rs6000_vector_align[V16QImode] = align32;
2647
2648 if (TARGET_VSX)
2649 {
2650 rs6000_vector_mem[V4SImode] = VECTOR_VSX;
2651 rs6000_vector_mem[V8HImode] = VECTOR_VSX;
2652 rs6000_vector_mem[V16QImode] = VECTOR_VSX;
2653 }
2654 else
2655 {
2656 rs6000_vector_mem[V4SImode] = VECTOR_ALTIVEC;
2657 rs6000_vector_mem[V8HImode] = VECTOR_ALTIVEC;
2658 rs6000_vector_mem[V16QImode] = VECTOR_ALTIVEC;
2659 }
2660 }
2661
2662 /* V2DImode, full mode depends on ISA 2.07 vector mode. Allow under VSX to
2663 do insert/splat/extract. Altivec doesn't have 64-bit integer support. */
2664 if (TARGET_VSX)
2665 {
2666 rs6000_vector_mem[V2DImode] = VECTOR_VSX;
2667 rs6000_vector_unit[V2DImode]
2668 = (TARGET_P8_VECTOR) ? VECTOR_P8_VECTOR : VECTOR_NONE;
2669 rs6000_vector_align[V2DImode] = align64;
2670
2671 rs6000_vector_mem[V1TImode] = VECTOR_VSX;
2672 rs6000_vector_unit[V1TImode]
2673 = (TARGET_P8_VECTOR) ? VECTOR_P8_VECTOR : VECTOR_NONE;
2674 rs6000_vector_align[V1TImode] = 128;
2675 }
2676
2677 /* DFmode, see if we want to use the VSX unit. Memory is handled
2678 differently, so don't set rs6000_vector_mem. */
2679 if (TARGET_VSX && TARGET_VSX_SCALAR_DOUBLE)
2680 {
2681 rs6000_vector_unit[DFmode] = VECTOR_VSX;
2682 rs6000_vector_align[DFmode] = 64;
2683 }
2684
2685 /* SFmode, see if we want to use the VSX unit. */
2686 if (TARGET_P8_VECTOR && TARGET_VSX_SCALAR_FLOAT)
2687 {
2688 rs6000_vector_unit[SFmode] = VECTOR_VSX;
2689 rs6000_vector_align[SFmode] = 32;
2690 }
2691
2692 /* Allow TImode in VSX register and set the VSX memory macros. */
2693 if (TARGET_VSX && TARGET_VSX_TIMODE)
2694 {
2695 rs6000_vector_mem[TImode] = VECTOR_VSX;
2696 rs6000_vector_align[TImode] = align64;
2697 }
2698
2699 /* TODO add SPE and paired floating point vector support. */
2700
2701 /* Register class constraints for the constraints that depend on compile
2702 switches. When the VSX code was added, different constraints were added
2703 based on the type (DFmode, V2DFmode, V4SFmode). For the vector types, all
2704 of the VSX registers are used. The register classes for scalar floating
2705 point types is set, based on whether we allow that type into the upper
2706 (Altivec) registers. GCC has register classes to target the Altivec
2707 registers for load/store operations, to select using a VSX memory
2708 operation instead of the traditional floating point operation. The
2709 constraints are:
2710
2711 d - Register class to use with traditional DFmode instructions.
2712 f - Register class to use with traditional SFmode instructions.
2713 v - Altivec register.
2714 wa - Any VSX register.
2715 wc - Reserved to represent individual CR bits (used in LLVM).
2716 wd - Preferred register class for V2DFmode.
2717 wf - Preferred register class for V4SFmode.
2718 wg - Float register for power6x move insns.
2719 wh - FP register for direct move instructions.
2720 wi - FP or VSX register to hold 64-bit integers for VSX insns.
2721 wj - FP or VSX register to hold 64-bit integers for direct moves.
2722 wk - FP or VSX register to hold 64-bit doubles for direct moves.
2723 wl - Float register if we can do 32-bit signed int loads.
2724 wm - VSX register for ISA 2.07 direct move operations.
2725 wn - always NO_REGS.
2726 wr - GPR if 64-bit mode is permitted.
2727 ws - Register class to do ISA 2.06 DF operations.
2728 wt - VSX register for TImode in VSX registers.
2729 wu - Altivec register for ISA 2.07 VSX SF/SI load/stores.
2730 wv - Altivec register for ISA 2.06 VSX DF/DI load/stores.
2731 ww - Register class to do SF conversions in with VSX operations.
2732 wx - Float register if we can do 32-bit int stores.
2733 wy - Register class to do ISA 2.07 SF operations.
2734 wz - Float register if we can do 32-bit unsigned int loads. */
2735
2736 if (TARGET_HARD_FLOAT && TARGET_FPRS)
2737 rs6000_constraints[RS6000_CONSTRAINT_f] = FLOAT_REGS; /* SFmode */
2738
2739 if (TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT)
2740 rs6000_constraints[RS6000_CONSTRAINT_d] = FLOAT_REGS; /* DFmode */
2741
2742 if (TARGET_VSX)
2743 {
2744 rs6000_constraints[RS6000_CONSTRAINT_wa] = VSX_REGS;
2745 rs6000_constraints[RS6000_CONSTRAINT_wd] = VSX_REGS; /* V2DFmode */
2746 rs6000_constraints[RS6000_CONSTRAINT_wf] = VSX_REGS; /* V4SFmode */
2747 rs6000_constraints[RS6000_CONSTRAINT_wi] = FLOAT_REGS; /* DImode */
2748
2749 if (TARGET_VSX_TIMODE)
2750 rs6000_constraints[RS6000_CONSTRAINT_wt] = VSX_REGS; /* TImode */
2751
2752 if (TARGET_UPPER_REGS_DF) /* DFmode */
2753 {
2754 rs6000_constraints[RS6000_CONSTRAINT_ws] = VSX_REGS;
2755 rs6000_constraints[RS6000_CONSTRAINT_wv] = ALTIVEC_REGS;
2756 }
2757 else
2758 rs6000_constraints[RS6000_CONSTRAINT_ws] = FLOAT_REGS;
2759 }
2760
2761 /* Add conditional constraints based on various options, to allow us to
2762 collapse multiple insn patterns. */
2763 if (TARGET_ALTIVEC)
2764 rs6000_constraints[RS6000_CONSTRAINT_v] = ALTIVEC_REGS;
2765
2766 if (TARGET_MFPGPR) /* DFmode */
2767 rs6000_constraints[RS6000_CONSTRAINT_wg] = FLOAT_REGS;
2768
2769 if (TARGET_LFIWAX)
2770 rs6000_constraints[RS6000_CONSTRAINT_wl] = FLOAT_REGS; /* DImode */
2771
2772 if (TARGET_DIRECT_MOVE)
2773 {
2774 rs6000_constraints[RS6000_CONSTRAINT_wh] = FLOAT_REGS;
2775 rs6000_constraints[RS6000_CONSTRAINT_wj] /* DImode */
2776 = rs6000_constraints[RS6000_CONSTRAINT_wi];
2777 rs6000_constraints[RS6000_CONSTRAINT_wk] /* DFmode */
2778 = rs6000_constraints[RS6000_CONSTRAINT_ws];
2779 rs6000_constraints[RS6000_CONSTRAINT_wm] = VSX_REGS;
2780 }
2781
2782 if (TARGET_POWERPC64)
2783 rs6000_constraints[RS6000_CONSTRAINT_wr] = GENERAL_REGS;
2784
2785 if (TARGET_P8_VECTOR && TARGET_UPPER_REGS_SF) /* SFmode */
2786 {
2787 rs6000_constraints[RS6000_CONSTRAINT_wu] = ALTIVEC_REGS;
2788 rs6000_constraints[RS6000_CONSTRAINT_wy] = VSX_REGS;
2789 rs6000_constraints[RS6000_CONSTRAINT_ww] = VSX_REGS;
2790 }
2791 else if (TARGET_P8_VECTOR)
2792 {
2793 rs6000_constraints[RS6000_CONSTRAINT_wy] = FLOAT_REGS;
2794 rs6000_constraints[RS6000_CONSTRAINT_ww] = FLOAT_REGS;
2795 }
2796 else if (TARGET_VSX)
2797 rs6000_constraints[RS6000_CONSTRAINT_ww] = FLOAT_REGS;
2798
2799 if (TARGET_STFIWX)
2800 rs6000_constraints[RS6000_CONSTRAINT_wx] = FLOAT_REGS; /* DImode */
2801
2802 if (TARGET_LFIWZX)
2803 rs6000_constraints[RS6000_CONSTRAINT_wz] = FLOAT_REGS; /* DImode */
2804
2805 /* Set up the reload helper and direct move functions. */
2806 if (TARGET_VSX || TARGET_ALTIVEC)
2807 {
2808 if (TARGET_64BIT)
2809 {
2810 reg_addr[V16QImode].reload_store = CODE_FOR_reload_v16qi_di_store;
2811 reg_addr[V16QImode].reload_load = CODE_FOR_reload_v16qi_di_load;
2812 reg_addr[V8HImode].reload_store = CODE_FOR_reload_v8hi_di_store;
2813 reg_addr[V8HImode].reload_load = CODE_FOR_reload_v8hi_di_load;
2814 reg_addr[V4SImode].reload_store = CODE_FOR_reload_v4si_di_store;
2815 reg_addr[V4SImode].reload_load = CODE_FOR_reload_v4si_di_load;
2816 reg_addr[V2DImode].reload_store = CODE_FOR_reload_v2di_di_store;
2817 reg_addr[V2DImode].reload_load = CODE_FOR_reload_v2di_di_load;
2818 reg_addr[V1TImode].reload_store = CODE_FOR_reload_v1ti_di_store;
2819 reg_addr[V1TImode].reload_load = CODE_FOR_reload_v1ti_di_load;
2820 reg_addr[V4SFmode].reload_store = CODE_FOR_reload_v4sf_di_store;
2821 reg_addr[V4SFmode].reload_load = CODE_FOR_reload_v4sf_di_load;
2822 reg_addr[V2DFmode].reload_store = CODE_FOR_reload_v2df_di_store;
2823 reg_addr[V2DFmode].reload_load = CODE_FOR_reload_v2df_di_load;
2824 reg_addr[DFmode].reload_store = CODE_FOR_reload_df_di_store;
2825 reg_addr[DFmode].reload_load = CODE_FOR_reload_df_di_load;
2826 reg_addr[DDmode].reload_store = CODE_FOR_reload_dd_di_store;
2827 reg_addr[DDmode].reload_load = CODE_FOR_reload_dd_di_load;
2828 reg_addr[SFmode].reload_store = CODE_FOR_reload_sf_di_store;
2829 reg_addr[SFmode].reload_load = CODE_FOR_reload_sf_di_load;
2830 reg_addr[SDmode].reload_store = CODE_FOR_reload_sd_di_store;
2831 reg_addr[SDmode].reload_load = CODE_FOR_reload_sd_di_load;
2832
2833 if (TARGET_VSX_TIMODE)
2834 {
2835 reg_addr[TImode].reload_store = CODE_FOR_reload_ti_di_store;
2836 reg_addr[TImode].reload_load = CODE_FOR_reload_ti_di_load;
2837 }
2838
2839 if (TARGET_DIRECT_MOVE)
2840 {
2841 reg_addr[TImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxti;
2842 reg_addr[V1TImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv1ti;
2843 reg_addr[V2DFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv2df;
2844 reg_addr[V2DImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv2di;
2845 reg_addr[V4SFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv4sf;
2846 reg_addr[V4SImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv4si;
2847 reg_addr[V8HImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv8hi;
2848 reg_addr[V16QImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv16qi;
2849 reg_addr[SFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxsf;
2850
2851 reg_addr[TImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprti;
2852 reg_addr[V1TImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv1ti;
2853 reg_addr[V2DFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv2df;
2854 reg_addr[V2DImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv2di;
2855 reg_addr[V4SFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv4sf;
2856 reg_addr[V4SImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv4si;
2857 reg_addr[V8HImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv8hi;
2858 reg_addr[V16QImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv16qi;
2859 reg_addr[SFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprsf;
2860 }
2861 }
2862 else
2863 {
2864 reg_addr[V16QImode].reload_store = CODE_FOR_reload_v16qi_si_store;
2865 reg_addr[V16QImode].reload_load = CODE_FOR_reload_v16qi_si_load;
2866 reg_addr[V8HImode].reload_store = CODE_FOR_reload_v8hi_si_store;
2867 reg_addr[V8HImode].reload_load = CODE_FOR_reload_v8hi_si_load;
2868 reg_addr[V4SImode].reload_store = CODE_FOR_reload_v4si_si_store;
2869 reg_addr[V4SImode].reload_load = CODE_FOR_reload_v4si_si_load;
2870 reg_addr[V2DImode].reload_store = CODE_FOR_reload_v2di_si_store;
2871 reg_addr[V2DImode].reload_load = CODE_FOR_reload_v2di_si_load;
2872 reg_addr[V1TImode].reload_store = CODE_FOR_reload_v1ti_si_store;
2873 reg_addr[V1TImode].reload_load = CODE_FOR_reload_v1ti_si_load;
2874 reg_addr[V4SFmode].reload_store = CODE_FOR_reload_v4sf_si_store;
2875 reg_addr[V4SFmode].reload_load = CODE_FOR_reload_v4sf_si_load;
2876 reg_addr[V2DFmode].reload_store = CODE_FOR_reload_v2df_si_store;
2877 reg_addr[V2DFmode].reload_load = CODE_FOR_reload_v2df_si_load;
2878 reg_addr[DFmode].reload_store = CODE_FOR_reload_df_si_store;
2879 reg_addr[DFmode].reload_load = CODE_FOR_reload_df_si_load;
2880 reg_addr[DDmode].reload_store = CODE_FOR_reload_dd_si_store;
2881 reg_addr[DDmode].reload_load = CODE_FOR_reload_dd_si_load;
2882 reg_addr[SFmode].reload_store = CODE_FOR_reload_sf_si_store;
2883 reg_addr[SFmode].reload_load = CODE_FOR_reload_sf_si_load;
2884 reg_addr[SDmode].reload_store = CODE_FOR_reload_sd_si_store;
2885 reg_addr[SDmode].reload_load = CODE_FOR_reload_sd_si_load;
2886
2887 if (TARGET_VSX_TIMODE)
2888 {
2889 reg_addr[TImode].reload_store = CODE_FOR_reload_ti_si_store;
2890 reg_addr[TImode].reload_load = CODE_FOR_reload_ti_si_load;
2891 }
2892
2893 if (TARGET_DIRECT_MOVE)
2894 {
2895 reg_addr[DImode].reload_fpr_gpr = CODE_FOR_reload_fpr_from_gprdi;
2896 reg_addr[DDmode].reload_fpr_gpr = CODE_FOR_reload_fpr_from_gprdd;
2897 reg_addr[DFmode].reload_fpr_gpr = CODE_FOR_reload_fpr_from_gprdf;
2898 }
2899 }
2900
2901 if (TARGET_UPPER_REGS_DF)
2902 reg_addr[DFmode].scalar_in_vmx_p = true;
2903
2904 if (TARGET_UPPER_REGS_SF)
2905 reg_addr[SFmode].scalar_in_vmx_p = true;
2906 }
2907
2908 /* Precalculate HARD_REGNO_NREGS. */
2909 for (r = 0; r < FIRST_PSEUDO_REGISTER; ++r)
2910 for (m = 0; m < NUM_MACHINE_MODES; ++m)
2911 rs6000_hard_regno_nregs[m][r]
2912 = rs6000_hard_regno_nregs_internal (r, (machine_mode)m);
2913
2914 /* Precalculate HARD_REGNO_MODE_OK. */
2915 for (r = 0; r < FIRST_PSEUDO_REGISTER; ++r)
2916 for (m = 0; m < NUM_MACHINE_MODES; ++m)
2917 if (rs6000_hard_regno_mode_ok (r, (machine_mode)m))
2918 rs6000_hard_regno_mode_ok_p[m][r] = true;
2919
2920 /* Precalculate CLASS_MAX_NREGS sizes. */
2921 for (c = 0; c < LIM_REG_CLASSES; ++c)
2922 {
2923 int reg_size;
2924
2925 if (TARGET_VSX && VSX_REG_CLASS_P (c))
2926 reg_size = UNITS_PER_VSX_WORD;
2927
2928 else if (c == ALTIVEC_REGS)
2929 reg_size = UNITS_PER_ALTIVEC_WORD;
2930
2931 else if (c == FLOAT_REGS)
2932 reg_size = UNITS_PER_FP_WORD;
2933
2934 else
2935 reg_size = UNITS_PER_WORD;
2936
2937 for (m = 0; m < NUM_MACHINE_MODES; ++m)
2938 {
2939 machine_mode m2 = (machine_mode)m;
2940 int reg_size2 = reg_size;
2941
2942 /* TFmode/TDmode always takes 2 registers, even in VSX. */
2943 if (TARGET_VSX && VSX_REG_CLASS_P (c)
2944 && (m == TDmode || m == TFmode))
2945 reg_size2 = UNITS_PER_FP_WORD;
2946
2947 rs6000_class_max_nregs[m][c]
2948 = (GET_MODE_SIZE (m2) + reg_size2 - 1) / reg_size2;
2949 }
2950 }
2951
2952 if (TARGET_E500_DOUBLE)
2953 rs6000_class_max_nregs[DFmode][GENERAL_REGS] = 1;
2954
2955 /* Calculate which modes to automatically generate code to use a the
2956 reciprocal divide and square root instructions. In the future, possibly
2957 automatically generate the instructions even if the user did not specify
2958 -mrecip. The older machines double precision reciprocal sqrt estimate is
2959 not accurate enough. */
2960 memset (rs6000_recip_bits, 0, sizeof (rs6000_recip_bits));
2961 if (TARGET_FRES)
2962 rs6000_recip_bits[SFmode] = RS6000_RECIP_MASK_HAVE_RE;
2963 if (TARGET_FRE)
2964 rs6000_recip_bits[DFmode] = RS6000_RECIP_MASK_HAVE_RE;
2965 if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode))
2966 rs6000_recip_bits[V4SFmode] = RS6000_RECIP_MASK_HAVE_RE;
2967 if (VECTOR_UNIT_VSX_P (V2DFmode))
2968 rs6000_recip_bits[V2DFmode] = RS6000_RECIP_MASK_HAVE_RE;
2969
2970 if (TARGET_FRSQRTES)
2971 rs6000_recip_bits[SFmode] |= RS6000_RECIP_MASK_HAVE_RSQRTE;
2972 if (TARGET_FRSQRTE)
2973 rs6000_recip_bits[DFmode] |= RS6000_RECIP_MASK_HAVE_RSQRTE;
2974 if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode))
2975 rs6000_recip_bits[V4SFmode] |= RS6000_RECIP_MASK_HAVE_RSQRTE;
2976 if (VECTOR_UNIT_VSX_P (V2DFmode))
2977 rs6000_recip_bits[V2DFmode] |= RS6000_RECIP_MASK_HAVE_RSQRTE;
2978
2979 if (rs6000_recip_control)
2980 {
2981 if (!flag_finite_math_only)
2982 warning (0, "-mrecip requires -ffinite-math or -ffast-math");
2983 if (flag_trapping_math)
2984 warning (0, "-mrecip requires -fno-trapping-math or -ffast-math");
2985 if (!flag_reciprocal_math)
2986 warning (0, "-mrecip requires -freciprocal-math or -ffast-math");
2987 if (flag_finite_math_only && !flag_trapping_math && flag_reciprocal_math)
2988 {
2989 if (RS6000_RECIP_HAVE_RE_P (SFmode)
2990 && (rs6000_recip_control & RECIP_SF_DIV) != 0)
2991 rs6000_recip_bits[SFmode] |= RS6000_RECIP_MASK_AUTO_RE;
2992
2993 if (RS6000_RECIP_HAVE_RE_P (DFmode)
2994 && (rs6000_recip_control & RECIP_DF_DIV) != 0)
2995 rs6000_recip_bits[DFmode] |= RS6000_RECIP_MASK_AUTO_RE;
2996
2997 if (RS6000_RECIP_HAVE_RE_P (V4SFmode)
2998 && (rs6000_recip_control & RECIP_V4SF_DIV) != 0)
2999 rs6000_recip_bits[V4SFmode] |= RS6000_RECIP_MASK_AUTO_RE;
3000
3001 if (RS6000_RECIP_HAVE_RE_P (V2DFmode)
3002 && (rs6000_recip_control & RECIP_V2DF_DIV) != 0)
3003 rs6000_recip_bits[V2DFmode] |= RS6000_RECIP_MASK_AUTO_RE;
3004
3005 if (RS6000_RECIP_HAVE_RSQRTE_P (SFmode)
3006 && (rs6000_recip_control & RECIP_SF_RSQRT) != 0)
3007 rs6000_recip_bits[SFmode] |= RS6000_RECIP_MASK_AUTO_RSQRTE;
3008
3009 if (RS6000_RECIP_HAVE_RSQRTE_P (DFmode)
3010 && (rs6000_recip_control & RECIP_DF_RSQRT) != 0)
3011 rs6000_recip_bits[DFmode] |= RS6000_RECIP_MASK_AUTO_RSQRTE;
3012
3013 if (RS6000_RECIP_HAVE_RSQRTE_P (V4SFmode)
3014 && (rs6000_recip_control & RECIP_V4SF_RSQRT) != 0)
3015 rs6000_recip_bits[V4SFmode] |= RS6000_RECIP_MASK_AUTO_RSQRTE;
3016
3017 if (RS6000_RECIP_HAVE_RSQRTE_P (V2DFmode)
3018 && (rs6000_recip_control & RECIP_V2DF_RSQRT) != 0)
3019 rs6000_recip_bits[V2DFmode] |= RS6000_RECIP_MASK_AUTO_RSQRTE;
3020 }
3021 }
3022
3023 /* Update the addr mask bits in reg_addr to help secondary reload and go if
3024 legitimate address support to figure out the appropriate addressing to
3025 use. */
3026 rs6000_setup_reg_addr_masks ();
3027
3028 if (global_init_p || TARGET_DEBUG_TARGET)
3029 {
3030 if (TARGET_DEBUG_REG)
3031 rs6000_debug_reg_global ();
3032
3033 if (TARGET_DEBUG_COST || TARGET_DEBUG_REG)
3034 fprintf (stderr,
3035 "SImode variable mult cost = %d\n"
3036 "SImode constant mult cost = %d\n"
3037 "SImode short constant mult cost = %d\n"
3038 "DImode multipliciation cost = %d\n"
3039 "SImode division cost = %d\n"
3040 "DImode division cost = %d\n"
3041 "Simple fp operation cost = %d\n"
3042 "DFmode multiplication cost = %d\n"
3043 "SFmode division cost = %d\n"
3044 "DFmode division cost = %d\n"
3045 "cache line size = %d\n"
3046 "l1 cache size = %d\n"
3047 "l2 cache size = %d\n"
3048 "simultaneous prefetches = %d\n"
3049 "\n",
3050 rs6000_cost->mulsi,
3051 rs6000_cost->mulsi_const,
3052 rs6000_cost->mulsi_const9,
3053 rs6000_cost->muldi,
3054 rs6000_cost->divsi,
3055 rs6000_cost->divdi,
3056 rs6000_cost->fp,
3057 rs6000_cost->dmul,
3058 rs6000_cost->sdiv,
3059 rs6000_cost->ddiv,
3060 rs6000_cost->cache_line_size,
3061 rs6000_cost->l1_cache_size,
3062 rs6000_cost->l2_cache_size,
3063 rs6000_cost->simultaneous_prefetches);
3064 }
3065 }
3066
3067 #if TARGET_MACHO
3068 /* The Darwin version of SUBTARGET_OVERRIDE_OPTIONS. */
3069
3070 static void
3071 darwin_rs6000_override_options (void)
3072 {
3073 /* The Darwin ABI always includes AltiVec, can't be (validly) turned
3074 off. */
3075 rs6000_altivec_abi = 1;
3076 TARGET_ALTIVEC_VRSAVE = 1;
3077 rs6000_current_abi = ABI_DARWIN;
3078
3079 if (DEFAULT_ABI == ABI_DARWIN
3080 && TARGET_64BIT)
3081 darwin_one_byte_bool = 1;
3082
3083 if (TARGET_64BIT && ! TARGET_POWERPC64)
3084 {
3085 rs6000_isa_flags |= OPTION_MASK_POWERPC64;
3086 warning (0, "-m64 requires PowerPC64 architecture, enabling");
3087 }
3088 if (flag_mkernel)
3089 {
3090 rs6000_default_long_calls = 1;
3091 rs6000_isa_flags |= OPTION_MASK_SOFT_FLOAT;
3092 }
3093
3094 /* Make -m64 imply -maltivec. Darwin's 64-bit ABI includes
3095 Altivec. */
3096 if (!flag_mkernel && !flag_apple_kext
3097 && TARGET_64BIT
3098 && ! (rs6000_isa_flags_explicit & OPTION_MASK_ALTIVEC))
3099 rs6000_isa_flags |= OPTION_MASK_ALTIVEC;
3100
3101 /* Unless the user (not the configurer) has explicitly overridden
3102 it with -mcpu=G3 or -mno-altivec, then 10.5+ targets default to
3103 G4 unless targeting the kernel. */
3104 if (!flag_mkernel
3105 && !flag_apple_kext
3106 && strverscmp (darwin_macosx_version_min, "10.5") >= 0
3107 && ! (rs6000_isa_flags_explicit & OPTION_MASK_ALTIVEC)
3108 && ! global_options_set.x_rs6000_cpu_index)
3109 {
3110 rs6000_isa_flags |= OPTION_MASK_ALTIVEC;
3111 }
3112 }
3113 #endif
3114
3115 /* If not otherwise specified by a target, make 'long double' equivalent to
3116 'double'. */
3117
3118 #ifndef RS6000_DEFAULT_LONG_DOUBLE_SIZE
3119 #define RS6000_DEFAULT_LONG_DOUBLE_SIZE 64
3120 #endif
3121
3122 /* Return the builtin mask of the various options used that could affect which
3123 builtins were used. In the past we used target_flags, but we've run out of
3124 bits, and some options like SPE and PAIRED are no longer in
3125 target_flags. */
3126
3127 HOST_WIDE_INT
3128 rs6000_builtin_mask_calculate (void)
3129 {
3130 return (((TARGET_ALTIVEC) ? RS6000_BTM_ALTIVEC : 0)
3131 | ((TARGET_VSX) ? RS6000_BTM_VSX : 0)
3132 | ((TARGET_SPE) ? RS6000_BTM_SPE : 0)
3133 | ((TARGET_PAIRED_FLOAT) ? RS6000_BTM_PAIRED : 0)
3134 | ((TARGET_FRE) ? RS6000_BTM_FRE : 0)
3135 | ((TARGET_FRES) ? RS6000_BTM_FRES : 0)
3136 | ((TARGET_FRSQRTE) ? RS6000_BTM_FRSQRTE : 0)
3137 | ((TARGET_FRSQRTES) ? RS6000_BTM_FRSQRTES : 0)
3138 | ((TARGET_POPCNTD) ? RS6000_BTM_POPCNTD : 0)
3139 | ((rs6000_cpu == PROCESSOR_CELL) ? RS6000_BTM_CELL : 0)
3140 | ((TARGET_P8_VECTOR) ? RS6000_BTM_P8_VECTOR : 0)
3141 | ((TARGET_CRYPTO) ? RS6000_BTM_CRYPTO : 0)
3142 | ((TARGET_HTM) ? RS6000_BTM_HTM : 0)
3143 | ((TARGET_DFP) ? RS6000_BTM_DFP : 0)
3144 | ((TARGET_HARD_FLOAT) ? RS6000_BTM_HARD_FLOAT : 0)
3145 | ((TARGET_LONG_DOUBLE_128) ? RS6000_BTM_LDBL128 : 0));
3146 }
3147
3148 /* Override command line options. Mostly we process the processor type and
3149 sometimes adjust other TARGET_ options. */
3150
3151 static bool
3152 rs6000_option_override_internal (bool global_init_p)
3153 {
3154 bool ret = true;
3155 bool have_cpu = false;
3156
3157 /* The default cpu requested at configure time, if any. */
3158 const char *implicit_cpu = OPTION_TARGET_CPU_DEFAULT;
3159
3160 HOST_WIDE_INT set_masks;
3161 int cpu_index;
3162 int tune_index;
3163 struct cl_target_option *main_target_opt
3164 = ((global_init_p || target_option_default_node == NULL)
3165 ? NULL : TREE_TARGET_OPTION (target_option_default_node));
3166
3167 /* Remember the explicit arguments. */
3168 if (global_init_p)
3169 rs6000_isa_flags_explicit = global_options_set.x_rs6000_isa_flags;
3170
3171 /* On 64-bit Darwin, power alignment is ABI-incompatible with some C
3172 library functions, so warn about it. The flag may be useful for
3173 performance studies from time to time though, so don't disable it
3174 entirely. */
3175 if (global_options_set.x_rs6000_alignment_flags
3176 && rs6000_alignment_flags == MASK_ALIGN_POWER
3177 && DEFAULT_ABI == ABI_DARWIN
3178 && TARGET_64BIT)
3179 warning (0, "-malign-power is not supported for 64-bit Darwin;"
3180 " it is incompatible with the installed C and C++ libraries");
3181
3182 /* Numerous experiment shows that IRA based loop pressure
3183 calculation works better for RTL loop invariant motion on targets
3184 with enough (>= 32) registers. It is an expensive optimization.
3185 So it is on only for peak performance. */
3186 if (optimize >= 3 && global_init_p
3187 && !global_options_set.x_flag_ira_loop_pressure)
3188 flag_ira_loop_pressure = 1;
3189
3190 /* Set the pointer size. */
3191 if (TARGET_64BIT)
3192 {
3193 rs6000_pmode = (int)DImode;
3194 rs6000_pointer_size = 64;
3195 }
3196 else
3197 {
3198 rs6000_pmode = (int)SImode;
3199 rs6000_pointer_size = 32;
3200 }
3201
3202 /* Some OSs don't support saving the high part of 64-bit registers on context
3203 switch. Other OSs don't support saving Altivec registers. On those OSs,
3204 we don't touch the OPTION_MASK_POWERPC64 or OPTION_MASK_ALTIVEC settings;
3205 if the user wants either, the user must explicitly specify them and we
3206 won't interfere with the user's specification. */
3207
3208 set_masks = POWERPC_MASKS;
3209 #ifdef OS_MISSING_POWERPC64
3210 if (OS_MISSING_POWERPC64)
3211 set_masks &= ~OPTION_MASK_POWERPC64;
3212 #endif
3213 #ifdef OS_MISSING_ALTIVEC
3214 if (OS_MISSING_ALTIVEC)
3215 set_masks &= ~(OPTION_MASK_ALTIVEC | OPTION_MASK_VSX);
3216 #endif
3217
3218 /* Don't override by the processor default if given explicitly. */
3219 set_masks &= ~rs6000_isa_flags_explicit;
3220
3221 /* Process the -mcpu=<xxx> and -mtune=<xxx> argument. If the user changed
3222 the cpu in a target attribute or pragma, but did not specify a tuning
3223 option, use the cpu for the tuning option rather than the option specified
3224 with -mtune on the command line. Process a '--with-cpu' configuration
3225 request as an implicit --cpu. */
3226 if (rs6000_cpu_index >= 0)
3227 {
3228 cpu_index = rs6000_cpu_index;
3229 have_cpu = true;
3230 }
3231 else if (main_target_opt != NULL && main_target_opt->x_rs6000_cpu_index >= 0)
3232 {
3233 rs6000_cpu_index = cpu_index = main_target_opt->x_rs6000_cpu_index;
3234 have_cpu = true;
3235 }
3236 else if (implicit_cpu)
3237 {
3238 rs6000_cpu_index = cpu_index = rs6000_cpu_name_lookup (implicit_cpu);
3239 have_cpu = true;
3240 }
3241 else
3242 {
3243 const char *default_cpu = (TARGET_POWERPC64 ? "powerpc64" : "powerpc");
3244 rs6000_cpu_index = cpu_index = rs6000_cpu_name_lookup (default_cpu);
3245 have_cpu = false;
3246 }
3247
3248 gcc_assert (cpu_index >= 0);
3249
3250 /* If we have a cpu, either through an explicit -mcpu=<xxx> or if the
3251 compiler was configured with --with-cpu=<xxx>, replace all of the ISA bits
3252 with those from the cpu, except for options that were explicitly set. If
3253 we don't have a cpu, do not override the target bits set in
3254 TARGET_DEFAULT. */
3255 if (have_cpu)
3256 {
3257 rs6000_isa_flags &= ~set_masks;
3258 rs6000_isa_flags |= (processor_target_table[cpu_index].target_enable
3259 & set_masks);
3260 }
3261 else
3262 rs6000_isa_flags |= (processor_target_table[cpu_index].target_enable
3263 & ~rs6000_isa_flags_explicit);
3264
3265 /* If no -mcpu=<xxx>, inherit any default options that were cleared via
3266 POWERPC_MASKS. Originally, TARGET_DEFAULT was used to initialize
3267 target_flags via the TARGET_DEFAULT_TARGET_FLAGS hook. When we switched
3268 to using rs6000_isa_flags, we need to do the initialization here. */
3269 if (!have_cpu)
3270 rs6000_isa_flags |= (TARGET_DEFAULT & ~rs6000_isa_flags_explicit);
3271
3272 if (rs6000_tune_index >= 0)
3273 tune_index = rs6000_tune_index;
3274 else if (have_cpu)
3275 rs6000_tune_index = tune_index = cpu_index;
3276 else
3277 {
3278 size_t i;
3279 enum processor_type tune_proc
3280 = (TARGET_POWERPC64 ? PROCESSOR_DEFAULT64 : PROCESSOR_DEFAULT);
3281
3282 tune_index = -1;
3283 for (i = 0; i < ARRAY_SIZE (processor_target_table); i++)
3284 if (processor_target_table[i].processor == tune_proc)
3285 {
3286 rs6000_tune_index = tune_index = i;
3287 break;
3288 }
3289 }
3290
3291 gcc_assert (tune_index >= 0);
3292 rs6000_cpu = processor_target_table[tune_index].processor;
3293
3294 /* Pick defaults for SPE related control flags. Do this early to make sure
3295 that the TARGET_ macros are representative ASAP. */
3296 {
3297 int spe_capable_cpu =
3298 (rs6000_cpu == PROCESSOR_PPC8540
3299 || rs6000_cpu == PROCESSOR_PPC8548);
3300
3301 if (!global_options_set.x_rs6000_spe_abi)
3302 rs6000_spe_abi = spe_capable_cpu;
3303
3304 if (!global_options_set.x_rs6000_spe)
3305 rs6000_spe = spe_capable_cpu;
3306
3307 if (!global_options_set.x_rs6000_float_gprs)
3308 rs6000_float_gprs =
3309 (rs6000_cpu == PROCESSOR_PPC8540 ? 1
3310 : rs6000_cpu == PROCESSOR_PPC8548 ? 2
3311 : 0);
3312 }
3313
3314 if (global_options_set.x_rs6000_spe_abi
3315 && rs6000_spe_abi
3316 && !TARGET_SPE_ABI)
3317 error ("not configured for SPE ABI");
3318
3319 if (global_options_set.x_rs6000_spe
3320 && rs6000_spe
3321 && !TARGET_SPE)
3322 error ("not configured for SPE instruction set");
3323
3324 if (main_target_opt != NULL
3325 && ((main_target_opt->x_rs6000_spe_abi != rs6000_spe_abi)
3326 || (main_target_opt->x_rs6000_spe != rs6000_spe)
3327 || (main_target_opt->x_rs6000_float_gprs != rs6000_float_gprs)))
3328 error ("target attribute or pragma changes SPE ABI");
3329
3330 if (rs6000_cpu == PROCESSOR_PPCE300C2 || rs6000_cpu == PROCESSOR_PPCE300C3
3331 || rs6000_cpu == PROCESSOR_PPCE500MC || rs6000_cpu == PROCESSOR_PPCE500MC64
3332 || rs6000_cpu == PROCESSOR_PPCE5500)
3333 {
3334 if (TARGET_ALTIVEC)
3335 error ("AltiVec not supported in this target");
3336 if (TARGET_SPE)
3337 error ("SPE not supported in this target");
3338 }
3339 if (rs6000_cpu == PROCESSOR_PPCE6500)
3340 {
3341 if (TARGET_SPE)
3342 error ("SPE not supported in this target");
3343 }
3344
3345 /* Disable Cell microcode if we are optimizing for the Cell
3346 and not optimizing for size. */
3347 if (rs6000_gen_cell_microcode == -1)
3348 rs6000_gen_cell_microcode = !(rs6000_cpu == PROCESSOR_CELL
3349 && !optimize_size);
3350
3351 /* If we are optimizing big endian systems for space and it's OK to
3352 use instructions that would be microcoded on the Cell, use the
3353 load/store multiple and string instructions. */
3354 if (BYTES_BIG_ENDIAN && optimize_size && rs6000_gen_cell_microcode)
3355 rs6000_isa_flags |= ~rs6000_isa_flags_explicit & (OPTION_MASK_MULTIPLE
3356 | OPTION_MASK_STRING);
3357
3358 /* Don't allow -mmultiple or -mstring on little endian systems
3359 unless the cpu is a 750, because the hardware doesn't support the
3360 instructions used in little endian mode, and causes an alignment
3361 trap. The 750 does not cause an alignment trap (except when the
3362 target is unaligned). */
3363
3364 if (!BYTES_BIG_ENDIAN && rs6000_cpu != PROCESSOR_PPC750)
3365 {
3366 if (TARGET_MULTIPLE)
3367 {
3368 rs6000_isa_flags &= ~OPTION_MASK_MULTIPLE;
3369 if ((rs6000_isa_flags_explicit & OPTION_MASK_MULTIPLE) != 0)
3370 warning (0, "-mmultiple is not supported on little endian systems");
3371 }
3372
3373 if (TARGET_STRING)
3374 {
3375 rs6000_isa_flags &= ~OPTION_MASK_STRING;
3376 if ((rs6000_isa_flags_explicit & OPTION_MASK_STRING) != 0)
3377 warning (0, "-mstring is not supported on little endian systems");
3378 }
3379 }
3380
3381 /* If little-endian, default to -mstrict-align on older processors.
3382 Testing for htm matches power8 and later. */
3383 if (!BYTES_BIG_ENDIAN
3384 && !(processor_target_table[tune_index].target_enable & OPTION_MASK_HTM))
3385 rs6000_isa_flags |= ~rs6000_isa_flags_explicit & OPTION_MASK_STRICT_ALIGN;
3386
3387 /* -maltivec={le,be} implies -maltivec. */
3388 if (rs6000_altivec_element_order != 0)
3389 rs6000_isa_flags |= OPTION_MASK_ALTIVEC;
3390
3391 /* Disallow -maltivec=le in big endian mode for now. This is not
3392 known to be useful for anyone. */
3393 if (BYTES_BIG_ENDIAN && rs6000_altivec_element_order == 1)
3394 {
3395 warning (0, N_("-maltivec=le not allowed for big-endian targets"));
3396 rs6000_altivec_element_order = 0;
3397 }
3398
3399 /* Add some warnings for VSX. */
3400 if (TARGET_VSX)
3401 {
3402 const char *msg = NULL;
3403 if (!TARGET_HARD_FLOAT || !TARGET_FPRS
3404 || !TARGET_SINGLE_FLOAT || !TARGET_DOUBLE_FLOAT)
3405 {
3406 if (rs6000_isa_flags_explicit & OPTION_MASK_VSX)
3407 msg = N_("-mvsx requires hardware floating point");
3408 else
3409 {
3410 rs6000_isa_flags &= ~ OPTION_MASK_VSX;
3411 rs6000_isa_flags_explicit |= OPTION_MASK_VSX;
3412 }
3413 }
3414 else if (TARGET_PAIRED_FLOAT)
3415 msg = N_("-mvsx and -mpaired are incompatible");
3416 else if (TARGET_AVOID_XFORM > 0)
3417 msg = N_("-mvsx needs indexed addressing");
3418 else if (!TARGET_ALTIVEC && (rs6000_isa_flags_explicit
3419 & OPTION_MASK_ALTIVEC))
3420 {
3421 if (rs6000_isa_flags_explicit & OPTION_MASK_VSX)
3422 msg = N_("-mvsx and -mno-altivec are incompatible");
3423 else
3424 msg = N_("-mno-altivec disables vsx");
3425 }
3426
3427 if (msg)
3428 {
3429 warning (0, msg);
3430 rs6000_isa_flags &= ~ OPTION_MASK_VSX;
3431 rs6000_isa_flags_explicit |= OPTION_MASK_VSX;
3432 }
3433 }
3434
3435 /* If hard-float/altivec/vsx were explicitly turned off then don't allow
3436 the -mcpu setting to enable options that conflict. */
3437 if ((!TARGET_HARD_FLOAT || !TARGET_ALTIVEC || !TARGET_VSX)
3438 && (rs6000_isa_flags_explicit & (OPTION_MASK_SOFT_FLOAT
3439 | OPTION_MASK_ALTIVEC
3440 | OPTION_MASK_VSX)) != 0)
3441 rs6000_isa_flags &= ~((OPTION_MASK_P8_VECTOR | OPTION_MASK_CRYPTO
3442 | OPTION_MASK_DIRECT_MOVE)
3443 & ~rs6000_isa_flags_explicit);
3444
3445 if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
3446 rs6000_print_isa_options (stderr, 0, "before defaults", rs6000_isa_flags);
3447
3448 /* For the newer switches (vsx, dfp, etc.) set some of the older options,
3449 unless the user explicitly used the -mno-<option> to disable the code. */
3450 if (TARGET_P8_VECTOR || TARGET_DIRECT_MOVE || TARGET_CRYPTO)
3451 rs6000_isa_flags |= (ISA_2_7_MASKS_SERVER & ~rs6000_isa_flags_explicit);
3452 else if (TARGET_VSX)
3453 rs6000_isa_flags |= (ISA_2_6_MASKS_SERVER & ~rs6000_isa_flags_explicit);
3454 else if (TARGET_POPCNTD)
3455 rs6000_isa_flags |= (ISA_2_6_MASKS_EMBEDDED & ~rs6000_isa_flags_explicit);
3456 else if (TARGET_DFP)
3457 rs6000_isa_flags |= (ISA_2_5_MASKS_SERVER & ~rs6000_isa_flags_explicit);
3458 else if (TARGET_CMPB)
3459 rs6000_isa_flags |= (ISA_2_5_MASKS_EMBEDDED & ~rs6000_isa_flags_explicit);
3460 else if (TARGET_FPRND)
3461 rs6000_isa_flags |= (ISA_2_4_MASKS & ~rs6000_isa_flags_explicit);
3462 else if (TARGET_POPCNTB)
3463 rs6000_isa_flags |= (ISA_2_2_MASKS & ~rs6000_isa_flags_explicit);
3464 else if (TARGET_ALTIVEC)
3465 rs6000_isa_flags |= (OPTION_MASK_PPC_GFXOPT & ~rs6000_isa_flags_explicit);
3466
3467 if (TARGET_CRYPTO && !TARGET_ALTIVEC)
3468 {
3469 if (rs6000_isa_flags_explicit & OPTION_MASK_CRYPTO)
3470 error ("-mcrypto requires -maltivec");
3471 rs6000_isa_flags &= ~OPTION_MASK_CRYPTO;
3472 }
3473
3474 if (TARGET_DIRECT_MOVE && !TARGET_VSX)
3475 {
3476 if (rs6000_isa_flags_explicit & OPTION_MASK_DIRECT_MOVE)
3477 error ("-mdirect-move requires -mvsx");
3478 rs6000_isa_flags &= ~OPTION_MASK_DIRECT_MOVE;
3479 }
3480
3481 if (TARGET_P8_VECTOR && !TARGET_ALTIVEC)
3482 {
3483 if (rs6000_isa_flags_explicit & OPTION_MASK_P8_VECTOR)
3484 error ("-mpower8-vector requires -maltivec");
3485 rs6000_isa_flags &= ~OPTION_MASK_P8_VECTOR;
3486 }
3487
3488 if (TARGET_P8_VECTOR && !TARGET_VSX)
3489 {
3490 if (rs6000_isa_flags_explicit & OPTION_MASK_P8_VECTOR)
3491 error ("-mpower8-vector requires -mvsx");
3492 rs6000_isa_flags &= ~OPTION_MASK_P8_VECTOR;
3493 }
3494
3495 if (TARGET_VSX_TIMODE && !TARGET_VSX)
3496 {
3497 if (rs6000_isa_flags_explicit & OPTION_MASK_VSX_TIMODE)
3498 error ("-mvsx-timode requires -mvsx");
3499 rs6000_isa_flags &= ~OPTION_MASK_VSX_TIMODE;
3500 }
3501
3502 if (TARGET_DFP && !TARGET_HARD_FLOAT)
3503 {
3504 if (rs6000_isa_flags_explicit & OPTION_MASK_DFP)
3505 error ("-mhard-dfp requires -mhard-float");
3506 rs6000_isa_flags &= ~OPTION_MASK_DFP;
3507 }
3508
3509 /* Allow an explicit -mupper-regs to set both -mupper-regs-df and
3510 -mupper-regs-sf, depending on the cpu, unless the user explicitly also set
3511 the individual option. */
3512 if (TARGET_UPPER_REGS > 0)
3513 {
3514 if (TARGET_VSX
3515 && !(rs6000_isa_flags_explicit & OPTION_MASK_UPPER_REGS_DF))
3516 {
3517 rs6000_isa_flags |= OPTION_MASK_UPPER_REGS_DF;
3518 rs6000_isa_flags_explicit |= OPTION_MASK_UPPER_REGS_DF;
3519 }
3520 if (TARGET_P8_VECTOR
3521 && !(rs6000_isa_flags_explicit & OPTION_MASK_UPPER_REGS_SF))
3522 {
3523 rs6000_isa_flags |= OPTION_MASK_UPPER_REGS_SF;
3524 rs6000_isa_flags_explicit |= OPTION_MASK_UPPER_REGS_SF;
3525 }
3526 }
3527 else if (TARGET_UPPER_REGS == 0)
3528 {
3529 if (TARGET_VSX
3530 && !(rs6000_isa_flags_explicit & OPTION_MASK_UPPER_REGS_DF))
3531 {
3532 rs6000_isa_flags &= ~OPTION_MASK_UPPER_REGS_DF;
3533 rs6000_isa_flags_explicit |= OPTION_MASK_UPPER_REGS_DF;
3534 }
3535 if (TARGET_P8_VECTOR
3536 && !(rs6000_isa_flags_explicit & OPTION_MASK_UPPER_REGS_SF))
3537 {
3538 rs6000_isa_flags &= ~OPTION_MASK_UPPER_REGS_SF;
3539 rs6000_isa_flags_explicit |= OPTION_MASK_UPPER_REGS_SF;
3540 }
3541 }
3542
3543 if (TARGET_UPPER_REGS_DF && !TARGET_VSX)
3544 {
3545 if (rs6000_isa_flags_explicit & OPTION_MASK_UPPER_REGS_DF)
3546 error ("-mupper-regs-df requires -mvsx");
3547 rs6000_isa_flags &= ~OPTION_MASK_UPPER_REGS_DF;
3548 }
3549
3550 if (TARGET_UPPER_REGS_SF && !TARGET_P8_VECTOR)
3551 {
3552 if (rs6000_isa_flags_explicit & OPTION_MASK_UPPER_REGS_SF)
3553 error ("-mupper-regs-sf requires -mpower8-vector");
3554 rs6000_isa_flags &= ~OPTION_MASK_UPPER_REGS_SF;
3555 }
3556
3557 /* The quad memory instructions only works in 64-bit mode. In 32-bit mode,
3558 silently turn off quad memory mode. */
3559 if ((TARGET_QUAD_MEMORY || TARGET_QUAD_MEMORY_ATOMIC) && !TARGET_POWERPC64)
3560 {
3561 if ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY) != 0)
3562 warning (0, N_("-mquad-memory requires 64-bit mode"));
3563
3564 if ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY_ATOMIC) != 0)
3565 warning (0, N_("-mquad-memory-atomic requires 64-bit mode"));
3566
3567 rs6000_isa_flags &= ~(OPTION_MASK_QUAD_MEMORY
3568 | OPTION_MASK_QUAD_MEMORY_ATOMIC);
3569 }
3570
3571 /* Non-atomic quad memory load/store are disabled for little endian, since
3572 the words are reversed, but atomic operations can still be done by
3573 swapping the words. */
3574 if (TARGET_QUAD_MEMORY && !WORDS_BIG_ENDIAN)
3575 {
3576 if ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY) != 0)
3577 warning (0, N_("-mquad-memory is not available in little endian mode"));
3578
3579 rs6000_isa_flags &= ~OPTION_MASK_QUAD_MEMORY;
3580 }
3581
3582 /* Assume if the user asked for normal quad memory instructions, they want
3583 the atomic versions as well, unless they explicity told us not to use quad
3584 word atomic instructions. */
3585 if (TARGET_QUAD_MEMORY
3586 && !TARGET_QUAD_MEMORY_ATOMIC
3587 && ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY_ATOMIC) == 0))
3588 rs6000_isa_flags |= OPTION_MASK_QUAD_MEMORY_ATOMIC;
3589
3590 /* Enable power8 fusion if we are tuning for power8, even if we aren't
3591 generating power8 instructions. */
3592 if (!(rs6000_isa_flags_explicit & OPTION_MASK_P8_FUSION))
3593 rs6000_isa_flags |= (processor_target_table[tune_index].target_enable
3594 & OPTION_MASK_P8_FUSION);
3595
3596 /* Power8 does not fuse sign extended loads with the addis. If we are
3597 optimizing at high levels for speed, convert a sign extended load into a
3598 zero extending load, and an explicit sign extension. */
3599 if (TARGET_P8_FUSION
3600 && !(rs6000_isa_flags_explicit & OPTION_MASK_P8_FUSION_SIGN)
3601 && optimize_function_for_speed_p (cfun)
3602 && optimize >= 3)
3603 rs6000_isa_flags |= OPTION_MASK_P8_FUSION_SIGN;
3604
3605 if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
3606 rs6000_print_isa_options (stderr, 0, "after defaults", rs6000_isa_flags);
3607
3608 /* E500mc does "better" if we inline more aggressively. Respect the
3609 user's opinion, though. */
3610 if (rs6000_block_move_inline_limit == 0
3611 && (rs6000_cpu == PROCESSOR_PPCE500MC
3612 || rs6000_cpu == PROCESSOR_PPCE500MC64
3613 || rs6000_cpu == PROCESSOR_PPCE5500
3614 || rs6000_cpu == PROCESSOR_PPCE6500))
3615 rs6000_block_move_inline_limit = 128;
3616
3617 /* store_one_arg depends on expand_block_move to handle at least the
3618 size of reg_parm_stack_space. */
3619 if (rs6000_block_move_inline_limit < (TARGET_POWERPC64 ? 64 : 32))
3620 rs6000_block_move_inline_limit = (TARGET_POWERPC64 ? 64 : 32);
3621
3622 if (global_init_p)
3623 {
3624 /* If the appropriate debug option is enabled, replace the target hooks
3625 with debug versions that call the real version and then prints
3626 debugging information. */
3627 if (TARGET_DEBUG_COST)
3628 {
3629 targetm.rtx_costs = rs6000_debug_rtx_costs;
3630 targetm.address_cost = rs6000_debug_address_cost;
3631 targetm.sched.adjust_cost = rs6000_debug_adjust_cost;
3632 }
3633
3634 if (TARGET_DEBUG_ADDR)
3635 {
3636 targetm.legitimate_address_p = rs6000_debug_legitimate_address_p;
3637 targetm.legitimize_address = rs6000_debug_legitimize_address;
3638 rs6000_secondary_reload_class_ptr
3639 = rs6000_debug_secondary_reload_class;
3640 rs6000_secondary_memory_needed_ptr
3641 = rs6000_debug_secondary_memory_needed;
3642 rs6000_cannot_change_mode_class_ptr
3643 = rs6000_debug_cannot_change_mode_class;
3644 rs6000_preferred_reload_class_ptr
3645 = rs6000_debug_preferred_reload_class;
3646 rs6000_legitimize_reload_address_ptr
3647 = rs6000_debug_legitimize_reload_address;
3648 rs6000_mode_dependent_address_ptr
3649 = rs6000_debug_mode_dependent_address;
3650 }
3651
3652 if (rs6000_veclibabi_name)
3653 {
3654 if (strcmp (rs6000_veclibabi_name, "mass") == 0)
3655 rs6000_veclib_handler = rs6000_builtin_vectorized_libmass;
3656 else
3657 {
3658 error ("unknown vectorization library ABI type (%s) for "
3659 "-mveclibabi= switch", rs6000_veclibabi_name);
3660 ret = false;
3661 }
3662 }
3663 }
3664
3665 if (!global_options_set.x_rs6000_long_double_type_size)
3666 {
3667 if (main_target_opt != NULL
3668 && (main_target_opt->x_rs6000_long_double_type_size
3669 != RS6000_DEFAULT_LONG_DOUBLE_SIZE))
3670 error ("target attribute or pragma changes long double size");
3671 else
3672 rs6000_long_double_type_size = RS6000_DEFAULT_LONG_DOUBLE_SIZE;
3673 }
3674
3675 #if !defined (POWERPC_LINUX) && !defined (POWERPC_FREEBSD)
3676 if (!global_options_set.x_rs6000_ieeequad)
3677 rs6000_ieeequad = 1;
3678 #endif
3679
3680 /* Disable VSX and Altivec silently if the user switched cpus to power7 in a
3681 target attribute or pragma which automatically enables both options,
3682 unless the altivec ABI was set. This is set by default for 64-bit, but
3683 not for 32-bit. */
3684 if (main_target_opt != NULL && !main_target_opt->x_rs6000_altivec_abi)
3685 rs6000_isa_flags &= ~((OPTION_MASK_VSX | OPTION_MASK_ALTIVEC)
3686 & ~rs6000_isa_flags_explicit);
3687
3688 /* Enable Altivec ABI for AIX -maltivec. */
3689 if (TARGET_XCOFF && (TARGET_ALTIVEC || TARGET_VSX))
3690 {
3691 if (main_target_opt != NULL && !main_target_opt->x_rs6000_altivec_abi)
3692 error ("target attribute or pragma changes AltiVec ABI");
3693 else
3694 rs6000_altivec_abi = 1;
3695 }
3696
3697 /* The AltiVec ABI is the default for PowerPC-64 GNU/Linux. For
3698 PowerPC-32 GNU/Linux, -maltivec implies the AltiVec ABI. It can
3699 be explicitly overridden in either case. */
3700 if (TARGET_ELF)
3701 {
3702 if (!global_options_set.x_rs6000_altivec_abi
3703 && (TARGET_64BIT || TARGET_ALTIVEC || TARGET_VSX))
3704 {
3705 if (main_target_opt != NULL &&
3706 !main_target_opt->x_rs6000_altivec_abi)
3707 error ("target attribute or pragma changes AltiVec ABI");
3708 else
3709 rs6000_altivec_abi = 1;
3710 }
3711 }
3712
3713 /* Set the Darwin64 ABI as default for 64-bit Darwin.
3714 So far, the only darwin64 targets are also MACH-O. */
3715 if (TARGET_MACHO
3716 && DEFAULT_ABI == ABI_DARWIN
3717 && TARGET_64BIT)
3718 {
3719 if (main_target_opt != NULL && !main_target_opt->x_rs6000_darwin64_abi)
3720 error ("target attribute or pragma changes darwin64 ABI");
3721 else
3722 {
3723 rs6000_darwin64_abi = 1;
3724 /* Default to natural alignment, for better performance. */
3725 rs6000_alignment_flags = MASK_ALIGN_NATURAL;
3726 }
3727 }
3728
3729 /* Place FP constants in the constant pool instead of TOC
3730 if section anchors enabled. */
3731 if (flag_section_anchors
3732 && !global_options_set.x_TARGET_NO_FP_IN_TOC)
3733 TARGET_NO_FP_IN_TOC = 1;
3734
3735 if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
3736 rs6000_print_isa_options (stderr, 0, "before subtarget", rs6000_isa_flags);
3737
3738 #ifdef SUBTARGET_OVERRIDE_OPTIONS
3739 SUBTARGET_OVERRIDE_OPTIONS;
3740 #endif
3741 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
3742 SUBSUBTARGET_OVERRIDE_OPTIONS;
3743 #endif
3744 #ifdef SUB3TARGET_OVERRIDE_OPTIONS
3745 SUB3TARGET_OVERRIDE_OPTIONS;
3746 #endif
3747
3748 if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
3749 rs6000_print_isa_options (stderr, 0, "after subtarget", rs6000_isa_flags);
3750
3751 /* For the E500 family of cores, reset the single/double FP flags to let us
3752 check that they remain constant across attributes or pragmas. Also,
3753 clear a possible request for string instructions, not supported and which
3754 we might have silently queried above for -Os.
3755
3756 For other families, clear ISEL in case it was set implicitly.
3757 */
3758
3759 switch (rs6000_cpu)
3760 {
3761 case PROCESSOR_PPC8540:
3762 case PROCESSOR_PPC8548:
3763 case PROCESSOR_PPCE500MC:
3764 case PROCESSOR_PPCE500MC64:
3765 case PROCESSOR_PPCE5500:
3766 case PROCESSOR_PPCE6500:
3767
3768 rs6000_single_float = TARGET_E500_SINGLE || TARGET_E500_DOUBLE;
3769 rs6000_double_float = TARGET_E500_DOUBLE;
3770
3771 rs6000_isa_flags &= ~OPTION_MASK_STRING;
3772
3773 break;
3774
3775 default:
3776
3777 if (have_cpu && !(rs6000_isa_flags_explicit & OPTION_MASK_ISEL))
3778 rs6000_isa_flags &= ~OPTION_MASK_ISEL;
3779
3780 break;
3781 }
3782
3783 if (main_target_opt)
3784 {
3785 if (main_target_opt->x_rs6000_single_float != rs6000_single_float)
3786 error ("target attribute or pragma changes single precision floating "
3787 "point");
3788 if (main_target_opt->x_rs6000_double_float != rs6000_double_float)
3789 error ("target attribute or pragma changes double precision floating "
3790 "point");
3791 }
3792
3793 /* Detect invalid option combinations with E500. */
3794 CHECK_E500_OPTIONS;
3795
3796 rs6000_always_hint = (rs6000_cpu != PROCESSOR_POWER4
3797 && rs6000_cpu != PROCESSOR_POWER5
3798 && rs6000_cpu != PROCESSOR_POWER6
3799 && rs6000_cpu != PROCESSOR_POWER7
3800 && rs6000_cpu != PROCESSOR_POWER8
3801 && rs6000_cpu != PROCESSOR_PPCA2
3802 && rs6000_cpu != PROCESSOR_CELL
3803 && rs6000_cpu != PROCESSOR_PPC476);
3804 rs6000_sched_groups = (rs6000_cpu == PROCESSOR_POWER4
3805 || rs6000_cpu == PROCESSOR_POWER5
3806 || rs6000_cpu == PROCESSOR_POWER7
3807 || rs6000_cpu == PROCESSOR_POWER8);
3808 rs6000_align_branch_targets = (rs6000_cpu == PROCESSOR_POWER4
3809 || rs6000_cpu == PROCESSOR_POWER5
3810 || rs6000_cpu == PROCESSOR_POWER6
3811 || rs6000_cpu == PROCESSOR_POWER7
3812 || rs6000_cpu == PROCESSOR_POWER8
3813 || rs6000_cpu == PROCESSOR_PPCE500MC
3814 || rs6000_cpu == PROCESSOR_PPCE500MC64
3815 || rs6000_cpu == PROCESSOR_PPCE5500
3816 || rs6000_cpu == PROCESSOR_PPCE6500);
3817
3818 /* Allow debug switches to override the above settings. These are set to -1
3819 in rs6000.opt to indicate the user hasn't directly set the switch. */
3820 if (TARGET_ALWAYS_HINT >= 0)
3821 rs6000_always_hint = TARGET_ALWAYS_HINT;
3822
3823 if (TARGET_SCHED_GROUPS >= 0)
3824 rs6000_sched_groups = TARGET_SCHED_GROUPS;
3825
3826 if (TARGET_ALIGN_BRANCH_TARGETS >= 0)
3827 rs6000_align_branch_targets = TARGET_ALIGN_BRANCH_TARGETS;
3828
3829 rs6000_sched_restricted_insns_priority
3830 = (rs6000_sched_groups ? 1 : 0);
3831
3832 /* Handle -msched-costly-dep option. */
3833 rs6000_sched_costly_dep
3834 = (rs6000_sched_groups ? true_store_to_load_dep_costly : no_dep_costly);
3835
3836 if (rs6000_sched_costly_dep_str)
3837 {
3838 if (! strcmp (rs6000_sched_costly_dep_str, "no"))
3839 rs6000_sched_costly_dep = no_dep_costly;
3840 else if (! strcmp (rs6000_sched_costly_dep_str, "all"))
3841 rs6000_sched_costly_dep = all_deps_costly;
3842 else if (! strcmp (rs6000_sched_costly_dep_str, "true_store_to_load"))
3843 rs6000_sched_costly_dep = true_store_to_load_dep_costly;
3844 else if (! strcmp (rs6000_sched_costly_dep_str, "store_to_load"))
3845 rs6000_sched_costly_dep = store_to_load_dep_costly;
3846 else
3847 rs6000_sched_costly_dep = ((enum rs6000_dependence_cost)
3848 atoi (rs6000_sched_costly_dep_str));
3849 }
3850
3851 /* Handle -minsert-sched-nops option. */
3852 rs6000_sched_insert_nops
3853 = (rs6000_sched_groups ? sched_finish_regroup_exact : sched_finish_none);
3854
3855 if (rs6000_sched_insert_nops_str)
3856 {
3857 if (! strcmp (rs6000_sched_insert_nops_str, "no"))
3858 rs6000_sched_insert_nops = sched_finish_none;
3859 else if (! strcmp (rs6000_sched_insert_nops_str, "pad"))
3860 rs6000_sched_insert_nops = sched_finish_pad_groups;
3861 else if (! strcmp (rs6000_sched_insert_nops_str, "regroup_exact"))
3862 rs6000_sched_insert_nops = sched_finish_regroup_exact;
3863 else
3864 rs6000_sched_insert_nops = ((enum rs6000_nop_insertion)
3865 atoi (rs6000_sched_insert_nops_str));
3866 }
3867
3868 if (global_init_p)
3869 {
3870 #ifdef TARGET_REGNAMES
3871 /* If the user desires alternate register names, copy in the
3872 alternate names now. */
3873 if (TARGET_REGNAMES)
3874 memcpy (rs6000_reg_names, alt_reg_names, sizeof (rs6000_reg_names));
3875 #endif
3876
3877 /* Set aix_struct_return last, after the ABI is determined.
3878 If -maix-struct-return or -msvr4-struct-return was explicitly
3879 used, don't override with the ABI default. */
3880 if (!global_options_set.x_aix_struct_return)
3881 aix_struct_return = (DEFAULT_ABI != ABI_V4 || DRAFT_V4_STRUCT_RET);
3882
3883 #if 0
3884 /* IBM XL compiler defaults to unsigned bitfields. */
3885 if (TARGET_XL_COMPAT)
3886 flag_signed_bitfields = 0;
3887 #endif
3888
3889 if (TARGET_LONG_DOUBLE_128 && !TARGET_IEEEQUAD)
3890 REAL_MODE_FORMAT (TFmode) = &ibm_extended_format;
3891
3892 if (TARGET_TOC)
3893 ASM_GENERATE_INTERNAL_LABEL (toc_label_name, "LCTOC", 1);
3894
3895 /* We can only guarantee the availability of DI pseudo-ops when
3896 assembling for 64-bit targets. */
3897 if (!TARGET_64BIT)
3898 {
3899 targetm.asm_out.aligned_op.di = NULL;
3900 targetm.asm_out.unaligned_op.di = NULL;
3901 }
3902
3903
3904 /* Set branch target alignment, if not optimizing for size. */
3905 if (!optimize_size)
3906 {
3907 /* Cell wants to be aligned 8byte for dual issue. Titan wants to be
3908 aligned 8byte to avoid misprediction by the branch predictor. */
3909 if (rs6000_cpu == PROCESSOR_TITAN
3910 || rs6000_cpu == PROCESSOR_CELL)
3911 {
3912 if (align_functions <= 0)
3913 align_functions = 8;
3914 if (align_jumps <= 0)
3915 align_jumps = 8;
3916 if (align_loops <= 0)
3917 align_loops = 8;
3918 }
3919 if (rs6000_align_branch_targets)
3920 {
3921 if (align_functions <= 0)
3922 align_functions = 16;
3923 if (align_jumps <= 0)
3924 align_jumps = 16;
3925 if (align_loops <= 0)
3926 {
3927 can_override_loop_align = 1;
3928 align_loops = 16;
3929 }
3930 }
3931 if (align_jumps_max_skip <= 0)
3932 align_jumps_max_skip = 15;
3933 if (align_loops_max_skip <= 0)
3934 align_loops_max_skip = 15;
3935 }
3936
3937 /* Arrange to save and restore machine status around nested functions. */
3938 init_machine_status = rs6000_init_machine_status;
3939
3940 /* We should always be splitting complex arguments, but we can't break
3941 Linux and Darwin ABIs at the moment. For now, only AIX is fixed. */
3942 if (DEFAULT_ABI == ABI_V4 || DEFAULT_ABI == ABI_DARWIN)
3943 targetm.calls.split_complex_arg = NULL;
3944 }
3945
3946 /* Initialize rs6000_cost with the appropriate target costs. */
3947 if (optimize_size)
3948 rs6000_cost = TARGET_POWERPC64 ? &size64_cost : &size32_cost;
3949 else
3950 switch (rs6000_cpu)
3951 {
3952 case PROCESSOR_RS64A:
3953 rs6000_cost = &rs64a_cost;
3954 break;
3955
3956 case PROCESSOR_MPCCORE:
3957 rs6000_cost = &mpccore_cost;
3958 break;
3959
3960 case PROCESSOR_PPC403:
3961 rs6000_cost = &ppc403_cost;
3962 break;
3963
3964 case PROCESSOR_PPC405:
3965 rs6000_cost = &ppc405_cost;
3966 break;
3967
3968 case PROCESSOR_PPC440:
3969 rs6000_cost = &ppc440_cost;
3970 break;
3971
3972 case PROCESSOR_PPC476:
3973 rs6000_cost = &ppc476_cost;
3974 break;
3975
3976 case PROCESSOR_PPC601:
3977 rs6000_cost = &ppc601_cost;
3978 break;
3979
3980 case PROCESSOR_PPC603:
3981 rs6000_cost = &ppc603_cost;
3982 break;
3983
3984 case PROCESSOR_PPC604:
3985 rs6000_cost = &ppc604_cost;
3986 break;
3987
3988 case PROCESSOR_PPC604e:
3989 rs6000_cost = &ppc604e_cost;
3990 break;
3991
3992 case PROCESSOR_PPC620:
3993 rs6000_cost = &ppc620_cost;
3994 break;
3995
3996 case PROCESSOR_PPC630:
3997 rs6000_cost = &ppc630_cost;
3998 break;
3999
4000 case PROCESSOR_CELL:
4001 rs6000_cost = &ppccell_cost;
4002 break;
4003
4004 case PROCESSOR_PPC750:
4005 case PROCESSOR_PPC7400:
4006 rs6000_cost = &ppc750_cost;
4007 break;
4008
4009 case PROCESSOR_PPC7450:
4010 rs6000_cost = &ppc7450_cost;
4011 break;
4012
4013 case PROCESSOR_PPC8540:
4014 case PROCESSOR_PPC8548:
4015 rs6000_cost = &ppc8540_cost;
4016 break;
4017
4018 case PROCESSOR_PPCE300C2:
4019 case PROCESSOR_PPCE300C3:
4020 rs6000_cost = &ppce300c2c3_cost;
4021 break;
4022
4023 case PROCESSOR_PPCE500MC:
4024 rs6000_cost = &ppce500mc_cost;
4025 break;
4026
4027 case PROCESSOR_PPCE500MC64:
4028 rs6000_cost = &ppce500mc64_cost;
4029 break;
4030
4031 case PROCESSOR_PPCE5500:
4032 rs6000_cost = &ppce5500_cost;
4033 break;
4034
4035 case PROCESSOR_PPCE6500:
4036 rs6000_cost = &ppce6500_cost;
4037 break;
4038
4039 case PROCESSOR_TITAN:
4040 rs6000_cost = &titan_cost;
4041 break;
4042
4043 case PROCESSOR_POWER4:
4044 case PROCESSOR_POWER5:
4045 rs6000_cost = &power4_cost;
4046 break;
4047
4048 case PROCESSOR_POWER6:
4049 rs6000_cost = &power6_cost;
4050 break;
4051
4052 case PROCESSOR_POWER7:
4053 rs6000_cost = &power7_cost;
4054 break;
4055
4056 case PROCESSOR_POWER8:
4057 rs6000_cost = &power8_cost;
4058 break;
4059
4060 case PROCESSOR_PPCA2:
4061 rs6000_cost = &ppca2_cost;
4062 break;
4063
4064 default:
4065 gcc_unreachable ();
4066 }
4067
4068 if (global_init_p)
4069 {
4070 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
4071 rs6000_cost->simultaneous_prefetches,
4072 global_options.x_param_values,
4073 global_options_set.x_param_values);
4074 maybe_set_param_value (PARAM_L1_CACHE_SIZE, rs6000_cost->l1_cache_size,
4075 global_options.x_param_values,
4076 global_options_set.x_param_values);
4077 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
4078 rs6000_cost->cache_line_size,
4079 global_options.x_param_values,
4080 global_options_set.x_param_values);
4081 maybe_set_param_value (PARAM_L2_CACHE_SIZE, rs6000_cost->l2_cache_size,
4082 global_options.x_param_values,
4083 global_options_set.x_param_values);
4084
4085 /* Increase loop peeling limits based on performance analysis. */
4086 maybe_set_param_value (PARAM_MAX_PEELED_INSNS, 400,
4087 global_options.x_param_values,
4088 global_options_set.x_param_values);
4089 maybe_set_param_value (PARAM_MAX_COMPLETELY_PEELED_INSNS, 400,
4090 global_options.x_param_values,
4091 global_options_set.x_param_values);
4092
4093 /* If using typedef char *va_list, signal that
4094 __builtin_va_start (&ap, 0) can be optimized to
4095 ap = __builtin_next_arg (0). */
4096 if (DEFAULT_ABI != ABI_V4)
4097 targetm.expand_builtin_va_start = NULL;
4098 }
4099
4100 /* Set up single/double float flags.
4101 If TARGET_HARD_FLOAT is set, but neither single or double is set,
4102 then set both flags. */
4103 if (TARGET_HARD_FLOAT && TARGET_FPRS
4104 && rs6000_single_float == 0 && rs6000_double_float == 0)
4105 rs6000_single_float = rs6000_double_float = 1;
4106
4107 /* If not explicitly specified via option, decide whether to generate indexed
4108 load/store instructions. */
4109 if (TARGET_AVOID_XFORM == -1)
4110 /* Avoid indexed addressing when targeting Power6 in order to avoid the
4111 DERAT mispredict penalty. However the LVE and STVE altivec instructions
4112 need indexed accesses and the type used is the scalar type of the element
4113 being loaded or stored. */
4114 TARGET_AVOID_XFORM = (rs6000_cpu == PROCESSOR_POWER6 && TARGET_CMPB
4115 && !TARGET_ALTIVEC);
4116
4117 /* Set the -mrecip options. */
4118 if (rs6000_recip_name)
4119 {
4120 char *p = ASTRDUP (rs6000_recip_name);
4121 char *q;
4122 unsigned int mask, i;
4123 bool invert;
4124
4125 while ((q = strtok (p, ",")) != NULL)
4126 {
4127 p = NULL;
4128 if (*q == '!')
4129 {
4130 invert = true;
4131 q++;
4132 }
4133 else
4134 invert = false;
4135
4136 if (!strcmp (q, "default"))
4137 mask = ((TARGET_RECIP_PRECISION)
4138 ? RECIP_HIGH_PRECISION : RECIP_LOW_PRECISION);
4139 else
4140 {
4141 for (i = 0; i < ARRAY_SIZE (recip_options); i++)
4142 if (!strcmp (q, recip_options[i].string))
4143 {
4144 mask = recip_options[i].mask;
4145 break;
4146 }
4147
4148 if (i == ARRAY_SIZE (recip_options))
4149 {
4150 error ("unknown option for -mrecip=%s", q);
4151 invert = false;
4152 mask = 0;
4153 ret = false;
4154 }
4155 }
4156
4157 if (invert)
4158 rs6000_recip_control &= ~mask;
4159 else
4160 rs6000_recip_control |= mask;
4161 }
4162 }
4163
4164 /* Set the builtin mask of the various options used that could affect which
4165 builtins were used. In the past we used target_flags, but we've run out
4166 of bits, and some options like SPE and PAIRED are no longer in
4167 target_flags. */
4168 rs6000_builtin_mask = rs6000_builtin_mask_calculate ();
4169 if (TARGET_DEBUG_BUILTIN || TARGET_DEBUG_TARGET)
4170 {
4171 fprintf (stderr,
4172 "new builtin mask = " HOST_WIDE_INT_PRINT_HEX ", ",
4173 rs6000_builtin_mask);
4174 rs6000_print_builtin_options (stderr, 0, NULL, rs6000_builtin_mask);
4175 }
4176
4177 /* Initialize all of the registers. */
4178 rs6000_init_hard_regno_mode_ok (global_init_p);
4179
4180 /* Save the initial options in case the user does function specific options */
4181 if (global_init_p)
4182 target_option_default_node = target_option_current_node
4183 = build_target_option_node (&global_options);
4184
4185 /* If not explicitly specified via option, decide whether to generate the
4186 extra blr's required to preserve the link stack on some cpus (eg, 476). */
4187 if (TARGET_LINK_STACK == -1)
4188 SET_TARGET_LINK_STACK (rs6000_cpu == PROCESSOR_PPC476 && flag_pic);
4189
4190 return ret;
4191 }
4192
4193 /* Implement TARGET_OPTION_OVERRIDE. On the RS/6000 this is used to
4194 define the target cpu type. */
4195
4196 static void
4197 rs6000_option_override (void)
4198 {
4199 (void) rs6000_option_override_internal (true);
4200
4201 /* Register machine-specific passes. This needs to be done at start-up.
4202 It's convenient to do it here (like i386 does). */
4203 opt_pass *pass_analyze_swaps = make_pass_analyze_swaps (g);
4204
4205 struct register_pass_info analyze_swaps_info
4206 = { pass_analyze_swaps, "cse1", 1, PASS_POS_INSERT_BEFORE };
4207
4208 register_pass (&analyze_swaps_info);
4209 }
4210
4211 \f
4212 /* Implement targetm.vectorize.builtin_mask_for_load. */
4213 static tree
4214 rs6000_builtin_mask_for_load (void)
4215 {
4216 if (TARGET_ALTIVEC || TARGET_VSX)
4217 return altivec_builtin_mask_for_load;
4218 else
4219 return 0;
4220 }
4221
4222 /* Implement LOOP_ALIGN. */
4223 int
4224 rs6000_loop_align (rtx label)
4225 {
4226 basic_block bb;
4227 int ninsns;
4228
4229 /* Don't override loop alignment if -falign-loops was specified. */
4230 if (!can_override_loop_align)
4231 return align_loops_log;
4232
4233 bb = BLOCK_FOR_INSN (label);
4234 ninsns = num_loop_insns(bb->loop_father);
4235
4236 /* Align small loops to 32 bytes to fit in an icache sector, otherwise return default. */
4237 if (ninsns > 4 && ninsns <= 8
4238 && (rs6000_cpu == PROCESSOR_POWER4
4239 || rs6000_cpu == PROCESSOR_POWER5
4240 || rs6000_cpu == PROCESSOR_POWER6
4241 || rs6000_cpu == PROCESSOR_POWER7
4242 || rs6000_cpu == PROCESSOR_POWER8))
4243 return 5;
4244 else
4245 return align_loops_log;
4246 }
4247
4248 /* Implement TARGET_LOOP_ALIGN_MAX_SKIP. */
4249 static int
4250 rs6000_loop_align_max_skip (rtx_insn *label)
4251 {
4252 return (1 << rs6000_loop_align (label)) - 1;
4253 }
4254
4255 /* Return true iff, data reference of TYPE can reach vector alignment (16)
4256 after applying N number of iterations. This routine does not determine
4257 how may iterations are required to reach desired alignment. */
4258
4259 static bool
4260 rs6000_vector_alignment_reachable (const_tree type ATTRIBUTE_UNUSED, bool is_packed)
4261 {
4262 if (is_packed)
4263 return false;
4264
4265 if (TARGET_32BIT)
4266 {
4267 if (rs6000_alignment_flags == MASK_ALIGN_NATURAL)
4268 return true;
4269
4270 if (rs6000_alignment_flags == MASK_ALIGN_POWER)
4271 return true;
4272
4273 return false;
4274 }
4275 else
4276 {
4277 if (TARGET_MACHO)
4278 return false;
4279
4280 /* Assuming that all other types are naturally aligned. CHECKME! */
4281 return true;
4282 }
4283 }
4284
4285 /* Return true if the vector misalignment factor is supported by the
4286 target. */
4287 static bool
4288 rs6000_builtin_support_vector_misalignment (machine_mode mode,
4289 const_tree type,
4290 int misalignment,
4291 bool is_packed)
4292 {
4293 if (TARGET_VSX)
4294 {
4295 /* Return if movmisalign pattern is not supported for this mode. */
4296 if (optab_handler (movmisalign_optab, mode) == CODE_FOR_nothing)
4297 return false;
4298
4299 if (misalignment == -1)
4300 {
4301 /* Misalignment factor is unknown at compile time but we know
4302 it's word aligned. */
4303 if (rs6000_vector_alignment_reachable (type, is_packed))
4304 {
4305 int element_size = TREE_INT_CST_LOW (TYPE_SIZE (type));
4306
4307 if (element_size == 64 || element_size == 32)
4308 return true;
4309 }
4310
4311 return false;
4312 }
4313
4314 /* VSX supports word-aligned vector. */
4315 if (misalignment % 4 == 0)
4316 return true;
4317 }
4318 return false;
4319 }
4320
4321 /* Implement targetm.vectorize.builtin_vectorization_cost. */
4322 static int
4323 rs6000_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
4324 tree vectype, int misalign)
4325 {
4326 unsigned elements;
4327 tree elem_type;
4328
4329 switch (type_of_cost)
4330 {
4331 case scalar_stmt:
4332 case scalar_load:
4333 case scalar_store:
4334 case vector_stmt:
4335 case vector_load:
4336 case vector_store:
4337 case vec_to_scalar:
4338 case scalar_to_vec:
4339 case cond_branch_not_taken:
4340 return 1;
4341
4342 case vec_perm:
4343 if (TARGET_VSX)
4344 return 3;
4345 else
4346 return 1;
4347
4348 case vec_promote_demote:
4349 if (TARGET_VSX)
4350 return 4;
4351 else
4352 return 1;
4353
4354 case cond_branch_taken:
4355 return 3;
4356
4357 case unaligned_load:
4358 if (TARGET_VSX && TARGET_ALLOW_MOVMISALIGN)
4359 {
4360 elements = TYPE_VECTOR_SUBPARTS (vectype);
4361 if (elements == 2)
4362 /* Double word aligned. */
4363 return 2;
4364
4365 if (elements == 4)
4366 {
4367 switch (misalign)
4368 {
4369 case 8:
4370 /* Double word aligned. */
4371 return 2;
4372
4373 case -1:
4374 /* Unknown misalignment. */
4375 case 4:
4376 case 12:
4377 /* Word aligned. */
4378 return 22;
4379
4380 default:
4381 gcc_unreachable ();
4382 }
4383 }
4384 }
4385
4386 if (TARGET_ALTIVEC)
4387 /* Misaligned loads are not supported. */
4388 gcc_unreachable ();
4389
4390 return 2;
4391
4392 case unaligned_store:
4393 if (TARGET_VSX && TARGET_ALLOW_MOVMISALIGN)
4394 {
4395 elements = TYPE_VECTOR_SUBPARTS (vectype);
4396 if (elements == 2)
4397 /* Double word aligned. */
4398 return 2;
4399
4400 if (elements == 4)
4401 {
4402 switch (misalign)
4403 {
4404 case 8:
4405 /* Double word aligned. */
4406 return 2;
4407
4408 case -1:
4409 /* Unknown misalignment. */
4410 case 4:
4411 case 12:
4412 /* Word aligned. */
4413 return 23;
4414
4415 default:
4416 gcc_unreachable ();
4417 }
4418 }
4419 }
4420
4421 if (TARGET_ALTIVEC)
4422 /* Misaligned stores are not supported. */
4423 gcc_unreachable ();
4424
4425 return 2;
4426
4427 case vec_construct:
4428 elements = TYPE_VECTOR_SUBPARTS (vectype);
4429 elem_type = TREE_TYPE (vectype);
4430 /* 32-bit vectors loaded into registers are stored as double
4431 precision, so we need n/2 converts in addition to the usual
4432 n/2 merges to construct a vector of short floats from them. */
4433 if (SCALAR_FLOAT_TYPE_P (elem_type)
4434 && TYPE_PRECISION (elem_type) == 32)
4435 return elements + 1;
4436 else
4437 return elements / 2 + 1;
4438
4439 default:
4440 gcc_unreachable ();
4441 }
4442 }
4443
4444 /* Implement targetm.vectorize.preferred_simd_mode. */
4445
4446 static machine_mode
4447 rs6000_preferred_simd_mode (machine_mode mode)
4448 {
4449 if (TARGET_VSX)
4450 switch (mode)
4451 {
4452 case DFmode:
4453 return V2DFmode;
4454 default:;
4455 }
4456 if (TARGET_ALTIVEC || TARGET_VSX)
4457 switch (mode)
4458 {
4459 case SFmode:
4460 return V4SFmode;
4461 case TImode:
4462 return V1TImode;
4463 case DImode:
4464 return V2DImode;
4465 case SImode:
4466 return V4SImode;
4467 case HImode:
4468 return V8HImode;
4469 case QImode:
4470 return V16QImode;
4471 default:;
4472 }
4473 if (TARGET_SPE)
4474 switch (mode)
4475 {
4476 case SFmode:
4477 return V2SFmode;
4478 case SImode:
4479 return V2SImode;
4480 default:;
4481 }
4482 if (TARGET_PAIRED_FLOAT
4483 && mode == SFmode)
4484 return V2SFmode;
4485 return word_mode;
4486 }
4487
4488 typedef struct _rs6000_cost_data
4489 {
4490 struct loop *loop_info;
4491 unsigned cost[3];
4492 } rs6000_cost_data;
4493
4494 /* Test for likely overcommitment of vector hardware resources. If a
4495 loop iteration is relatively large, and too large a percentage of
4496 instructions in the loop are vectorized, the cost model may not
4497 adequately reflect delays from unavailable vector resources.
4498 Penalize the loop body cost for this case. */
4499
4500 static void
4501 rs6000_density_test (rs6000_cost_data *data)
4502 {
4503 const int DENSITY_PCT_THRESHOLD = 85;
4504 const int DENSITY_SIZE_THRESHOLD = 70;
4505 const int DENSITY_PENALTY = 10;
4506 struct loop *loop = data->loop_info;
4507 basic_block *bbs = get_loop_body (loop);
4508 int nbbs = loop->num_nodes;
4509 int vec_cost = data->cost[vect_body], not_vec_cost = 0;
4510 int i, density_pct;
4511
4512 for (i = 0; i < nbbs; i++)
4513 {
4514 basic_block bb = bbs[i];
4515 gimple_stmt_iterator gsi;
4516
4517 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
4518 {
4519 gimple stmt = gsi_stmt (gsi);
4520 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4521
4522 if (!STMT_VINFO_RELEVANT_P (stmt_info)
4523 && !STMT_VINFO_IN_PATTERN_P (stmt_info))
4524 not_vec_cost++;
4525 }
4526 }
4527
4528 free (bbs);
4529 density_pct = (vec_cost * 100) / (vec_cost + not_vec_cost);
4530
4531 if (density_pct > DENSITY_PCT_THRESHOLD
4532 && vec_cost + not_vec_cost > DENSITY_SIZE_THRESHOLD)
4533 {
4534 data->cost[vect_body] = vec_cost * (100 + DENSITY_PENALTY) / 100;
4535 if (dump_enabled_p ())
4536 dump_printf_loc (MSG_NOTE, vect_location,
4537 "density %d%%, cost %d exceeds threshold, penalizing "
4538 "loop body cost by %d%%", density_pct,
4539 vec_cost + not_vec_cost, DENSITY_PENALTY);
4540 }
4541 }
4542
4543 /* Implement targetm.vectorize.init_cost. */
4544
4545 static void *
4546 rs6000_init_cost (struct loop *loop_info)
4547 {
4548 rs6000_cost_data *data = XNEW (struct _rs6000_cost_data);
4549 data->loop_info = loop_info;
4550 data->cost[vect_prologue] = 0;
4551 data->cost[vect_body] = 0;
4552 data->cost[vect_epilogue] = 0;
4553 return data;
4554 }
4555
4556 /* Implement targetm.vectorize.add_stmt_cost. */
4557
4558 static unsigned
4559 rs6000_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
4560 struct _stmt_vec_info *stmt_info, int misalign,
4561 enum vect_cost_model_location where)
4562 {
4563 rs6000_cost_data *cost_data = (rs6000_cost_data*) data;
4564 unsigned retval = 0;
4565
4566 if (flag_vect_cost_model)
4567 {
4568 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
4569 int stmt_cost = rs6000_builtin_vectorization_cost (kind, vectype,
4570 misalign);
4571 /* Statements in an inner loop relative to the loop being
4572 vectorized are weighted more heavily. The value here is
4573 arbitrary and could potentially be improved with analysis. */
4574 if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
4575 count *= 50; /* FIXME. */
4576
4577 retval = (unsigned) (count * stmt_cost);
4578 cost_data->cost[where] += retval;
4579 }
4580
4581 return retval;
4582 }
4583
4584 /* Implement targetm.vectorize.finish_cost. */
4585
4586 static void
4587 rs6000_finish_cost (void *data, unsigned *prologue_cost,
4588 unsigned *body_cost, unsigned *epilogue_cost)
4589 {
4590 rs6000_cost_data *cost_data = (rs6000_cost_data*) data;
4591
4592 if (cost_data->loop_info)
4593 rs6000_density_test (cost_data);
4594
4595 *prologue_cost = cost_data->cost[vect_prologue];
4596 *body_cost = cost_data->cost[vect_body];
4597 *epilogue_cost = cost_data->cost[vect_epilogue];
4598 }
4599
4600 /* Implement targetm.vectorize.destroy_cost_data. */
4601
4602 static void
4603 rs6000_destroy_cost_data (void *data)
4604 {
4605 free (data);
4606 }
4607
4608 /* Handler for the Mathematical Acceleration Subsystem (mass) interface to a
4609 library with vectorized intrinsics. */
4610
4611 static tree
4612 rs6000_builtin_vectorized_libmass (tree fndecl, tree type_out, tree type_in)
4613 {
4614 char name[32];
4615 const char *suffix = NULL;
4616 tree fntype, new_fndecl, bdecl = NULL_TREE;
4617 int n_args = 1;
4618 const char *bname;
4619 machine_mode el_mode, in_mode;
4620 int n, in_n;
4621
4622 /* Libmass is suitable for unsafe math only as it does not correctly support
4623 parts of IEEE with the required precision such as denormals. Only support
4624 it if we have VSX to use the simd d2 or f4 functions.
4625 XXX: Add variable length support. */
4626 if (!flag_unsafe_math_optimizations || !TARGET_VSX)
4627 return NULL_TREE;
4628
4629 el_mode = TYPE_MODE (TREE_TYPE (type_out));
4630 n = TYPE_VECTOR_SUBPARTS (type_out);
4631 in_mode = TYPE_MODE (TREE_TYPE (type_in));
4632 in_n = TYPE_VECTOR_SUBPARTS (type_in);
4633 if (el_mode != in_mode
4634 || n != in_n)
4635 return NULL_TREE;
4636
4637 if (DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_NORMAL)
4638 {
4639 enum built_in_function fn = DECL_FUNCTION_CODE (fndecl);
4640 switch (fn)
4641 {
4642 case BUILT_IN_ATAN2:
4643 case BUILT_IN_HYPOT:
4644 case BUILT_IN_POW:
4645 n_args = 2;
4646 /* fall through */
4647
4648 case BUILT_IN_ACOS:
4649 case BUILT_IN_ACOSH:
4650 case BUILT_IN_ASIN:
4651 case BUILT_IN_ASINH:
4652 case BUILT_IN_ATAN:
4653 case BUILT_IN_ATANH:
4654 case BUILT_IN_CBRT:
4655 case BUILT_IN_COS:
4656 case BUILT_IN_COSH:
4657 case BUILT_IN_ERF:
4658 case BUILT_IN_ERFC:
4659 case BUILT_IN_EXP2:
4660 case BUILT_IN_EXP:
4661 case BUILT_IN_EXPM1:
4662 case BUILT_IN_LGAMMA:
4663 case BUILT_IN_LOG10:
4664 case BUILT_IN_LOG1P:
4665 case BUILT_IN_LOG2:
4666 case BUILT_IN_LOG:
4667 case BUILT_IN_SIN:
4668 case BUILT_IN_SINH:
4669 case BUILT_IN_SQRT:
4670 case BUILT_IN_TAN:
4671 case BUILT_IN_TANH:
4672 bdecl = builtin_decl_implicit (fn);
4673 suffix = "d2"; /* pow -> powd2 */
4674 if (el_mode != DFmode
4675 || n != 2
4676 || !bdecl)
4677 return NULL_TREE;
4678 break;
4679
4680 case BUILT_IN_ATAN2F:
4681 case BUILT_IN_HYPOTF:
4682 case BUILT_IN_POWF:
4683 n_args = 2;
4684 /* fall through */
4685
4686 case BUILT_IN_ACOSF:
4687 case BUILT_IN_ACOSHF:
4688 case BUILT_IN_ASINF:
4689 case BUILT_IN_ASINHF:
4690 case BUILT_IN_ATANF:
4691 case BUILT_IN_ATANHF:
4692 case BUILT_IN_CBRTF:
4693 case BUILT_IN_COSF:
4694 case BUILT_IN_COSHF:
4695 case BUILT_IN_ERFF:
4696 case BUILT_IN_ERFCF:
4697 case BUILT_IN_EXP2F:
4698 case BUILT_IN_EXPF:
4699 case BUILT_IN_EXPM1F:
4700 case BUILT_IN_LGAMMAF:
4701 case BUILT_IN_LOG10F:
4702 case BUILT_IN_LOG1PF:
4703 case BUILT_IN_LOG2F:
4704 case BUILT_IN_LOGF:
4705 case BUILT_IN_SINF:
4706 case BUILT_IN_SINHF:
4707 case BUILT_IN_SQRTF:
4708 case BUILT_IN_TANF:
4709 case BUILT_IN_TANHF:
4710 bdecl = builtin_decl_implicit (fn);
4711 suffix = "4"; /* powf -> powf4 */
4712 if (el_mode != SFmode
4713 || n != 4
4714 || !bdecl)
4715 return NULL_TREE;
4716 break;
4717
4718 default:
4719 return NULL_TREE;
4720 }
4721 }
4722 else
4723 return NULL_TREE;
4724
4725 gcc_assert (suffix != NULL);
4726 bname = IDENTIFIER_POINTER (DECL_NAME (bdecl));
4727 if (!bname)
4728 return NULL_TREE;
4729
4730 strcpy (name, bname + sizeof ("__builtin_") - 1);
4731 strcat (name, suffix);
4732
4733 if (n_args == 1)
4734 fntype = build_function_type_list (type_out, type_in, NULL);
4735 else if (n_args == 2)
4736 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
4737 else
4738 gcc_unreachable ();
4739
4740 /* Build a function declaration for the vectorized function. */
4741 new_fndecl = build_decl (BUILTINS_LOCATION,
4742 FUNCTION_DECL, get_identifier (name), fntype);
4743 TREE_PUBLIC (new_fndecl) = 1;
4744 DECL_EXTERNAL (new_fndecl) = 1;
4745 DECL_IS_NOVOPS (new_fndecl) = 1;
4746 TREE_READONLY (new_fndecl) = 1;
4747
4748 return new_fndecl;
4749 }
4750
4751 /* Returns a function decl for a vectorized version of the builtin function
4752 with builtin function code FN and the result vector type TYPE, or NULL_TREE
4753 if it is not available. */
4754
4755 static tree
4756 rs6000_builtin_vectorized_function (tree fndecl, tree type_out,
4757 tree type_in)
4758 {
4759 machine_mode in_mode, out_mode;
4760 int in_n, out_n;
4761
4762 if (TARGET_DEBUG_BUILTIN)
4763 fprintf (stderr, "rs6000_builtin_vectorized_function (%s, %s, %s)\n",
4764 IDENTIFIER_POINTER (DECL_NAME (fndecl)),
4765 GET_MODE_NAME (TYPE_MODE (type_out)),
4766 GET_MODE_NAME (TYPE_MODE (type_in)));
4767
4768 if (TREE_CODE (type_out) != VECTOR_TYPE
4769 || TREE_CODE (type_in) != VECTOR_TYPE
4770 || !TARGET_VECTORIZE_BUILTINS)
4771 return NULL_TREE;
4772
4773 out_mode = TYPE_MODE (TREE_TYPE (type_out));
4774 out_n = TYPE_VECTOR_SUBPARTS (type_out);
4775 in_mode = TYPE_MODE (TREE_TYPE (type_in));
4776 in_n = TYPE_VECTOR_SUBPARTS (type_in);
4777
4778 if (DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_NORMAL)
4779 {
4780 enum built_in_function fn = DECL_FUNCTION_CODE (fndecl);
4781 switch (fn)
4782 {
4783 case BUILT_IN_CLZIMAX:
4784 case BUILT_IN_CLZLL:
4785 case BUILT_IN_CLZL:
4786 case BUILT_IN_CLZ:
4787 if (TARGET_P8_VECTOR && in_mode == out_mode && out_n == in_n)
4788 {
4789 if (out_mode == QImode && out_n == 16)
4790 return rs6000_builtin_decls[P8V_BUILTIN_VCLZB];
4791 else if (out_mode == HImode && out_n == 8)
4792 return rs6000_builtin_decls[P8V_BUILTIN_VCLZH];
4793 else if (out_mode == SImode && out_n == 4)
4794 return rs6000_builtin_decls[P8V_BUILTIN_VCLZW];
4795 else if (out_mode == DImode && out_n == 2)
4796 return rs6000_builtin_decls[P8V_BUILTIN_VCLZD];
4797 }
4798 break;
4799 case BUILT_IN_COPYSIGN:
4800 if (VECTOR_UNIT_VSX_P (V2DFmode)
4801 && out_mode == DFmode && out_n == 2
4802 && in_mode == DFmode && in_n == 2)
4803 return rs6000_builtin_decls[VSX_BUILTIN_CPSGNDP];
4804 break;
4805 case BUILT_IN_COPYSIGNF:
4806 if (out_mode != SFmode || out_n != 4
4807 || in_mode != SFmode || in_n != 4)
4808 break;
4809 if (VECTOR_UNIT_VSX_P (V4SFmode))
4810 return rs6000_builtin_decls[VSX_BUILTIN_CPSGNSP];
4811 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode))
4812 return rs6000_builtin_decls[ALTIVEC_BUILTIN_COPYSIGN_V4SF];
4813 break;
4814 case BUILT_IN_POPCOUNTIMAX:
4815 case BUILT_IN_POPCOUNTLL:
4816 case BUILT_IN_POPCOUNTL:
4817 case BUILT_IN_POPCOUNT:
4818 if (TARGET_P8_VECTOR && in_mode == out_mode && out_n == in_n)
4819 {
4820 if (out_mode == QImode && out_n == 16)
4821 return rs6000_builtin_decls[P8V_BUILTIN_VPOPCNTB];
4822 else if (out_mode == HImode && out_n == 8)
4823 return rs6000_builtin_decls[P8V_BUILTIN_VPOPCNTH];
4824 else if (out_mode == SImode && out_n == 4)
4825 return rs6000_builtin_decls[P8V_BUILTIN_VPOPCNTW];
4826 else if (out_mode == DImode && out_n == 2)
4827 return rs6000_builtin_decls[P8V_BUILTIN_VPOPCNTD];
4828 }
4829 break;
4830 case BUILT_IN_SQRT:
4831 if (VECTOR_UNIT_VSX_P (V2DFmode)
4832 && out_mode == DFmode && out_n == 2
4833 && in_mode == DFmode && in_n == 2)
4834 return rs6000_builtin_decls[VSX_BUILTIN_XVSQRTDP];
4835 break;
4836 case BUILT_IN_SQRTF:
4837 if (VECTOR_UNIT_VSX_P (V4SFmode)
4838 && out_mode == SFmode && out_n == 4
4839 && in_mode == SFmode && in_n == 4)
4840 return rs6000_builtin_decls[VSX_BUILTIN_XVSQRTSP];
4841 break;
4842 case BUILT_IN_CEIL:
4843 if (VECTOR_UNIT_VSX_P (V2DFmode)
4844 && out_mode == DFmode && out_n == 2
4845 && in_mode == DFmode && in_n == 2)
4846 return rs6000_builtin_decls[VSX_BUILTIN_XVRDPIP];
4847 break;
4848 case BUILT_IN_CEILF:
4849 if (out_mode != SFmode || out_n != 4
4850 || in_mode != SFmode || in_n != 4)
4851 break;
4852 if (VECTOR_UNIT_VSX_P (V4SFmode))
4853 return rs6000_builtin_decls[VSX_BUILTIN_XVRSPIP];
4854 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode))
4855 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRFIP];
4856 break;
4857 case BUILT_IN_FLOOR:
4858 if (VECTOR_UNIT_VSX_P (V2DFmode)
4859 && out_mode == DFmode && out_n == 2
4860 && in_mode == DFmode && in_n == 2)
4861 return rs6000_builtin_decls[VSX_BUILTIN_XVRDPIM];
4862 break;
4863 case BUILT_IN_FLOORF:
4864 if (out_mode != SFmode || out_n != 4
4865 || in_mode != SFmode || in_n != 4)
4866 break;
4867 if (VECTOR_UNIT_VSX_P (V4SFmode))
4868 return rs6000_builtin_decls[VSX_BUILTIN_XVRSPIM];
4869 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode))
4870 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRFIM];
4871 break;
4872 case BUILT_IN_FMA:
4873 if (VECTOR_UNIT_VSX_P (V2DFmode)
4874 && out_mode == DFmode && out_n == 2
4875 && in_mode == DFmode && in_n == 2)
4876 return rs6000_builtin_decls[VSX_BUILTIN_XVMADDDP];
4877 break;
4878 case BUILT_IN_FMAF:
4879 if (VECTOR_UNIT_VSX_P (V4SFmode)
4880 && out_mode == SFmode && out_n == 4
4881 && in_mode == SFmode && in_n == 4)
4882 return rs6000_builtin_decls[VSX_BUILTIN_XVMADDSP];
4883 else if (VECTOR_UNIT_ALTIVEC_P (V4SFmode)
4884 && out_mode == SFmode && out_n == 4
4885 && in_mode == SFmode && in_n == 4)
4886 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VMADDFP];
4887 break;
4888 case BUILT_IN_TRUNC:
4889 if (VECTOR_UNIT_VSX_P (V2DFmode)
4890 && out_mode == DFmode && out_n == 2
4891 && in_mode == DFmode && in_n == 2)
4892 return rs6000_builtin_decls[VSX_BUILTIN_XVRDPIZ];
4893 break;
4894 case BUILT_IN_TRUNCF:
4895 if (out_mode != SFmode || out_n != 4
4896 || in_mode != SFmode || in_n != 4)
4897 break;
4898 if (VECTOR_UNIT_VSX_P (V4SFmode))
4899 return rs6000_builtin_decls[VSX_BUILTIN_XVRSPIZ];
4900 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode))
4901 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRFIZ];
4902 break;
4903 case BUILT_IN_NEARBYINT:
4904 if (VECTOR_UNIT_VSX_P (V2DFmode)
4905 && flag_unsafe_math_optimizations
4906 && out_mode == DFmode && out_n == 2
4907 && in_mode == DFmode && in_n == 2)
4908 return rs6000_builtin_decls[VSX_BUILTIN_XVRDPI];
4909 break;
4910 case BUILT_IN_NEARBYINTF:
4911 if (VECTOR_UNIT_VSX_P (V4SFmode)
4912 && flag_unsafe_math_optimizations
4913 && out_mode == SFmode && out_n == 4
4914 && in_mode == SFmode && in_n == 4)
4915 return rs6000_builtin_decls[VSX_BUILTIN_XVRSPI];
4916 break;
4917 case BUILT_IN_RINT:
4918 if (VECTOR_UNIT_VSX_P (V2DFmode)
4919 && !flag_trapping_math
4920 && out_mode == DFmode && out_n == 2
4921 && in_mode == DFmode && in_n == 2)
4922 return rs6000_builtin_decls[VSX_BUILTIN_XVRDPIC];
4923 break;
4924 case BUILT_IN_RINTF:
4925 if (VECTOR_UNIT_VSX_P (V4SFmode)
4926 && !flag_trapping_math
4927 && out_mode == SFmode && out_n == 4
4928 && in_mode == SFmode && in_n == 4)
4929 return rs6000_builtin_decls[VSX_BUILTIN_XVRSPIC];
4930 break;
4931 default:
4932 break;
4933 }
4934 }
4935
4936 else if (DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD)
4937 {
4938 enum rs6000_builtins fn
4939 = (enum rs6000_builtins)DECL_FUNCTION_CODE (fndecl);
4940 switch (fn)
4941 {
4942 case RS6000_BUILTIN_RSQRTF:
4943 if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode)
4944 && out_mode == SFmode && out_n == 4
4945 && in_mode == SFmode && in_n == 4)
4946 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRSQRTFP];
4947 break;
4948 case RS6000_BUILTIN_RSQRT:
4949 if (VECTOR_UNIT_VSX_P (V2DFmode)
4950 && out_mode == DFmode && out_n == 2
4951 && in_mode == DFmode && in_n == 2)
4952 return rs6000_builtin_decls[VSX_BUILTIN_RSQRT_2DF];
4953 break;
4954 case RS6000_BUILTIN_RECIPF:
4955 if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode)
4956 && out_mode == SFmode && out_n == 4
4957 && in_mode == SFmode && in_n == 4)
4958 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRECIPFP];
4959 break;
4960 case RS6000_BUILTIN_RECIP:
4961 if (VECTOR_UNIT_VSX_P (V2DFmode)
4962 && out_mode == DFmode && out_n == 2
4963 && in_mode == DFmode && in_n == 2)
4964 return rs6000_builtin_decls[VSX_BUILTIN_RECIP_V2DF];
4965 break;
4966 default:
4967 break;
4968 }
4969 }
4970
4971 /* Generate calls to libmass if appropriate. */
4972 if (rs6000_veclib_handler)
4973 return rs6000_veclib_handler (fndecl, type_out, type_in);
4974
4975 return NULL_TREE;
4976 }
4977 \f
4978 /* Default CPU string for rs6000*_file_start functions. */
4979 static const char *rs6000_default_cpu;
4980
4981 /* Do anything needed at the start of the asm file. */
4982
4983 static void
4984 rs6000_file_start (void)
4985 {
4986 char buffer[80];
4987 const char *start = buffer;
4988 FILE *file = asm_out_file;
4989
4990 rs6000_default_cpu = TARGET_CPU_DEFAULT;
4991
4992 default_file_start ();
4993
4994 if (flag_verbose_asm)
4995 {
4996 sprintf (buffer, "\n%s rs6000/powerpc options:", ASM_COMMENT_START);
4997
4998 if (rs6000_default_cpu != 0 && rs6000_default_cpu[0] != '\0')
4999 {
5000 fprintf (file, "%s --with-cpu=%s", start, rs6000_default_cpu);
5001 start = "";
5002 }
5003
5004 if (global_options_set.x_rs6000_cpu_index)
5005 {
5006 fprintf (file, "%s -mcpu=%s", start,
5007 processor_target_table[rs6000_cpu_index].name);
5008 start = "";
5009 }
5010
5011 if (global_options_set.x_rs6000_tune_index)
5012 {
5013 fprintf (file, "%s -mtune=%s", start,
5014 processor_target_table[rs6000_tune_index].name);
5015 start = "";
5016 }
5017
5018 if (PPC405_ERRATUM77)
5019 {
5020 fprintf (file, "%s PPC405CR_ERRATUM77", start);
5021 start = "";
5022 }
5023
5024 #ifdef USING_ELFOS_H
5025 switch (rs6000_sdata)
5026 {
5027 case SDATA_NONE: fprintf (file, "%s -msdata=none", start); start = ""; break;
5028 case SDATA_DATA: fprintf (file, "%s -msdata=data", start); start = ""; break;
5029 case SDATA_SYSV: fprintf (file, "%s -msdata=sysv", start); start = ""; break;
5030 case SDATA_EABI: fprintf (file, "%s -msdata=eabi", start); start = ""; break;
5031 }
5032
5033 if (rs6000_sdata && g_switch_value)
5034 {
5035 fprintf (file, "%s -G %d", start,
5036 g_switch_value);
5037 start = "";
5038 }
5039 #endif
5040
5041 if (*start == '\0')
5042 putc ('\n', file);
5043 }
5044
5045 if (DEFAULT_ABI == ABI_ELFv2)
5046 fprintf (file, "\t.abiversion 2\n");
5047
5048 if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2
5049 || (TARGET_ELF && flag_pic == 2))
5050 {
5051 switch_to_section (toc_section);
5052 switch_to_section (text_section);
5053 }
5054 }
5055
5056 \f
5057 /* Return nonzero if this function is known to have a null epilogue. */
5058
5059 int
5060 direct_return (void)
5061 {
5062 if (reload_completed)
5063 {
5064 rs6000_stack_t *info = rs6000_stack_info ();
5065
5066 if (info->first_gp_reg_save == 32
5067 && info->first_fp_reg_save == 64
5068 && info->first_altivec_reg_save == LAST_ALTIVEC_REGNO + 1
5069 && ! info->lr_save_p
5070 && ! info->cr_save_p
5071 && info->vrsave_mask == 0
5072 && ! info->push_p)
5073 return 1;
5074 }
5075
5076 return 0;
5077 }
5078
5079 /* Return the number of instructions it takes to form a constant in an
5080 integer register. */
5081
5082 int
5083 num_insns_constant_wide (HOST_WIDE_INT value)
5084 {
5085 /* signed constant loadable with addi */
5086 if ((unsigned HOST_WIDE_INT) (value + 0x8000) < 0x10000)
5087 return 1;
5088
5089 /* constant loadable with addis */
5090 else if ((value & 0xffff) == 0
5091 && (value >> 31 == -1 || value >> 31 == 0))
5092 return 1;
5093
5094 else if (TARGET_POWERPC64)
5095 {
5096 HOST_WIDE_INT low = ((value & 0xffffffff) ^ 0x80000000) - 0x80000000;
5097 HOST_WIDE_INT high = value >> 31;
5098
5099 if (high == 0 || high == -1)
5100 return 2;
5101
5102 high >>= 1;
5103
5104 if (low == 0)
5105 return num_insns_constant_wide (high) + 1;
5106 else if (high == 0)
5107 return num_insns_constant_wide (low) + 1;
5108 else
5109 return (num_insns_constant_wide (high)
5110 + num_insns_constant_wide (low) + 1);
5111 }
5112
5113 else
5114 return 2;
5115 }
5116
5117 int
5118 num_insns_constant (rtx op, machine_mode mode)
5119 {
5120 HOST_WIDE_INT low, high;
5121
5122 switch (GET_CODE (op))
5123 {
5124 case CONST_INT:
5125 if ((INTVAL (op) >> 31) != 0 && (INTVAL (op) >> 31) != -1
5126 && mask64_operand (op, mode))
5127 return 2;
5128 else
5129 return num_insns_constant_wide (INTVAL (op));
5130
5131 case CONST_WIDE_INT:
5132 {
5133 int i;
5134 int ins = CONST_WIDE_INT_NUNITS (op) - 1;
5135 for (i = 0; i < CONST_WIDE_INT_NUNITS (op); i++)
5136 ins += num_insns_constant_wide (CONST_WIDE_INT_ELT (op, i));
5137 return ins;
5138 }
5139
5140 case CONST_DOUBLE:
5141 if (mode == SFmode || mode == SDmode)
5142 {
5143 long l;
5144 REAL_VALUE_TYPE rv;
5145
5146 REAL_VALUE_FROM_CONST_DOUBLE (rv, op);
5147 if (DECIMAL_FLOAT_MODE_P (mode))
5148 REAL_VALUE_TO_TARGET_DECIMAL32 (rv, l);
5149 else
5150 REAL_VALUE_TO_TARGET_SINGLE (rv, l);
5151 return num_insns_constant_wide ((HOST_WIDE_INT) l);
5152 }
5153
5154 long l[2];
5155 REAL_VALUE_TYPE rv;
5156
5157 REAL_VALUE_FROM_CONST_DOUBLE (rv, op);
5158 if (DECIMAL_FLOAT_MODE_P (mode))
5159 REAL_VALUE_TO_TARGET_DECIMAL64 (rv, l);
5160 else
5161 REAL_VALUE_TO_TARGET_DOUBLE (rv, l);
5162 high = l[WORDS_BIG_ENDIAN == 0];
5163 low = l[WORDS_BIG_ENDIAN != 0];
5164
5165 if (TARGET_32BIT)
5166 return (num_insns_constant_wide (low)
5167 + num_insns_constant_wide (high));
5168 else
5169 {
5170 if ((high == 0 && low >= 0)
5171 || (high == -1 && low < 0))
5172 return num_insns_constant_wide (low);
5173
5174 else if (mask64_operand (op, mode))
5175 return 2;
5176
5177 else if (low == 0)
5178 return num_insns_constant_wide (high) + 1;
5179
5180 else
5181 return (num_insns_constant_wide (high)
5182 + num_insns_constant_wide (low) + 1);
5183 }
5184
5185 default:
5186 gcc_unreachable ();
5187 }
5188 }
5189
5190 /* Interpret element ELT of the CONST_VECTOR OP as an integer value.
5191 If the mode of OP is MODE_VECTOR_INT, this simply returns the
5192 corresponding element of the vector, but for V4SFmode and V2SFmode,
5193 the corresponding "float" is interpreted as an SImode integer. */
5194
5195 HOST_WIDE_INT
5196 const_vector_elt_as_int (rtx op, unsigned int elt)
5197 {
5198 rtx tmp;
5199
5200 /* We can't handle V2DImode and V2DFmode vector constants here yet. */
5201 gcc_assert (GET_MODE (op) != V2DImode
5202 && GET_MODE (op) != V2DFmode);
5203
5204 tmp = CONST_VECTOR_ELT (op, elt);
5205 if (GET_MODE (op) == V4SFmode
5206 || GET_MODE (op) == V2SFmode)
5207 tmp = gen_lowpart (SImode, tmp);
5208 return INTVAL (tmp);
5209 }
5210
5211 /* Return true if OP can be synthesized with a particular vspltisb, vspltish
5212 or vspltisw instruction. OP is a CONST_VECTOR. Which instruction is used
5213 depends on STEP and COPIES, one of which will be 1. If COPIES > 1,
5214 all items are set to the same value and contain COPIES replicas of the
5215 vsplt's operand; if STEP > 1, one in STEP elements is set to the vsplt's
5216 operand and the others are set to the value of the operand's msb. */
5217
5218 static bool
5219 vspltis_constant (rtx op, unsigned step, unsigned copies)
5220 {
5221 machine_mode mode = GET_MODE (op);
5222 machine_mode inner = GET_MODE_INNER (mode);
5223
5224 unsigned i;
5225 unsigned nunits;
5226 unsigned bitsize;
5227 unsigned mask;
5228
5229 HOST_WIDE_INT val;
5230 HOST_WIDE_INT splat_val;
5231 HOST_WIDE_INT msb_val;
5232
5233 if (mode == V2DImode || mode == V2DFmode || mode == V1TImode)
5234 return false;
5235
5236 nunits = GET_MODE_NUNITS (mode);
5237 bitsize = GET_MODE_BITSIZE (inner);
5238 mask = GET_MODE_MASK (inner);
5239
5240 val = const_vector_elt_as_int (op, BYTES_BIG_ENDIAN ? nunits - 1 : 0);
5241 splat_val = val;
5242 msb_val = val >= 0 ? 0 : -1;
5243
5244 /* Construct the value to be splatted, if possible. If not, return 0. */
5245 for (i = 2; i <= copies; i *= 2)
5246 {
5247 HOST_WIDE_INT small_val;
5248 bitsize /= 2;
5249 small_val = splat_val >> bitsize;
5250 mask >>= bitsize;
5251 if (splat_val != ((small_val << bitsize) | (small_val & mask)))
5252 return false;
5253 splat_val = small_val;
5254 }
5255
5256 /* Check if SPLAT_VAL can really be the operand of a vspltis[bhw]. */
5257 if (EASY_VECTOR_15 (splat_val))
5258 ;
5259
5260 /* Also check if we can splat, and then add the result to itself. Do so if
5261 the value is positive, of if the splat instruction is using OP's mode;
5262 for splat_val < 0, the splat and the add should use the same mode. */
5263 else if (EASY_VECTOR_15_ADD_SELF (splat_val)
5264 && (splat_val >= 0 || (step == 1 && copies == 1)))
5265 ;
5266
5267 /* Also check if are loading up the most significant bit which can be done by
5268 loading up -1 and shifting the value left by -1. */
5269 else if (EASY_VECTOR_MSB (splat_val, inner))
5270 ;
5271
5272 else
5273 return false;
5274
5275 /* Check if VAL is present in every STEP-th element, and the
5276 other elements are filled with its most significant bit. */
5277 for (i = 1; i < nunits; ++i)
5278 {
5279 HOST_WIDE_INT desired_val;
5280 unsigned elt = BYTES_BIG_ENDIAN ? nunits - 1 - i : i;
5281 if ((i & (step - 1)) == 0)
5282 desired_val = val;
5283 else
5284 desired_val = msb_val;
5285
5286 if (desired_val != const_vector_elt_as_int (op, elt))
5287 return false;
5288 }
5289
5290 return true;
5291 }
5292
5293
5294 /* Return true if OP is of the given MODE and can be synthesized
5295 with a vspltisb, vspltish or vspltisw. */
5296
5297 bool
5298 easy_altivec_constant (rtx op, machine_mode mode)
5299 {
5300 unsigned step, copies;
5301
5302 if (mode == VOIDmode)
5303 mode = GET_MODE (op);
5304 else if (mode != GET_MODE (op))
5305 return false;
5306
5307 /* V2DI/V2DF was added with VSX. Only allow 0 and all 1's as easy
5308 constants. */
5309 if (mode == V2DFmode)
5310 return zero_constant (op, mode);
5311
5312 else if (mode == V2DImode)
5313 {
5314 if (GET_CODE (CONST_VECTOR_ELT (op, 0)) != CONST_INT
5315 || GET_CODE (CONST_VECTOR_ELT (op, 1)) != CONST_INT)
5316 return false;
5317
5318 if (zero_constant (op, mode))
5319 return true;
5320
5321 if (INTVAL (CONST_VECTOR_ELT (op, 0)) == -1
5322 && INTVAL (CONST_VECTOR_ELT (op, 1)) == -1)
5323 return true;
5324
5325 return false;
5326 }
5327
5328 /* V1TImode is a special container for TImode. Ignore for now. */
5329 else if (mode == V1TImode)
5330 return false;
5331
5332 /* Start with a vspltisw. */
5333 step = GET_MODE_NUNITS (mode) / 4;
5334 copies = 1;
5335
5336 if (vspltis_constant (op, step, copies))
5337 return true;
5338
5339 /* Then try with a vspltish. */
5340 if (step == 1)
5341 copies <<= 1;
5342 else
5343 step >>= 1;
5344
5345 if (vspltis_constant (op, step, copies))
5346 return true;
5347
5348 /* And finally a vspltisb. */
5349 if (step == 1)
5350 copies <<= 1;
5351 else
5352 step >>= 1;
5353
5354 if (vspltis_constant (op, step, copies))
5355 return true;
5356
5357 return false;
5358 }
5359
5360 /* Generate a VEC_DUPLICATE representing a vspltis[bhw] instruction whose
5361 result is OP. Abort if it is not possible. */
5362
5363 rtx
5364 gen_easy_altivec_constant (rtx op)
5365 {
5366 machine_mode mode = GET_MODE (op);
5367 int nunits = GET_MODE_NUNITS (mode);
5368 rtx val = CONST_VECTOR_ELT (op, BYTES_BIG_ENDIAN ? nunits - 1 : 0);
5369 unsigned step = nunits / 4;
5370 unsigned copies = 1;
5371
5372 /* Start with a vspltisw. */
5373 if (vspltis_constant (op, step, copies))
5374 return gen_rtx_VEC_DUPLICATE (V4SImode, gen_lowpart (SImode, val));
5375
5376 /* Then try with a vspltish. */
5377 if (step == 1)
5378 copies <<= 1;
5379 else
5380 step >>= 1;
5381
5382 if (vspltis_constant (op, step, copies))
5383 return gen_rtx_VEC_DUPLICATE (V8HImode, gen_lowpart (HImode, val));
5384
5385 /* And finally a vspltisb. */
5386 if (step == 1)
5387 copies <<= 1;
5388 else
5389 step >>= 1;
5390
5391 if (vspltis_constant (op, step, copies))
5392 return gen_rtx_VEC_DUPLICATE (V16QImode, gen_lowpart (QImode, val));
5393
5394 gcc_unreachable ();
5395 }
5396
5397 const char *
5398 output_vec_const_move (rtx *operands)
5399 {
5400 int cst, cst2;
5401 machine_mode mode;
5402 rtx dest, vec;
5403
5404 dest = operands[0];
5405 vec = operands[1];
5406 mode = GET_MODE (dest);
5407
5408 if (TARGET_VSX)
5409 {
5410 if (zero_constant (vec, mode))
5411 return "xxlxor %x0,%x0,%x0";
5412
5413 if ((mode == V2DImode || mode == V1TImode)
5414 && INTVAL (CONST_VECTOR_ELT (vec, 0)) == -1
5415 && INTVAL (CONST_VECTOR_ELT (vec, 1)) == -1)
5416 return "vspltisw %0,-1";
5417 }
5418
5419 if (TARGET_ALTIVEC)
5420 {
5421 rtx splat_vec;
5422 if (zero_constant (vec, mode))
5423 return "vxor %0,%0,%0";
5424
5425 splat_vec = gen_easy_altivec_constant (vec);
5426 gcc_assert (GET_CODE (splat_vec) == VEC_DUPLICATE);
5427 operands[1] = XEXP (splat_vec, 0);
5428 if (!EASY_VECTOR_15 (INTVAL (operands[1])))
5429 return "#";
5430
5431 switch (GET_MODE (splat_vec))
5432 {
5433 case V4SImode:
5434 return "vspltisw %0,%1";
5435
5436 case V8HImode:
5437 return "vspltish %0,%1";
5438
5439 case V16QImode:
5440 return "vspltisb %0,%1";
5441
5442 default:
5443 gcc_unreachable ();
5444 }
5445 }
5446
5447 gcc_assert (TARGET_SPE);
5448
5449 /* Vector constant 0 is handled as a splitter of V2SI, and in the
5450 pattern of V1DI, V4HI, and V2SF.
5451
5452 FIXME: We should probably return # and add post reload
5453 splitters for these, but this way is so easy ;-). */
5454 cst = INTVAL (CONST_VECTOR_ELT (vec, 0));
5455 cst2 = INTVAL (CONST_VECTOR_ELT (vec, 1));
5456 operands[1] = CONST_VECTOR_ELT (vec, 0);
5457 operands[2] = CONST_VECTOR_ELT (vec, 1);
5458 if (cst == cst2)
5459 return "li %0,%1\n\tevmergelo %0,%0,%0";
5460 else if (WORDS_BIG_ENDIAN)
5461 return "li %0,%1\n\tevmergelo %0,%0,%0\n\tli %0,%2";
5462 else
5463 return "li %0,%2\n\tevmergelo %0,%0,%0\n\tli %0,%1";
5464 }
5465
5466 /* Initialize TARGET of vector PAIRED to VALS. */
5467
5468 void
5469 paired_expand_vector_init (rtx target, rtx vals)
5470 {
5471 machine_mode mode = GET_MODE (target);
5472 int n_elts = GET_MODE_NUNITS (mode);
5473 int n_var = 0;
5474 rtx x, new_rtx, tmp, constant_op, op1, op2;
5475 int i;
5476
5477 for (i = 0; i < n_elts; ++i)
5478 {
5479 x = XVECEXP (vals, 0, i);
5480 if (!(CONST_SCALAR_INT_P (x) || CONST_DOUBLE_P (x) || CONST_FIXED_P (x)))
5481 ++n_var;
5482 }
5483 if (n_var == 0)
5484 {
5485 /* Load from constant pool. */
5486 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
5487 return;
5488 }
5489
5490 if (n_var == 2)
5491 {
5492 /* The vector is initialized only with non-constants. */
5493 new_rtx = gen_rtx_VEC_CONCAT (V2SFmode, XVECEXP (vals, 0, 0),
5494 XVECEXP (vals, 0, 1));
5495
5496 emit_move_insn (target, new_rtx);
5497 return;
5498 }
5499
5500 /* One field is non-constant and the other one is a constant. Load the
5501 constant from the constant pool and use ps_merge instruction to
5502 construct the whole vector. */
5503 op1 = XVECEXP (vals, 0, 0);
5504 op2 = XVECEXP (vals, 0, 1);
5505
5506 constant_op = (CONSTANT_P (op1)) ? op1 : op2;
5507
5508 tmp = gen_reg_rtx (GET_MODE (constant_op));
5509 emit_move_insn (tmp, constant_op);
5510
5511 if (CONSTANT_P (op1))
5512 new_rtx = gen_rtx_VEC_CONCAT (V2SFmode, tmp, op2);
5513 else
5514 new_rtx = gen_rtx_VEC_CONCAT (V2SFmode, op1, tmp);
5515
5516 emit_move_insn (target, new_rtx);
5517 }
5518
5519 void
5520 paired_expand_vector_move (rtx operands[])
5521 {
5522 rtx op0 = operands[0], op1 = operands[1];
5523
5524 emit_move_insn (op0, op1);
5525 }
5526
5527 /* Emit vector compare for code RCODE. DEST is destination, OP1 and
5528 OP2 are two VEC_COND_EXPR operands, CC_OP0 and CC_OP1 are the two
5529 operands for the relation operation COND. This is a recursive
5530 function. */
5531
5532 static void
5533 paired_emit_vector_compare (enum rtx_code rcode,
5534 rtx dest, rtx op0, rtx op1,
5535 rtx cc_op0, rtx cc_op1)
5536 {
5537 rtx tmp = gen_reg_rtx (V2SFmode);
5538 rtx tmp1, max, min;
5539
5540 gcc_assert (TARGET_PAIRED_FLOAT);
5541 gcc_assert (GET_MODE (op0) == GET_MODE (op1));
5542
5543 switch (rcode)
5544 {
5545 case LT:
5546 case LTU:
5547 paired_emit_vector_compare (GE, dest, op1, op0, cc_op0, cc_op1);
5548 return;
5549 case GE:
5550 case GEU:
5551 emit_insn (gen_subv2sf3 (tmp, cc_op0, cc_op1));
5552 emit_insn (gen_selv2sf4 (dest, tmp, op0, op1, CONST0_RTX (SFmode)));
5553 return;
5554 case LE:
5555 case LEU:
5556 paired_emit_vector_compare (GE, dest, op0, op1, cc_op1, cc_op0);
5557 return;
5558 case GT:
5559 paired_emit_vector_compare (LE, dest, op1, op0, cc_op0, cc_op1);
5560 return;
5561 case EQ:
5562 tmp1 = gen_reg_rtx (V2SFmode);
5563 max = gen_reg_rtx (V2SFmode);
5564 min = gen_reg_rtx (V2SFmode);
5565 gen_reg_rtx (V2SFmode);
5566
5567 emit_insn (gen_subv2sf3 (tmp, cc_op0, cc_op1));
5568 emit_insn (gen_selv2sf4
5569 (max, tmp, cc_op0, cc_op1, CONST0_RTX (SFmode)));
5570 emit_insn (gen_subv2sf3 (tmp, cc_op1, cc_op0));
5571 emit_insn (gen_selv2sf4
5572 (min, tmp, cc_op0, cc_op1, CONST0_RTX (SFmode)));
5573 emit_insn (gen_subv2sf3 (tmp1, min, max));
5574 emit_insn (gen_selv2sf4 (dest, tmp1, op0, op1, CONST0_RTX (SFmode)));
5575 return;
5576 case NE:
5577 paired_emit_vector_compare (EQ, dest, op1, op0, cc_op0, cc_op1);
5578 return;
5579 case UNLE:
5580 paired_emit_vector_compare (LE, dest, op1, op0, cc_op0, cc_op1);
5581 return;
5582 case UNLT:
5583 paired_emit_vector_compare (LT, dest, op1, op0, cc_op0, cc_op1);
5584 return;
5585 case UNGE:
5586 paired_emit_vector_compare (GE, dest, op1, op0, cc_op0, cc_op1);
5587 return;
5588 case UNGT:
5589 paired_emit_vector_compare (GT, dest, op1, op0, cc_op0, cc_op1);
5590 return;
5591 default:
5592 gcc_unreachable ();
5593 }
5594
5595 return;
5596 }
5597
5598 /* Emit vector conditional expression.
5599 DEST is destination. OP1 and OP2 are two VEC_COND_EXPR operands.
5600 CC_OP0 and CC_OP1 are the two operands for the relation operation COND. */
5601
5602 int
5603 paired_emit_vector_cond_expr (rtx dest, rtx op1, rtx op2,
5604 rtx cond, rtx cc_op0, rtx cc_op1)
5605 {
5606 enum rtx_code rcode = GET_CODE (cond);
5607
5608 if (!TARGET_PAIRED_FLOAT)
5609 return 0;
5610
5611 paired_emit_vector_compare (rcode, dest, op1, op2, cc_op0, cc_op1);
5612
5613 return 1;
5614 }
5615
5616 /* Initialize vector TARGET to VALS. */
5617
5618 void
5619 rs6000_expand_vector_init (rtx target, rtx vals)
5620 {
5621 machine_mode mode = GET_MODE (target);
5622 machine_mode inner_mode = GET_MODE_INNER (mode);
5623 int n_elts = GET_MODE_NUNITS (mode);
5624 int n_var = 0, one_var = -1;
5625 bool all_same = true, all_const_zero = true;
5626 rtx x, mem;
5627 int i;
5628
5629 for (i = 0; i < n_elts; ++i)
5630 {
5631 x = XVECEXP (vals, 0, i);
5632 if (!(CONST_SCALAR_INT_P (x) || CONST_DOUBLE_P (x) || CONST_FIXED_P (x)))
5633 ++n_var, one_var = i;
5634 else if (x != CONST0_RTX (inner_mode))
5635 all_const_zero = false;
5636
5637 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
5638 all_same = false;
5639 }
5640
5641 if (n_var == 0)
5642 {
5643 rtx const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
5644 bool int_vector_p = (GET_MODE_CLASS (mode) == MODE_VECTOR_INT);
5645 if ((int_vector_p || TARGET_VSX) && all_const_zero)
5646 {
5647 /* Zero register. */
5648 emit_insn (gen_rtx_SET (VOIDmode, target,
5649 gen_rtx_XOR (mode, target, target)));
5650 return;
5651 }
5652 else if (int_vector_p && easy_vector_constant (const_vec, mode))
5653 {
5654 /* Splat immediate. */
5655 emit_insn (gen_rtx_SET (VOIDmode, target, const_vec));
5656 return;
5657 }
5658 else
5659 {
5660 /* Load from constant pool. */
5661 emit_move_insn (target, const_vec);
5662 return;
5663 }
5664 }
5665
5666 /* Double word values on VSX can use xxpermdi or lxvdsx. */
5667 if (VECTOR_MEM_VSX_P (mode) && (mode == V2DFmode || mode == V2DImode))
5668 {
5669 rtx op0 = XVECEXP (vals, 0, 0);
5670 rtx op1 = XVECEXP (vals, 0, 1);
5671 if (all_same)
5672 {
5673 if (!MEM_P (op0) && !REG_P (op0))
5674 op0 = force_reg (inner_mode, op0);
5675 if (mode == V2DFmode)
5676 emit_insn (gen_vsx_splat_v2df (target, op0));
5677 else
5678 emit_insn (gen_vsx_splat_v2di (target, op0));
5679 }
5680 else
5681 {
5682 op0 = force_reg (inner_mode, op0);
5683 op1 = force_reg (inner_mode, op1);
5684 if (mode == V2DFmode)
5685 emit_insn (gen_vsx_concat_v2df (target, op0, op1));
5686 else
5687 emit_insn (gen_vsx_concat_v2di (target, op0, op1));
5688 }
5689 return;
5690 }
5691
5692 /* With single precision floating point on VSX, know that internally single
5693 precision is actually represented as a double, and either make 2 V2DF
5694 vectors, and convert these vectors to single precision, or do one
5695 conversion, and splat the result to the other elements. */
5696 if (mode == V4SFmode && VECTOR_MEM_VSX_P (mode))
5697 {
5698 if (all_same)
5699 {
5700 rtx freg = gen_reg_rtx (V4SFmode);
5701 rtx sreg = force_reg (SFmode, XVECEXP (vals, 0, 0));
5702 rtx cvt = ((TARGET_XSCVDPSPN)
5703 ? gen_vsx_xscvdpspn_scalar (freg, sreg)
5704 : gen_vsx_xscvdpsp_scalar (freg, sreg));
5705
5706 emit_insn (cvt);
5707 emit_insn (gen_vsx_xxspltw_v4sf_direct (target, freg, const0_rtx));
5708 }
5709 else
5710 {
5711 rtx dbl_even = gen_reg_rtx (V2DFmode);
5712 rtx dbl_odd = gen_reg_rtx (V2DFmode);
5713 rtx flt_even = gen_reg_rtx (V4SFmode);
5714 rtx flt_odd = gen_reg_rtx (V4SFmode);
5715 rtx op0 = force_reg (SFmode, XVECEXP (vals, 0, 0));
5716 rtx op1 = force_reg (SFmode, XVECEXP (vals, 0, 1));
5717 rtx op2 = force_reg (SFmode, XVECEXP (vals, 0, 2));
5718 rtx op3 = force_reg (SFmode, XVECEXP (vals, 0, 3));
5719
5720 emit_insn (gen_vsx_concat_v2sf (dbl_even, op0, op1));
5721 emit_insn (gen_vsx_concat_v2sf (dbl_odd, op2, op3));
5722 emit_insn (gen_vsx_xvcvdpsp (flt_even, dbl_even));
5723 emit_insn (gen_vsx_xvcvdpsp (flt_odd, dbl_odd));
5724 rs6000_expand_extract_even (target, flt_even, flt_odd);
5725 }
5726 return;
5727 }
5728
5729 /* Store value to stack temp. Load vector element. Splat. However, splat
5730 of 64-bit items is not supported on Altivec. */
5731 if (all_same && GET_MODE_SIZE (inner_mode) <= 4)
5732 {
5733 mem = assign_stack_temp (mode, GET_MODE_SIZE (inner_mode));
5734 emit_move_insn (adjust_address_nv (mem, inner_mode, 0),
5735 XVECEXP (vals, 0, 0));
5736 x = gen_rtx_UNSPEC (VOIDmode,
5737 gen_rtvec (1, const0_rtx), UNSPEC_LVE);
5738 emit_insn (gen_rtx_PARALLEL (VOIDmode,
5739 gen_rtvec (2,
5740 gen_rtx_SET (VOIDmode,
5741 target, mem),
5742 x)));
5743 x = gen_rtx_VEC_SELECT (inner_mode, target,
5744 gen_rtx_PARALLEL (VOIDmode,
5745 gen_rtvec (1, const0_rtx)));
5746 emit_insn (gen_rtx_SET (VOIDmode, target,
5747 gen_rtx_VEC_DUPLICATE (mode, x)));
5748 return;
5749 }
5750
5751 /* One field is non-constant. Load constant then overwrite
5752 varying field. */
5753 if (n_var == 1)
5754 {
5755 rtx copy = copy_rtx (vals);
5756
5757 /* Load constant part of vector, substitute neighboring value for
5758 varying element. */
5759 XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, (one_var + 1) % n_elts);
5760 rs6000_expand_vector_init (target, copy);
5761
5762 /* Insert variable. */
5763 rs6000_expand_vector_set (target, XVECEXP (vals, 0, one_var), one_var);
5764 return;
5765 }
5766
5767 /* Construct the vector in memory one field at a time
5768 and load the whole vector. */
5769 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
5770 for (i = 0; i < n_elts; i++)
5771 emit_move_insn (adjust_address_nv (mem, inner_mode,
5772 i * GET_MODE_SIZE (inner_mode)),
5773 XVECEXP (vals, 0, i));
5774 emit_move_insn (target, mem);
5775 }
5776
5777 /* Set field ELT of TARGET to VAL. */
5778
5779 void
5780 rs6000_expand_vector_set (rtx target, rtx val, int elt)
5781 {
5782 machine_mode mode = GET_MODE (target);
5783 machine_mode inner_mode = GET_MODE_INNER (mode);
5784 rtx reg = gen_reg_rtx (mode);
5785 rtx mask, mem, x;
5786 int width = GET_MODE_SIZE (inner_mode);
5787 int i;
5788
5789 if (VECTOR_MEM_VSX_P (mode) && (mode == V2DFmode || mode == V2DImode))
5790 {
5791 rtx (*set_func) (rtx, rtx, rtx, rtx)
5792 = ((mode == V2DFmode) ? gen_vsx_set_v2df : gen_vsx_set_v2di);
5793 emit_insn (set_func (target, target, val, GEN_INT (elt)));
5794 return;
5795 }
5796
5797 /* Simplify setting single element vectors like V1TImode. */
5798 if (GET_MODE_SIZE (mode) == GET_MODE_SIZE (inner_mode) && elt == 0)
5799 {
5800 emit_move_insn (target, gen_lowpart (mode, val));
5801 return;
5802 }
5803
5804 /* Load single variable value. */
5805 mem = assign_stack_temp (mode, GET_MODE_SIZE (inner_mode));
5806 emit_move_insn (adjust_address_nv (mem, inner_mode, 0), val);
5807 x = gen_rtx_UNSPEC (VOIDmode,
5808 gen_rtvec (1, const0_rtx), UNSPEC_LVE);
5809 emit_insn (gen_rtx_PARALLEL (VOIDmode,
5810 gen_rtvec (2,
5811 gen_rtx_SET (VOIDmode,
5812 reg, mem),
5813 x)));
5814
5815 /* Linear sequence. */
5816 mask = gen_rtx_PARALLEL (V16QImode, rtvec_alloc (16));
5817 for (i = 0; i < 16; ++i)
5818 XVECEXP (mask, 0, i) = GEN_INT (i);
5819
5820 /* Set permute mask to insert element into target. */
5821 for (i = 0; i < width; ++i)
5822 XVECEXP (mask, 0, elt*width + i)
5823 = GEN_INT (i + 0x10);
5824 x = gen_rtx_CONST_VECTOR (V16QImode, XVEC (mask, 0));
5825
5826 if (BYTES_BIG_ENDIAN)
5827 x = gen_rtx_UNSPEC (mode,
5828 gen_rtvec (3, target, reg,
5829 force_reg (V16QImode, x)),
5830 UNSPEC_VPERM);
5831 else
5832 {
5833 /* Invert selector. We prefer to generate VNAND on P8 so
5834 that future fusion opportunities can kick in, but must
5835 generate VNOR elsewhere. */
5836 rtx notx = gen_rtx_NOT (V16QImode, force_reg (V16QImode, x));
5837 rtx iorx = (TARGET_P8_VECTOR
5838 ? gen_rtx_IOR (V16QImode, notx, notx)
5839 : gen_rtx_AND (V16QImode, notx, notx));
5840 rtx tmp = gen_reg_rtx (V16QImode);
5841 emit_insn (gen_rtx_SET (VOIDmode, tmp, iorx));
5842
5843 /* Permute with operands reversed and adjusted selector. */
5844 x = gen_rtx_UNSPEC (mode, gen_rtvec (3, reg, target, tmp),
5845 UNSPEC_VPERM);
5846 }
5847
5848 emit_insn (gen_rtx_SET (VOIDmode, target, x));
5849 }
5850
5851 /* Extract field ELT from VEC into TARGET. */
5852
5853 void
5854 rs6000_expand_vector_extract (rtx target, rtx vec, int elt)
5855 {
5856 machine_mode mode = GET_MODE (vec);
5857 machine_mode inner_mode = GET_MODE_INNER (mode);
5858 rtx mem;
5859
5860 if (VECTOR_MEM_VSX_P (mode))
5861 {
5862 switch (mode)
5863 {
5864 default:
5865 break;
5866 case V1TImode:
5867 gcc_assert (elt == 0 && inner_mode == TImode);
5868 emit_move_insn (target, gen_lowpart (TImode, vec));
5869 break;
5870 case V2DFmode:
5871 emit_insn (gen_vsx_extract_v2df (target, vec, GEN_INT (elt)));
5872 return;
5873 case V2DImode:
5874 emit_insn (gen_vsx_extract_v2di (target, vec, GEN_INT (elt)));
5875 return;
5876 case V4SFmode:
5877 emit_insn (gen_vsx_extract_v4sf (target, vec, GEN_INT (elt)));
5878 return;
5879 }
5880 }
5881
5882 /* Allocate mode-sized buffer. */
5883 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
5884
5885 emit_move_insn (mem, vec);
5886
5887 /* Add offset to field within buffer matching vector element. */
5888 mem = adjust_address_nv (mem, inner_mode, elt * GET_MODE_SIZE (inner_mode));
5889
5890 emit_move_insn (target, adjust_address_nv (mem, inner_mode, 0));
5891 }
5892
5893 /* Generates shifts and masks for a pair of rldicl or rldicr insns to
5894 implement ANDing by the mask IN. */
5895 void
5896 build_mask64_2_operands (rtx in, rtx *out)
5897 {
5898 unsigned HOST_WIDE_INT c, lsb, m1, m2;
5899 int shift;
5900
5901 gcc_assert (GET_CODE (in) == CONST_INT);
5902
5903 c = INTVAL (in);
5904 if (c & 1)
5905 {
5906 /* Assume c initially something like 0x00fff000000fffff. The idea
5907 is to rotate the word so that the middle ^^^^^^ group of zeros
5908 is at the MS end and can be cleared with an rldicl mask. We then
5909 rotate back and clear off the MS ^^ group of zeros with a
5910 second rldicl. */
5911 c = ~c; /* c == 0xff000ffffff00000 */
5912 lsb = c & -c; /* lsb == 0x0000000000100000 */
5913 m1 = -lsb; /* m1 == 0xfffffffffff00000 */
5914 c = ~c; /* c == 0x00fff000000fffff */
5915 c &= -lsb; /* c == 0x00fff00000000000 */
5916 lsb = c & -c; /* lsb == 0x0000100000000000 */
5917 c = ~c; /* c == 0xff000fffffffffff */
5918 c &= -lsb; /* c == 0xff00000000000000 */
5919 shift = 0;
5920 while ((lsb >>= 1) != 0)
5921 shift++; /* shift == 44 on exit from loop */
5922 m1 <<= 64 - shift; /* m1 == 0xffffff0000000000 */
5923 m1 = ~m1; /* m1 == 0x000000ffffffffff */
5924 m2 = ~c; /* m2 == 0x00ffffffffffffff */
5925 }
5926 else
5927 {
5928 /* Assume c initially something like 0xff000f0000000000. The idea
5929 is to rotate the word so that the ^^^ middle group of zeros
5930 is at the LS end and can be cleared with an rldicr mask. We then
5931 rotate back and clear off the LS group of ^^^^^^^^^^ zeros with
5932 a second rldicr. */
5933 lsb = c & -c; /* lsb == 0x0000010000000000 */
5934 m2 = -lsb; /* m2 == 0xffffff0000000000 */
5935 c = ~c; /* c == 0x00fff0ffffffffff */
5936 c &= -lsb; /* c == 0x00fff00000000000 */
5937 lsb = c & -c; /* lsb == 0x0000100000000000 */
5938 c = ~c; /* c == 0xff000fffffffffff */
5939 c &= -lsb; /* c == 0xff00000000000000 */
5940 shift = 0;
5941 while ((lsb >>= 1) != 0)
5942 shift++; /* shift == 44 on exit from loop */
5943 m1 = ~c; /* m1 == 0x00ffffffffffffff */
5944 m1 >>= shift; /* m1 == 0x0000000000000fff */
5945 m1 = ~m1; /* m1 == 0xfffffffffffff000 */
5946 }
5947
5948 /* Note that when we only have two 0->1 and 1->0 transitions, one of the
5949 masks will be all 1's. We are guaranteed more than one transition. */
5950 out[0] = GEN_INT (64 - shift);
5951 out[1] = GEN_INT (m1);
5952 out[2] = GEN_INT (shift);
5953 out[3] = GEN_INT (m2);
5954 }
5955
5956 /* Return TRUE if OP is an invalid SUBREG operation on the e500. */
5957
5958 bool
5959 invalid_e500_subreg (rtx op, machine_mode mode)
5960 {
5961 if (TARGET_E500_DOUBLE)
5962 {
5963 /* Reject (subreg:SI (reg:DF)); likewise with subreg:DI or
5964 subreg:TI and reg:TF. Decimal float modes are like integer
5965 modes (only low part of each register used) for this
5966 purpose. */
5967 if (GET_CODE (op) == SUBREG
5968 && (mode == SImode || mode == DImode || mode == TImode
5969 || mode == DDmode || mode == TDmode || mode == PTImode)
5970 && REG_P (SUBREG_REG (op))
5971 && (GET_MODE (SUBREG_REG (op)) == DFmode
5972 || GET_MODE (SUBREG_REG (op)) == TFmode))
5973 return true;
5974
5975 /* Reject (subreg:DF (reg:DI)); likewise with subreg:TF and
5976 reg:TI. */
5977 if (GET_CODE (op) == SUBREG
5978 && (mode == DFmode || mode == TFmode)
5979 && REG_P (SUBREG_REG (op))
5980 && (GET_MODE (SUBREG_REG (op)) == DImode
5981 || GET_MODE (SUBREG_REG (op)) == TImode
5982 || GET_MODE (SUBREG_REG (op)) == PTImode
5983 || GET_MODE (SUBREG_REG (op)) == DDmode
5984 || GET_MODE (SUBREG_REG (op)) == TDmode))
5985 return true;
5986 }
5987
5988 if (TARGET_SPE
5989 && GET_CODE (op) == SUBREG
5990 && mode == SImode
5991 && REG_P (SUBREG_REG (op))
5992 && SPE_VECTOR_MODE (GET_MODE (SUBREG_REG (op))))
5993 return true;
5994
5995 return false;
5996 }
5997
5998 /* Return alignment of TYPE. Existing alignment is ALIGN. HOW
5999 selects whether the alignment is abi mandated, optional, or
6000 both abi and optional alignment. */
6001
6002 unsigned int
6003 rs6000_data_alignment (tree type, unsigned int align, enum data_align how)
6004 {
6005 if (how != align_opt)
6006 {
6007 if (TREE_CODE (type) == VECTOR_TYPE)
6008 {
6009 if ((TARGET_SPE && SPE_VECTOR_MODE (TYPE_MODE (type)))
6010 || (TARGET_PAIRED_FLOAT && PAIRED_VECTOR_MODE (TYPE_MODE (type))))
6011 {
6012 if (align < 64)
6013 align = 64;
6014 }
6015 else if (align < 128)
6016 align = 128;
6017 }
6018 else if (TARGET_E500_DOUBLE
6019 && TREE_CODE (type) == REAL_TYPE
6020 && TYPE_MODE (type) == DFmode)
6021 {
6022 if (align < 64)
6023 align = 64;
6024 }
6025 }
6026
6027 if (how != align_abi)
6028 {
6029 if (TREE_CODE (type) == ARRAY_TYPE
6030 && TYPE_MODE (TREE_TYPE (type)) == QImode)
6031 {
6032 if (align < BITS_PER_WORD)
6033 align = BITS_PER_WORD;
6034 }
6035 }
6036
6037 return align;
6038 }
6039
6040 /* Previous GCC releases forced all vector types to have 16-byte alignment. */
6041
6042 bool
6043 rs6000_special_adjust_field_align_p (tree field, unsigned int computed)
6044 {
6045 if (TARGET_ALTIVEC && TREE_CODE (TREE_TYPE (field)) == VECTOR_TYPE)
6046 {
6047 if (computed != 128)
6048 {
6049 static bool warned;
6050 if (!warned && warn_psabi)
6051 {
6052 warned = true;
6053 inform (input_location,
6054 "the layout of aggregates containing vectors with"
6055 " %d-byte alignment has changed in GCC 5",
6056 computed / BITS_PER_UNIT);
6057 }
6058 }
6059 /* In current GCC there is no special case. */
6060 return false;
6061 }
6062
6063 return false;
6064 }
6065
6066 /* AIX increases natural record alignment to doubleword if the first
6067 field is an FP double while the FP fields remain word aligned. */
6068
6069 unsigned int
6070 rs6000_special_round_type_align (tree type, unsigned int computed,
6071 unsigned int specified)
6072 {
6073 unsigned int align = MAX (computed, specified);
6074 tree field = TYPE_FIELDS (type);
6075
6076 /* Skip all non field decls */
6077 while (field != NULL && TREE_CODE (field) != FIELD_DECL)
6078 field = DECL_CHAIN (field);
6079
6080 if (field != NULL && field != type)
6081 {
6082 type = TREE_TYPE (field);
6083 while (TREE_CODE (type) == ARRAY_TYPE)
6084 type = TREE_TYPE (type);
6085
6086 if (type != error_mark_node && TYPE_MODE (type) == DFmode)
6087 align = MAX (align, 64);
6088 }
6089
6090 return align;
6091 }
6092
6093 /* Darwin increases record alignment to the natural alignment of
6094 the first field. */
6095
6096 unsigned int
6097 darwin_rs6000_special_round_type_align (tree type, unsigned int computed,
6098 unsigned int specified)
6099 {
6100 unsigned int align = MAX (computed, specified);
6101
6102 if (TYPE_PACKED (type))
6103 return align;
6104
6105 /* Find the first field, looking down into aggregates. */
6106 do {
6107 tree field = TYPE_FIELDS (type);
6108 /* Skip all non field decls */
6109 while (field != NULL && TREE_CODE (field) != FIELD_DECL)
6110 field = DECL_CHAIN (field);
6111 if (! field)
6112 break;
6113 /* A packed field does not contribute any extra alignment. */
6114 if (DECL_PACKED (field))
6115 return align;
6116 type = TREE_TYPE (field);
6117 while (TREE_CODE (type) == ARRAY_TYPE)
6118 type = TREE_TYPE (type);
6119 } while (AGGREGATE_TYPE_P (type));
6120
6121 if (! AGGREGATE_TYPE_P (type) && type != error_mark_node)
6122 align = MAX (align, TYPE_ALIGN (type));
6123
6124 return align;
6125 }
6126
6127 /* Return 1 for an operand in small memory on V.4/eabi. */
6128
6129 int
6130 small_data_operand (rtx op ATTRIBUTE_UNUSED,
6131 machine_mode mode ATTRIBUTE_UNUSED)
6132 {
6133 #if TARGET_ELF
6134 rtx sym_ref;
6135
6136 if (rs6000_sdata == SDATA_NONE || rs6000_sdata == SDATA_DATA)
6137 return 0;
6138
6139 if (DEFAULT_ABI != ABI_V4)
6140 return 0;
6141
6142 /* Vector and float memory instructions have a limited offset on the
6143 SPE, so using a vector or float variable directly as an operand is
6144 not useful. */
6145 if (TARGET_SPE
6146 && (SPE_VECTOR_MODE (mode) || FLOAT_MODE_P (mode)))
6147 return 0;
6148
6149 if (GET_CODE (op) == SYMBOL_REF)
6150 sym_ref = op;
6151
6152 else if (GET_CODE (op) != CONST
6153 || GET_CODE (XEXP (op, 0)) != PLUS
6154 || GET_CODE (XEXP (XEXP (op, 0), 0)) != SYMBOL_REF
6155 || GET_CODE (XEXP (XEXP (op, 0), 1)) != CONST_INT)
6156 return 0;
6157
6158 else
6159 {
6160 rtx sum = XEXP (op, 0);
6161 HOST_WIDE_INT summand;
6162
6163 /* We have to be careful here, because it is the referenced address
6164 that must be 32k from _SDA_BASE_, not just the symbol. */
6165 summand = INTVAL (XEXP (sum, 1));
6166 if (summand < 0 || summand > g_switch_value)
6167 return 0;
6168
6169 sym_ref = XEXP (sum, 0);
6170 }
6171
6172 return SYMBOL_REF_SMALL_P (sym_ref);
6173 #else
6174 return 0;
6175 #endif
6176 }
6177
6178 /* Return true if either operand is a general purpose register. */
6179
6180 bool
6181 gpr_or_gpr_p (rtx op0, rtx op1)
6182 {
6183 return ((REG_P (op0) && INT_REGNO_P (REGNO (op0)))
6184 || (REG_P (op1) && INT_REGNO_P (REGNO (op1))));
6185 }
6186
6187 /* Return true if this is a move direct operation between GPR registers and
6188 floating point/VSX registers. */
6189
6190 bool
6191 direct_move_p (rtx op0, rtx op1)
6192 {
6193 int regno0, regno1;
6194
6195 if (!REG_P (op0) || !REG_P (op1))
6196 return false;
6197
6198 if (!TARGET_DIRECT_MOVE && !TARGET_MFPGPR)
6199 return false;
6200
6201 regno0 = REGNO (op0);
6202 regno1 = REGNO (op1);
6203 if (regno0 >= FIRST_PSEUDO_REGISTER || regno1 >= FIRST_PSEUDO_REGISTER)
6204 return false;
6205
6206 if (INT_REGNO_P (regno0))
6207 return (TARGET_DIRECT_MOVE) ? VSX_REGNO_P (regno1) : FP_REGNO_P (regno1);
6208
6209 else if (INT_REGNO_P (regno1))
6210 {
6211 if (TARGET_MFPGPR && FP_REGNO_P (regno0))
6212 return true;
6213
6214 else if (TARGET_DIRECT_MOVE && VSX_REGNO_P (regno0))
6215 return true;
6216 }
6217
6218 return false;
6219 }
6220
6221 /* Return true if this is a load or store quad operation. This function does
6222 not handle the atomic quad memory instructions. */
6223
6224 bool
6225 quad_load_store_p (rtx op0, rtx op1)
6226 {
6227 bool ret;
6228
6229 if (!TARGET_QUAD_MEMORY)
6230 ret = false;
6231
6232 else if (REG_P (op0) && MEM_P (op1))
6233 ret = (quad_int_reg_operand (op0, GET_MODE (op0))
6234 && quad_memory_operand (op1, GET_MODE (op1))
6235 && !reg_overlap_mentioned_p (op0, op1));
6236
6237 else if (MEM_P (op0) && REG_P (op1))
6238 ret = (quad_memory_operand (op0, GET_MODE (op0))
6239 && quad_int_reg_operand (op1, GET_MODE (op1)));
6240
6241 else
6242 ret = false;
6243
6244 if (TARGET_DEBUG_ADDR)
6245 {
6246 fprintf (stderr, "\n========== quad_load_store, return %s\n",
6247 ret ? "true" : "false");
6248 debug_rtx (gen_rtx_SET (VOIDmode, op0, op1));
6249 }
6250
6251 return ret;
6252 }
6253
6254 /* Given an address, return a constant offset term if one exists. */
6255
6256 static rtx
6257 address_offset (rtx op)
6258 {
6259 if (GET_CODE (op) == PRE_INC
6260 || GET_CODE (op) == PRE_DEC)
6261 op = XEXP (op, 0);
6262 else if (GET_CODE (op) == PRE_MODIFY
6263 || GET_CODE (op) == LO_SUM)
6264 op = XEXP (op, 1);
6265
6266 if (GET_CODE (op) == CONST)
6267 op = XEXP (op, 0);
6268
6269 if (GET_CODE (op) == PLUS)
6270 op = XEXP (op, 1);
6271
6272 if (CONST_INT_P (op))
6273 return op;
6274
6275 return NULL_RTX;
6276 }
6277
6278 /* Return true if the MEM operand is a memory operand suitable for use
6279 with a (full width, possibly multiple) gpr load/store. On
6280 powerpc64 this means the offset must be divisible by 4.
6281 Implements 'Y' constraint.
6282
6283 Accept direct, indexed, offset, lo_sum and tocref. Since this is
6284 a constraint function we know the operand has satisfied a suitable
6285 memory predicate. Also accept some odd rtl generated by reload
6286 (see rs6000_legitimize_reload_address for various forms). It is
6287 important that reload rtl be accepted by appropriate constraints
6288 but not by the operand predicate.
6289
6290 Offsetting a lo_sum should not be allowed, except where we know by
6291 alignment that a 32k boundary is not crossed, but see the ???
6292 comment in rs6000_legitimize_reload_address. Note that by
6293 "offsetting" here we mean a further offset to access parts of the
6294 MEM. It's fine to have a lo_sum where the inner address is offset
6295 from a sym, since the same sym+offset will appear in the high part
6296 of the address calculation. */
6297
6298 bool
6299 mem_operand_gpr (rtx op, machine_mode mode)
6300 {
6301 unsigned HOST_WIDE_INT offset;
6302 int extra;
6303 rtx addr = XEXP (op, 0);
6304
6305 op = address_offset (addr);
6306 if (op == NULL_RTX)
6307 return true;
6308
6309 offset = INTVAL (op);
6310 if (TARGET_POWERPC64 && (offset & 3) != 0)
6311 return false;
6312
6313 extra = GET_MODE_SIZE (mode) - UNITS_PER_WORD;
6314 if (extra < 0)
6315 extra = 0;
6316
6317 if (GET_CODE (addr) == LO_SUM)
6318 /* For lo_sum addresses, we must allow any offset except one that
6319 causes a wrap, so test only the low 16 bits. */
6320 offset = ((offset & 0xffff) ^ 0x8000) - 0x8000;
6321
6322 return offset + 0x8000 < 0x10000u - extra;
6323 }
6324 \f
6325 /* Subroutines of rs6000_legitimize_address and rs6000_legitimate_address_p. */
6326
6327 static bool
6328 reg_offset_addressing_ok_p (machine_mode mode)
6329 {
6330 switch (mode)
6331 {
6332 case V16QImode:
6333 case V8HImode:
6334 case V4SFmode:
6335 case V4SImode:
6336 case V2DFmode:
6337 case V2DImode:
6338 case V1TImode:
6339 case TImode:
6340 /* AltiVec/VSX vector modes. Only reg+reg addressing is valid. While
6341 TImode is not a vector mode, if we want to use the VSX registers to
6342 move it around, we need to restrict ourselves to reg+reg
6343 addressing. */
6344 if (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode))
6345 return false;
6346 break;
6347
6348 case V4HImode:
6349 case V2SImode:
6350 case V1DImode:
6351 case V2SFmode:
6352 /* Paired vector modes. Only reg+reg addressing is valid. */
6353 if (TARGET_PAIRED_FLOAT)
6354 return false;
6355 break;
6356
6357 case SDmode:
6358 /* If we can do direct load/stores of SDmode, restrict it to reg+reg
6359 addressing for the LFIWZX and STFIWX instructions. */
6360 if (TARGET_NO_SDMODE_STACK)
6361 return false;
6362 break;
6363
6364 default:
6365 break;
6366 }
6367
6368 return true;
6369 }
6370
6371 static bool
6372 virtual_stack_registers_memory_p (rtx op)
6373 {
6374 int regnum;
6375
6376 if (GET_CODE (op) == REG)
6377 regnum = REGNO (op);
6378
6379 else if (GET_CODE (op) == PLUS
6380 && GET_CODE (XEXP (op, 0)) == REG
6381 && GET_CODE (XEXP (op, 1)) == CONST_INT)
6382 regnum = REGNO (XEXP (op, 0));
6383
6384 else
6385 return false;
6386
6387 return (regnum >= FIRST_VIRTUAL_REGISTER
6388 && regnum <= LAST_VIRTUAL_POINTER_REGISTER);
6389 }
6390
6391 /* Return true if a MODE sized memory accesses to OP plus OFFSET
6392 is known to not straddle a 32k boundary. */
6393
6394 static bool
6395 offsettable_ok_by_alignment (rtx op, HOST_WIDE_INT offset,
6396 machine_mode mode)
6397 {
6398 tree decl, type;
6399 unsigned HOST_WIDE_INT dsize, dalign, lsb, mask;
6400
6401 if (GET_CODE (op) != SYMBOL_REF)
6402 return false;
6403
6404 dsize = GET_MODE_SIZE (mode);
6405 decl = SYMBOL_REF_DECL (op);
6406 if (!decl)
6407 {
6408 if (dsize == 0)
6409 return false;
6410
6411 /* -fsection-anchors loses the original SYMBOL_REF_DECL when
6412 replacing memory addresses with an anchor plus offset. We
6413 could find the decl by rummaging around in the block->objects
6414 VEC for the given offset but that seems like too much work. */
6415 dalign = BITS_PER_UNIT;
6416 if (SYMBOL_REF_HAS_BLOCK_INFO_P (op)
6417 && SYMBOL_REF_ANCHOR_P (op)
6418 && SYMBOL_REF_BLOCK (op) != NULL)
6419 {
6420 struct object_block *block = SYMBOL_REF_BLOCK (op);
6421
6422 dalign = block->alignment;
6423 offset += SYMBOL_REF_BLOCK_OFFSET (op);
6424 }
6425 else if (CONSTANT_POOL_ADDRESS_P (op))
6426 {
6427 /* It would be nice to have get_pool_align().. */
6428 machine_mode cmode = get_pool_mode (op);
6429
6430 dalign = GET_MODE_ALIGNMENT (cmode);
6431 }
6432 }
6433 else if (DECL_P (decl))
6434 {
6435 dalign = DECL_ALIGN (decl);
6436
6437 if (dsize == 0)
6438 {
6439 /* Allow BLKmode when the entire object is known to not
6440 cross a 32k boundary. */
6441 if (!DECL_SIZE_UNIT (decl))
6442 return false;
6443
6444 if (!tree_fits_uhwi_p (DECL_SIZE_UNIT (decl)))
6445 return false;
6446
6447 dsize = tree_to_uhwi (DECL_SIZE_UNIT (decl));
6448 if (dsize > 32768)
6449 return false;
6450
6451 return dalign / BITS_PER_UNIT >= dsize;
6452 }
6453 }
6454 else
6455 {
6456 type = TREE_TYPE (decl);
6457
6458 dalign = TYPE_ALIGN (type);
6459 if (CONSTANT_CLASS_P (decl))
6460 dalign = CONSTANT_ALIGNMENT (decl, dalign);
6461 else
6462 dalign = DATA_ALIGNMENT (decl, dalign);
6463
6464 if (dsize == 0)
6465 {
6466 /* BLKmode, check the entire object. */
6467 if (TREE_CODE (decl) == STRING_CST)
6468 dsize = TREE_STRING_LENGTH (decl);
6469 else if (TYPE_SIZE_UNIT (type)
6470 && tree_fits_uhwi_p (TYPE_SIZE_UNIT (type)))
6471 dsize = tree_to_uhwi (TYPE_SIZE_UNIT (type));
6472 else
6473 return false;
6474 if (dsize > 32768)
6475 return false;
6476
6477 return dalign / BITS_PER_UNIT >= dsize;
6478 }
6479 }
6480
6481 /* Find how many bits of the alignment we know for this access. */
6482 mask = dalign / BITS_PER_UNIT - 1;
6483 lsb = offset & -offset;
6484 mask &= lsb - 1;
6485 dalign = mask + 1;
6486
6487 return dalign >= dsize;
6488 }
6489
6490 static bool
6491 constant_pool_expr_p (rtx op)
6492 {
6493 rtx base, offset;
6494
6495 split_const (op, &base, &offset);
6496 return (GET_CODE (base) == SYMBOL_REF
6497 && CONSTANT_POOL_ADDRESS_P (base)
6498 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (base), Pmode));
6499 }
6500
6501 static const_rtx tocrel_base, tocrel_offset;
6502
6503 /* Return true if OP is a toc pointer relative address (the output
6504 of create_TOC_reference). If STRICT, do not match high part or
6505 non-split -mcmodel=large/medium toc pointer relative addresses. */
6506
6507 bool
6508 toc_relative_expr_p (const_rtx op, bool strict)
6509 {
6510 if (!TARGET_TOC)
6511 return false;
6512
6513 if (TARGET_CMODEL != CMODEL_SMALL)
6514 {
6515 /* Only match the low part. */
6516 if (GET_CODE (op) == LO_SUM
6517 && REG_P (XEXP (op, 0))
6518 && INT_REG_OK_FOR_BASE_P (XEXP (op, 0), strict))
6519 op = XEXP (op, 1);
6520 else if (strict)
6521 return false;
6522 }
6523
6524 tocrel_base = op;
6525 tocrel_offset = const0_rtx;
6526 if (GET_CODE (op) == PLUS && add_cint_operand (XEXP (op, 1), GET_MODE (op)))
6527 {
6528 tocrel_base = XEXP (op, 0);
6529 tocrel_offset = XEXP (op, 1);
6530 }
6531
6532 return (GET_CODE (tocrel_base) == UNSPEC
6533 && XINT (tocrel_base, 1) == UNSPEC_TOCREL);
6534 }
6535
6536 /* Return true if X is a constant pool address, and also for cmodel=medium
6537 if X is a toc-relative address known to be offsettable within MODE. */
6538
6539 bool
6540 legitimate_constant_pool_address_p (const_rtx x, machine_mode mode,
6541 bool strict)
6542 {
6543 return (toc_relative_expr_p (x, strict)
6544 && (TARGET_CMODEL != CMODEL_MEDIUM
6545 || constant_pool_expr_p (XVECEXP (tocrel_base, 0, 0))
6546 || mode == QImode
6547 || offsettable_ok_by_alignment (XVECEXP (tocrel_base, 0, 0),
6548 INTVAL (tocrel_offset), mode)));
6549 }
6550
6551 static bool
6552 legitimate_small_data_p (machine_mode mode, rtx x)
6553 {
6554 return (DEFAULT_ABI == ABI_V4
6555 && !flag_pic && !TARGET_TOC
6556 && (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == CONST)
6557 && small_data_operand (x, mode));
6558 }
6559
6560 /* SPE offset addressing is limited to 5-bits worth of double words. */
6561 #define SPE_CONST_OFFSET_OK(x) (((x) & ~0xf8) == 0)
6562
6563 bool
6564 rs6000_legitimate_offset_address_p (machine_mode mode, rtx x,
6565 bool strict, bool worst_case)
6566 {
6567 unsigned HOST_WIDE_INT offset;
6568 unsigned int extra;
6569
6570 if (GET_CODE (x) != PLUS)
6571 return false;
6572 if (!REG_P (XEXP (x, 0)))
6573 return false;
6574 if (!INT_REG_OK_FOR_BASE_P (XEXP (x, 0), strict))
6575 return false;
6576 if (!reg_offset_addressing_ok_p (mode))
6577 return virtual_stack_registers_memory_p (x);
6578 if (legitimate_constant_pool_address_p (x, mode, strict || lra_in_progress))
6579 return true;
6580 if (GET_CODE (XEXP (x, 1)) != CONST_INT)
6581 return false;
6582
6583 offset = INTVAL (XEXP (x, 1));
6584 extra = 0;
6585 switch (mode)
6586 {
6587 case V4HImode:
6588 case V2SImode:
6589 case V1DImode:
6590 case V2SFmode:
6591 /* SPE vector modes. */
6592 return SPE_CONST_OFFSET_OK (offset);
6593
6594 case DFmode:
6595 case DDmode:
6596 case DImode:
6597 /* On e500v2, we may have:
6598
6599 (subreg:DF (mem:DI (plus (reg) (const_int))) 0).
6600
6601 Which gets addressed with evldd instructions. */
6602 if (TARGET_E500_DOUBLE)
6603 return SPE_CONST_OFFSET_OK (offset);
6604
6605 /* If we are using VSX scalar loads, restrict ourselves to reg+reg
6606 addressing. */
6607 if (VECTOR_MEM_VSX_P (mode))
6608 return false;
6609
6610 if (!worst_case)
6611 break;
6612 if (!TARGET_POWERPC64)
6613 extra = 4;
6614 else if (offset & 3)
6615 return false;
6616 break;
6617
6618 case TFmode:
6619 if (TARGET_E500_DOUBLE)
6620 return (SPE_CONST_OFFSET_OK (offset)
6621 && SPE_CONST_OFFSET_OK (offset + 8));
6622 /* fall through */
6623
6624 case TDmode:
6625 case TImode:
6626 case PTImode:
6627 extra = 8;
6628 if (!worst_case)
6629 break;
6630 if (!TARGET_POWERPC64)
6631 extra = 12;
6632 else if (offset & 3)
6633 return false;
6634 break;
6635
6636 default:
6637 break;
6638 }
6639
6640 offset += 0x8000;
6641 return offset < 0x10000 - extra;
6642 }
6643
6644 bool
6645 legitimate_indexed_address_p (rtx x, int strict)
6646 {
6647 rtx op0, op1;
6648
6649 if (GET_CODE (x) != PLUS)
6650 return false;
6651
6652 op0 = XEXP (x, 0);
6653 op1 = XEXP (x, 1);
6654
6655 /* Recognize the rtl generated by reload which we know will later be
6656 replaced with proper base and index regs. */
6657 if (!strict
6658 && reload_in_progress
6659 && (REG_P (op0) || GET_CODE (op0) == PLUS)
6660 && REG_P (op1))
6661 return true;
6662
6663 return (REG_P (op0) && REG_P (op1)
6664 && ((INT_REG_OK_FOR_BASE_P (op0, strict)
6665 && INT_REG_OK_FOR_INDEX_P (op1, strict))
6666 || (INT_REG_OK_FOR_BASE_P (op1, strict)
6667 && INT_REG_OK_FOR_INDEX_P (op0, strict))));
6668 }
6669
6670 bool
6671 avoiding_indexed_address_p (machine_mode mode)
6672 {
6673 /* Avoid indexed addressing for modes that have non-indexed
6674 load/store instruction forms. */
6675 return (TARGET_AVOID_XFORM && VECTOR_MEM_NONE_P (mode));
6676 }
6677
6678 bool
6679 legitimate_indirect_address_p (rtx x, int strict)
6680 {
6681 return GET_CODE (x) == REG && INT_REG_OK_FOR_BASE_P (x, strict);
6682 }
6683
6684 bool
6685 macho_lo_sum_memory_operand (rtx x, machine_mode mode)
6686 {
6687 if (!TARGET_MACHO || !flag_pic
6688 || mode != SImode || GET_CODE (x) != MEM)
6689 return false;
6690 x = XEXP (x, 0);
6691
6692 if (GET_CODE (x) != LO_SUM)
6693 return false;
6694 if (GET_CODE (XEXP (x, 0)) != REG)
6695 return false;
6696 if (!INT_REG_OK_FOR_BASE_P (XEXP (x, 0), 0))
6697 return false;
6698 x = XEXP (x, 1);
6699
6700 return CONSTANT_P (x);
6701 }
6702
6703 static bool
6704 legitimate_lo_sum_address_p (machine_mode mode, rtx x, int strict)
6705 {
6706 if (GET_CODE (x) != LO_SUM)
6707 return false;
6708 if (GET_CODE (XEXP (x, 0)) != REG)
6709 return false;
6710 if (!INT_REG_OK_FOR_BASE_P (XEXP (x, 0), strict))
6711 return false;
6712 /* Restrict addressing for DI because of our SUBREG hackery. */
6713 if (TARGET_E500_DOUBLE && GET_MODE_SIZE (mode) > UNITS_PER_WORD)
6714 return false;
6715 x = XEXP (x, 1);
6716
6717 if (TARGET_ELF || TARGET_MACHO)
6718 {
6719 bool large_toc_ok;
6720
6721 if (DEFAULT_ABI == ABI_V4 && flag_pic)
6722 return false;
6723 /* LRA don't use LEGITIMIZE_RELOAD_ADDRESS as it usually calls
6724 push_reload from reload pass code. LEGITIMIZE_RELOAD_ADDRESS
6725 recognizes some LO_SUM addresses as valid although this
6726 function says opposite. In most cases, LRA through different
6727 transformations can generate correct code for address reloads.
6728 It can not manage only some LO_SUM cases. So we need to add
6729 code analogous to one in rs6000_legitimize_reload_address for
6730 LOW_SUM here saying that some addresses are still valid. */
6731 large_toc_ok = (lra_in_progress && TARGET_CMODEL != CMODEL_SMALL
6732 && small_toc_ref (x, VOIDmode));
6733 if (TARGET_TOC && ! large_toc_ok)
6734 return false;
6735 if (GET_MODE_NUNITS (mode) != 1)
6736 return false;
6737 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD
6738 && !(/* ??? Assume floating point reg based on mode? */
6739 TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT
6740 && (mode == DFmode || mode == DDmode)))
6741 return false;
6742
6743 return CONSTANT_P (x) || large_toc_ok;
6744 }
6745
6746 return false;
6747 }
6748
6749
6750 /* Try machine-dependent ways of modifying an illegitimate address
6751 to be legitimate. If we find one, return the new, valid address.
6752 This is used from only one place: `memory_address' in explow.c.
6753
6754 OLDX is the address as it was before break_out_memory_refs was
6755 called. In some cases it is useful to look at this to decide what
6756 needs to be done.
6757
6758 It is always safe for this function to do nothing. It exists to
6759 recognize opportunities to optimize the output.
6760
6761 On RS/6000, first check for the sum of a register with a constant
6762 integer that is out of range. If so, generate code to add the
6763 constant with the low-order 16 bits masked to the register and force
6764 this result into another register (this can be done with `cau').
6765 Then generate an address of REG+(CONST&0xffff), allowing for the
6766 possibility of bit 16 being a one.
6767
6768 Then check for the sum of a register and something not constant, try to
6769 load the other things into a register and return the sum. */
6770
6771 static rtx
6772 rs6000_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
6773 machine_mode mode)
6774 {
6775 unsigned int extra;
6776
6777 if (!reg_offset_addressing_ok_p (mode))
6778 {
6779 if (virtual_stack_registers_memory_p (x))
6780 return x;
6781
6782 /* In theory we should not be seeing addresses of the form reg+0,
6783 but just in case it is generated, optimize it away. */
6784 if (GET_CODE (x) == PLUS && XEXP (x, 1) == const0_rtx)
6785 return force_reg (Pmode, XEXP (x, 0));
6786
6787 /* For TImode with load/store quad, restrict addresses to just a single
6788 pointer, so it works with both GPRs and VSX registers. */
6789 /* Make sure both operands are registers. */
6790 else if (GET_CODE (x) == PLUS
6791 && (mode != TImode || !TARGET_QUAD_MEMORY))
6792 return gen_rtx_PLUS (Pmode,
6793 force_reg (Pmode, XEXP (x, 0)),
6794 force_reg (Pmode, XEXP (x, 1)));
6795 else
6796 return force_reg (Pmode, x);
6797 }
6798 if (GET_CODE (x) == SYMBOL_REF)
6799 {
6800 enum tls_model model = SYMBOL_REF_TLS_MODEL (x);
6801 if (model != 0)
6802 return rs6000_legitimize_tls_address (x, model);
6803 }
6804
6805 extra = 0;
6806 switch (mode)
6807 {
6808 case TFmode:
6809 case TDmode:
6810 case TImode:
6811 case PTImode:
6812 /* As in legitimate_offset_address_p we do not assume
6813 worst-case. The mode here is just a hint as to the registers
6814 used. A TImode is usually in gprs, but may actually be in
6815 fprs. Leave worst-case scenario for reload to handle via
6816 insn constraints. PTImode is only GPRs. */
6817 extra = 8;
6818 break;
6819 default:
6820 break;
6821 }
6822
6823 if (GET_CODE (x) == PLUS
6824 && GET_CODE (XEXP (x, 0)) == REG
6825 && GET_CODE (XEXP (x, 1)) == CONST_INT
6826 && ((unsigned HOST_WIDE_INT) (INTVAL (XEXP (x, 1)) + 0x8000)
6827 >= 0x10000 - extra)
6828 && !(SPE_VECTOR_MODE (mode)
6829 || (TARGET_E500_DOUBLE && GET_MODE_SIZE (mode) > UNITS_PER_WORD)))
6830 {
6831 HOST_WIDE_INT high_int, low_int;
6832 rtx sum;
6833 low_int = ((INTVAL (XEXP (x, 1)) & 0xffff) ^ 0x8000) - 0x8000;
6834 if (low_int >= 0x8000 - extra)
6835 low_int = 0;
6836 high_int = INTVAL (XEXP (x, 1)) - low_int;
6837 sum = force_operand (gen_rtx_PLUS (Pmode, XEXP (x, 0),
6838 GEN_INT (high_int)), 0);
6839 return plus_constant (Pmode, sum, low_int);
6840 }
6841 else if (GET_CODE (x) == PLUS
6842 && GET_CODE (XEXP (x, 0)) == REG
6843 && GET_CODE (XEXP (x, 1)) != CONST_INT
6844 && GET_MODE_NUNITS (mode) == 1
6845 && (GET_MODE_SIZE (mode) <= UNITS_PER_WORD
6846 || (/* ??? Assume floating point reg based on mode? */
6847 (TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT)
6848 && (mode == DFmode || mode == DDmode)))
6849 && !avoiding_indexed_address_p (mode))
6850 {
6851 return gen_rtx_PLUS (Pmode, XEXP (x, 0),
6852 force_reg (Pmode, force_operand (XEXP (x, 1), 0)));
6853 }
6854 else if (SPE_VECTOR_MODE (mode)
6855 || (TARGET_E500_DOUBLE && GET_MODE_SIZE (mode) > UNITS_PER_WORD))
6856 {
6857 if (mode == DImode)
6858 return x;
6859 /* We accept [reg + reg] and [reg + OFFSET]. */
6860
6861 if (GET_CODE (x) == PLUS)
6862 {
6863 rtx op1 = XEXP (x, 0);
6864 rtx op2 = XEXP (x, 1);
6865 rtx y;
6866
6867 op1 = force_reg (Pmode, op1);
6868
6869 if (GET_CODE (op2) != REG
6870 && (GET_CODE (op2) != CONST_INT
6871 || !SPE_CONST_OFFSET_OK (INTVAL (op2))
6872 || (GET_MODE_SIZE (mode) > 8
6873 && !SPE_CONST_OFFSET_OK (INTVAL (op2) + 8))))
6874 op2 = force_reg (Pmode, op2);
6875
6876 /* We can't always do [reg + reg] for these, because [reg +
6877 reg + offset] is not a legitimate addressing mode. */
6878 y = gen_rtx_PLUS (Pmode, op1, op2);
6879
6880 if ((GET_MODE_SIZE (mode) > 8 || mode == DDmode) && REG_P (op2))
6881 return force_reg (Pmode, y);
6882 else
6883 return y;
6884 }
6885
6886 return force_reg (Pmode, x);
6887 }
6888 else if ((TARGET_ELF
6889 #if TARGET_MACHO
6890 || !MACHO_DYNAMIC_NO_PIC_P
6891 #endif
6892 )
6893 && TARGET_32BIT
6894 && TARGET_NO_TOC
6895 && ! flag_pic
6896 && GET_CODE (x) != CONST_INT
6897 && GET_CODE (x) != CONST_WIDE_INT
6898 && GET_CODE (x) != CONST_DOUBLE
6899 && CONSTANT_P (x)
6900 && GET_MODE_NUNITS (mode) == 1
6901 && (GET_MODE_SIZE (mode) <= UNITS_PER_WORD
6902 || (/* ??? Assume floating point reg based on mode? */
6903 (TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT)
6904 && (mode == DFmode || mode == DDmode))))
6905 {
6906 rtx reg = gen_reg_rtx (Pmode);
6907 if (TARGET_ELF)
6908 emit_insn (gen_elf_high (reg, x));
6909 else
6910 emit_insn (gen_macho_high (reg, x));
6911 return gen_rtx_LO_SUM (Pmode, reg, x);
6912 }
6913 else if (TARGET_TOC
6914 && GET_CODE (x) == SYMBOL_REF
6915 && constant_pool_expr_p (x)
6916 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (x), Pmode))
6917 return create_TOC_reference (x, NULL_RTX);
6918 else
6919 return x;
6920 }
6921
6922 /* Debug version of rs6000_legitimize_address. */
6923 static rtx
6924 rs6000_debug_legitimize_address (rtx x, rtx oldx, machine_mode mode)
6925 {
6926 rtx ret;
6927 rtx_insn *insns;
6928
6929 start_sequence ();
6930 ret = rs6000_legitimize_address (x, oldx, mode);
6931 insns = get_insns ();
6932 end_sequence ();
6933
6934 if (ret != x)
6935 {
6936 fprintf (stderr,
6937 "\nrs6000_legitimize_address: mode %s, old code %s, "
6938 "new code %s, modified\n",
6939 GET_MODE_NAME (mode), GET_RTX_NAME (GET_CODE (x)),
6940 GET_RTX_NAME (GET_CODE (ret)));
6941
6942 fprintf (stderr, "Original address:\n");
6943 debug_rtx (x);
6944
6945 fprintf (stderr, "oldx:\n");
6946 debug_rtx (oldx);
6947
6948 fprintf (stderr, "New address:\n");
6949 debug_rtx (ret);
6950
6951 if (insns)
6952 {
6953 fprintf (stderr, "Insns added:\n");
6954 debug_rtx_list (insns, 20);
6955 }
6956 }
6957 else
6958 {
6959 fprintf (stderr,
6960 "\nrs6000_legitimize_address: mode %s, code %s, no change:\n",
6961 GET_MODE_NAME (mode), GET_RTX_NAME (GET_CODE (x)));
6962
6963 debug_rtx (x);
6964 }
6965
6966 if (insns)
6967 emit_insn (insns);
6968
6969 return ret;
6970 }
6971
6972 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
6973 We need to emit DTP-relative relocations. */
6974
6975 static void rs6000_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
6976 static void
6977 rs6000_output_dwarf_dtprel (FILE *file, int size, rtx x)
6978 {
6979 switch (size)
6980 {
6981 case 4:
6982 fputs ("\t.long\t", file);
6983 break;
6984 case 8:
6985 fputs (DOUBLE_INT_ASM_OP, file);
6986 break;
6987 default:
6988 gcc_unreachable ();
6989 }
6990 output_addr_const (file, x);
6991 fputs ("@dtprel+0x8000", file);
6992 }
6993
6994 /* Return true if X is a symbol that refers to real (rather than emulated)
6995 TLS. */
6996
6997 static bool
6998 rs6000_real_tls_symbol_ref_p (rtx x)
6999 {
7000 return (GET_CODE (x) == SYMBOL_REF
7001 && SYMBOL_REF_TLS_MODEL (x) >= TLS_MODEL_REAL);
7002 }
7003
7004 /* In the name of slightly smaller debug output, and to cater to
7005 general assembler lossage, recognize various UNSPEC sequences
7006 and turn them back into a direct symbol reference. */
7007
7008 static rtx
7009 rs6000_delegitimize_address (rtx orig_x)
7010 {
7011 rtx x, y, offset;
7012
7013 orig_x = delegitimize_mem_from_attrs (orig_x);
7014 x = orig_x;
7015 if (MEM_P (x))
7016 x = XEXP (x, 0);
7017
7018 y = x;
7019 if (TARGET_CMODEL != CMODEL_SMALL
7020 && GET_CODE (y) == LO_SUM)
7021 y = XEXP (y, 1);
7022
7023 offset = NULL_RTX;
7024 if (GET_CODE (y) == PLUS
7025 && GET_MODE (y) == Pmode
7026 && CONST_INT_P (XEXP (y, 1)))
7027 {
7028 offset = XEXP (y, 1);
7029 y = XEXP (y, 0);
7030 }
7031
7032 if (GET_CODE (y) == UNSPEC
7033 && XINT (y, 1) == UNSPEC_TOCREL)
7034 {
7035 #ifdef ENABLE_CHECKING
7036 if (REG_P (XVECEXP (y, 0, 1))
7037 && REGNO (XVECEXP (y, 0, 1)) == TOC_REGISTER)
7038 {
7039 /* All good. */
7040 }
7041 else if (GET_CODE (XVECEXP (y, 0, 1)) == DEBUG_EXPR)
7042 {
7043 /* Weirdness alert. df_note_compute can replace r2 with a
7044 debug_expr when this unspec is in a debug_insn.
7045 Seen in gcc.dg/pr51957-1.c */
7046 }
7047 else
7048 {
7049 debug_rtx (orig_x);
7050 abort ();
7051 }
7052 #endif
7053 y = XVECEXP (y, 0, 0);
7054
7055 #ifdef HAVE_AS_TLS
7056 /* Do not associate thread-local symbols with the original
7057 constant pool symbol. */
7058 if (TARGET_XCOFF
7059 && GET_CODE (y) == SYMBOL_REF
7060 && CONSTANT_POOL_ADDRESS_P (y)
7061 && rs6000_real_tls_symbol_ref_p (get_pool_constant (y)))
7062 return orig_x;
7063 #endif
7064
7065 if (offset != NULL_RTX)
7066 y = gen_rtx_PLUS (Pmode, y, offset);
7067 if (!MEM_P (orig_x))
7068 return y;
7069 else
7070 return replace_equiv_address_nv (orig_x, y);
7071 }
7072
7073 if (TARGET_MACHO
7074 && GET_CODE (orig_x) == LO_SUM
7075 && GET_CODE (XEXP (orig_x, 1)) == CONST)
7076 {
7077 y = XEXP (XEXP (orig_x, 1), 0);
7078 if (GET_CODE (y) == UNSPEC
7079 && XINT (y, 1) == UNSPEC_MACHOPIC_OFFSET)
7080 return XVECEXP (y, 0, 0);
7081 }
7082
7083 return orig_x;
7084 }
7085
7086 /* Return true if X shouldn't be emitted into the debug info.
7087 The linker doesn't like .toc section references from
7088 .debug_* sections, so reject .toc section symbols. */
7089
7090 static bool
7091 rs6000_const_not_ok_for_debug_p (rtx x)
7092 {
7093 if (GET_CODE (x) == SYMBOL_REF
7094 && CONSTANT_POOL_ADDRESS_P (x))
7095 {
7096 rtx c = get_pool_constant (x);
7097 machine_mode cmode = get_pool_mode (x);
7098 if (ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (c, cmode))
7099 return true;
7100 }
7101
7102 return false;
7103 }
7104
7105 /* Construct the SYMBOL_REF for the tls_get_addr function. */
7106
7107 static GTY(()) rtx rs6000_tls_symbol;
7108 static rtx
7109 rs6000_tls_get_addr (void)
7110 {
7111 if (!rs6000_tls_symbol)
7112 rs6000_tls_symbol = init_one_libfunc ("__tls_get_addr");
7113
7114 return rs6000_tls_symbol;
7115 }
7116
7117 /* Construct the SYMBOL_REF for TLS GOT references. */
7118
7119 static GTY(()) rtx rs6000_got_symbol;
7120 static rtx
7121 rs6000_got_sym (void)
7122 {
7123 if (!rs6000_got_symbol)
7124 {
7125 rs6000_got_symbol = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
7126 SYMBOL_REF_FLAGS (rs6000_got_symbol) |= SYMBOL_FLAG_LOCAL;
7127 SYMBOL_REF_FLAGS (rs6000_got_symbol) |= SYMBOL_FLAG_EXTERNAL;
7128 }
7129
7130 return rs6000_got_symbol;
7131 }
7132
7133 /* AIX Thread-Local Address support. */
7134
7135 static rtx
7136 rs6000_legitimize_tls_address_aix (rtx addr, enum tls_model model)
7137 {
7138 rtx sym, mem, tocref, tlsreg, tmpreg, dest, tlsaddr;
7139 const char *name;
7140 char *tlsname;
7141
7142 name = XSTR (addr, 0);
7143 /* Append TLS CSECT qualifier, unless the symbol already is qualified
7144 or the symbol will be in TLS private data section. */
7145 if (name[strlen (name) - 1] != ']'
7146 && (TREE_PUBLIC (SYMBOL_REF_DECL (addr))
7147 || bss_initializer_p (SYMBOL_REF_DECL (addr))))
7148 {
7149 tlsname = XALLOCAVEC (char, strlen (name) + 4);
7150 strcpy (tlsname, name);
7151 strcat (tlsname,
7152 bss_initializer_p (SYMBOL_REF_DECL (addr)) ? "[UL]" : "[TL]");
7153 tlsaddr = copy_rtx (addr);
7154 XSTR (tlsaddr, 0) = ggc_strdup (tlsname);
7155 }
7156 else
7157 tlsaddr = addr;
7158
7159 /* Place addr into TOC constant pool. */
7160 sym = force_const_mem (GET_MODE (tlsaddr), tlsaddr);
7161
7162 /* Output the TOC entry and create the MEM referencing the value. */
7163 if (constant_pool_expr_p (XEXP (sym, 0))
7164 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (XEXP (sym, 0)), Pmode))
7165 {
7166 tocref = create_TOC_reference (XEXP (sym, 0), NULL_RTX);
7167 mem = gen_const_mem (Pmode, tocref);
7168 set_mem_alias_set (mem, get_TOC_alias_set ());
7169 }
7170 else
7171 return sym;
7172
7173 /* Use global-dynamic for local-dynamic. */
7174 if (model == TLS_MODEL_GLOBAL_DYNAMIC
7175 || model == TLS_MODEL_LOCAL_DYNAMIC)
7176 {
7177 /* Create new TOC reference for @m symbol. */
7178 name = XSTR (XVECEXP (XEXP (mem, 0), 0, 0), 0);
7179 tlsname = XALLOCAVEC (char, strlen (name) + 1);
7180 strcpy (tlsname, "*LCM");
7181 strcat (tlsname, name + 3);
7182 rtx modaddr = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (tlsname));
7183 SYMBOL_REF_FLAGS (modaddr) |= SYMBOL_FLAG_LOCAL;
7184 tocref = create_TOC_reference (modaddr, NULL_RTX);
7185 rtx modmem = gen_const_mem (Pmode, tocref);
7186 set_mem_alias_set (modmem, get_TOC_alias_set ());
7187
7188 rtx modreg = gen_reg_rtx (Pmode);
7189 emit_insn (gen_rtx_SET (VOIDmode, modreg, modmem));
7190
7191 tmpreg = gen_reg_rtx (Pmode);
7192 emit_insn (gen_rtx_SET (VOIDmode, tmpreg, mem));
7193
7194 dest = gen_reg_rtx (Pmode);
7195 if (TARGET_32BIT)
7196 emit_insn (gen_tls_get_addrsi (dest, modreg, tmpreg));
7197 else
7198 emit_insn (gen_tls_get_addrdi (dest, modreg, tmpreg));
7199 return dest;
7200 }
7201 /* Obtain TLS pointer: 32 bit call or 64 bit GPR 13. */
7202 else if (TARGET_32BIT)
7203 {
7204 tlsreg = gen_reg_rtx (SImode);
7205 emit_insn (gen_tls_get_tpointer (tlsreg));
7206 }
7207 else
7208 tlsreg = gen_rtx_REG (DImode, 13);
7209
7210 /* Load the TOC value into temporary register. */
7211 tmpreg = gen_reg_rtx (Pmode);
7212 emit_insn (gen_rtx_SET (VOIDmode, tmpreg, mem));
7213 set_unique_reg_note (get_last_insn (), REG_EQUAL,
7214 gen_rtx_MINUS (Pmode, addr, tlsreg));
7215
7216 /* Add TOC symbol value to TLS pointer. */
7217 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tmpreg, tlsreg));
7218
7219 return dest;
7220 }
7221
7222 /* ADDR contains a thread-local SYMBOL_REF. Generate code to compute
7223 this (thread-local) address. */
7224
7225 static rtx
7226 rs6000_legitimize_tls_address (rtx addr, enum tls_model model)
7227 {
7228 rtx dest, insn;
7229
7230 if (TARGET_XCOFF)
7231 return rs6000_legitimize_tls_address_aix (addr, model);
7232
7233 dest = gen_reg_rtx (Pmode);
7234 if (model == TLS_MODEL_LOCAL_EXEC && rs6000_tls_size == 16)
7235 {
7236 rtx tlsreg;
7237
7238 if (TARGET_64BIT)
7239 {
7240 tlsreg = gen_rtx_REG (Pmode, 13);
7241 insn = gen_tls_tprel_64 (dest, tlsreg, addr);
7242 }
7243 else
7244 {
7245 tlsreg = gen_rtx_REG (Pmode, 2);
7246 insn = gen_tls_tprel_32 (dest, tlsreg, addr);
7247 }
7248 emit_insn (insn);
7249 }
7250 else if (model == TLS_MODEL_LOCAL_EXEC && rs6000_tls_size == 32)
7251 {
7252 rtx tlsreg, tmp;
7253
7254 tmp = gen_reg_rtx (Pmode);
7255 if (TARGET_64BIT)
7256 {
7257 tlsreg = gen_rtx_REG (Pmode, 13);
7258 insn = gen_tls_tprel_ha_64 (tmp, tlsreg, addr);
7259 }
7260 else
7261 {
7262 tlsreg = gen_rtx_REG (Pmode, 2);
7263 insn = gen_tls_tprel_ha_32 (tmp, tlsreg, addr);
7264 }
7265 emit_insn (insn);
7266 if (TARGET_64BIT)
7267 insn = gen_tls_tprel_lo_64 (dest, tmp, addr);
7268 else
7269 insn = gen_tls_tprel_lo_32 (dest, tmp, addr);
7270 emit_insn (insn);
7271 }
7272 else
7273 {
7274 rtx r3, got, tga, tmp1, tmp2, call_insn;
7275
7276 /* We currently use relocations like @got@tlsgd for tls, which
7277 means the linker will handle allocation of tls entries, placing
7278 them in the .got section. So use a pointer to the .got section,
7279 not one to secondary TOC sections used by 64-bit -mminimal-toc,
7280 or to secondary GOT sections used by 32-bit -fPIC. */
7281 if (TARGET_64BIT)
7282 got = gen_rtx_REG (Pmode, 2);
7283 else
7284 {
7285 if (flag_pic == 1)
7286 got = gen_rtx_REG (Pmode, RS6000_PIC_OFFSET_TABLE_REGNUM);
7287 else
7288 {
7289 rtx gsym = rs6000_got_sym ();
7290 got = gen_reg_rtx (Pmode);
7291 if (flag_pic == 0)
7292 rs6000_emit_move (got, gsym, Pmode);
7293 else
7294 {
7295 rtx mem, lab, last;
7296
7297 tmp1 = gen_reg_rtx (Pmode);
7298 tmp2 = gen_reg_rtx (Pmode);
7299 mem = gen_const_mem (Pmode, tmp1);
7300 lab = gen_label_rtx ();
7301 emit_insn (gen_load_toc_v4_PIC_1b (gsym, lab));
7302 emit_move_insn (tmp1, gen_rtx_REG (Pmode, LR_REGNO));
7303 if (TARGET_LINK_STACK)
7304 emit_insn (gen_addsi3 (tmp1, tmp1, GEN_INT (4)));
7305 emit_move_insn (tmp2, mem);
7306 last = emit_insn (gen_addsi3 (got, tmp1, tmp2));
7307 set_unique_reg_note (last, REG_EQUAL, gsym);
7308 }
7309 }
7310 }
7311
7312 if (model == TLS_MODEL_GLOBAL_DYNAMIC)
7313 {
7314 tga = rs6000_tls_get_addr ();
7315 emit_library_call_value (tga, dest, LCT_CONST, Pmode,
7316 1, const0_rtx, Pmode);
7317
7318 r3 = gen_rtx_REG (Pmode, 3);
7319 if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
7320 {
7321 if (TARGET_64BIT)
7322 insn = gen_tls_gd_aix64 (r3, got, addr, tga, const0_rtx);
7323 else
7324 insn = gen_tls_gd_aix32 (r3, got, addr, tga, const0_rtx);
7325 }
7326 else if (DEFAULT_ABI == ABI_V4)
7327 insn = gen_tls_gd_sysvsi (r3, got, addr, tga, const0_rtx);
7328 else
7329 gcc_unreachable ();
7330 call_insn = last_call_insn ();
7331 PATTERN (call_insn) = insn;
7332 if (DEFAULT_ABI == ABI_V4 && TARGET_SECURE_PLT && flag_pic)
7333 use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn),
7334 pic_offset_table_rtx);
7335 }
7336 else if (model == TLS_MODEL_LOCAL_DYNAMIC)
7337 {
7338 tga = rs6000_tls_get_addr ();
7339 tmp1 = gen_reg_rtx (Pmode);
7340 emit_library_call_value (tga, tmp1, LCT_CONST, Pmode,
7341 1, const0_rtx, Pmode);
7342
7343 r3 = gen_rtx_REG (Pmode, 3);
7344 if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
7345 {
7346 if (TARGET_64BIT)
7347 insn = gen_tls_ld_aix64 (r3, got, tga, const0_rtx);
7348 else
7349 insn = gen_tls_ld_aix32 (r3, got, tga, const0_rtx);
7350 }
7351 else if (DEFAULT_ABI == ABI_V4)
7352 insn = gen_tls_ld_sysvsi (r3, got, tga, const0_rtx);
7353 else
7354 gcc_unreachable ();
7355 call_insn = last_call_insn ();
7356 PATTERN (call_insn) = insn;
7357 if (DEFAULT_ABI == ABI_V4 && TARGET_SECURE_PLT && flag_pic)
7358 use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn),
7359 pic_offset_table_rtx);
7360
7361 if (rs6000_tls_size == 16)
7362 {
7363 if (TARGET_64BIT)
7364 insn = gen_tls_dtprel_64 (dest, tmp1, addr);
7365 else
7366 insn = gen_tls_dtprel_32 (dest, tmp1, addr);
7367 }
7368 else if (rs6000_tls_size == 32)
7369 {
7370 tmp2 = gen_reg_rtx (Pmode);
7371 if (TARGET_64BIT)
7372 insn = gen_tls_dtprel_ha_64 (tmp2, tmp1, addr);
7373 else
7374 insn = gen_tls_dtprel_ha_32 (tmp2, tmp1, addr);
7375 emit_insn (insn);
7376 if (TARGET_64BIT)
7377 insn = gen_tls_dtprel_lo_64 (dest, tmp2, addr);
7378 else
7379 insn = gen_tls_dtprel_lo_32 (dest, tmp2, addr);
7380 }
7381 else
7382 {
7383 tmp2 = gen_reg_rtx (Pmode);
7384 if (TARGET_64BIT)
7385 insn = gen_tls_got_dtprel_64 (tmp2, got, addr);
7386 else
7387 insn = gen_tls_got_dtprel_32 (tmp2, got, addr);
7388 emit_insn (insn);
7389 insn = gen_rtx_SET (Pmode, dest,
7390 gen_rtx_PLUS (Pmode, tmp2, tmp1));
7391 }
7392 emit_insn (insn);
7393 }
7394 else
7395 {
7396 /* IE, or 64-bit offset LE. */
7397 tmp2 = gen_reg_rtx (Pmode);
7398 if (TARGET_64BIT)
7399 insn = gen_tls_got_tprel_64 (tmp2, got, addr);
7400 else
7401 insn = gen_tls_got_tprel_32 (tmp2, got, addr);
7402 emit_insn (insn);
7403 if (TARGET_64BIT)
7404 insn = gen_tls_tls_64 (dest, tmp2, addr);
7405 else
7406 insn = gen_tls_tls_32 (dest, tmp2, addr);
7407 emit_insn (insn);
7408 }
7409 }
7410
7411 return dest;
7412 }
7413
7414 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
7415
7416 static bool
7417 rs6000_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
7418 {
7419 if (GET_CODE (x) == HIGH
7420 && GET_CODE (XEXP (x, 0)) == UNSPEC)
7421 return true;
7422
7423 /* A TLS symbol in the TOC cannot contain a sum. */
7424 if (GET_CODE (x) == CONST
7425 && GET_CODE (XEXP (x, 0)) == PLUS
7426 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
7427 && SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0)) != 0)
7428 return true;
7429
7430 /* Do not place an ELF TLS symbol in the constant pool. */
7431 return TARGET_ELF && tls_referenced_p (x);
7432 }
7433
7434 /* Return true iff the given SYMBOL_REF refers to a constant pool entry
7435 that we have put in the TOC, or for cmodel=medium, if the SYMBOL_REF
7436 can be addressed relative to the toc pointer. */
7437
7438 static bool
7439 use_toc_relative_ref (rtx sym)
7440 {
7441 return ((constant_pool_expr_p (sym)
7442 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (sym),
7443 get_pool_mode (sym)))
7444 || (TARGET_CMODEL == CMODEL_MEDIUM
7445 && SYMBOL_REF_LOCAL_P (sym)));
7446 }
7447
7448 /* Our implementation of LEGITIMIZE_RELOAD_ADDRESS. Returns a value to
7449 replace the input X, or the original X if no replacement is called for.
7450 The output parameter *WIN is 1 if the calling macro should goto WIN,
7451 0 if it should not.
7452
7453 For RS/6000, we wish to handle large displacements off a base
7454 register by splitting the addend across an addiu/addis and the mem insn.
7455 This cuts number of extra insns needed from 3 to 1.
7456
7457 On Darwin, we use this to generate code for floating point constants.
7458 A movsf_low is generated so we wind up with 2 instructions rather than 3.
7459 The Darwin code is inside #if TARGET_MACHO because only then are the
7460 machopic_* functions defined. */
7461 static rtx
7462 rs6000_legitimize_reload_address (rtx x, machine_mode mode,
7463 int opnum, int type,
7464 int ind_levels ATTRIBUTE_UNUSED, int *win)
7465 {
7466 bool reg_offset_p = reg_offset_addressing_ok_p (mode);
7467
7468 /* Nasty hack for vsx_splat_V2DF/V2DI load from mem, which takes a
7469 DFmode/DImode MEM. */
7470 if (reg_offset_p
7471 && opnum == 1
7472 && ((mode == DFmode && recog_data.operand_mode[0] == V2DFmode)
7473 || (mode == DImode && recog_data.operand_mode[0] == V2DImode)))
7474 reg_offset_p = false;
7475
7476 /* We must recognize output that we have already generated ourselves. */
7477 if (GET_CODE (x) == PLUS
7478 && GET_CODE (XEXP (x, 0)) == PLUS
7479 && GET_CODE (XEXP (XEXP (x, 0), 0)) == REG
7480 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
7481 && GET_CODE (XEXP (x, 1)) == CONST_INT)
7482 {
7483 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
7484 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
7485 opnum, (enum reload_type) type);
7486 *win = 1;
7487 return x;
7488 }
7489
7490 /* Likewise for (lo_sum (high ...) ...) output we have generated. */
7491 if (GET_CODE (x) == LO_SUM
7492 && GET_CODE (XEXP (x, 0)) == HIGH)
7493 {
7494 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
7495 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0,
7496 opnum, (enum reload_type) type);
7497 *win = 1;
7498 return x;
7499 }
7500
7501 #if TARGET_MACHO
7502 if (DEFAULT_ABI == ABI_DARWIN && flag_pic
7503 && GET_CODE (x) == LO_SUM
7504 && GET_CODE (XEXP (x, 0)) == PLUS
7505 && XEXP (XEXP (x, 0), 0) == pic_offset_table_rtx
7506 && GET_CODE (XEXP (XEXP (x, 0), 1)) == HIGH
7507 && XEXP (XEXP (XEXP (x, 0), 1), 0) == XEXP (x, 1)
7508 && machopic_operand_p (XEXP (x, 1)))
7509 {
7510 /* Result of previous invocation of this function on Darwin
7511 floating point constant. */
7512 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
7513 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0,
7514 opnum, (enum reload_type) type);
7515 *win = 1;
7516 return x;
7517 }
7518 #endif
7519
7520 if (TARGET_CMODEL != CMODEL_SMALL
7521 && reg_offset_p
7522 && small_toc_ref (x, VOIDmode))
7523 {
7524 rtx hi = gen_rtx_HIGH (Pmode, copy_rtx (x));
7525 x = gen_rtx_LO_SUM (Pmode, hi, x);
7526 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
7527 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0,
7528 opnum, (enum reload_type) type);
7529 *win = 1;
7530 return x;
7531 }
7532
7533 if (GET_CODE (x) == PLUS
7534 && GET_CODE (XEXP (x, 0)) == REG
7535 && REGNO (XEXP (x, 0)) < FIRST_PSEUDO_REGISTER
7536 && INT_REG_OK_FOR_BASE_P (XEXP (x, 0), 1)
7537 && GET_CODE (XEXP (x, 1)) == CONST_INT
7538 && reg_offset_p
7539 && !SPE_VECTOR_MODE (mode)
7540 && !(TARGET_E500_DOUBLE && GET_MODE_SIZE (mode) > UNITS_PER_WORD)
7541 && (!VECTOR_MODE_P (mode) || VECTOR_MEM_NONE_P (mode)))
7542 {
7543 HOST_WIDE_INT val = INTVAL (XEXP (x, 1));
7544 HOST_WIDE_INT low = ((val & 0xffff) ^ 0x8000) - 0x8000;
7545 HOST_WIDE_INT high
7546 = (((val - low) & 0xffffffff) ^ 0x80000000) - 0x80000000;
7547
7548 /* Check for 32-bit overflow. */
7549 if (high + low != val)
7550 {
7551 *win = 0;
7552 return x;
7553 }
7554
7555 /* Reload the high part into a base reg; leave the low part
7556 in the mem directly. */
7557
7558 x = gen_rtx_PLUS (GET_MODE (x),
7559 gen_rtx_PLUS (GET_MODE (x), XEXP (x, 0),
7560 GEN_INT (high)),
7561 GEN_INT (low));
7562
7563 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
7564 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
7565 opnum, (enum reload_type) type);
7566 *win = 1;
7567 return x;
7568 }
7569
7570 if (GET_CODE (x) == SYMBOL_REF
7571 && reg_offset_p
7572 && (!VECTOR_MODE_P (mode) || VECTOR_MEM_NONE_P (mode))
7573 && !SPE_VECTOR_MODE (mode)
7574 #if TARGET_MACHO
7575 && DEFAULT_ABI == ABI_DARWIN
7576 && (flag_pic || MACHO_DYNAMIC_NO_PIC_P)
7577 && machopic_symbol_defined_p (x)
7578 #else
7579 && DEFAULT_ABI == ABI_V4
7580 && !flag_pic
7581 #endif
7582 /* Don't do this for TFmode or TDmode, since the result isn't offsettable.
7583 The same goes for DImode without 64-bit gprs and DFmode and DDmode
7584 without fprs.
7585 ??? Assume floating point reg based on mode? This assumption is
7586 violated by eg. powerpc-linux -m32 compile of gcc.dg/pr28796-2.c
7587 where reload ends up doing a DFmode load of a constant from
7588 mem using two gprs. Unfortunately, at this point reload
7589 hasn't yet selected regs so poking around in reload data
7590 won't help and even if we could figure out the regs reliably,
7591 we'd still want to allow this transformation when the mem is
7592 naturally aligned. Since we say the address is good here, we
7593 can't disable offsets from LO_SUMs in mem_operand_gpr.
7594 FIXME: Allow offset from lo_sum for other modes too, when
7595 mem is sufficiently aligned. */
7596 && mode != TFmode
7597 && mode != TDmode
7598 && (mode != TImode || !TARGET_VSX_TIMODE)
7599 && mode != PTImode
7600 && (mode != DImode || TARGET_POWERPC64)
7601 && ((mode != DFmode && mode != DDmode) || TARGET_POWERPC64
7602 || (TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT)))
7603 {
7604 #if TARGET_MACHO
7605 if (flag_pic)
7606 {
7607 rtx offset = machopic_gen_offset (x);
7608 x = gen_rtx_LO_SUM (GET_MODE (x),
7609 gen_rtx_PLUS (Pmode, pic_offset_table_rtx,
7610 gen_rtx_HIGH (Pmode, offset)), offset);
7611 }
7612 else
7613 #endif
7614 x = gen_rtx_LO_SUM (GET_MODE (x),
7615 gen_rtx_HIGH (Pmode, x), x);
7616
7617 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
7618 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0,
7619 opnum, (enum reload_type) type);
7620 *win = 1;
7621 return x;
7622 }
7623
7624 /* Reload an offset address wrapped by an AND that represents the
7625 masking of the lower bits. Strip the outer AND and let reload
7626 convert the offset address into an indirect address. For VSX,
7627 force reload to create the address with an AND in a separate
7628 register, because we can't guarantee an altivec register will
7629 be used. */
7630 if (VECTOR_MEM_ALTIVEC_P (mode)
7631 && GET_CODE (x) == AND
7632 && GET_CODE (XEXP (x, 0)) == PLUS
7633 && GET_CODE (XEXP (XEXP (x, 0), 0)) == REG
7634 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
7635 && GET_CODE (XEXP (x, 1)) == CONST_INT
7636 && INTVAL (XEXP (x, 1)) == -16)
7637 {
7638 x = XEXP (x, 0);
7639 *win = 1;
7640 return x;
7641 }
7642
7643 if (TARGET_TOC
7644 && reg_offset_p
7645 && GET_CODE (x) == SYMBOL_REF
7646 && use_toc_relative_ref (x))
7647 {
7648 x = create_TOC_reference (x, NULL_RTX);
7649 if (TARGET_CMODEL != CMODEL_SMALL)
7650 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
7651 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0,
7652 opnum, (enum reload_type) type);
7653 *win = 1;
7654 return x;
7655 }
7656 *win = 0;
7657 return x;
7658 }
7659
7660 /* Debug version of rs6000_legitimize_reload_address. */
7661 static rtx
7662 rs6000_debug_legitimize_reload_address (rtx x, machine_mode mode,
7663 int opnum, int type,
7664 int ind_levels, int *win)
7665 {
7666 rtx ret = rs6000_legitimize_reload_address (x, mode, opnum, type,
7667 ind_levels, win);
7668 fprintf (stderr,
7669 "\nrs6000_legitimize_reload_address: mode = %s, opnum = %d, "
7670 "type = %d, ind_levels = %d, win = %d, original addr:\n",
7671 GET_MODE_NAME (mode), opnum, type, ind_levels, *win);
7672 debug_rtx (x);
7673
7674 if (x == ret)
7675 fprintf (stderr, "Same address returned\n");
7676 else if (!ret)
7677 fprintf (stderr, "NULL returned\n");
7678 else
7679 {
7680 fprintf (stderr, "New address:\n");
7681 debug_rtx (ret);
7682 }
7683
7684 return ret;
7685 }
7686
7687 /* TARGET_LEGITIMATE_ADDRESS_P recognizes an RTL expression
7688 that is a valid memory address for an instruction.
7689 The MODE argument is the machine mode for the MEM expression
7690 that wants to use this address.
7691
7692 On the RS/6000, there are four valid address: a SYMBOL_REF that
7693 refers to a constant pool entry of an address (or the sum of it
7694 plus a constant), a short (16-bit signed) constant plus a register,
7695 the sum of two registers, or a register indirect, possibly with an
7696 auto-increment. For DFmode, DDmode and DImode with a constant plus
7697 register, we must ensure that both words are addressable or PowerPC64
7698 with offset word aligned.
7699
7700 For modes spanning multiple registers (DFmode and DDmode in 32-bit GPRs,
7701 32-bit DImode, TImode, TFmode, TDmode), indexed addressing cannot be used
7702 because adjacent memory cells are accessed by adding word-sized offsets
7703 during assembly output. */
7704 static bool
7705 rs6000_legitimate_address_p (machine_mode mode, rtx x, bool reg_ok_strict)
7706 {
7707 bool reg_offset_p = reg_offset_addressing_ok_p (mode);
7708
7709 /* If this is an unaligned stvx/ldvx type address, discard the outer AND. */
7710 if (VECTOR_MEM_ALTIVEC_P (mode)
7711 && GET_CODE (x) == AND
7712 && GET_CODE (XEXP (x, 1)) == CONST_INT
7713 && INTVAL (XEXP (x, 1)) == -16)
7714 x = XEXP (x, 0);
7715
7716 if (TARGET_ELF && RS6000_SYMBOL_REF_TLS_P (x))
7717 return 0;
7718 if (legitimate_indirect_address_p (x, reg_ok_strict))
7719 return 1;
7720 if (TARGET_UPDATE
7721 && (GET_CODE (x) == PRE_INC || GET_CODE (x) == PRE_DEC)
7722 && mode_supports_pre_incdec_p (mode)
7723 && legitimate_indirect_address_p (XEXP (x, 0), reg_ok_strict))
7724 return 1;
7725 if (virtual_stack_registers_memory_p (x))
7726 return 1;
7727 if (reg_offset_p && legitimate_small_data_p (mode, x))
7728 return 1;
7729 if (reg_offset_p
7730 && legitimate_constant_pool_address_p (x, mode,
7731 reg_ok_strict || lra_in_progress))
7732 return 1;
7733 /* For TImode, if we have load/store quad and TImode in VSX registers, only
7734 allow register indirect addresses. This will allow the values to go in
7735 either GPRs or VSX registers without reloading. The vector types would
7736 tend to go into VSX registers, so we allow REG+REG, while TImode seems
7737 somewhat split, in that some uses are GPR based, and some VSX based. */
7738 if (mode == TImode && TARGET_QUAD_MEMORY && TARGET_VSX_TIMODE)
7739 return 0;
7740 /* If not REG_OK_STRICT (before reload) let pass any stack offset. */
7741 if (! reg_ok_strict
7742 && reg_offset_p
7743 && GET_CODE (x) == PLUS
7744 && GET_CODE (XEXP (x, 0)) == REG
7745 && (XEXP (x, 0) == virtual_stack_vars_rtx
7746 || XEXP (x, 0) == arg_pointer_rtx)
7747 && GET_CODE (XEXP (x, 1)) == CONST_INT)
7748 return 1;
7749 if (rs6000_legitimate_offset_address_p (mode, x, reg_ok_strict, false))
7750 return 1;
7751 if (mode != TFmode
7752 && mode != TDmode
7753 && ((TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT)
7754 || TARGET_POWERPC64
7755 || (mode != DFmode && mode != DDmode)
7756 || (TARGET_E500_DOUBLE && mode != DDmode))
7757 && (TARGET_POWERPC64 || mode != DImode)
7758 && (mode != TImode || VECTOR_MEM_VSX_P (TImode))
7759 && mode != PTImode
7760 && !avoiding_indexed_address_p (mode)
7761 && legitimate_indexed_address_p (x, reg_ok_strict))
7762 return 1;
7763 if (TARGET_UPDATE && GET_CODE (x) == PRE_MODIFY
7764 && mode_supports_pre_modify_p (mode)
7765 && legitimate_indirect_address_p (XEXP (x, 0), reg_ok_strict)
7766 && (rs6000_legitimate_offset_address_p (mode, XEXP (x, 1),
7767 reg_ok_strict, false)
7768 || (!avoiding_indexed_address_p (mode)
7769 && legitimate_indexed_address_p (XEXP (x, 1), reg_ok_strict)))
7770 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
7771 return 1;
7772 if (reg_offset_p && legitimate_lo_sum_address_p (mode, x, reg_ok_strict))
7773 return 1;
7774 return 0;
7775 }
7776
7777 /* Debug version of rs6000_legitimate_address_p. */
7778 static bool
7779 rs6000_debug_legitimate_address_p (machine_mode mode, rtx x,
7780 bool reg_ok_strict)
7781 {
7782 bool ret = rs6000_legitimate_address_p (mode, x, reg_ok_strict);
7783 fprintf (stderr,
7784 "\nrs6000_legitimate_address_p: return = %s, mode = %s, "
7785 "strict = %d, reload = %s, code = %s\n",
7786 ret ? "true" : "false",
7787 GET_MODE_NAME (mode),
7788 reg_ok_strict,
7789 (reload_completed
7790 ? "after"
7791 : (reload_in_progress ? "progress" : "before")),
7792 GET_RTX_NAME (GET_CODE (x)));
7793 debug_rtx (x);
7794
7795 return ret;
7796 }
7797
7798 /* Implement TARGET_MODE_DEPENDENT_ADDRESS_P. */
7799
7800 static bool
7801 rs6000_mode_dependent_address_p (const_rtx addr,
7802 addr_space_t as ATTRIBUTE_UNUSED)
7803 {
7804 return rs6000_mode_dependent_address_ptr (addr);
7805 }
7806
7807 /* Go to LABEL if ADDR (a legitimate address expression)
7808 has an effect that depends on the machine mode it is used for.
7809
7810 On the RS/6000 this is true of all integral offsets (since AltiVec
7811 and VSX modes don't allow them) or is a pre-increment or decrement.
7812
7813 ??? Except that due to conceptual problems in offsettable_address_p
7814 we can't really report the problems of integral offsets. So leave
7815 this assuming that the adjustable offset must be valid for the
7816 sub-words of a TFmode operand, which is what we had before. */
7817
7818 static bool
7819 rs6000_mode_dependent_address (const_rtx addr)
7820 {
7821 switch (GET_CODE (addr))
7822 {
7823 case PLUS:
7824 /* Any offset from virtual_stack_vars_rtx and arg_pointer_rtx
7825 is considered a legitimate address before reload, so there
7826 are no offset restrictions in that case. Note that this
7827 condition is safe in strict mode because any address involving
7828 virtual_stack_vars_rtx or arg_pointer_rtx would already have
7829 been rejected as illegitimate. */
7830 if (XEXP (addr, 0) != virtual_stack_vars_rtx
7831 && XEXP (addr, 0) != arg_pointer_rtx
7832 && GET_CODE (XEXP (addr, 1)) == CONST_INT)
7833 {
7834 unsigned HOST_WIDE_INT val = INTVAL (XEXP (addr, 1));
7835 return val + 0x8000 >= 0x10000 - (TARGET_POWERPC64 ? 8 : 12);
7836 }
7837 break;
7838
7839 case LO_SUM:
7840 /* Anything in the constant pool is sufficiently aligned that
7841 all bytes have the same high part address. */
7842 return !legitimate_constant_pool_address_p (addr, QImode, false);
7843
7844 /* Auto-increment cases are now treated generically in recog.c. */
7845 case PRE_MODIFY:
7846 return TARGET_UPDATE;
7847
7848 /* AND is only allowed in Altivec loads. */
7849 case AND:
7850 return true;
7851
7852 default:
7853 break;
7854 }
7855
7856 return false;
7857 }
7858
7859 /* Debug version of rs6000_mode_dependent_address. */
7860 static bool
7861 rs6000_debug_mode_dependent_address (const_rtx addr)
7862 {
7863 bool ret = rs6000_mode_dependent_address (addr);
7864
7865 fprintf (stderr, "\nrs6000_mode_dependent_address: ret = %s\n",
7866 ret ? "true" : "false");
7867 debug_rtx (addr);
7868
7869 return ret;
7870 }
7871
7872 /* Implement FIND_BASE_TERM. */
7873
7874 rtx
7875 rs6000_find_base_term (rtx op)
7876 {
7877 rtx base;
7878
7879 base = op;
7880 if (GET_CODE (base) == CONST)
7881 base = XEXP (base, 0);
7882 if (GET_CODE (base) == PLUS)
7883 base = XEXP (base, 0);
7884 if (GET_CODE (base) == UNSPEC)
7885 switch (XINT (base, 1))
7886 {
7887 case UNSPEC_TOCREL:
7888 case UNSPEC_MACHOPIC_OFFSET:
7889 /* OP represents SYM [+ OFFSET] - ANCHOR. SYM is the base term
7890 for aliasing purposes. */
7891 return XVECEXP (base, 0, 0);
7892 }
7893
7894 return op;
7895 }
7896
7897 /* More elaborate version of recog's offsettable_memref_p predicate
7898 that works around the ??? note of rs6000_mode_dependent_address.
7899 In particular it accepts
7900
7901 (mem:DI (plus:SI (reg/f:SI 31 31) (const_int 32760 [0x7ff8])))
7902
7903 in 32-bit mode, that the recog predicate rejects. */
7904
7905 static bool
7906 rs6000_offsettable_memref_p (rtx op, machine_mode reg_mode)
7907 {
7908 bool worst_case;
7909
7910 if (!MEM_P (op))
7911 return false;
7912
7913 /* First mimic offsettable_memref_p. */
7914 if (offsettable_address_p (true, GET_MODE (op), XEXP (op, 0)))
7915 return true;
7916
7917 /* offsettable_address_p invokes rs6000_mode_dependent_address, but
7918 the latter predicate knows nothing about the mode of the memory
7919 reference and, therefore, assumes that it is the largest supported
7920 mode (TFmode). As a consequence, legitimate offsettable memory
7921 references are rejected. rs6000_legitimate_offset_address_p contains
7922 the correct logic for the PLUS case of rs6000_mode_dependent_address,
7923 at least with a little bit of help here given that we know the
7924 actual registers used. */
7925 worst_case = ((TARGET_POWERPC64 && GET_MODE_CLASS (reg_mode) == MODE_INT)
7926 || GET_MODE_SIZE (reg_mode) == 4);
7927 return rs6000_legitimate_offset_address_p (GET_MODE (op), XEXP (op, 0),
7928 true, worst_case);
7929 }
7930
7931 /* Change register usage conditional on target flags. */
7932 static void
7933 rs6000_conditional_register_usage (void)
7934 {
7935 int i;
7936
7937 if (TARGET_DEBUG_TARGET)
7938 fprintf (stderr, "rs6000_conditional_register_usage called\n");
7939
7940 /* Set MQ register fixed (already call_used) so that it will not be
7941 allocated. */
7942 fixed_regs[64] = 1;
7943
7944 /* 64-bit AIX and Linux reserve GPR13 for thread-private data. */
7945 if (TARGET_64BIT)
7946 fixed_regs[13] = call_used_regs[13]
7947 = call_really_used_regs[13] = 1;
7948
7949 /* Conditionally disable FPRs. */
7950 if (TARGET_SOFT_FLOAT || !TARGET_FPRS)
7951 for (i = 32; i < 64; i++)
7952 fixed_regs[i] = call_used_regs[i]
7953 = call_really_used_regs[i] = 1;
7954
7955 /* The TOC register is not killed across calls in a way that is
7956 visible to the compiler. */
7957 if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
7958 call_really_used_regs[2] = 0;
7959
7960 if (DEFAULT_ABI == ABI_V4
7961 && PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM
7962 && flag_pic == 2)
7963 fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] = 1;
7964
7965 if (DEFAULT_ABI == ABI_V4
7966 && PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM
7967 && flag_pic == 1)
7968 fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM]
7969 = call_used_regs[RS6000_PIC_OFFSET_TABLE_REGNUM]
7970 = call_really_used_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] = 1;
7971
7972 if (DEFAULT_ABI == ABI_DARWIN
7973 && PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM)
7974 fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM]
7975 = call_used_regs[RS6000_PIC_OFFSET_TABLE_REGNUM]
7976 = call_really_used_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] = 1;
7977
7978 if (TARGET_TOC && TARGET_MINIMAL_TOC)
7979 fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM]
7980 = call_used_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] = 1;
7981
7982 if (TARGET_SPE)
7983 {
7984 global_regs[SPEFSCR_REGNO] = 1;
7985 /* We used to use r14 as FIXED_SCRATCH to address SPE 64-bit
7986 registers in prologues and epilogues. We no longer use r14
7987 for FIXED_SCRATCH, but we're keeping r14 out of the allocation
7988 pool for link-compatibility with older versions of GCC. Once
7989 "old" code has died out, we can return r14 to the allocation
7990 pool. */
7991 fixed_regs[14]
7992 = call_used_regs[14]
7993 = call_really_used_regs[14] = 1;
7994 }
7995
7996 if (!TARGET_ALTIVEC && !TARGET_VSX)
7997 {
7998 for (i = FIRST_ALTIVEC_REGNO; i <= LAST_ALTIVEC_REGNO; ++i)
7999 fixed_regs[i] = call_used_regs[i] = call_really_used_regs[i] = 1;
8000 call_really_used_regs[VRSAVE_REGNO] = 1;
8001 }
8002
8003 if (TARGET_ALTIVEC || TARGET_VSX)
8004 global_regs[VSCR_REGNO] = 1;
8005
8006 if (TARGET_ALTIVEC_ABI)
8007 {
8008 for (i = FIRST_ALTIVEC_REGNO; i < FIRST_ALTIVEC_REGNO + 20; ++i)
8009 call_used_regs[i] = call_really_used_regs[i] = 1;
8010
8011 /* AIX reserves VR20:31 in non-extended ABI mode. */
8012 if (TARGET_XCOFF)
8013 for (i = FIRST_ALTIVEC_REGNO + 20; i < FIRST_ALTIVEC_REGNO + 32; ++i)
8014 fixed_regs[i] = call_used_regs[i] = call_really_used_regs[i] = 1;
8015 }
8016 }
8017
8018 \f
8019 /* Output insns to set DEST equal to the constant SOURCE as a series of
8020 lis, ori and shl instructions and return TRUE. */
8021
8022 bool
8023 rs6000_emit_set_const (rtx dest, rtx source)
8024 {
8025 machine_mode mode = GET_MODE (dest);
8026 rtx temp, set;
8027 rtx_insn *insn;
8028 HOST_WIDE_INT c;
8029
8030 gcc_checking_assert (CONST_INT_P (source));
8031 c = INTVAL (source);
8032 switch (mode)
8033 {
8034 case QImode:
8035 case HImode:
8036 emit_insn (gen_rtx_SET (VOIDmode, dest, source));
8037 return true;
8038
8039 case SImode:
8040 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (SImode);
8041
8042 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (temp),
8043 GEN_INT (c & ~(HOST_WIDE_INT) 0xffff)));
8044 emit_insn (gen_rtx_SET (VOIDmode, dest,
8045 gen_rtx_IOR (SImode, copy_rtx (temp),
8046 GEN_INT (c & 0xffff))));
8047 break;
8048
8049 case DImode:
8050 if (!TARGET_POWERPC64)
8051 {
8052 rtx hi, lo;
8053
8054 hi = operand_subword_force (copy_rtx (dest), WORDS_BIG_ENDIAN == 0,
8055 DImode);
8056 lo = operand_subword_force (dest, WORDS_BIG_ENDIAN != 0,
8057 DImode);
8058 emit_move_insn (hi, GEN_INT (c >> 32));
8059 c = ((c & 0xffffffff) ^ 0x80000000) - 0x80000000;
8060 emit_move_insn (lo, GEN_INT (c));
8061 }
8062 else
8063 rs6000_emit_set_long_const (dest, c);
8064 break;
8065
8066 default:
8067 gcc_unreachable ();
8068 }
8069
8070 insn = get_last_insn ();
8071 set = single_set (insn);
8072 if (! CONSTANT_P (SET_SRC (set)))
8073 set_unique_reg_note (insn, REG_EQUAL, GEN_INT (c));
8074
8075 return true;
8076 }
8077
8078 /* Subroutine of rs6000_emit_set_const, handling PowerPC64 DImode.
8079 Output insns to set DEST equal to the constant C as a series of
8080 lis, ori and shl instructions. */
8081
8082 static void
8083 rs6000_emit_set_long_const (rtx dest, HOST_WIDE_INT c)
8084 {
8085 rtx temp;
8086 HOST_WIDE_INT ud1, ud2, ud3, ud4;
8087
8088 ud1 = c & 0xffff;
8089 c = c >> 16;
8090 ud2 = c & 0xffff;
8091 c = c >> 16;
8092 ud3 = c & 0xffff;
8093 c = c >> 16;
8094 ud4 = c & 0xffff;
8095
8096 if ((ud4 == 0xffff && ud3 == 0xffff && ud2 == 0xffff && (ud1 & 0x8000))
8097 || (ud4 == 0 && ud3 == 0 && ud2 == 0 && ! (ud1 & 0x8000)))
8098 emit_move_insn (dest, GEN_INT ((ud1 ^ 0x8000) - 0x8000));
8099
8100 else if ((ud4 == 0xffff && ud3 == 0xffff && (ud2 & 0x8000))
8101 || (ud4 == 0 && ud3 == 0 && ! (ud2 & 0x8000)))
8102 {
8103 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
8104
8105 emit_move_insn (ud1 != 0 ? copy_rtx (temp) : dest,
8106 GEN_INT (((ud2 << 16) ^ 0x80000000) - 0x80000000));
8107 if (ud1 != 0)
8108 emit_move_insn (dest,
8109 gen_rtx_IOR (DImode, copy_rtx (temp),
8110 GEN_INT (ud1)));
8111 }
8112 else if (ud3 == 0 && ud4 == 0)
8113 {
8114 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
8115
8116 gcc_assert (ud2 & 0x8000);
8117 emit_move_insn (copy_rtx (temp),
8118 GEN_INT (((ud2 << 16) ^ 0x80000000) - 0x80000000));
8119 if (ud1 != 0)
8120 emit_move_insn (copy_rtx (temp),
8121 gen_rtx_IOR (DImode, copy_rtx (temp),
8122 GEN_INT (ud1)));
8123 emit_move_insn (dest,
8124 gen_rtx_ZERO_EXTEND (DImode,
8125 gen_lowpart (SImode,
8126 copy_rtx (temp))));
8127 }
8128 else if ((ud4 == 0xffff && (ud3 & 0x8000))
8129 || (ud4 == 0 && ! (ud3 & 0x8000)))
8130 {
8131 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
8132
8133 emit_move_insn (copy_rtx (temp),
8134 GEN_INT (((ud3 << 16) ^ 0x80000000) - 0x80000000));
8135 if (ud2 != 0)
8136 emit_move_insn (copy_rtx (temp),
8137 gen_rtx_IOR (DImode, copy_rtx (temp),
8138 GEN_INT (ud2)));
8139 emit_move_insn (ud1 != 0 ? copy_rtx (temp) : dest,
8140 gen_rtx_ASHIFT (DImode, copy_rtx (temp),
8141 GEN_INT (16)));
8142 if (ud1 != 0)
8143 emit_move_insn (dest,
8144 gen_rtx_IOR (DImode, copy_rtx (temp),
8145 GEN_INT (ud1)));
8146 }
8147 else
8148 {
8149 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
8150
8151 emit_move_insn (copy_rtx (temp),
8152 GEN_INT (((ud4 << 16) ^ 0x80000000) - 0x80000000));
8153 if (ud3 != 0)
8154 emit_move_insn (copy_rtx (temp),
8155 gen_rtx_IOR (DImode, copy_rtx (temp),
8156 GEN_INT (ud3)));
8157
8158 emit_move_insn (ud2 != 0 || ud1 != 0 ? copy_rtx (temp) : dest,
8159 gen_rtx_ASHIFT (DImode, copy_rtx (temp),
8160 GEN_INT (32)));
8161 if (ud2 != 0)
8162 emit_move_insn (ud1 != 0 ? copy_rtx (temp) : dest,
8163 gen_rtx_IOR (DImode, copy_rtx (temp),
8164 GEN_INT (ud2 << 16)));
8165 if (ud1 != 0)
8166 emit_move_insn (dest,
8167 gen_rtx_IOR (DImode, copy_rtx (temp),
8168 GEN_INT (ud1)));
8169 }
8170 }
8171
8172 /* Helper for the following. Get rid of [r+r] memory refs
8173 in cases where it won't work (TImode, TFmode, TDmode, PTImode). */
8174
8175 static void
8176 rs6000_eliminate_indexed_memrefs (rtx operands[2])
8177 {
8178 if (reload_in_progress)
8179 return;
8180
8181 if (GET_CODE (operands[0]) == MEM
8182 && GET_CODE (XEXP (operands[0], 0)) != REG
8183 && ! legitimate_constant_pool_address_p (XEXP (operands[0], 0),
8184 GET_MODE (operands[0]), false))
8185 operands[0]
8186 = replace_equiv_address (operands[0],
8187 copy_addr_to_reg (XEXP (operands[0], 0)));
8188
8189 if (GET_CODE (operands[1]) == MEM
8190 && GET_CODE (XEXP (operands[1], 0)) != REG
8191 && ! legitimate_constant_pool_address_p (XEXP (operands[1], 0),
8192 GET_MODE (operands[1]), false))
8193 operands[1]
8194 = replace_equiv_address (operands[1],
8195 copy_addr_to_reg (XEXP (operands[1], 0)));
8196 }
8197
8198 /* Generate a vector of constants to permute MODE for a little-endian
8199 storage operation by swapping the two halves of a vector. */
8200 static rtvec
8201 rs6000_const_vec (machine_mode mode)
8202 {
8203 int i, subparts;
8204 rtvec v;
8205
8206 switch (mode)
8207 {
8208 case V1TImode:
8209 subparts = 1;
8210 break;
8211 case V2DFmode:
8212 case V2DImode:
8213 subparts = 2;
8214 break;
8215 case V4SFmode:
8216 case V4SImode:
8217 subparts = 4;
8218 break;
8219 case V8HImode:
8220 subparts = 8;
8221 break;
8222 case V16QImode:
8223 subparts = 16;
8224 break;
8225 default:
8226 gcc_unreachable();
8227 }
8228
8229 v = rtvec_alloc (subparts);
8230
8231 for (i = 0; i < subparts / 2; ++i)
8232 RTVEC_ELT (v, i) = gen_rtx_CONST_INT (DImode, i + subparts / 2);
8233 for (i = subparts / 2; i < subparts; ++i)
8234 RTVEC_ELT (v, i) = gen_rtx_CONST_INT (DImode, i - subparts / 2);
8235
8236 return v;
8237 }
8238
8239 /* Generate a permute rtx that represents an lxvd2x, stxvd2x, or xxpermdi
8240 for a VSX load or store operation. */
8241 rtx
8242 rs6000_gen_le_vsx_permute (rtx source, machine_mode mode)
8243 {
8244 rtx par = gen_rtx_PARALLEL (VOIDmode, rs6000_const_vec (mode));
8245 return gen_rtx_VEC_SELECT (mode, source, par);
8246 }
8247
8248 /* Emit a little-endian load from vector memory location SOURCE to VSX
8249 register DEST in mode MODE. The load is done with two permuting
8250 insn's that represent an lxvd2x and xxpermdi. */
8251 void
8252 rs6000_emit_le_vsx_load (rtx dest, rtx source, machine_mode mode)
8253 {
8254 rtx tmp, permute_mem, permute_reg;
8255
8256 /* Use V2DImode to do swaps of types with 128-bit scalare parts (TImode,
8257 V1TImode). */
8258 if (mode == TImode || mode == V1TImode)
8259 {
8260 mode = V2DImode;
8261 dest = gen_lowpart (V2DImode, dest);
8262 source = adjust_address (source, V2DImode, 0);
8263 }
8264
8265 tmp = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (dest) : dest;
8266 permute_mem = rs6000_gen_le_vsx_permute (source, mode);
8267 permute_reg = rs6000_gen_le_vsx_permute (tmp, mode);
8268 emit_insn (gen_rtx_SET (VOIDmode, tmp, permute_mem));
8269 emit_insn (gen_rtx_SET (VOIDmode, dest, permute_reg));
8270 }
8271
8272 /* Emit a little-endian store to vector memory location DEST from VSX
8273 register SOURCE in mode MODE. The store is done with two permuting
8274 insn's that represent an xxpermdi and an stxvd2x. */
8275 void
8276 rs6000_emit_le_vsx_store (rtx dest, rtx source, machine_mode mode)
8277 {
8278 rtx tmp, permute_src, permute_tmp;
8279
8280 /* Use V2DImode to do swaps of types with 128-bit scalare parts (TImode,
8281 V1TImode). */
8282 if (mode == TImode || mode == V1TImode)
8283 {
8284 mode = V2DImode;
8285 dest = adjust_address (dest, V2DImode, 0);
8286 source = gen_lowpart (V2DImode, source);
8287 }
8288
8289 tmp = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (source) : source;
8290 permute_src = rs6000_gen_le_vsx_permute (source, mode);
8291 permute_tmp = rs6000_gen_le_vsx_permute (tmp, mode);
8292 emit_insn (gen_rtx_SET (VOIDmode, tmp, permute_src));
8293 emit_insn (gen_rtx_SET (VOIDmode, dest, permute_tmp));
8294 }
8295
8296 /* Emit a sequence representing a little-endian VSX load or store,
8297 moving data from SOURCE to DEST in mode MODE. This is done
8298 separately from rs6000_emit_move to ensure it is called only
8299 during expand. LE VSX loads and stores introduced later are
8300 handled with a split. The expand-time RTL generation allows
8301 us to optimize away redundant pairs of register-permutes. */
8302 void
8303 rs6000_emit_le_vsx_move (rtx dest, rtx source, machine_mode mode)
8304 {
8305 gcc_assert (!BYTES_BIG_ENDIAN
8306 && VECTOR_MEM_VSX_P (mode)
8307 && !gpr_or_gpr_p (dest, source)
8308 && (MEM_P (source) ^ MEM_P (dest)));
8309
8310 if (MEM_P (source))
8311 {
8312 gcc_assert (REG_P (dest) || GET_CODE (dest) == SUBREG);
8313 rs6000_emit_le_vsx_load (dest, source, mode);
8314 }
8315 else
8316 {
8317 if (!REG_P (source))
8318 source = force_reg (mode, source);
8319 rs6000_emit_le_vsx_store (dest, source, mode);
8320 }
8321 }
8322
8323 /* Emit a move from SOURCE to DEST in mode MODE. */
8324 void
8325 rs6000_emit_move (rtx dest, rtx source, machine_mode mode)
8326 {
8327 rtx operands[2];
8328 operands[0] = dest;
8329 operands[1] = source;
8330
8331 if (TARGET_DEBUG_ADDR)
8332 {
8333 fprintf (stderr,
8334 "\nrs6000_emit_move: mode = %s, reload_in_progress = %d, "
8335 "reload_completed = %d, can_create_pseudos = %d.\ndest:\n",
8336 GET_MODE_NAME (mode),
8337 reload_in_progress,
8338 reload_completed,
8339 can_create_pseudo_p ());
8340 debug_rtx (dest);
8341 fprintf (stderr, "source:\n");
8342 debug_rtx (source);
8343 }
8344
8345 /* Sanity checks. Check that we get CONST_DOUBLE only when we should. */
8346 if (CONST_WIDE_INT_P (operands[1])
8347 && GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT)
8348 {
8349 /* This should be fixed with the introduction of CONST_WIDE_INT. */
8350 gcc_unreachable ();
8351 }
8352
8353 /* Check if GCC is setting up a block move that will end up using FP
8354 registers as temporaries. We must make sure this is acceptable. */
8355 if (GET_CODE (operands[0]) == MEM
8356 && GET_CODE (operands[1]) == MEM
8357 && mode == DImode
8358 && (SLOW_UNALIGNED_ACCESS (DImode, MEM_ALIGN (operands[0]))
8359 || SLOW_UNALIGNED_ACCESS (DImode, MEM_ALIGN (operands[1])))
8360 && ! (SLOW_UNALIGNED_ACCESS (SImode, (MEM_ALIGN (operands[0]) > 32
8361 ? 32 : MEM_ALIGN (operands[0])))
8362 || SLOW_UNALIGNED_ACCESS (SImode, (MEM_ALIGN (operands[1]) > 32
8363 ? 32
8364 : MEM_ALIGN (operands[1]))))
8365 && ! MEM_VOLATILE_P (operands [0])
8366 && ! MEM_VOLATILE_P (operands [1]))
8367 {
8368 emit_move_insn (adjust_address (operands[0], SImode, 0),
8369 adjust_address (operands[1], SImode, 0));
8370 emit_move_insn (adjust_address (copy_rtx (operands[0]), SImode, 4),
8371 adjust_address (copy_rtx (operands[1]), SImode, 4));
8372 return;
8373 }
8374
8375 if (can_create_pseudo_p () && GET_CODE (operands[0]) == MEM
8376 && !gpc_reg_operand (operands[1], mode))
8377 operands[1] = force_reg (mode, operands[1]);
8378
8379 /* Recognize the case where operand[1] is a reference to thread-local
8380 data and load its address to a register. */
8381 if (tls_referenced_p (operands[1]))
8382 {
8383 enum tls_model model;
8384 rtx tmp = operands[1];
8385 rtx addend = NULL;
8386
8387 if (GET_CODE (tmp) == CONST && GET_CODE (XEXP (tmp, 0)) == PLUS)
8388 {
8389 addend = XEXP (XEXP (tmp, 0), 1);
8390 tmp = XEXP (XEXP (tmp, 0), 0);
8391 }
8392
8393 gcc_assert (GET_CODE (tmp) == SYMBOL_REF);
8394 model = SYMBOL_REF_TLS_MODEL (tmp);
8395 gcc_assert (model != 0);
8396
8397 tmp = rs6000_legitimize_tls_address (tmp, model);
8398 if (addend)
8399 {
8400 tmp = gen_rtx_PLUS (mode, tmp, addend);
8401 tmp = force_operand (tmp, operands[0]);
8402 }
8403 operands[1] = tmp;
8404 }
8405
8406 /* Handle the case where reload calls us with an invalid address. */
8407 if (reload_in_progress && mode == Pmode
8408 && (! general_operand (operands[1], mode)
8409 || ! nonimmediate_operand (operands[0], mode)))
8410 goto emit_set;
8411
8412 /* 128-bit constant floating-point values on Darwin should really be
8413 loaded as two parts. */
8414 if (!TARGET_IEEEQUAD && TARGET_LONG_DOUBLE_128
8415 && mode == TFmode && GET_CODE (operands[1]) == CONST_DOUBLE)
8416 {
8417 rs6000_emit_move (simplify_gen_subreg (DFmode, operands[0], mode, 0),
8418 simplify_gen_subreg (DFmode, operands[1], mode, 0),
8419 DFmode);
8420 rs6000_emit_move (simplify_gen_subreg (DFmode, operands[0], mode,
8421 GET_MODE_SIZE (DFmode)),
8422 simplify_gen_subreg (DFmode, operands[1], mode,
8423 GET_MODE_SIZE (DFmode)),
8424 DFmode);
8425 return;
8426 }
8427
8428 if (reload_in_progress && cfun->machine->sdmode_stack_slot != NULL_RTX)
8429 cfun->machine->sdmode_stack_slot =
8430 eliminate_regs (cfun->machine->sdmode_stack_slot, VOIDmode, NULL_RTX);
8431
8432
8433 /* Transform (p0:DD, (SUBREG:DD p1:SD)) to ((SUBREG:SD p0:DD),
8434 p1:SD) if p1 is not of floating point class and p0 is spilled as
8435 we can have no analogous movsd_store for this. */
8436 if (lra_in_progress && mode == DDmode
8437 && REG_P (operands[0]) && REGNO (operands[0]) >= FIRST_PSEUDO_REGISTER
8438 && reg_preferred_class (REGNO (operands[0])) == NO_REGS
8439 && GET_CODE (operands[1]) == SUBREG && REG_P (SUBREG_REG (operands[1]))
8440 && GET_MODE (SUBREG_REG (operands[1])) == SDmode)
8441 {
8442 enum reg_class cl;
8443 int regno = REGNO (SUBREG_REG (operands[1]));
8444
8445 if (regno >= FIRST_PSEUDO_REGISTER)
8446 {
8447 cl = reg_preferred_class (regno);
8448 regno = cl == NO_REGS ? -1 : ira_class_hard_regs[cl][1];
8449 }
8450 if (regno >= 0 && ! FP_REGNO_P (regno))
8451 {
8452 mode = SDmode;
8453 operands[0] = gen_lowpart_SUBREG (SDmode, operands[0]);
8454 operands[1] = SUBREG_REG (operands[1]);
8455 }
8456 }
8457 if (lra_in_progress
8458 && mode == SDmode
8459 && REG_P (operands[0]) && REGNO (operands[0]) >= FIRST_PSEUDO_REGISTER
8460 && reg_preferred_class (REGNO (operands[0])) == NO_REGS
8461 && (REG_P (operands[1])
8462 || (GET_CODE (operands[1]) == SUBREG
8463 && REG_P (SUBREG_REG (operands[1])))))
8464 {
8465 int regno = REGNO (GET_CODE (operands[1]) == SUBREG
8466 ? SUBREG_REG (operands[1]) : operands[1]);
8467 enum reg_class cl;
8468
8469 if (regno >= FIRST_PSEUDO_REGISTER)
8470 {
8471 cl = reg_preferred_class (regno);
8472 gcc_assert (cl != NO_REGS);
8473 regno = ira_class_hard_regs[cl][0];
8474 }
8475 if (FP_REGNO_P (regno))
8476 {
8477 if (GET_MODE (operands[0]) != DDmode)
8478 operands[0] = gen_rtx_SUBREG (DDmode, operands[0], 0);
8479 emit_insn (gen_movsd_store (operands[0], operands[1]));
8480 }
8481 else if (INT_REGNO_P (regno))
8482 emit_insn (gen_movsd_hardfloat (operands[0], operands[1]));
8483 else
8484 gcc_unreachable();
8485 return;
8486 }
8487 /* Transform ((SUBREG:DD p0:SD), p1:DD) to (p0:SD, (SUBREG:SD
8488 p:DD)) if p0 is not of floating point class and p1 is spilled as
8489 we can have no analogous movsd_load for this. */
8490 if (lra_in_progress && mode == DDmode
8491 && GET_CODE (operands[0]) == SUBREG && REG_P (SUBREG_REG (operands[0]))
8492 && GET_MODE (SUBREG_REG (operands[0])) == SDmode
8493 && REG_P (operands[1]) && REGNO (operands[1]) >= FIRST_PSEUDO_REGISTER
8494 && reg_preferred_class (REGNO (operands[1])) == NO_REGS)
8495 {
8496 enum reg_class cl;
8497 int regno = REGNO (SUBREG_REG (operands[0]));
8498
8499 if (regno >= FIRST_PSEUDO_REGISTER)
8500 {
8501 cl = reg_preferred_class (regno);
8502 regno = cl == NO_REGS ? -1 : ira_class_hard_regs[cl][0];
8503 }
8504 if (regno >= 0 && ! FP_REGNO_P (regno))
8505 {
8506 mode = SDmode;
8507 operands[0] = SUBREG_REG (operands[0]);
8508 operands[1] = gen_lowpart_SUBREG (SDmode, operands[1]);
8509 }
8510 }
8511 if (lra_in_progress
8512 && mode == SDmode
8513 && (REG_P (operands[0])
8514 || (GET_CODE (operands[0]) == SUBREG
8515 && REG_P (SUBREG_REG (operands[0]))))
8516 && REG_P (operands[1]) && REGNO (operands[1]) >= FIRST_PSEUDO_REGISTER
8517 && reg_preferred_class (REGNO (operands[1])) == NO_REGS)
8518 {
8519 int regno = REGNO (GET_CODE (operands[0]) == SUBREG
8520 ? SUBREG_REG (operands[0]) : operands[0]);
8521 enum reg_class cl;
8522
8523 if (regno >= FIRST_PSEUDO_REGISTER)
8524 {
8525 cl = reg_preferred_class (regno);
8526 gcc_assert (cl != NO_REGS);
8527 regno = ira_class_hard_regs[cl][0];
8528 }
8529 if (FP_REGNO_P (regno))
8530 {
8531 if (GET_MODE (operands[1]) != DDmode)
8532 operands[1] = gen_rtx_SUBREG (DDmode, operands[1], 0);
8533 emit_insn (gen_movsd_load (operands[0], operands[1]));
8534 }
8535 else if (INT_REGNO_P (regno))
8536 emit_insn (gen_movsd_hardfloat (operands[0], operands[1]));
8537 else
8538 gcc_unreachable();
8539 return;
8540 }
8541
8542 if (reload_in_progress
8543 && mode == SDmode
8544 && cfun->machine->sdmode_stack_slot != NULL_RTX
8545 && MEM_P (operands[0])
8546 && rtx_equal_p (operands[0], cfun->machine->sdmode_stack_slot)
8547 && REG_P (operands[1]))
8548 {
8549 if (FP_REGNO_P (REGNO (operands[1])))
8550 {
8551 rtx mem = adjust_address_nv (operands[0], DDmode, 0);
8552 mem = eliminate_regs (mem, VOIDmode, NULL_RTX);
8553 emit_insn (gen_movsd_store (mem, operands[1]));
8554 }
8555 else if (INT_REGNO_P (REGNO (operands[1])))
8556 {
8557 rtx mem = operands[0];
8558 if (BYTES_BIG_ENDIAN)
8559 mem = adjust_address_nv (mem, mode, 4);
8560 mem = eliminate_regs (mem, VOIDmode, NULL_RTX);
8561 emit_insn (gen_movsd_hardfloat (mem, operands[1]));
8562 }
8563 else
8564 gcc_unreachable();
8565 return;
8566 }
8567 if (reload_in_progress
8568 && mode == SDmode
8569 && REG_P (operands[0])
8570 && MEM_P (operands[1])
8571 && cfun->machine->sdmode_stack_slot != NULL_RTX
8572 && rtx_equal_p (operands[1], cfun->machine->sdmode_stack_slot))
8573 {
8574 if (FP_REGNO_P (REGNO (operands[0])))
8575 {
8576 rtx mem = adjust_address_nv (operands[1], DDmode, 0);
8577 mem = eliminate_regs (mem, VOIDmode, NULL_RTX);
8578 emit_insn (gen_movsd_load (operands[0], mem));
8579 }
8580 else if (INT_REGNO_P (REGNO (operands[0])))
8581 {
8582 rtx mem = operands[1];
8583 if (BYTES_BIG_ENDIAN)
8584 mem = adjust_address_nv (mem, mode, 4);
8585 mem = eliminate_regs (mem, VOIDmode, NULL_RTX);
8586 emit_insn (gen_movsd_hardfloat (operands[0], mem));
8587 }
8588 else
8589 gcc_unreachable();
8590 return;
8591 }
8592
8593 /* FIXME: In the long term, this switch statement should go away
8594 and be replaced by a sequence of tests based on things like
8595 mode == Pmode. */
8596 switch (mode)
8597 {
8598 case HImode:
8599 case QImode:
8600 if (CONSTANT_P (operands[1])
8601 && GET_CODE (operands[1]) != CONST_INT)
8602 operands[1] = force_const_mem (mode, operands[1]);
8603 break;
8604
8605 case TFmode:
8606 case TDmode:
8607 rs6000_eliminate_indexed_memrefs (operands);
8608 /* fall through */
8609
8610 case DFmode:
8611 case DDmode:
8612 case SFmode:
8613 case SDmode:
8614 if (CONSTANT_P (operands[1])
8615 && ! easy_fp_constant (operands[1], mode))
8616 operands[1] = force_const_mem (mode, operands[1]);
8617 break;
8618
8619 case V16QImode:
8620 case V8HImode:
8621 case V4SFmode:
8622 case V4SImode:
8623 case V4HImode:
8624 case V2SFmode:
8625 case V2SImode:
8626 case V1DImode:
8627 case V2DFmode:
8628 case V2DImode:
8629 case V1TImode:
8630 if (CONSTANT_P (operands[1])
8631 && !easy_vector_constant (operands[1], mode))
8632 operands[1] = force_const_mem (mode, operands[1]);
8633 break;
8634
8635 case SImode:
8636 case DImode:
8637 /* Use default pattern for address of ELF small data */
8638 if (TARGET_ELF
8639 && mode == Pmode
8640 && DEFAULT_ABI == ABI_V4
8641 && (GET_CODE (operands[1]) == SYMBOL_REF
8642 || GET_CODE (operands[1]) == CONST)
8643 && small_data_operand (operands[1], mode))
8644 {
8645 emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[1]));
8646 return;
8647 }
8648
8649 if (DEFAULT_ABI == ABI_V4
8650 && mode == Pmode && mode == SImode
8651 && flag_pic == 1 && got_operand (operands[1], mode))
8652 {
8653 emit_insn (gen_movsi_got (operands[0], operands[1]));
8654 return;
8655 }
8656
8657 if ((TARGET_ELF || DEFAULT_ABI == ABI_DARWIN)
8658 && TARGET_NO_TOC
8659 && ! flag_pic
8660 && mode == Pmode
8661 && CONSTANT_P (operands[1])
8662 && GET_CODE (operands[1]) != HIGH
8663 && GET_CODE (operands[1]) != CONST_INT)
8664 {
8665 rtx target = (!can_create_pseudo_p ()
8666 ? operands[0]
8667 : gen_reg_rtx (mode));
8668
8669 /* If this is a function address on -mcall-aixdesc,
8670 convert it to the address of the descriptor. */
8671 if (DEFAULT_ABI == ABI_AIX
8672 && GET_CODE (operands[1]) == SYMBOL_REF
8673 && XSTR (operands[1], 0)[0] == '.')
8674 {
8675 const char *name = XSTR (operands[1], 0);
8676 rtx new_ref;
8677 while (*name == '.')
8678 name++;
8679 new_ref = gen_rtx_SYMBOL_REF (Pmode, name);
8680 CONSTANT_POOL_ADDRESS_P (new_ref)
8681 = CONSTANT_POOL_ADDRESS_P (operands[1]);
8682 SYMBOL_REF_FLAGS (new_ref) = SYMBOL_REF_FLAGS (operands[1]);
8683 SYMBOL_REF_USED (new_ref) = SYMBOL_REF_USED (operands[1]);
8684 SYMBOL_REF_DATA (new_ref) = SYMBOL_REF_DATA (operands[1]);
8685 operands[1] = new_ref;
8686 }
8687
8688 if (DEFAULT_ABI == ABI_DARWIN)
8689 {
8690 #if TARGET_MACHO
8691 if (MACHO_DYNAMIC_NO_PIC_P)
8692 {
8693 /* Take care of any required data indirection. */
8694 operands[1] = rs6000_machopic_legitimize_pic_address (
8695 operands[1], mode, operands[0]);
8696 if (operands[0] != operands[1])
8697 emit_insn (gen_rtx_SET (VOIDmode,
8698 operands[0], operands[1]));
8699 return;
8700 }
8701 #endif
8702 emit_insn (gen_macho_high (target, operands[1]));
8703 emit_insn (gen_macho_low (operands[0], target, operands[1]));
8704 return;
8705 }
8706
8707 emit_insn (gen_elf_high (target, operands[1]));
8708 emit_insn (gen_elf_low (operands[0], target, operands[1]));
8709 return;
8710 }
8711
8712 /* If this is a SYMBOL_REF that refers to a constant pool entry,
8713 and we have put it in the TOC, we just need to make a TOC-relative
8714 reference to it. */
8715 if (TARGET_TOC
8716 && GET_CODE (operands[1]) == SYMBOL_REF
8717 && use_toc_relative_ref (operands[1]))
8718 operands[1] = create_TOC_reference (operands[1], operands[0]);
8719 else if (mode == Pmode
8720 && CONSTANT_P (operands[1])
8721 && GET_CODE (operands[1]) != HIGH
8722 && ((GET_CODE (operands[1]) != CONST_INT
8723 && ! easy_fp_constant (operands[1], mode))
8724 || (GET_CODE (operands[1]) == CONST_INT
8725 && (num_insns_constant (operands[1], mode)
8726 > (TARGET_CMODEL != CMODEL_SMALL ? 3 : 2)))
8727 || (GET_CODE (operands[0]) == REG
8728 && FP_REGNO_P (REGNO (operands[0]))))
8729 && !toc_relative_expr_p (operands[1], false)
8730 && (TARGET_CMODEL == CMODEL_SMALL
8731 || can_create_pseudo_p ()
8732 || (REG_P (operands[0])
8733 && INT_REG_OK_FOR_BASE_P (operands[0], true))))
8734 {
8735
8736 #if TARGET_MACHO
8737 /* Darwin uses a special PIC legitimizer. */
8738 if (DEFAULT_ABI == ABI_DARWIN && MACHOPIC_INDIRECT)
8739 {
8740 operands[1] =
8741 rs6000_machopic_legitimize_pic_address (operands[1], mode,
8742 operands[0]);
8743 if (operands[0] != operands[1])
8744 emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[1]));
8745 return;
8746 }
8747 #endif
8748
8749 /* If we are to limit the number of things we put in the TOC and
8750 this is a symbol plus a constant we can add in one insn,
8751 just put the symbol in the TOC and add the constant. Don't do
8752 this if reload is in progress. */
8753 if (GET_CODE (operands[1]) == CONST
8754 && TARGET_NO_SUM_IN_TOC && ! reload_in_progress
8755 && GET_CODE (XEXP (operands[1], 0)) == PLUS
8756 && add_operand (XEXP (XEXP (operands[1], 0), 1), mode)
8757 && (GET_CODE (XEXP (XEXP (operands[1], 0), 0)) == LABEL_REF
8758 || GET_CODE (XEXP (XEXP (operands[1], 0), 0)) == SYMBOL_REF)
8759 && ! side_effects_p (operands[0]))
8760 {
8761 rtx sym =
8762 force_const_mem (mode, XEXP (XEXP (operands[1], 0), 0));
8763 rtx other = XEXP (XEXP (operands[1], 0), 1);
8764
8765 sym = force_reg (mode, sym);
8766 emit_insn (gen_add3_insn (operands[0], sym, other));
8767 return;
8768 }
8769
8770 operands[1] = force_const_mem (mode, operands[1]);
8771
8772 if (TARGET_TOC
8773 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
8774 && constant_pool_expr_p (XEXP (operands[1], 0))
8775 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (
8776 get_pool_constant (XEXP (operands[1], 0)),
8777 get_pool_mode (XEXP (operands[1], 0))))
8778 {
8779 rtx tocref = create_TOC_reference (XEXP (operands[1], 0),
8780 operands[0]);
8781 operands[1] = gen_const_mem (mode, tocref);
8782 set_mem_alias_set (operands[1], get_TOC_alias_set ());
8783 }
8784 }
8785 break;
8786
8787 case TImode:
8788 if (!VECTOR_MEM_VSX_P (TImode))
8789 rs6000_eliminate_indexed_memrefs (operands);
8790 break;
8791
8792 case PTImode:
8793 rs6000_eliminate_indexed_memrefs (operands);
8794 break;
8795
8796 default:
8797 fatal_insn ("bad move", gen_rtx_SET (VOIDmode, dest, source));
8798 }
8799
8800 /* Above, we may have called force_const_mem which may have returned
8801 an invalid address. If we can, fix this up; otherwise, reload will
8802 have to deal with it. */
8803 if (GET_CODE (operands[1]) == MEM && ! reload_in_progress)
8804 operands[1] = validize_mem (operands[1]);
8805
8806 emit_set:
8807 emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[1]));
8808 }
8809
8810 /* Return true if a structure, union or array containing FIELD should be
8811 accessed using `BLKMODE'.
8812
8813 For the SPE, simd types are V2SI, and gcc can be tempted to put the
8814 entire thing in a DI and use subregs to access the internals.
8815 store_bit_field() will force (subreg:DI (reg:V2SI x))'s to the
8816 back-end. Because a single GPR can hold a V2SI, but not a DI, the
8817 best thing to do is set structs to BLKmode and avoid Severe Tire
8818 Damage.
8819
8820 On e500 v2, DF and DI modes suffer from the same anomaly. DF can
8821 fit into 1, whereas DI still needs two. */
8822
8823 static bool
8824 rs6000_member_type_forces_blk (const_tree field, machine_mode mode)
8825 {
8826 return ((TARGET_SPE && TREE_CODE (TREE_TYPE (field)) == VECTOR_TYPE)
8827 || (TARGET_E500_DOUBLE && mode == DFmode));
8828 }
8829 \f
8830 /* Nonzero if we can use a floating-point register to pass this arg. */
8831 #define USE_FP_FOR_ARG_P(CUM,MODE) \
8832 (SCALAR_FLOAT_MODE_P (MODE) \
8833 && (CUM)->fregno <= FP_ARG_MAX_REG \
8834 && TARGET_HARD_FLOAT && TARGET_FPRS)
8835
8836 /* Nonzero if we can use an AltiVec register to pass this arg. */
8837 #define USE_ALTIVEC_FOR_ARG_P(CUM,MODE,NAMED) \
8838 (ALTIVEC_OR_VSX_VECTOR_MODE (MODE) \
8839 && (CUM)->vregno <= ALTIVEC_ARG_MAX_REG \
8840 && TARGET_ALTIVEC_ABI \
8841 && (NAMED))
8842
8843 /* Walk down the type tree of TYPE counting consecutive base elements.
8844 If *MODEP is VOIDmode, then set it to the first valid floating point
8845 or vector type. If a non-floating point or vector type is found, or
8846 if a floating point or vector type that doesn't match a non-VOIDmode
8847 *MODEP is found, then return -1, otherwise return the count in the
8848 sub-tree. */
8849
8850 static int
8851 rs6000_aggregate_candidate (const_tree type, machine_mode *modep)
8852 {
8853 machine_mode mode;
8854 HOST_WIDE_INT size;
8855
8856 switch (TREE_CODE (type))
8857 {
8858 case REAL_TYPE:
8859 mode = TYPE_MODE (type);
8860 if (!SCALAR_FLOAT_MODE_P (mode))
8861 return -1;
8862
8863 if (*modep == VOIDmode)
8864 *modep = mode;
8865
8866 if (*modep == mode)
8867 return 1;
8868
8869 break;
8870
8871 case COMPLEX_TYPE:
8872 mode = TYPE_MODE (TREE_TYPE (type));
8873 if (!SCALAR_FLOAT_MODE_P (mode))
8874 return -1;
8875
8876 if (*modep == VOIDmode)
8877 *modep = mode;
8878
8879 if (*modep == mode)
8880 return 2;
8881
8882 break;
8883
8884 case VECTOR_TYPE:
8885 if (!TARGET_ALTIVEC_ABI || !TARGET_ALTIVEC)
8886 return -1;
8887
8888 /* Use V4SImode as representative of all 128-bit vector types. */
8889 size = int_size_in_bytes (type);
8890 switch (size)
8891 {
8892 case 16:
8893 mode = V4SImode;
8894 break;
8895 default:
8896 return -1;
8897 }
8898
8899 if (*modep == VOIDmode)
8900 *modep = mode;
8901
8902 /* Vector modes are considered to be opaque: two vectors are
8903 equivalent for the purposes of being homogeneous aggregates
8904 if they are the same size. */
8905 if (*modep == mode)
8906 return 1;
8907
8908 break;
8909
8910 case ARRAY_TYPE:
8911 {
8912 int count;
8913 tree index = TYPE_DOMAIN (type);
8914
8915 /* Can't handle incomplete types nor sizes that are not
8916 fixed. */
8917 if (!COMPLETE_TYPE_P (type)
8918 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
8919 return -1;
8920
8921 count = rs6000_aggregate_candidate (TREE_TYPE (type), modep);
8922 if (count == -1
8923 || !index
8924 || !TYPE_MAX_VALUE (index)
8925 || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index))
8926 || !TYPE_MIN_VALUE (index)
8927 || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index))
8928 || count < 0)
8929 return -1;
8930
8931 count *= (1 + tree_to_uhwi (TYPE_MAX_VALUE (index))
8932 - tree_to_uhwi (TYPE_MIN_VALUE (index)));
8933
8934 /* There must be no padding. */
8935 if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
8936 return -1;
8937
8938 return count;
8939 }
8940
8941 case RECORD_TYPE:
8942 {
8943 int count = 0;
8944 int sub_count;
8945 tree field;
8946
8947 /* Can't handle incomplete types nor sizes that are not
8948 fixed. */
8949 if (!COMPLETE_TYPE_P (type)
8950 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
8951 return -1;
8952
8953 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
8954 {
8955 if (TREE_CODE (field) != FIELD_DECL)
8956 continue;
8957
8958 sub_count = rs6000_aggregate_candidate (TREE_TYPE (field), modep);
8959 if (sub_count < 0)
8960 return -1;
8961 count += sub_count;
8962 }
8963
8964 /* There must be no padding. */
8965 if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
8966 return -1;
8967
8968 return count;
8969 }
8970
8971 case UNION_TYPE:
8972 case QUAL_UNION_TYPE:
8973 {
8974 /* These aren't very interesting except in a degenerate case. */
8975 int count = 0;
8976 int sub_count;
8977 tree field;
8978
8979 /* Can't handle incomplete types nor sizes that are not
8980 fixed. */
8981 if (!COMPLETE_TYPE_P (type)
8982 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
8983 return -1;
8984
8985 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
8986 {
8987 if (TREE_CODE (field) != FIELD_DECL)
8988 continue;
8989
8990 sub_count = rs6000_aggregate_candidate (TREE_TYPE (field), modep);
8991 if (sub_count < 0)
8992 return -1;
8993 count = count > sub_count ? count : sub_count;
8994 }
8995
8996 /* There must be no padding. */
8997 if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
8998 return -1;
8999
9000 return count;
9001 }
9002
9003 default:
9004 break;
9005 }
9006
9007 return -1;
9008 }
9009
9010 /* If an argument, whose type is described by TYPE and MODE, is a homogeneous
9011 float or vector aggregate that shall be passed in FP/vector registers
9012 according to the ELFv2 ABI, return the homogeneous element mode in
9013 *ELT_MODE and the number of elements in *N_ELTS, and return TRUE.
9014
9015 Otherwise, set *ELT_MODE to MODE and *N_ELTS to 1, and return FALSE. */
9016
9017 static bool
9018 rs6000_discover_homogeneous_aggregate (machine_mode mode, const_tree type,
9019 machine_mode *elt_mode,
9020 int *n_elts)
9021 {
9022 /* Note that we do not accept complex types at the top level as
9023 homogeneous aggregates; these types are handled via the
9024 targetm.calls.split_complex_arg mechanism. Complex types
9025 can be elements of homogeneous aggregates, however. */
9026 if (DEFAULT_ABI == ABI_ELFv2 && type && AGGREGATE_TYPE_P (type))
9027 {
9028 machine_mode field_mode = VOIDmode;
9029 int field_count = rs6000_aggregate_candidate (type, &field_mode);
9030
9031 if (field_count > 0)
9032 {
9033 int n_regs = (SCALAR_FLOAT_MODE_P (field_mode)?
9034 (GET_MODE_SIZE (field_mode) + 7) >> 3 : 1);
9035
9036 /* The ELFv2 ABI allows homogeneous aggregates to occupy
9037 up to AGGR_ARG_NUM_REG registers. */
9038 if (field_count * n_regs <= AGGR_ARG_NUM_REG)
9039 {
9040 if (elt_mode)
9041 *elt_mode = field_mode;
9042 if (n_elts)
9043 *n_elts = field_count;
9044 return true;
9045 }
9046 }
9047 }
9048
9049 if (elt_mode)
9050 *elt_mode = mode;
9051 if (n_elts)
9052 *n_elts = 1;
9053 return false;
9054 }
9055
9056 /* Return a nonzero value to say to return the function value in
9057 memory, just as large structures are always returned. TYPE will be
9058 the data type of the value, and FNTYPE will be the type of the
9059 function doing the returning, or @code{NULL} for libcalls.
9060
9061 The AIX ABI for the RS/6000 specifies that all structures are
9062 returned in memory. The Darwin ABI does the same.
9063
9064 For the Darwin 64 Bit ABI, a function result can be returned in
9065 registers or in memory, depending on the size of the return data
9066 type. If it is returned in registers, the value occupies the same
9067 registers as it would if it were the first and only function
9068 argument. Otherwise, the function places its result in memory at
9069 the location pointed to by GPR3.
9070
9071 The SVR4 ABI specifies that structures <= 8 bytes are returned in r3/r4,
9072 but a draft put them in memory, and GCC used to implement the draft
9073 instead of the final standard. Therefore, aix_struct_return
9074 controls this instead of DEFAULT_ABI; V.4 targets needing backward
9075 compatibility can change DRAFT_V4_STRUCT_RET to override the
9076 default, and -m switches get the final word. See
9077 rs6000_option_override_internal for more details.
9078
9079 The PPC32 SVR4 ABI uses IEEE double extended for long double, if 128-bit
9080 long double support is enabled. These values are returned in memory.
9081
9082 int_size_in_bytes returns -1 for variable size objects, which go in
9083 memory always. The cast to unsigned makes -1 > 8. */
9084
9085 static bool
9086 rs6000_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
9087 {
9088 /* For the Darwin64 ABI, test if we can fit the return value in regs. */
9089 if (TARGET_MACHO
9090 && rs6000_darwin64_abi
9091 && TREE_CODE (type) == RECORD_TYPE
9092 && int_size_in_bytes (type) > 0)
9093 {
9094 CUMULATIVE_ARGS valcum;
9095 rtx valret;
9096
9097 valcum.words = 0;
9098 valcum.fregno = FP_ARG_MIN_REG;
9099 valcum.vregno = ALTIVEC_ARG_MIN_REG;
9100 /* Do a trial code generation as if this were going to be passed
9101 as an argument; if any part goes in memory, we return NULL. */
9102 valret = rs6000_darwin64_record_arg (&valcum, type, true, true);
9103 if (valret)
9104 return false;
9105 /* Otherwise fall through to more conventional ABI rules. */
9106 }
9107
9108 /* The ELFv2 ABI returns homogeneous VFP aggregates in registers */
9109 if (rs6000_discover_homogeneous_aggregate (TYPE_MODE (type), type,
9110 NULL, NULL))
9111 return false;
9112
9113 /* The ELFv2 ABI returns aggregates up to 16B in registers */
9114 if (DEFAULT_ABI == ABI_ELFv2 && AGGREGATE_TYPE_P (type)
9115 && (unsigned HOST_WIDE_INT) int_size_in_bytes (type) <= 16)
9116 return false;
9117
9118 if (AGGREGATE_TYPE_P (type)
9119 && (aix_struct_return
9120 || (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 8))
9121 return true;
9122
9123 /* Allow -maltivec -mabi=no-altivec without warning. Altivec vector
9124 modes only exist for GCC vector types if -maltivec. */
9125 if (TARGET_32BIT && !TARGET_ALTIVEC_ABI
9126 && ALTIVEC_VECTOR_MODE (TYPE_MODE (type)))
9127 return false;
9128
9129 /* Return synthetic vectors in memory. */
9130 if (TREE_CODE (type) == VECTOR_TYPE
9131 && int_size_in_bytes (type) > (TARGET_ALTIVEC_ABI ? 16 : 8))
9132 {
9133 static bool warned_for_return_big_vectors = false;
9134 if (!warned_for_return_big_vectors)
9135 {
9136 warning (0, "GCC vector returned by reference: "
9137 "non-standard ABI extension with no compatibility guarantee");
9138 warned_for_return_big_vectors = true;
9139 }
9140 return true;
9141 }
9142
9143 if (DEFAULT_ABI == ABI_V4 && TARGET_IEEEQUAD && TYPE_MODE (type) == TFmode)
9144 return true;
9145
9146 return false;
9147 }
9148
9149 /* Specify whether values returned in registers should be at the most
9150 significant end of a register. We want aggregates returned by
9151 value to match the way aggregates are passed to functions. */
9152
9153 static bool
9154 rs6000_return_in_msb (const_tree valtype)
9155 {
9156 return (DEFAULT_ABI == ABI_ELFv2
9157 && BYTES_BIG_ENDIAN
9158 && AGGREGATE_TYPE_P (valtype)
9159 && FUNCTION_ARG_PADDING (TYPE_MODE (valtype), valtype) == upward);
9160 }
9161
9162 #ifdef HAVE_AS_GNU_ATTRIBUTE
9163 /* Return TRUE if a call to function FNDECL may be one that
9164 potentially affects the function calling ABI of the object file. */
9165
9166 static bool
9167 call_ABI_of_interest (tree fndecl)
9168 {
9169 if (symtab->state == EXPANSION)
9170 {
9171 struct cgraph_node *c_node;
9172
9173 /* Libcalls are always interesting. */
9174 if (fndecl == NULL_TREE)
9175 return true;
9176
9177 /* Any call to an external function is interesting. */
9178 if (DECL_EXTERNAL (fndecl))
9179 return true;
9180
9181 /* Interesting functions that we are emitting in this object file. */
9182 c_node = cgraph_node::get (fndecl);
9183 c_node = c_node->ultimate_alias_target ();
9184 return !c_node->only_called_directly_p ();
9185 }
9186 return false;
9187 }
9188 #endif
9189
9190 /* Initialize a variable CUM of type CUMULATIVE_ARGS
9191 for a call to a function whose data type is FNTYPE.
9192 For a library call, FNTYPE is 0 and RETURN_MODE the return value mode.
9193
9194 For incoming args we set the number of arguments in the prototype large
9195 so we never return a PARALLEL. */
9196
9197 void
9198 init_cumulative_args (CUMULATIVE_ARGS *cum, tree fntype,
9199 rtx libname ATTRIBUTE_UNUSED, int incoming,
9200 int libcall, int n_named_args,
9201 tree fndecl ATTRIBUTE_UNUSED,
9202 machine_mode return_mode ATTRIBUTE_UNUSED)
9203 {
9204 static CUMULATIVE_ARGS zero_cumulative;
9205
9206 *cum = zero_cumulative;
9207 cum->words = 0;
9208 cum->fregno = FP_ARG_MIN_REG;
9209 cum->vregno = ALTIVEC_ARG_MIN_REG;
9210 cum->prototype = (fntype && prototype_p (fntype));
9211 cum->call_cookie = ((DEFAULT_ABI == ABI_V4 && libcall)
9212 ? CALL_LIBCALL : CALL_NORMAL);
9213 cum->sysv_gregno = GP_ARG_MIN_REG;
9214 cum->stdarg = stdarg_p (fntype);
9215
9216 cum->nargs_prototype = 0;
9217 if (incoming || cum->prototype)
9218 cum->nargs_prototype = n_named_args;
9219
9220 /* Check for a longcall attribute. */
9221 if ((!fntype && rs6000_default_long_calls)
9222 || (fntype
9223 && lookup_attribute ("longcall", TYPE_ATTRIBUTES (fntype))
9224 && !lookup_attribute ("shortcall", TYPE_ATTRIBUTES (fntype))))
9225 cum->call_cookie |= CALL_LONG;
9226
9227 if (TARGET_DEBUG_ARG)
9228 {
9229 fprintf (stderr, "\ninit_cumulative_args:");
9230 if (fntype)
9231 {
9232 tree ret_type = TREE_TYPE (fntype);
9233 fprintf (stderr, " ret code = %s,",
9234 get_tree_code_name (TREE_CODE (ret_type)));
9235 }
9236
9237 if (cum->call_cookie & CALL_LONG)
9238 fprintf (stderr, " longcall,");
9239
9240 fprintf (stderr, " proto = %d, nargs = %d\n",
9241 cum->prototype, cum->nargs_prototype);
9242 }
9243
9244 #ifdef HAVE_AS_GNU_ATTRIBUTE
9245 if (DEFAULT_ABI == ABI_V4)
9246 {
9247 cum->escapes = call_ABI_of_interest (fndecl);
9248 if (cum->escapes)
9249 {
9250 tree return_type;
9251
9252 if (fntype)
9253 {
9254 return_type = TREE_TYPE (fntype);
9255 return_mode = TYPE_MODE (return_type);
9256 }
9257 else
9258 return_type = lang_hooks.types.type_for_mode (return_mode, 0);
9259
9260 if (return_type != NULL)
9261 {
9262 if (TREE_CODE (return_type) == RECORD_TYPE
9263 && TYPE_TRANSPARENT_AGGR (return_type))
9264 {
9265 return_type = TREE_TYPE (first_field (return_type));
9266 return_mode = TYPE_MODE (return_type);
9267 }
9268 if (AGGREGATE_TYPE_P (return_type)
9269 && ((unsigned HOST_WIDE_INT) int_size_in_bytes (return_type)
9270 <= 8))
9271 rs6000_returns_struct = true;
9272 }
9273 if (SCALAR_FLOAT_MODE_P (return_mode))
9274 rs6000_passes_float = true;
9275 else if (ALTIVEC_OR_VSX_VECTOR_MODE (return_mode)
9276 || SPE_VECTOR_MODE (return_mode))
9277 rs6000_passes_vector = true;
9278 }
9279 }
9280 #endif
9281
9282 if (fntype
9283 && !TARGET_ALTIVEC
9284 && TARGET_ALTIVEC_ABI
9285 && ALTIVEC_VECTOR_MODE (TYPE_MODE (TREE_TYPE (fntype))))
9286 {
9287 error ("cannot return value in vector register because"
9288 " altivec instructions are disabled, use -maltivec"
9289 " to enable them");
9290 }
9291 }
9292 \f
9293 /* Return true if TYPE must be passed on the stack and not in registers. */
9294
9295 static bool
9296 rs6000_must_pass_in_stack (machine_mode mode, const_tree type)
9297 {
9298 if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2 || TARGET_64BIT)
9299 return must_pass_in_stack_var_size (mode, type);
9300 else
9301 return must_pass_in_stack_var_size_or_pad (mode, type);
9302 }
9303
9304 /* If defined, a C expression which determines whether, and in which
9305 direction, to pad out an argument with extra space. The value
9306 should be of type `enum direction': either `upward' to pad above
9307 the argument, `downward' to pad below, or `none' to inhibit
9308 padding.
9309
9310 For the AIX ABI structs are always stored left shifted in their
9311 argument slot. */
9312
9313 enum direction
9314 function_arg_padding (machine_mode mode, const_tree type)
9315 {
9316 #ifndef AGGREGATE_PADDING_FIXED
9317 #define AGGREGATE_PADDING_FIXED 0
9318 #endif
9319 #ifndef AGGREGATES_PAD_UPWARD_ALWAYS
9320 #define AGGREGATES_PAD_UPWARD_ALWAYS 0
9321 #endif
9322
9323 if (!AGGREGATE_PADDING_FIXED)
9324 {
9325 /* GCC used to pass structures of the same size as integer types as
9326 if they were in fact integers, ignoring FUNCTION_ARG_PADDING.
9327 i.e. Structures of size 1 or 2 (or 4 when TARGET_64BIT) were
9328 passed padded downward, except that -mstrict-align further
9329 muddied the water in that multi-component structures of 2 and 4
9330 bytes in size were passed padded upward.
9331
9332 The following arranges for best compatibility with previous
9333 versions of gcc, but removes the -mstrict-align dependency. */
9334 if (BYTES_BIG_ENDIAN)
9335 {
9336 HOST_WIDE_INT size = 0;
9337
9338 if (mode == BLKmode)
9339 {
9340 if (type && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST)
9341 size = int_size_in_bytes (type);
9342 }
9343 else
9344 size = GET_MODE_SIZE (mode);
9345
9346 if (size == 1 || size == 2 || size == 4)
9347 return downward;
9348 }
9349 return upward;
9350 }
9351
9352 if (AGGREGATES_PAD_UPWARD_ALWAYS)
9353 {
9354 if (type != 0 && AGGREGATE_TYPE_P (type))
9355 return upward;
9356 }
9357
9358 /* Fall back to the default. */
9359 return DEFAULT_FUNCTION_ARG_PADDING (mode, type);
9360 }
9361
9362 /* If defined, a C expression that gives the alignment boundary, in bits,
9363 of an argument with the specified mode and type. If it is not defined,
9364 PARM_BOUNDARY is used for all arguments.
9365
9366 V.4 wants long longs and doubles to be double word aligned. Just
9367 testing the mode size is a boneheaded way to do this as it means
9368 that other types such as complex int are also double word aligned.
9369 However, we're stuck with this because changing the ABI might break
9370 existing library interfaces.
9371
9372 Doubleword align SPE vectors.
9373 Quadword align Altivec/VSX vectors.
9374 Quadword align large synthetic vector types. */
9375
9376 static unsigned int
9377 rs6000_function_arg_boundary (machine_mode mode, const_tree type)
9378 {
9379 machine_mode elt_mode;
9380 int n_elts;
9381
9382 rs6000_discover_homogeneous_aggregate (mode, type, &elt_mode, &n_elts);
9383
9384 if (DEFAULT_ABI == ABI_V4
9385 && (GET_MODE_SIZE (mode) == 8
9386 || (TARGET_HARD_FLOAT
9387 && TARGET_FPRS
9388 && (mode == TFmode || mode == TDmode))))
9389 return 64;
9390 else if (SPE_VECTOR_MODE (mode)
9391 || (type && TREE_CODE (type) == VECTOR_TYPE
9392 && int_size_in_bytes (type) >= 8
9393 && int_size_in_bytes (type) < 16))
9394 return 64;
9395 else if (ALTIVEC_OR_VSX_VECTOR_MODE (elt_mode)
9396 || (type && TREE_CODE (type) == VECTOR_TYPE
9397 && int_size_in_bytes (type) >= 16))
9398 return 128;
9399
9400 /* Aggregate types that need > 8 byte alignment are quadword-aligned
9401 in the parameter area in the ELFv2 ABI, and in the AIX ABI unless
9402 -mcompat-align-parm is used. */
9403 if (((DEFAULT_ABI == ABI_AIX && !rs6000_compat_align_parm)
9404 || DEFAULT_ABI == ABI_ELFv2)
9405 && type && TYPE_ALIGN (type) > 64)
9406 {
9407 /* "Aggregate" means any AGGREGATE_TYPE except for single-element
9408 or homogeneous float/vector aggregates here. We already handled
9409 vector aggregates above, but still need to check for float here. */
9410 bool aggregate_p = (AGGREGATE_TYPE_P (type)
9411 && !SCALAR_FLOAT_MODE_P (elt_mode));
9412
9413 /* We used to check for BLKmode instead of the above aggregate type
9414 check. Warn when this results in any difference to the ABI. */
9415 if (aggregate_p != (mode == BLKmode))
9416 {
9417 static bool warned;
9418 if (!warned && warn_psabi)
9419 {
9420 warned = true;
9421 inform (input_location,
9422 "the ABI of passing aggregates with %d-byte alignment"
9423 " has changed in GCC 5",
9424 (int) TYPE_ALIGN (type) / BITS_PER_UNIT);
9425 }
9426 }
9427
9428 if (aggregate_p)
9429 return 128;
9430 }
9431
9432 /* Similar for the Darwin64 ABI. Note that for historical reasons we
9433 implement the "aggregate type" check as a BLKmode check here; this
9434 means certain aggregate types are in fact not aligned. */
9435 if (TARGET_MACHO && rs6000_darwin64_abi
9436 && mode == BLKmode
9437 && type && TYPE_ALIGN (type) > 64)
9438 return 128;
9439
9440 return PARM_BOUNDARY;
9441 }
9442
9443 /* The offset in words to the start of the parameter save area. */
9444
9445 static unsigned int
9446 rs6000_parm_offset (void)
9447 {
9448 return (DEFAULT_ABI == ABI_V4 ? 2
9449 : DEFAULT_ABI == ABI_ELFv2 ? 4
9450 : 6);
9451 }
9452
9453 /* For a function parm of MODE and TYPE, return the starting word in
9454 the parameter area. NWORDS of the parameter area are already used. */
9455
9456 static unsigned int
9457 rs6000_parm_start (machine_mode mode, const_tree type,
9458 unsigned int nwords)
9459 {
9460 unsigned int align;
9461
9462 align = rs6000_function_arg_boundary (mode, type) / PARM_BOUNDARY - 1;
9463 return nwords + (-(rs6000_parm_offset () + nwords) & align);
9464 }
9465
9466 /* Compute the size (in words) of a function argument. */
9467
9468 static unsigned long
9469 rs6000_arg_size (machine_mode mode, const_tree type)
9470 {
9471 unsigned long size;
9472
9473 if (mode != BLKmode)
9474 size = GET_MODE_SIZE (mode);
9475 else
9476 size = int_size_in_bytes (type);
9477
9478 if (TARGET_32BIT)
9479 return (size + 3) >> 2;
9480 else
9481 return (size + 7) >> 3;
9482 }
9483 \f
9484 /* Use this to flush pending int fields. */
9485
9486 static void
9487 rs6000_darwin64_record_arg_advance_flush (CUMULATIVE_ARGS *cum,
9488 HOST_WIDE_INT bitpos, int final)
9489 {
9490 unsigned int startbit, endbit;
9491 int intregs, intoffset;
9492 machine_mode mode;
9493
9494 /* Handle the situations where a float is taking up the first half
9495 of the GPR, and the other half is empty (typically due to
9496 alignment restrictions). We can detect this by a 8-byte-aligned
9497 int field, or by seeing that this is the final flush for this
9498 argument. Count the word and continue on. */
9499 if (cum->floats_in_gpr == 1
9500 && (cum->intoffset % 64 == 0
9501 || (cum->intoffset == -1 && final)))
9502 {
9503 cum->words++;
9504 cum->floats_in_gpr = 0;
9505 }
9506
9507 if (cum->intoffset == -1)
9508 return;
9509
9510 intoffset = cum->intoffset;
9511 cum->intoffset = -1;
9512 cum->floats_in_gpr = 0;
9513
9514 if (intoffset % BITS_PER_WORD != 0)
9515 {
9516 mode = mode_for_size (BITS_PER_WORD - intoffset % BITS_PER_WORD,
9517 MODE_INT, 0);
9518 if (mode == BLKmode)
9519 {
9520 /* We couldn't find an appropriate mode, which happens,
9521 e.g., in packed structs when there are 3 bytes to load.
9522 Back intoffset back to the beginning of the word in this
9523 case. */
9524 intoffset = intoffset & -BITS_PER_WORD;
9525 }
9526 }
9527
9528 startbit = intoffset & -BITS_PER_WORD;
9529 endbit = (bitpos + BITS_PER_WORD - 1) & -BITS_PER_WORD;
9530 intregs = (endbit - startbit) / BITS_PER_WORD;
9531 cum->words += intregs;
9532 /* words should be unsigned. */
9533 if ((unsigned)cum->words < (endbit/BITS_PER_WORD))
9534 {
9535 int pad = (endbit/BITS_PER_WORD) - cum->words;
9536 cum->words += pad;
9537 }
9538 }
9539
9540 /* The darwin64 ABI calls for us to recurse down through structs,
9541 looking for elements passed in registers. Unfortunately, we have
9542 to track int register count here also because of misalignments
9543 in powerpc alignment mode. */
9544
9545 static void
9546 rs6000_darwin64_record_arg_advance_recurse (CUMULATIVE_ARGS *cum,
9547 const_tree type,
9548 HOST_WIDE_INT startbitpos)
9549 {
9550 tree f;
9551
9552 for (f = TYPE_FIELDS (type); f ; f = DECL_CHAIN (f))
9553 if (TREE_CODE (f) == FIELD_DECL)
9554 {
9555 HOST_WIDE_INT bitpos = startbitpos;
9556 tree ftype = TREE_TYPE (f);
9557 machine_mode mode;
9558 if (ftype == error_mark_node)
9559 continue;
9560 mode = TYPE_MODE (ftype);
9561
9562 if (DECL_SIZE (f) != 0
9563 && tree_fits_uhwi_p (bit_position (f)))
9564 bitpos += int_bit_position (f);
9565
9566 /* ??? FIXME: else assume zero offset. */
9567
9568 if (TREE_CODE (ftype) == RECORD_TYPE)
9569 rs6000_darwin64_record_arg_advance_recurse (cum, ftype, bitpos);
9570 else if (USE_FP_FOR_ARG_P (cum, mode))
9571 {
9572 unsigned n_fpregs = (GET_MODE_SIZE (mode) + 7) >> 3;
9573 rs6000_darwin64_record_arg_advance_flush (cum, bitpos, 0);
9574 cum->fregno += n_fpregs;
9575 /* Single-precision floats present a special problem for
9576 us, because they are smaller than an 8-byte GPR, and so
9577 the structure-packing rules combined with the standard
9578 varargs behavior mean that we want to pack float/float
9579 and float/int combinations into a single register's
9580 space. This is complicated by the arg advance flushing,
9581 which works on arbitrarily large groups of int-type
9582 fields. */
9583 if (mode == SFmode)
9584 {
9585 if (cum->floats_in_gpr == 1)
9586 {
9587 /* Two floats in a word; count the word and reset
9588 the float count. */
9589 cum->words++;
9590 cum->floats_in_gpr = 0;
9591 }
9592 else if (bitpos % 64 == 0)
9593 {
9594 /* A float at the beginning of an 8-byte word;
9595 count it and put off adjusting cum->words until
9596 we see if a arg advance flush is going to do it
9597 for us. */
9598 cum->floats_in_gpr++;
9599 }
9600 else
9601 {
9602 /* The float is at the end of a word, preceded
9603 by integer fields, so the arg advance flush
9604 just above has already set cum->words and
9605 everything is taken care of. */
9606 }
9607 }
9608 else
9609 cum->words += n_fpregs;
9610 }
9611 else if (USE_ALTIVEC_FOR_ARG_P (cum, mode, 1))
9612 {
9613 rs6000_darwin64_record_arg_advance_flush (cum, bitpos, 0);
9614 cum->vregno++;
9615 cum->words += 2;
9616 }
9617 else if (cum->intoffset == -1)
9618 cum->intoffset = bitpos;
9619 }
9620 }
9621
9622 /* Check for an item that needs to be considered specially under the darwin 64
9623 bit ABI. These are record types where the mode is BLK or the structure is
9624 8 bytes in size. */
9625 static int
9626 rs6000_darwin64_struct_check_p (machine_mode mode, const_tree type)
9627 {
9628 return rs6000_darwin64_abi
9629 && ((mode == BLKmode
9630 && TREE_CODE (type) == RECORD_TYPE
9631 && int_size_in_bytes (type) > 0)
9632 || (type && TREE_CODE (type) == RECORD_TYPE
9633 && int_size_in_bytes (type) == 8)) ? 1 : 0;
9634 }
9635
9636 /* Update the data in CUM to advance over an argument
9637 of mode MODE and data type TYPE.
9638 (TYPE is null for libcalls where that information may not be available.)
9639
9640 Note that for args passed by reference, function_arg will be called
9641 with MODE and TYPE set to that of the pointer to the arg, not the arg
9642 itself. */
9643
9644 static void
9645 rs6000_function_arg_advance_1 (CUMULATIVE_ARGS *cum, machine_mode mode,
9646 const_tree type, bool named, int depth)
9647 {
9648 machine_mode elt_mode;
9649 int n_elts;
9650
9651 rs6000_discover_homogeneous_aggregate (mode, type, &elt_mode, &n_elts);
9652
9653 /* Only tick off an argument if we're not recursing. */
9654 if (depth == 0)
9655 cum->nargs_prototype--;
9656
9657 #ifdef HAVE_AS_GNU_ATTRIBUTE
9658 if (DEFAULT_ABI == ABI_V4
9659 && cum->escapes)
9660 {
9661 if (SCALAR_FLOAT_MODE_P (mode))
9662 rs6000_passes_float = true;
9663 else if (named && ALTIVEC_OR_VSX_VECTOR_MODE (mode))
9664 rs6000_passes_vector = true;
9665 else if (SPE_VECTOR_MODE (mode)
9666 && !cum->stdarg
9667 && cum->sysv_gregno <= GP_ARG_MAX_REG)
9668 rs6000_passes_vector = true;
9669 }
9670 #endif
9671
9672 if (TARGET_ALTIVEC_ABI
9673 && (ALTIVEC_OR_VSX_VECTOR_MODE (elt_mode)
9674 || (type && TREE_CODE (type) == VECTOR_TYPE
9675 && int_size_in_bytes (type) == 16)))
9676 {
9677 bool stack = false;
9678
9679 if (USE_ALTIVEC_FOR_ARG_P (cum, elt_mode, named))
9680 {
9681 cum->vregno += n_elts;
9682
9683 if (!TARGET_ALTIVEC)
9684 error ("cannot pass argument in vector register because"
9685 " altivec instructions are disabled, use -maltivec"
9686 " to enable them");
9687
9688 /* PowerPC64 Linux and AIX allocate GPRs for a vector argument
9689 even if it is going to be passed in a vector register.
9690 Darwin does the same for variable-argument functions. */
9691 if (((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
9692 && TARGET_64BIT)
9693 || (cum->stdarg && DEFAULT_ABI != ABI_V4))
9694 stack = true;
9695 }
9696 else
9697 stack = true;
9698
9699 if (stack)
9700 {
9701 int align;
9702
9703 /* Vector parameters must be 16-byte aligned. In 32-bit
9704 mode this means we need to take into account the offset
9705 to the parameter save area. In 64-bit mode, they just
9706 have to start on an even word, since the parameter save
9707 area is 16-byte aligned. */
9708 if (TARGET_32BIT)
9709 align = -(rs6000_parm_offset () + cum->words) & 3;
9710 else
9711 align = cum->words & 1;
9712 cum->words += align + rs6000_arg_size (mode, type);
9713
9714 if (TARGET_DEBUG_ARG)
9715 {
9716 fprintf (stderr, "function_adv: words = %2d, align=%d, ",
9717 cum->words, align);
9718 fprintf (stderr, "nargs = %4d, proto = %d, mode = %4s\n",
9719 cum->nargs_prototype, cum->prototype,
9720 GET_MODE_NAME (mode));
9721 }
9722 }
9723 }
9724 else if (TARGET_SPE_ABI && TARGET_SPE && SPE_VECTOR_MODE (mode)
9725 && !cum->stdarg
9726 && cum->sysv_gregno <= GP_ARG_MAX_REG)
9727 cum->sysv_gregno++;
9728
9729 else if (TARGET_MACHO && rs6000_darwin64_struct_check_p (mode, type))
9730 {
9731 int size = int_size_in_bytes (type);
9732 /* Variable sized types have size == -1 and are
9733 treated as if consisting entirely of ints.
9734 Pad to 16 byte boundary if needed. */
9735 if (TYPE_ALIGN (type) >= 2 * BITS_PER_WORD
9736 && (cum->words % 2) != 0)
9737 cum->words++;
9738 /* For varargs, we can just go up by the size of the struct. */
9739 if (!named)
9740 cum->words += (size + 7) / 8;
9741 else
9742 {
9743 /* It is tempting to say int register count just goes up by
9744 sizeof(type)/8, but this is wrong in a case such as
9745 { int; double; int; } [powerpc alignment]. We have to
9746 grovel through the fields for these too. */
9747 cum->intoffset = 0;
9748 cum->floats_in_gpr = 0;
9749 rs6000_darwin64_record_arg_advance_recurse (cum, type, 0);
9750 rs6000_darwin64_record_arg_advance_flush (cum,
9751 size * BITS_PER_UNIT, 1);
9752 }
9753 if (TARGET_DEBUG_ARG)
9754 {
9755 fprintf (stderr, "function_adv: words = %2d, align=%d, size=%d",
9756 cum->words, TYPE_ALIGN (type), size);
9757 fprintf (stderr,
9758 "nargs = %4d, proto = %d, mode = %4s (darwin64 abi)\n",
9759 cum->nargs_prototype, cum->prototype,
9760 GET_MODE_NAME (mode));
9761 }
9762 }
9763 else if (DEFAULT_ABI == ABI_V4)
9764 {
9765 if (TARGET_HARD_FLOAT && TARGET_FPRS
9766 && ((TARGET_SINGLE_FLOAT && mode == SFmode)
9767 || (TARGET_DOUBLE_FLOAT && mode == DFmode)
9768 || (mode == TFmode && !TARGET_IEEEQUAD)
9769 || mode == SDmode || mode == DDmode || mode == TDmode))
9770 {
9771 /* _Decimal128 must use an even/odd register pair. This assumes
9772 that the register number is odd when fregno is odd. */
9773 if (mode == TDmode && (cum->fregno % 2) == 1)
9774 cum->fregno++;
9775
9776 if (cum->fregno + (mode == TFmode || mode == TDmode ? 1 : 0)
9777 <= FP_ARG_V4_MAX_REG)
9778 cum->fregno += (GET_MODE_SIZE (mode) + 7) >> 3;
9779 else
9780 {
9781 cum->fregno = FP_ARG_V4_MAX_REG + 1;
9782 if (mode == DFmode || mode == TFmode
9783 || mode == DDmode || mode == TDmode)
9784 cum->words += cum->words & 1;
9785 cum->words += rs6000_arg_size (mode, type);
9786 }
9787 }
9788 else
9789 {
9790 int n_words = rs6000_arg_size (mode, type);
9791 int gregno = cum->sysv_gregno;
9792
9793 /* Long long and SPE vectors are put in (r3,r4), (r5,r6),
9794 (r7,r8) or (r9,r10). As does any other 2 word item such
9795 as complex int due to a historical mistake. */
9796 if (n_words == 2)
9797 gregno += (1 - gregno) & 1;
9798
9799 /* Multi-reg args are not split between registers and stack. */
9800 if (gregno + n_words - 1 > GP_ARG_MAX_REG)
9801 {
9802 /* Long long and SPE vectors are aligned on the stack.
9803 So are other 2 word items such as complex int due to
9804 a historical mistake. */
9805 if (n_words == 2)
9806 cum->words += cum->words & 1;
9807 cum->words += n_words;
9808 }
9809
9810 /* Note: continuing to accumulate gregno past when we've started
9811 spilling to the stack indicates the fact that we've started
9812 spilling to the stack to expand_builtin_saveregs. */
9813 cum->sysv_gregno = gregno + n_words;
9814 }
9815
9816 if (TARGET_DEBUG_ARG)
9817 {
9818 fprintf (stderr, "function_adv: words = %2d, fregno = %2d, ",
9819 cum->words, cum->fregno);
9820 fprintf (stderr, "gregno = %2d, nargs = %4d, proto = %d, ",
9821 cum->sysv_gregno, cum->nargs_prototype, cum->prototype);
9822 fprintf (stderr, "mode = %4s, named = %d\n",
9823 GET_MODE_NAME (mode), named);
9824 }
9825 }
9826 else
9827 {
9828 int n_words = rs6000_arg_size (mode, type);
9829 int start_words = cum->words;
9830 int align_words = rs6000_parm_start (mode, type, start_words);
9831
9832 cum->words = align_words + n_words;
9833
9834 if (SCALAR_FLOAT_MODE_P (elt_mode)
9835 && TARGET_HARD_FLOAT && TARGET_FPRS)
9836 {
9837 /* _Decimal128 must be passed in an even/odd float register pair.
9838 This assumes that the register number is odd when fregno is
9839 odd. */
9840 if (elt_mode == TDmode && (cum->fregno % 2) == 1)
9841 cum->fregno++;
9842 cum->fregno += n_elts * ((GET_MODE_SIZE (elt_mode) + 7) >> 3);
9843 }
9844
9845 if (TARGET_DEBUG_ARG)
9846 {
9847 fprintf (stderr, "function_adv: words = %2d, fregno = %2d, ",
9848 cum->words, cum->fregno);
9849 fprintf (stderr, "nargs = %4d, proto = %d, mode = %4s, ",
9850 cum->nargs_prototype, cum->prototype, GET_MODE_NAME (mode));
9851 fprintf (stderr, "named = %d, align = %d, depth = %d\n",
9852 named, align_words - start_words, depth);
9853 }
9854 }
9855 }
9856
9857 static void
9858 rs6000_function_arg_advance (cumulative_args_t cum, machine_mode mode,
9859 const_tree type, bool named)
9860 {
9861 rs6000_function_arg_advance_1 (get_cumulative_args (cum), mode, type, named,
9862 0);
9863 }
9864
9865 static rtx
9866 spe_build_register_parallel (machine_mode mode, int gregno)
9867 {
9868 rtx r1, r3, r5, r7;
9869
9870 switch (mode)
9871 {
9872 case DFmode:
9873 r1 = gen_rtx_REG (DImode, gregno);
9874 r1 = gen_rtx_EXPR_LIST (VOIDmode, r1, const0_rtx);
9875 return gen_rtx_PARALLEL (mode, gen_rtvec (1, r1));
9876
9877 case DCmode:
9878 case TFmode:
9879 r1 = gen_rtx_REG (DImode, gregno);
9880 r1 = gen_rtx_EXPR_LIST (VOIDmode, r1, const0_rtx);
9881 r3 = gen_rtx_REG (DImode, gregno + 2);
9882 r3 = gen_rtx_EXPR_LIST (VOIDmode, r3, GEN_INT (8));
9883 return gen_rtx_PARALLEL (mode, gen_rtvec (2, r1, r3));
9884
9885 case TCmode:
9886 r1 = gen_rtx_REG (DImode, gregno);
9887 r1 = gen_rtx_EXPR_LIST (VOIDmode, r1, const0_rtx);
9888 r3 = gen_rtx_REG (DImode, gregno + 2);
9889 r3 = gen_rtx_EXPR_LIST (VOIDmode, r3, GEN_INT (8));
9890 r5 = gen_rtx_REG (DImode, gregno + 4);
9891 r5 = gen_rtx_EXPR_LIST (VOIDmode, r5, GEN_INT (16));
9892 r7 = gen_rtx_REG (DImode, gregno + 6);
9893 r7 = gen_rtx_EXPR_LIST (VOIDmode, r7, GEN_INT (24));
9894 return gen_rtx_PARALLEL (mode, gen_rtvec (4, r1, r3, r5, r7));
9895
9896 default:
9897 gcc_unreachable ();
9898 }
9899 }
9900
9901 /* Determine where to put a SIMD argument on the SPE. */
9902 static rtx
9903 rs6000_spe_function_arg (const CUMULATIVE_ARGS *cum, machine_mode mode,
9904 const_tree type)
9905 {
9906 int gregno = cum->sysv_gregno;
9907
9908 /* On E500 v2, double arithmetic is done on the full 64-bit GPR, but
9909 are passed and returned in a pair of GPRs for ABI compatibility. */
9910 if (TARGET_E500_DOUBLE && (mode == DFmode || mode == TFmode
9911 || mode == DCmode || mode == TCmode))
9912 {
9913 int n_words = rs6000_arg_size (mode, type);
9914
9915 /* Doubles go in an odd/even register pair (r5/r6, etc). */
9916 if (mode == DFmode)
9917 gregno += (1 - gregno) & 1;
9918
9919 /* Multi-reg args are not split between registers and stack. */
9920 if (gregno + n_words - 1 > GP_ARG_MAX_REG)
9921 return NULL_RTX;
9922
9923 return spe_build_register_parallel (mode, gregno);
9924 }
9925 if (cum->stdarg)
9926 {
9927 int n_words = rs6000_arg_size (mode, type);
9928
9929 /* SPE vectors are put in odd registers. */
9930 if (n_words == 2 && (gregno & 1) == 0)
9931 gregno += 1;
9932
9933 if (gregno + n_words - 1 <= GP_ARG_MAX_REG)
9934 {
9935 rtx r1, r2;
9936 machine_mode m = SImode;
9937
9938 r1 = gen_rtx_REG (m, gregno);
9939 r1 = gen_rtx_EXPR_LIST (m, r1, const0_rtx);
9940 r2 = gen_rtx_REG (m, gregno + 1);
9941 r2 = gen_rtx_EXPR_LIST (m, r2, GEN_INT (4));
9942 return gen_rtx_PARALLEL (mode, gen_rtvec (2, r1, r2));
9943 }
9944 else
9945 return NULL_RTX;
9946 }
9947 else
9948 {
9949 if (gregno <= GP_ARG_MAX_REG)
9950 return gen_rtx_REG (mode, gregno);
9951 else
9952 return NULL_RTX;
9953 }
9954 }
9955
9956 /* A subroutine of rs6000_darwin64_record_arg. Assign the bits of the
9957 structure between cum->intoffset and bitpos to integer registers. */
9958
9959 static void
9960 rs6000_darwin64_record_arg_flush (CUMULATIVE_ARGS *cum,
9961 HOST_WIDE_INT bitpos, rtx rvec[], int *k)
9962 {
9963 machine_mode mode;
9964 unsigned int regno;
9965 unsigned int startbit, endbit;
9966 int this_regno, intregs, intoffset;
9967 rtx reg;
9968
9969 if (cum->intoffset == -1)
9970 return;
9971
9972 intoffset = cum->intoffset;
9973 cum->intoffset = -1;
9974
9975 /* If this is the trailing part of a word, try to only load that
9976 much into the register. Otherwise load the whole register. Note
9977 that in the latter case we may pick up unwanted bits. It's not a
9978 problem at the moment but may wish to revisit. */
9979
9980 if (intoffset % BITS_PER_WORD != 0)
9981 {
9982 mode = mode_for_size (BITS_PER_WORD - intoffset % BITS_PER_WORD,
9983 MODE_INT, 0);
9984 if (mode == BLKmode)
9985 {
9986 /* We couldn't find an appropriate mode, which happens,
9987 e.g., in packed structs when there are 3 bytes to load.
9988 Back intoffset back to the beginning of the word in this
9989 case. */
9990 intoffset = intoffset & -BITS_PER_WORD;
9991 mode = word_mode;
9992 }
9993 }
9994 else
9995 mode = word_mode;
9996
9997 startbit = intoffset & -BITS_PER_WORD;
9998 endbit = (bitpos + BITS_PER_WORD - 1) & -BITS_PER_WORD;
9999 intregs = (endbit - startbit) / BITS_PER_WORD;
10000 this_regno = cum->words + intoffset / BITS_PER_WORD;
10001
10002 if (intregs > 0 && intregs > GP_ARG_NUM_REG - this_regno)
10003 cum->use_stack = 1;
10004
10005 intregs = MIN (intregs, GP_ARG_NUM_REG - this_regno);
10006 if (intregs <= 0)
10007 return;
10008
10009 intoffset /= BITS_PER_UNIT;
10010 do
10011 {
10012 regno = GP_ARG_MIN_REG + this_regno;
10013 reg = gen_rtx_REG (mode, regno);
10014 rvec[(*k)++] =
10015 gen_rtx_EXPR_LIST (VOIDmode, reg, GEN_INT (intoffset));
10016
10017 this_regno += 1;
10018 intoffset = (intoffset | (UNITS_PER_WORD-1)) + 1;
10019 mode = word_mode;
10020 intregs -= 1;
10021 }
10022 while (intregs > 0);
10023 }
10024
10025 /* Recursive workhorse for the following. */
10026
10027 static void
10028 rs6000_darwin64_record_arg_recurse (CUMULATIVE_ARGS *cum, const_tree type,
10029 HOST_WIDE_INT startbitpos, rtx rvec[],
10030 int *k)
10031 {
10032 tree f;
10033
10034 for (f = TYPE_FIELDS (type); f ; f = DECL_CHAIN (f))
10035 if (TREE_CODE (f) == FIELD_DECL)
10036 {
10037 HOST_WIDE_INT bitpos = startbitpos;
10038 tree ftype = TREE_TYPE (f);
10039 machine_mode mode;
10040 if (ftype == error_mark_node)
10041 continue;
10042 mode = TYPE_MODE (ftype);
10043
10044 if (DECL_SIZE (f) != 0
10045 && tree_fits_uhwi_p (bit_position (f)))
10046 bitpos += int_bit_position (f);
10047
10048 /* ??? FIXME: else assume zero offset. */
10049
10050 if (TREE_CODE (ftype) == RECORD_TYPE)
10051 rs6000_darwin64_record_arg_recurse (cum, ftype, bitpos, rvec, k);
10052 else if (cum->named && USE_FP_FOR_ARG_P (cum, mode))
10053 {
10054 unsigned n_fpreg = (GET_MODE_SIZE (mode) + 7) >> 3;
10055 #if 0
10056 switch (mode)
10057 {
10058 case SCmode: mode = SFmode; break;
10059 case DCmode: mode = DFmode; break;
10060 case TCmode: mode = TFmode; break;
10061 default: break;
10062 }
10063 #endif
10064 rs6000_darwin64_record_arg_flush (cum, bitpos, rvec, k);
10065 if (cum->fregno + n_fpreg > FP_ARG_MAX_REG + 1)
10066 {
10067 gcc_assert (cum->fregno == FP_ARG_MAX_REG
10068 && (mode == TFmode || mode == TDmode));
10069 /* Long double or _Decimal128 split over regs and memory. */
10070 mode = DECIMAL_FLOAT_MODE_P (mode) ? DDmode : DFmode;
10071 cum->use_stack=1;
10072 }
10073 rvec[(*k)++]
10074 = gen_rtx_EXPR_LIST (VOIDmode,
10075 gen_rtx_REG (mode, cum->fregno++),
10076 GEN_INT (bitpos / BITS_PER_UNIT));
10077 if (mode == TFmode || mode == TDmode)
10078 cum->fregno++;
10079 }
10080 else if (cum->named && USE_ALTIVEC_FOR_ARG_P (cum, mode, 1))
10081 {
10082 rs6000_darwin64_record_arg_flush (cum, bitpos, rvec, k);
10083 rvec[(*k)++]
10084 = gen_rtx_EXPR_LIST (VOIDmode,
10085 gen_rtx_REG (mode, cum->vregno++),
10086 GEN_INT (bitpos / BITS_PER_UNIT));
10087 }
10088 else if (cum->intoffset == -1)
10089 cum->intoffset = bitpos;
10090 }
10091 }
10092
10093 /* For the darwin64 ABI, we want to construct a PARALLEL consisting of
10094 the register(s) to be used for each field and subfield of a struct
10095 being passed by value, along with the offset of where the
10096 register's value may be found in the block. FP fields go in FP
10097 register, vector fields go in vector registers, and everything
10098 else goes in int registers, packed as in memory.
10099
10100 This code is also used for function return values. RETVAL indicates
10101 whether this is the case.
10102
10103 Much of this is taken from the SPARC V9 port, which has a similar
10104 calling convention. */
10105
10106 static rtx
10107 rs6000_darwin64_record_arg (CUMULATIVE_ARGS *orig_cum, const_tree type,
10108 bool named, bool retval)
10109 {
10110 rtx rvec[FIRST_PSEUDO_REGISTER];
10111 int k = 1, kbase = 1;
10112 HOST_WIDE_INT typesize = int_size_in_bytes (type);
10113 /* This is a copy; modifications are not visible to our caller. */
10114 CUMULATIVE_ARGS copy_cum = *orig_cum;
10115 CUMULATIVE_ARGS *cum = &copy_cum;
10116
10117 /* Pad to 16 byte boundary if needed. */
10118 if (!retval && TYPE_ALIGN (type) >= 2 * BITS_PER_WORD
10119 && (cum->words % 2) != 0)
10120 cum->words++;
10121
10122 cum->intoffset = 0;
10123 cum->use_stack = 0;
10124 cum->named = named;
10125
10126 /* Put entries into rvec[] for individual FP and vector fields, and
10127 for the chunks of memory that go in int regs. Note we start at
10128 element 1; 0 is reserved for an indication of using memory, and
10129 may or may not be filled in below. */
10130 rs6000_darwin64_record_arg_recurse (cum, type, /* startbit pos= */ 0, rvec, &k);
10131 rs6000_darwin64_record_arg_flush (cum, typesize * BITS_PER_UNIT, rvec, &k);
10132
10133 /* If any part of the struct went on the stack put all of it there.
10134 This hack is because the generic code for
10135 FUNCTION_ARG_PARTIAL_NREGS cannot handle cases where the register
10136 parts of the struct are not at the beginning. */
10137 if (cum->use_stack)
10138 {
10139 if (retval)
10140 return NULL_RTX; /* doesn't go in registers at all */
10141 kbase = 0;
10142 rvec[0] = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
10143 }
10144 if (k > 1 || cum->use_stack)
10145 return gen_rtx_PARALLEL (BLKmode, gen_rtvec_v (k - kbase, &rvec[kbase]));
10146 else
10147 return NULL_RTX;
10148 }
10149
10150 /* Determine where to place an argument in 64-bit mode with 32-bit ABI. */
10151
10152 static rtx
10153 rs6000_mixed_function_arg (machine_mode mode, const_tree type,
10154 int align_words)
10155 {
10156 int n_units;
10157 int i, k;
10158 rtx rvec[GP_ARG_NUM_REG + 1];
10159
10160 if (align_words >= GP_ARG_NUM_REG)
10161 return NULL_RTX;
10162
10163 n_units = rs6000_arg_size (mode, type);
10164
10165 /* Optimize the simple case where the arg fits in one gpr, except in
10166 the case of BLKmode due to assign_parms assuming that registers are
10167 BITS_PER_WORD wide. */
10168 if (n_units == 0
10169 || (n_units == 1 && mode != BLKmode))
10170 return gen_rtx_REG (mode, GP_ARG_MIN_REG + align_words);
10171
10172 k = 0;
10173 if (align_words + n_units > GP_ARG_NUM_REG)
10174 /* Not all of the arg fits in gprs. Say that it goes in memory too,
10175 using a magic NULL_RTX component.
10176 This is not strictly correct. Only some of the arg belongs in
10177 memory, not all of it. However, the normal scheme using
10178 function_arg_partial_nregs can result in unusual subregs, eg.
10179 (subreg:SI (reg:DF) 4), which are not handled well. The code to
10180 store the whole arg to memory is often more efficient than code
10181 to store pieces, and we know that space is available in the right
10182 place for the whole arg. */
10183 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
10184
10185 i = 0;
10186 do
10187 {
10188 rtx r = gen_rtx_REG (SImode, GP_ARG_MIN_REG + align_words);
10189 rtx off = GEN_INT (i++ * 4);
10190 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, r, off);
10191 }
10192 while (++align_words < GP_ARG_NUM_REG && --n_units != 0);
10193
10194 return gen_rtx_PARALLEL (mode, gen_rtvec_v (k, rvec));
10195 }
10196
10197 /* We have an argument of MODE and TYPE that goes into FPRs or VRs,
10198 but must also be copied into the parameter save area starting at
10199 offset ALIGN_WORDS. Fill in RVEC with the elements corresponding
10200 to the GPRs and/or memory. Return the number of elements used. */
10201
10202 static int
10203 rs6000_psave_function_arg (machine_mode mode, const_tree type,
10204 int align_words, rtx *rvec)
10205 {
10206 int k = 0;
10207
10208 if (align_words < GP_ARG_NUM_REG)
10209 {
10210 int n_words = rs6000_arg_size (mode, type);
10211
10212 if (align_words + n_words > GP_ARG_NUM_REG
10213 || mode == BLKmode
10214 || (TARGET_32BIT && TARGET_POWERPC64))
10215 {
10216 /* If this is partially on the stack, then we only
10217 include the portion actually in registers here. */
10218 machine_mode rmode = TARGET_32BIT ? SImode : DImode;
10219 int i = 0;
10220
10221 if (align_words + n_words > GP_ARG_NUM_REG)
10222 {
10223 /* Not all of the arg fits in gprs. Say that it goes in memory
10224 too, using a magic NULL_RTX component. Also see comment in
10225 rs6000_mixed_function_arg for why the normal
10226 function_arg_partial_nregs scheme doesn't work in this case. */
10227 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
10228 }
10229
10230 do
10231 {
10232 rtx r = gen_rtx_REG (rmode, GP_ARG_MIN_REG + align_words);
10233 rtx off = GEN_INT (i++ * GET_MODE_SIZE (rmode));
10234 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, r, off);
10235 }
10236 while (++align_words < GP_ARG_NUM_REG && --n_words != 0);
10237 }
10238 else
10239 {
10240 /* The whole arg fits in gprs. */
10241 rtx r = gen_rtx_REG (mode, GP_ARG_MIN_REG + align_words);
10242 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, r, const0_rtx);
10243 }
10244 }
10245 else
10246 {
10247 /* It's entirely in memory. */
10248 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
10249 }
10250
10251 return k;
10252 }
10253
10254 /* RVEC is a vector of K components of an argument of mode MODE.
10255 Construct the final function_arg return value from it. */
10256
10257 static rtx
10258 rs6000_finish_function_arg (machine_mode mode, rtx *rvec, int k)
10259 {
10260 gcc_assert (k >= 1);
10261
10262 /* Avoid returning a PARALLEL in the trivial cases. */
10263 if (k == 1)
10264 {
10265 if (XEXP (rvec[0], 0) == NULL_RTX)
10266 return NULL_RTX;
10267
10268 if (GET_MODE (XEXP (rvec[0], 0)) == mode)
10269 return XEXP (rvec[0], 0);
10270 }
10271
10272 return gen_rtx_PARALLEL (mode, gen_rtvec_v (k, rvec));
10273 }
10274
10275 /* Determine where to put an argument to a function.
10276 Value is zero to push the argument on the stack,
10277 or a hard register in which to store the argument.
10278
10279 MODE is the argument's machine mode.
10280 TYPE is the data type of the argument (as a tree).
10281 This is null for libcalls where that information may
10282 not be available.
10283 CUM is a variable of type CUMULATIVE_ARGS which gives info about
10284 the preceding args and about the function being called. It is
10285 not modified in this routine.
10286 NAMED is nonzero if this argument is a named parameter
10287 (otherwise it is an extra parameter matching an ellipsis).
10288
10289 On RS/6000 the first eight words of non-FP are normally in registers
10290 and the rest are pushed. Under AIX, the first 13 FP args are in registers.
10291 Under V.4, the first 8 FP args are in registers.
10292
10293 If this is floating-point and no prototype is specified, we use
10294 both an FP and integer register (or possibly FP reg and stack). Library
10295 functions (when CALL_LIBCALL is set) always have the proper types for args,
10296 so we can pass the FP value just in one register. emit_library_function
10297 doesn't support PARALLEL anyway.
10298
10299 Note that for args passed by reference, function_arg will be called
10300 with MODE and TYPE set to that of the pointer to the arg, not the arg
10301 itself. */
10302
10303 static rtx
10304 rs6000_function_arg (cumulative_args_t cum_v, machine_mode mode,
10305 const_tree type, bool named)
10306 {
10307 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
10308 enum rs6000_abi abi = DEFAULT_ABI;
10309 machine_mode elt_mode;
10310 int n_elts;
10311
10312 /* Return a marker to indicate whether CR1 needs to set or clear the
10313 bit that V.4 uses to say fp args were passed in registers.
10314 Assume that we don't need the marker for software floating point,
10315 or compiler generated library calls. */
10316 if (mode == VOIDmode)
10317 {
10318 if (abi == ABI_V4
10319 && (cum->call_cookie & CALL_LIBCALL) == 0
10320 && (cum->stdarg
10321 || (cum->nargs_prototype < 0
10322 && (cum->prototype || TARGET_NO_PROTOTYPE))))
10323 {
10324 /* For the SPE, we need to crxor CR6 always. */
10325 if (TARGET_SPE_ABI)
10326 return GEN_INT (cum->call_cookie | CALL_V4_SET_FP_ARGS);
10327 else if (TARGET_HARD_FLOAT && TARGET_FPRS)
10328 return GEN_INT (cum->call_cookie
10329 | ((cum->fregno == FP_ARG_MIN_REG)
10330 ? CALL_V4_SET_FP_ARGS
10331 : CALL_V4_CLEAR_FP_ARGS));
10332 }
10333
10334 return GEN_INT (cum->call_cookie & ~CALL_LIBCALL);
10335 }
10336
10337 rs6000_discover_homogeneous_aggregate (mode, type, &elt_mode, &n_elts);
10338
10339 if (TARGET_MACHO && rs6000_darwin64_struct_check_p (mode, type))
10340 {
10341 rtx rslt = rs6000_darwin64_record_arg (cum, type, named, /*retval= */false);
10342 if (rslt != NULL_RTX)
10343 return rslt;
10344 /* Else fall through to usual handling. */
10345 }
10346
10347 if (USE_ALTIVEC_FOR_ARG_P (cum, elt_mode, named))
10348 {
10349 rtx rvec[GP_ARG_NUM_REG + AGGR_ARG_NUM_REG + 1];
10350 rtx r, off;
10351 int i, k = 0;
10352
10353 /* Do we also need to pass this argument in the parameter
10354 save area? */
10355 if (TARGET_64BIT && ! cum->prototype)
10356 {
10357 int align_words = (cum->words + 1) & ~1;
10358 k = rs6000_psave_function_arg (mode, type, align_words, rvec);
10359 }
10360
10361 /* Describe where this argument goes in the vector registers. */
10362 for (i = 0; i < n_elts && cum->vregno + i <= ALTIVEC_ARG_MAX_REG; i++)
10363 {
10364 r = gen_rtx_REG (elt_mode, cum->vregno + i);
10365 off = GEN_INT (i * GET_MODE_SIZE (elt_mode));
10366 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, r, off);
10367 }
10368
10369 return rs6000_finish_function_arg (mode, rvec, k);
10370 }
10371 else if (TARGET_ALTIVEC_ABI
10372 && (ALTIVEC_OR_VSX_VECTOR_MODE (mode)
10373 || (type && TREE_CODE (type) == VECTOR_TYPE
10374 && int_size_in_bytes (type) == 16)))
10375 {
10376 if (named || abi == ABI_V4)
10377 return NULL_RTX;
10378 else
10379 {
10380 /* Vector parameters to varargs functions under AIX or Darwin
10381 get passed in memory and possibly also in GPRs. */
10382 int align, align_words, n_words;
10383 machine_mode part_mode;
10384
10385 /* Vector parameters must be 16-byte aligned. In 32-bit
10386 mode this means we need to take into account the offset
10387 to the parameter save area. In 64-bit mode, they just
10388 have to start on an even word, since the parameter save
10389 area is 16-byte aligned. */
10390 if (TARGET_32BIT)
10391 align = -(rs6000_parm_offset () + cum->words) & 3;
10392 else
10393 align = cum->words & 1;
10394 align_words = cum->words + align;
10395
10396 /* Out of registers? Memory, then. */
10397 if (align_words >= GP_ARG_NUM_REG)
10398 return NULL_RTX;
10399
10400 if (TARGET_32BIT && TARGET_POWERPC64)
10401 return rs6000_mixed_function_arg (mode, type, align_words);
10402
10403 /* The vector value goes in GPRs. Only the part of the
10404 value in GPRs is reported here. */
10405 part_mode = mode;
10406 n_words = rs6000_arg_size (mode, type);
10407 if (align_words + n_words > GP_ARG_NUM_REG)
10408 /* Fortunately, there are only two possibilities, the value
10409 is either wholly in GPRs or half in GPRs and half not. */
10410 part_mode = DImode;
10411
10412 return gen_rtx_REG (part_mode, GP_ARG_MIN_REG + align_words);
10413 }
10414 }
10415 else if (TARGET_SPE_ABI && TARGET_SPE
10416 && (SPE_VECTOR_MODE (mode)
10417 || (TARGET_E500_DOUBLE && (mode == DFmode
10418 || mode == DCmode
10419 || mode == TFmode
10420 || mode == TCmode))))
10421 return rs6000_spe_function_arg (cum, mode, type);
10422
10423 else if (abi == ABI_V4)
10424 {
10425 if (TARGET_HARD_FLOAT && TARGET_FPRS
10426 && ((TARGET_SINGLE_FLOAT && mode == SFmode)
10427 || (TARGET_DOUBLE_FLOAT && mode == DFmode)
10428 || (mode == TFmode && !TARGET_IEEEQUAD)
10429 || mode == SDmode || mode == DDmode || mode == TDmode))
10430 {
10431 /* _Decimal128 must use an even/odd register pair. This assumes
10432 that the register number is odd when fregno is odd. */
10433 if (mode == TDmode && (cum->fregno % 2) == 1)
10434 cum->fregno++;
10435
10436 if (cum->fregno + (mode == TFmode || mode == TDmode ? 1 : 0)
10437 <= FP_ARG_V4_MAX_REG)
10438 return gen_rtx_REG (mode, cum->fregno);
10439 else
10440 return NULL_RTX;
10441 }
10442 else
10443 {
10444 int n_words = rs6000_arg_size (mode, type);
10445 int gregno = cum->sysv_gregno;
10446
10447 /* Long long and SPE vectors are put in (r3,r4), (r5,r6),
10448 (r7,r8) or (r9,r10). As does any other 2 word item such
10449 as complex int due to a historical mistake. */
10450 if (n_words == 2)
10451 gregno += (1 - gregno) & 1;
10452
10453 /* Multi-reg args are not split between registers and stack. */
10454 if (gregno + n_words - 1 > GP_ARG_MAX_REG)
10455 return NULL_RTX;
10456
10457 if (TARGET_32BIT && TARGET_POWERPC64)
10458 return rs6000_mixed_function_arg (mode, type,
10459 gregno - GP_ARG_MIN_REG);
10460 return gen_rtx_REG (mode, gregno);
10461 }
10462 }
10463 else
10464 {
10465 int align_words = rs6000_parm_start (mode, type, cum->words);
10466
10467 /* _Decimal128 must be passed in an even/odd float register pair.
10468 This assumes that the register number is odd when fregno is odd. */
10469 if (elt_mode == TDmode && (cum->fregno % 2) == 1)
10470 cum->fregno++;
10471
10472 if (USE_FP_FOR_ARG_P (cum, elt_mode))
10473 {
10474 rtx rvec[GP_ARG_NUM_REG + AGGR_ARG_NUM_REG + 1];
10475 rtx r, off;
10476 int i, k = 0;
10477 unsigned long n_fpreg = (GET_MODE_SIZE (elt_mode) + 7) >> 3;
10478 int fpr_words;
10479
10480 /* Do we also need to pass this argument in the parameter
10481 save area? */
10482 if (type && (cum->nargs_prototype <= 0
10483 || ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
10484 && TARGET_XL_COMPAT
10485 && align_words >= GP_ARG_NUM_REG)))
10486 k = rs6000_psave_function_arg (mode, type, align_words, rvec);
10487
10488 /* Describe where this argument goes in the fprs. */
10489 for (i = 0; i < n_elts
10490 && cum->fregno + i * n_fpreg <= FP_ARG_MAX_REG; i++)
10491 {
10492 /* Check if the argument is split over registers and memory.
10493 This can only ever happen for long double or _Decimal128;
10494 complex types are handled via split_complex_arg. */
10495 machine_mode fmode = elt_mode;
10496 if (cum->fregno + (i + 1) * n_fpreg > FP_ARG_MAX_REG + 1)
10497 {
10498 gcc_assert (fmode == TFmode || fmode == TDmode);
10499 fmode = DECIMAL_FLOAT_MODE_P (fmode) ? DDmode : DFmode;
10500 }
10501
10502 r = gen_rtx_REG (fmode, cum->fregno + i * n_fpreg);
10503 off = GEN_INT (i * GET_MODE_SIZE (elt_mode));
10504 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, r, off);
10505 }
10506
10507 /* If there were not enough FPRs to hold the argument, the rest
10508 usually goes into memory. However, if the current position
10509 is still within the register parameter area, a portion may
10510 actually have to go into GPRs.
10511
10512 Note that it may happen that the portion of the argument
10513 passed in the first "half" of the first GPR was already
10514 passed in the last FPR as well.
10515
10516 For unnamed arguments, we already set up GPRs to cover the
10517 whole argument in rs6000_psave_function_arg, so there is
10518 nothing further to do at this point. */
10519 fpr_words = (i * GET_MODE_SIZE (elt_mode)) / (TARGET_32BIT ? 4 : 8);
10520 if (i < n_elts && align_words + fpr_words < GP_ARG_NUM_REG
10521 && cum->nargs_prototype > 0)
10522 {
10523 static bool warned;
10524
10525 machine_mode rmode = TARGET_32BIT ? SImode : DImode;
10526 int n_words = rs6000_arg_size (mode, type);
10527
10528 align_words += fpr_words;
10529 n_words -= fpr_words;
10530
10531 do
10532 {
10533 r = gen_rtx_REG (rmode, GP_ARG_MIN_REG + align_words);
10534 off = GEN_INT (fpr_words++ * GET_MODE_SIZE (rmode));
10535 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, r, off);
10536 }
10537 while (++align_words < GP_ARG_NUM_REG && --n_words != 0);
10538
10539 if (!warned && warn_psabi)
10540 {
10541 warned = true;
10542 inform (input_location,
10543 "the ABI of passing homogeneous float aggregates"
10544 " has changed in GCC 5");
10545 }
10546 }
10547
10548 return rs6000_finish_function_arg (mode, rvec, k);
10549 }
10550 else if (align_words < GP_ARG_NUM_REG)
10551 {
10552 if (TARGET_32BIT && TARGET_POWERPC64)
10553 return rs6000_mixed_function_arg (mode, type, align_words);
10554
10555 return gen_rtx_REG (mode, GP_ARG_MIN_REG + align_words);
10556 }
10557 else
10558 return NULL_RTX;
10559 }
10560 }
10561 \f
10562 /* For an arg passed partly in registers and partly in memory, this is
10563 the number of bytes passed in registers. For args passed entirely in
10564 registers or entirely in memory, zero. When an arg is described by a
10565 PARALLEL, perhaps using more than one register type, this function
10566 returns the number of bytes used by the first element of the PARALLEL. */
10567
10568 static int
10569 rs6000_arg_partial_bytes (cumulative_args_t cum_v, machine_mode mode,
10570 tree type, bool named)
10571 {
10572 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
10573 bool passed_in_gprs = true;
10574 int ret = 0;
10575 int align_words;
10576 machine_mode elt_mode;
10577 int n_elts;
10578
10579 rs6000_discover_homogeneous_aggregate (mode, type, &elt_mode, &n_elts);
10580
10581 if (DEFAULT_ABI == ABI_V4)
10582 return 0;
10583
10584 if (USE_ALTIVEC_FOR_ARG_P (cum, elt_mode, named))
10585 {
10586 /* If we are passing this arg in the fixed parameter save area
10587 (gprs or memory) as well as VRs, we do not use the partial
10588 bytes mechanism; instead, rs6000_function_arg will return a
10589 PARALLEL including a memory element as necessary. */
10590 if (TARGET_64BIT && ! cum->prototype)
10591 return 0;
10592
10593 /* Otherwise, we pass in VRs only. Check for partial copies. */
10594 passed_in_gprs = false;
10595 if (cum->vregno + n_elts > ALTIVEC_ARG_MAX_REG + 1)
10596 ret = (ALTIVEC_ARG_MAX_REG + 1 - cum->vregno) * 16;
10597 }
10598
10599 /* In this complicated case we just disable the partial_nregs code. */
10600 if (TARGET_MACHO && rs6000_darwin64_struct_check_p (mode, type))
10601 return 0;
10602
10603 align_words = rs6000_parm_start (mode, type, cum->words);
10604
10605 if (USE_FP_FOR_ARG_P (cum, elt_mode))
10606 {
10607 unsigned long n_fpreg = (GET_MODE_SIZE (elt_mode) + 7) >> 3;
10608
10609 /* If we are passing this arg in the fixed parameter save area
10610 (gprs or memory) as well as FPRs, we do not use the partial
10611 bytes mechanism; instead, rs6000_function_arg will return a
10612 PARALLEL including a memory element as necessary. */
10613 if (type
10614 && (cum->nargs_prototype <= 0
10615 || ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
10616 && TARGET_XL_COMPAT
10617 && align_words >= GP_ARG_NUM_REG)))
10618 return 0;
10619
10620 /* Otherwise, we pass in FPRs only. Check for partial copies. */
10621 passed_in_gprs = false;
10622 if (cum->fregno + n_elts * n_fpreg > FP_ARG_MAX_REG + 1)
10623 {
10624 /* Compute number of bytes / words passed in FPRs. If there
10625 is still space available in the register parameter area
10626 *after* that amount, a part of the argument will be passed
10627 in GPRs. In that case, the total amount passed in any
10628 registers is equal to the amount that would have been passed
10629 in GPRs if everything were passed there, so we fall back to
10630 the GPR code below to compute the appropriate value. */
10631 int fpr = ((FP_ARG_MAX_REG + 1 - cum->fregno)
10632 * MIN (8, GET_MODE_SIZE (elt_mode)));
10633 int fpr_words = fpr / (TARGET_32BIT ? 4 : 8);
10634
10635 if (align_words + fpr_words < GP_ARG_NUM_REG)
10636 passed_in_gprs = true;
10637 else
10638 ret = fpr;
10639 }
10640 }
10641
10642 if (passed_in_gprs
10643 && align_words < GP_ARG_NUM_REG
10644 && GP_ARG_NUM_REG < align_words + rs6000_arg_size (mode, type))
10645 ret = (GP_ARG_NUM_REG - align_words) * (TARGET_32BIT ? 4 : 8);
10646
10647 if (ret != 0 && TARGET_DEBUG_ARG)
10648 fprintf (stderr, "rs6000_arg_partial_bytes: %d\n", ret);
10649
10650 return ret;
10651 }
10652 \f
10653 /* A C expression that indicates when an argument must be passed by
10654 reference. If nonzero for an argument, a copy of that argument is
10655 made in memory and a pointer to the argument is passed instead of
10656 the argument itself. The pointer is passed in whatever way is
10657 appropriate for passing a pointer to that type.
10658
10659 Under V.4, aggregates and long double are passed by reference.
10660
10661 As an extension to all 32-bit ABIs, AltiVec vectors are passed by
10662 reference unless the AltiVec vector extension ABI is in force.
10663
10664 As an extension to all ABIs, variable sized types are passed by
10665 reference. */
10666
10667 static bool
10668 rs6000_pass_by_reference (cumulative_args_t cum ATTRIBUTE_UNUSED,
10669 machine_mode mode, const_tree type,
10670 bool named ATTRIBUTE_UNUSED)
10671 {
10672 if (DEFAULT_ABI == ABI_V4 && TARGET_IEEEQUAD && mode == TFmode)
10673 {
10674 if (TARGET_DEBUG_ARG)
10675 fprintf (stderr, "function_arg_pass_by_reference: V4 long double\n");
10676 return 1;
10677 }
10678
10679 if (!type)
10680 return 0;
10681
10682 if (DEFAULT_ABI == ABI_V4 && AGGREGATE_TYPE_P (type))
10683 {
10684 if (TARGET_DEBUG_ARG)
10685 fprintf (stderr, "function_arg_pass_by_reference: V4 aggregate\n");
10686 return 1;
10687 }
10688
10689 if (int_size_in_bytes (type) < 0)
10690 {
10691 if (TARGET_DEBUG_ARG)
10692 fprintf (stderr, "function_arg_pass_by_reference: variable size\n");
10693 return 1;
10694 }
10695
10696 /* Allow -maltivec -mabi=no-altivec without warning. Altivec vector
10697 modes only exist for GCC vector types if -maltivec. */
10698 if (TARGET_32BIT && !TARGET_ALTIVEC_ABI && ALTIVEC_VECTOR_MODE (mode))
10699 {
10700 if (TARGET_DEBUG_ARG)
10701 fprintf (stderr, "function_arg_pass_by_reference: AltiVec\n");
10702 return 1;
10703 }
10704
10705 /* Pass synthetic vectors in memory. */
10706 if (TREE_CODE (type) == VECTOR_TYPE
10707 && int_size_in_bytes (type) > (TARGET_ALTIVEC_ABI ? 16 : 8))
10708 {
10709 static bool warned_for_pass_big_vectors = false;
10710 if (TARGET_DEBUG_ARG)
10711 fprintf (stderr, "function_arg_pass_by_reference: synthetic vector\n");
10712 if (!warned_for_pass_big_vectors)
10713 {
10714 warning (0, "GCC vector passed by reference: "
10715 "non-standard ABI extension with no compatibility guarantee");
10716 warned_for_pass_big_vectors = true;
10717 }
10718 return 1;
10719 }
10720
10721 return 0;
10722 }
10723
10724 /* Process parameter of type TYPE after ARGS_SO_FAR parameters were
10725 already processes. Return true if the parameter must be passed
10726 (fully or partially) on the stack. */
10727
10728 static bool
10729 rs6000_parm_needs_stack (cumulative_args_t args_so_far, tree type)
10730 {
10731 machine_mode mode;
10732 int unsignedp;
10733 rtx entry_parm;
10734
10735 /* Catch errors. */
10736 if (type == NULL || type == error_mark_node)
10737 return true;
10738
10739 /* Handle types with no storage requirement. */
10740 if (TYPE_MODE (type) == VOIDmode)
10741 return false;
10742
10743 /* Handle complex types. */
10744 if (TREE_CODE (type) == COMPLEX_TYPE)
10745 return (rs6000_parm_needs_stack (args_so_far, TREE_TYPE (type))
10746 || rs6000_parm_needs_stack (args_so_far, TREE_TYPE (type)));
10747
10748 /* Handle transparent aggregates. */
10749 if ((TREE_CODE (type) == UNION_TYPE || TREE_CODE (type) == RECORD_TYPE)
10750 && TYPE_TRANSPARENT_AGGR (type))
10751 type = TREE_TYPE (first_field (type));
10752
10753 /* See if this arg was passed by invisible reference. */
10754 if (pass_by_reference (get_cumulative_args (args_so_far),
10755 TYPE_MODE (type), type, true))
10756 type = build_pointer_type (type);
10757
10758 /* Find mode as it is passed by the ABI. */
10759 unsignedp = TYPE_UNSIGNED (type);
10760 mode = promote_mode (type, TYPE_MODE (type), &unsignedp);
10761
10762 /* If we must pass in stack, we need a stack. */
10763 if (rs6000_must_pass_in_stack (mode, type))
10764 return true;
10765
10766 /* If there is no incoming register, we need a stack. */
10767 entry_parm = rs6000_function_arg (args_so_far, mode, type, true);
10768 if (entry_parm == NULL)
10769 return true;
10770
10771 /* Likewise if we need to pass both in registers and on the stack. */
10772 if (GET_CODE (entry_parm) == PARALLEL
10773 && XEXP (XVECEXP (entry_parm, 0, 0), 0) == NULL_RTX)
10774 return true;
10775
10776 /* Also true if we're partially in registers and partially not. */
10777 if (rs6000_arg_partial_bytes (args_so_far, mode, type, true) != 0)
10778 return true;
10779
10780 /* Update info on where next arg arrives in registers. */
10781 rs6000_function_arg_advance (args_so_far, mode, type, true);
10782 return false;
10783 }
10784
10785 /* Return true if FUN has no prototype, has a variable argument
10786 list, or passes any parameter in memory. */
10787
10788 static bool
10789 rs6000_function_parms_need_stack (tree fun, bool incoming)
10790 {
10791 tree fntype, result;
10792 CUMULATIVE_ARGS args_so_far_v;
10793 cumulative_args_t args_so_far;
10794
10795 if (!fun)
10796 /* Must be a libcall, all of which only use reg parms. */
10797 return false;
10798
10799 fntype = fun;
10800 if (!TYPE_P (fun))
10801 fntype = TREE_TYPE (fun);
10802
10803 /* Varargs functions need the parameter save area. */
10804 if ((!incoming && !prototype_p (fntype)) || stdarg_p (fntype))
10805 return true;
10806
10807 INIT_CUMULATIVE_INCOMING_ARGS (args_so_far_v, fntype, NULL_RTX);
10808 args_so_far = pack_cumulative_args (&args_so_far_v);
10809
10810 /* When incoming, we will have been passed the function decl.
10811 It is necessary to use the decl to handle K&R style functions,
10812 where TYPE_ARG_TYPES may not be available. */
10813 if (incoming)
10814 {
10815 gcc_assert (DECL_P (fun));
10816 result = DECL_RESULT (fun);
10817 }
10818 else
10819 result = TREE_TYPE (fntype);
10820
10821 if (result && aggregate_value_p (result, fntype))
10822 {
10823 if (!TYPE_P (result))
10824 result = TREE_TYPE (result);
10825 result = build_pointer_type (result);
10826 rs6000_parm_needs_stack (args_so_far, result);
10827 }
10828
10829 if (incoming)
10830 {
10831 tree parm;
10832
10833 for (parm = DECL_ARGUMENTS (fun);
10834 parm && parm != void_list_node;
10835 parm = TREE_CHAIN (parm))
10836 if (rs6000_parm_needs_stack (args_so_far, TREE_TYPE (parm)))
10837 return true;
10838 }
10839 else
10840 {
10841 function_args_iterator args_iter;
10842 tree arg_type;
10843
10844 FOREACH_FUNCTION_ARGS (fntype, arg_type, args_iter)
10845 if (rs6000_parm_needs_stack (args_so_far, arg_type))
10846 return true;
10847 }
10848
10849 return false;
10850 }
10851
10852 /* Return the size of the REG_PARM_STACK_SPACE are for FUN. This is
10853 usually a constant depending on the ABI. However, in the ELFv2 ABI
10854 the register parameter area is optional when calling a function that
10855 has a prototype is scope, has no variable argument list, and passes
10856 all parameters in registers. */
10857
10858 int
10859 rs6000_reg_parm_stack_space (tree fun, bool incoming)
10860 {
10861 int reg_parm_stack_space;
10862
10863 switch (DEFAULT_ABI)
10864 {
10865 default:
10866 reg_parm_stack_space = 0;
10867 break;
10868
10869 case ABI_AIX:
10870 case ABI_DARWIN:
10871 reg_parm_stack_space = TARGET_64BIT ? 64 : 32;
10872 break;
10873
10874 case ABI_ELFv2:
10875 /* ??? Recomputing this every time is a bit expensive. Is there
10876 a place to cache this information? */
10877 if (rs6000_function_parms_need_stack (fun, incoming))
10878 reg_parm_stack_space = TARGET_64BIT ? 64 : 32;
10879 else
10880 reg_parm_stack_space = 0;
10881 break;
10882 }
10883
10884 return reg_parm_stack_space;
10885 }
10886
10887 static void
10888 rs6000_move_block_from_reg (int regno, rtx x, int nregs)
10889 {
10890 int i;
10891 machine_mode reg_mode = TARGET_32BIT ? SImode : DImode;
10892
10893 if (nregs == 0)
10894 return;
10895
10896 for (i = 0; i < nregs; i++)
10897 {
10898 rtx tem = adjust_address_nv (x, reg_mode, i * GET_MODE_SIZE (reg_mode));
10899 if (reload_completed)
10900 {
10901 if (! strict_memory_address_p (reg_mode, XEXP (tem, 0)))
10902 tem = NULL_RTX;
10903 else
10904 tem = simplify_gen_subreg (reg_mode, x, BLKmode,
10905 i * GET_MODE_SIZE (reg_mode));
10906 }
10907 else
10908 tem = replace_equiv_address (tem, XEXP (tem, 0));
10909
10910 gcc_assert (tem);
10911
10912 emit_move_insn (tem, gen_rtx_REG (reg_mode, regno + i));
10913 }
10914 }
10915 \f
10916 /* Perform any needed actions needed for a function that is receiving a
10917 variable number of arguments.
10918
10919 CUM is as above.
10920
10921 MODE and TYPE are the mode and type of the current parameter.
10922
10923 PRETEND_SIZE is a variable that should be set to the amount of stack
10924 that must be pushed by the prolog to pretend that our caller pushed
10925 it.
10926
10927 Normally, this macro will push all remaining incoming registers on the
10928 stack and set PRETEND_SIZE to the length of the registers pushed. */
10929
10930 static void
10931 setup_incoming_varargs (cumulative_args_t cum, machine_mode mode,
10932 tree type, int *pretend_size ATTRIBUTE_UNUSED,
10933 int no_rtl)
10934 {
10935 CUMULATIVE_ARGS next_cum;
10936 int reg_size = TARGET_32BIT ? 4 : 8;
10937 rtx save_area = NULL_RTX, mem;
10938 int first_reg_offset;
10939 alias_set_type set;
10940
10941 /* Skip the last named argument. */
10942 next_cum = *get_cumulative_args (cum);
10943 rs6000_function_arg_advance_1 (&next_cum, mode, type, true, 0);
10944
10945 if (DEFAULT_ABI == ABI_V4)
10946 {
10947 first_reg_offset = next_cum.sysv_gregno - GP_ARG_MIN_REG;
10948
10949 if (! no_rtl)
10950 {
10951 int gpr_reg_num = 0, gpr_size = 0, fpr_size = 0;
10952 HOST_WIDE_INT offset = 0;
10953
10954 /* Try to optimize the size of the varargs save area.
10955 The ABI requires that ap.reg_save_area is doubleword
10956 aligned, but we don't need to allocate space for all
10957 the bytes, only those to which we actually will save
10958 anything. */
10959 if (cfun->va_list_gpr_size && first_reg_offset < GP_ARG_NUM_REG)
10960 gpr_reg_num = GP_ARG_NUM_REG - first_reg_offset;
10961 if (TARGET_HARD_FLOAT && TARGET_FPRS
10962 && next_cum.fregno <= FP_ARG_V4_MAX_REG
10963 && cfun->va_list_fpr_size)
10964 {
10965 if (gpr_reg_num)
10966 fpr_size = (next_cum.fregno - FP_ARG_MIN_REG)
10967 * UNITS_PER_FP_WORD;
10968 if (cfun->va_list_fpr_size
10969 < FP_ARG_V4_MAX_REG + 1 - next_cum.fregno)
10970 fpr_size += cfun->va_list_fpr_size * UNITS_PER_FP_WORD;
10971 else
10972 fpr_size += (FP_ARG_V4_MAX_REG + 1 - next_cum.fregno)
10973 * UNITS_PER_FP_WORD;
10974 }
10975 if (gpr_reg_num)
10976 {
10977 offset = -((first_reg_offset * reg_size) & ~7);
10978 if (!fpr_size && gpr_reg_num > cfun->va_list_gpr_size)
10979 {
10980 gpr_reg_num = cfun->va_list_gpr_size;
10981 if (reg_size == 4 && (first_reg_offset & 1))
10982 gpr_reg_num++;
10983 }
10984 gpr_size = (gpr_reg_num * reg_size + 7) & ~7;
10985 }
10986 else if (fpr_size)
10987 offset = - (int) (next_cum.fregno - FP_ARG_MIN_REG)
10988 * UNITS_PER_FP_WORD
10989 - (int) (GP_ARG_NUM_REG * reg_size);
10990
10991 if (gpr_size + fpr_size)
10992 {
10993 rtx reg_save_area
10994 = assign_stack_local (BLKmode, gpr_size + fpr_size, 64);
10995 gcc_assert (GET_CODE (reg_save_area) == MEM);
10996 reg_save_area = XEXP (reg_save_area, 0);
10997 if (GET_CODE (reg_save_area) == PLUS)
10998 {
10999 gcc_assert (XEXP (reg_save_area, 0)
11000 == virtual_stack_vars_rtx);
11001 gcc_assert (GET_CODE (XEXP (reg_save_area, 1)) == CONST_INT);
11002 offset += INTVAL (XEXP (reg_save_area, 1));
11003 }
11004 else
11005 gcc_assert (reg_save_area == virtual_stack_vars_rtx);
11006 }
11007
11008 cfun->machine->varargs_save_offset = offset;
11009 save_area = plus_constant (Pmode, virtual_stack_vars_rtx, offset);
11010 }
11011 }
11012 else
11013 {
11014 first_reg_offset = next_cum.words;
11015 save_area = virtual_incoming_args_rtx;
11016
11017 if (targetm.calls.must_pass_in_stack (mode, type))
11018 first_reg_offset += rs6000_arg_size (TYPE_MODE (type), type);
11019 }
11020
11021 set = get_varargs_alias_set ();
11022 if (! no_rtl && first_reg_offset < GP_ARG_NUM_REG
11023 && cfun->va_list_gpr_size)
11024 {
11025 int n_gpr, nregs = GP_ARG_NUM_REG - first_reg_offset;
11026
11027 if (va_list_gpr_counter_field)
11028 /* V4 va_list_gpr_size counts number of registers needed. */
11029 n_gpr = cfun->va_list_gpr_size;
11030 else
11031 /* char * va_list instead counts number of bytes needed. */
11032 n_gpr = (cfun->va_list_gpr_size + reg_size - 1) / reg_size;
11033
11034 if (nregs > n_gpr)
11035 nregs = n_gpr;
11036
11037 mem = gen_rtx_MEM (BLKmode,
11038 plus_constant (Pmode, save_area,
11039 first_reg_offset * reg_size));
11040 MEM_NOTRAP_P (mem) = 1;
11041 set_mem_alias_set (mem, set);
11042 set_mem_align (mem, BITS_PER_WORD);
11043
11044 rs6000_move_block_from_reg (GP_ARG_MIN_REG + first_reg_offset, mem,
11045 nregs);
11046 }
11047
11048 /* Save FP registers if needed. */
11049 if (DEFAULT_ABI == ABI_V4
11050 && TARGET_HARD_FLOAT && TARGET_FPRS
11051 && ! no_rtl
11052 && next_cum.fregno <= FP_ARG_V4_MAX_REG
11053 && cfun->va_list_fpr_size)
11054 {
11055 int fregno = next_cum.fregno, nregs;
11056 rtx cr1 = gen_rtx_REG (CCmode, CR1_REGNO);
11057 rtx lab = gen_label_rtx ();
11058 int off = (GP_ARG_NUM_REG * reg_size) + ((fregno - FP_ARG_MIN_REG)
11059 * UNITS_PER_FP_WORD);
11060
11061 emit_jump_insn
11062 (gen_rtx_SET (VOIDmode,
11063 pc_rtx,
11064 gen_rtx_IF_THEN_ELSE (VOIDmode,
11065 gen_rtx_NE (VOIDmode, cr1,
11066 const0_rtx),
11067 gen_rtx_LABEL_REF (VOIDmode, lab),
11068 pc_rtx)));
11069
11070 for (nregs = 0;
11071 fregno <= FP_ARG_V4_MAX_REG && nregs < cfun->va_list_fpr_size;
11072 fregno++, off += UNITS_PER_FP_WORD, nregs++)
11073 {
11074 mem = gen_rtx_MEM ((TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT)
11075 ? DFmode : SFmode,
11076 plus_constant (Pmode, save_area, off));
11077 MEM_NOTRAP_P (mem) = 1;
11078 set_mem_alias_set (mem, set);
11079 set_mem_align (mem, GET_MODE_ALIGNMENT (
11080 (TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT)
11081 ? DFmode : SFmode));
11082 emit_move_insn (mem, gen_rtx_REG (
11083 (TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT)
11084 ? DFmode : SFmode, fregno));
11085 }
11086
11087 emit_label (lab);
11088 }
11089 }
11090
11091 /* Create the va_list data type. */
11092
11093 static tree
11094 rs6000_build_builtin_va_list (void)
11095 {
11096 tree f_gpr, f_fpr, f_res, f_ovf, f_sav, record, type_decl;
11097
11098 /* For AIX, prefer 'char *' because that's what the system
11099 header files like. */
11100 if (DEFAULT_ABI != ABI_V4)
11101 return build_pointer_type (char_type_node);
11102
11103 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
11104 type_decl = build_decl (BUILTINS_LOCATION, TYPE_DECL,
11105 get_identifier ("__va_list_tag"), record);
11106
11107 f_gpr = build_decl (BUILTINS_LOCATION, FIELD_DECL, get_identifier ("gpr"),
11108 unsigned_char_type_node);
11109 f_fpr = build_decl (BUILTINS_LOCATION, FIELD_DECL, get_identifier ("fpr"),
11110 unsigned_char_type_node);
11111 /* Give the two bytes of padding a name, so that -Wpadded won't warn on
11112 every user file. */
11113 f_res = build_decl (BUILTINS_LOCATION, FIELD_DECL,
11114 get_identifier ("reserved"), short_unsigned_type_node);
11115 f_ovf = build_decl (BUILTINS_LOCATION, FIELD_DECL,
11116 get_identifier ("overflow_arg_area"),
11117 ptr_type_node);
11118 f_sav = build_decl (BUILTINS_LOCATION, FIELD_DECL,
11119 get_identifier ("reg_save_area"),
11120 ptr_type_node);
11121
11122 va_list_gpr_counter_field = f_gpr;
11123 va_list_fpr_counter_field = f_fpr;
11124
11125 DECL_FIELD_CONTEXT (f_gpr) = record;
11126 DECL_FIELD_CONTEXT (f_fpr) = record;
11127 DECL_FIELD_CONTEXT (f_res) = record;
11128 DECL_FIELD_CONTEXT (f_ovf) = record;
11129 DECL_FIELD_CONTEXT (f_sav) = record;
11130
11131 TYPE_STUB_DECL (record) = type_decl;
11132 TYPE_NAME (record) = type_decl;
11133 TYPE_FIELDS (record) = f_gpr;
11134 DECL_CHAIN (f_gpr) = f_fpr;
11135 DECL_CHAIN (f_fpr) = f_res;
11136 DECL_CHAIN (f_res) = f_ovf;
11137 DECL_CHAIN (f_ovf) = f_sav;
11138
11139 layout_type (record);
11140
11141 /* The correct type is an array type of one element. */
11142 return build_array_type (record, build_index_type (size_zero_node));
11143 }
11144
11145 /* Implement va_start. */
11146
11147 static void
11148 rs6000_va_start (tree valist, rtx nextarg)
11149 {
11150 HOST_WIDE_INT words, n_gpr, n_fpr;
11151 tree f_gpr, f_fpr, f_res, f_ovf, f_sav;
11152 tree gpr, fpr, ovf, sav, t;
11153
11154 /* Only SVR4 needs something special. */
11155 if (DEFAULT_ABI != ABI_V4)
11156 {
11157 std_expand_builtin_va_start (valist, nextarg);
11158 return;
11159 }
11160
11161 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
11162 f_fpr = DECL_CHAIN (f_gpr);
11163 f_res = DECL_CHAIN (f_fpr);
11164 f_ovf = DECL_CHAIN (f_res);
11165 f_sav = DECL_CHAIN (f_ovf);
11166
11167 valist = build_simple_mem_ref (valist);
11168 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
11169 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), unshare_expr (valist),
11170 f_fpr, NULL_TREE);
11171 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), unshare_expr (valist),
11172 f_ovf, NULL_TREE);
11173 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), unshare_expr (valist),
11174 f_sav, NULL_TREE);
11175
11176 /* Count number of gp and fp argument registers used. */
11177 words = crtl->args.info.words;
11178 n_gpr = MIN (crtl->args.info.sysv_gregno - GP_ARG_MIN_REG,
11179 GP_ARG_NUM_REG);
11180 n_fpr = MIN (crtl->args.info.fregno - FP_ARG_MIN_REG,
11181 FP_ARG_NUM_REG);
11182
11183 if (TARGET_DEBUG_ARG)
11184 fprintf (stderr, "va_start: words = "HOST_WIDE_INT_PRINT_DEC", n_gpr = "
11185 HOST_WIDE_INT_PRINT_DEC", n_fpr = "HOST_WIDE_INT_PRINT_DEC"\n",
11186 words, n_gpr, n_fpr);
11187
11188 if (cfun->va_list_gpr_size)
11189 {
11190 t = build2 (MODIFY_EXPR, TREE_TYPE (gpr), gpr,
11191 build_int_cst (NULL_TREE, n_gpr));
11192 TREE_SIDE_EFFECTS (t) = 1;
11193 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
11194 }
11195
11196 if (cfun->va_list_fpr_size)
11197 {
11198 t = build2 (MODIFY_EXPR, TREE_TYPE (fpr), fpr,
11199 build_int_cst (NULL_TREE, n_fpr));
11200 TREE_SIDE_EFFECTS (t) = 1;
11201 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
11202
11203 #ifdef HAVE_AS_GNU_ATTRIBUTE
11204 if (call_ABI_of_interest (cfun->decl))
11205 rs6000_passes_float = true;
11206 #endif
11207 }
11208
11209 /* Find the overflow area. */
11210 t = make_tree (TREE_TYPE (ovf), virtual_incoming_args_rtx);
11211 if (words != 0)
11212 t = fold_build_pointer_plus_hwi (t, words * UNITS_PER_WORD);
11213 t = build2 (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
11214 TREE_SIDE_EFFECTS (t) = 1;
11215 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
11216
11217 /* If there were no va_arg invocations, don't set up the register
11218 save area. */
11219 if (!cfun->va_list_gpr_size
11220 && !cfun->va_list_fpr_size
11221 && n_gpr < GP_ARG_NUM_REG
11222 && n_fpr < FP_ARG_V4_MAX_REG)
11223 return;
11224
11225 /* Find the register save area. */
11226 t = make_tree (TREE_TYPE (sav), virtual_stack_vars_rtx);
11227 if (cfun->machine->varargs_save_offset)
11228 t = fold_build_pointer_plus_hwi (t, cfun->machine->varargs_save_offset);
11229 t = build2 (MODIFY_EXPR, TREE_TYPE (sav), sav, t);
11230 TREE_SIDE_EFFECTS (t) = 1;
11231 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
11232 }
11233
11234 /* Implement va_arg. */
11235
11236 static tree
11237 rs6000_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
11238 gimple_seq *post_p)
11239 {
11240 tree f_gpr, f_fpr, f_res, f_ovf, f_sav;
11241 tree gpr, fpr, ovf, sav, reg, t, u;
11242 int size, rsize, n_reg, sav_ofs, sav_scale;
11243 tree lab_false, lab_over, addr;
11244 int align;
11245 tree ptrtype = build_pointer_type_for_mode (type, ptr_mode, true);
11246 int regalign = 0;
11247 gimple stmt;
11248
11249 if (pass_by_reference (NULL, TYPE_MODE (type), type, false))
11250 {
11251 t = rs6000_gimplify_va_arg (valist, ptrtype, pre_p, post_p);
11252 return build_va_arg_indirect_ref (t);
11253 }
11254
11255 /* We need to deal with the fact that the darwin ppc64 ABI is defined by an
11256 earlier version of gcc, with the property that it always applied alignment
11257 adjustments to the va-args (even for zero-sized types). The cheapest way
11258 to deal with this is to replicate the effect of the part of
11259 std_gimplify_va_arg_expr that carries out the align adjust, for the case
11260 of relevance.
11261 We don't need to check for pass-by-reference because of the test above.
11262 We can return a simplifed answer, since we know there's no offset to add. */
11263
11264 if (((TARGET_MACHO
11265 && rs6000_darwin64_abi)
11266 || DEFAULT_ABI == ABI_ELFv2
11267 || (DEFAULT_ABI == ABI_AIX && !rs6000_compat_align_parm))
11268 && integer_zerop (TYPE_SIZE (type)))
11269 {
11270 unsigned HOST_WIDE_INT align, boundary;
11271 tree valist_tmp = get_initialized_tmp_var (valist, pre_p, NULL);
11272 align = PARM_BOUNDARY / BITS_PER_UNIT;
11273 boundary = rs6000_function_arg_boundary (TYPE_MODE (type), type);
11274 if (boundary > MAX_SUPPORTED_STACK_ALIGNMENT)
11275 boundary = MAX_SUPPORTED_STACK_ALIGNMENT;
11276 boundary /= BITS_PER_UNIT;
11277 if (boundary > align)
11278 {
11279 tree t ;
11280 /* This updates arg ptr by the amount that would be necessary
11281 to align the zero-sized (but not zero-alignment) item. */
11282 t = build2 (MODIFY_EXPR, TREE_TYPE (valist), valist_tmp,
11283 fold_build_pointer_plus_hwi (valist_tmp, boundary - 1));
11284 gimplify_and_add (t, pre_p);
11285
11286 t = fold_convert (sizetype, valist_tmp);
11287 t = build2 (MODIFY_EXPR, TREE_TYPE (valist), valist_tmp,
11288 fold_convert (TREE_TYPE (valist),
11289 fold_build2 (BIT_AND_EXPR, sizetype, t,
11290 size_int (-boundary))));
11291 t = build2 (MODIFY_EXPR, TREE_TYPE (valist), valist, t);
11292 gimplify_and_add (t, pre_p);
11293 }
11294 /* Since it is zero-sized there's no increment for the item itself. */
11295 valist_tmp = fold_convert (build_pointer_type (type), valist_tmp);
11296 return build_va_arg_indirect_ref (valist_tmp);
11297 }
11298
11299 if (DEFAULT_ABI != ABI_V4)
11300 {
11301 if (targetm.calls.split_complex_arg && TREE_CODE (type) == COMPLEX_TYPE)
11302 {
11303 tree elem_type = TREE_TYPE (type);
11304 machine_mode elem_mode = TYPE_MODE (elem_type);
11305 int elem_size = GET_MODE_SIZE (elem_mode);
11306
11307 if (elem_size < UNITS_PER_WORD)
11308 {
11309 tree real_part, imag_part;
11310 gimple_seq post = NULL;
11311
11312 real_part = rs6000_gimplify_va_arg (valist, elem_type, pre_p,
11313 &post);
11314 /* Copy the value into a temporary, lest the formal temporary
11315 be reused out from under us. */
11316 real_part = get_initialized_tmp_var (real_part, pre_p, &post);
11317 gimple_seq_add_seq (pre_p, post);
11318
11319 imag_part = rs6000_gimplify_va_arg (valist, elem_type, pre_p,
11320 post_p);
11321
11322 return build2 (COMPLEX_EXPR, type, real_part, imag_part);
11323 }
11324 }
11325
11326 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
11327 }
11328
11329 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
11330 f_fpr = DECL_CHAIN (f_gpr);
11331 f_res = DECL_CHAIN (f_fpr);
11332 f_ovf = DECL_CHAIN (f_res);
11333 f_sav = DECL_CHAIN (f_ovf);
11334
11335 valist = build_va_arg_indirect_ref (valist);
11336 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
11337 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), unshare_expr (valist),
11338 f_fpr, NULL_TREE);
11339 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), unshare_expr (valist),
11340 f_ovf, NULL_TREE);
11341 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), unshare_expr (valist),
11342 f_sav, NULL_TREE);
11343
11344 size = int_size_in_bytes (type);
11345 rsize = (size + 3) / 4;
11346 align = 1;
11347
11348 if (TARGET_HARD_FLOAT && TARGET_FPRS
11349 && ((TARGET_SINGLE_FLOAT && TYPE_MODE (type) == SFmode)
11350 || (TARGET_DOUBLE_FLOAT
11351 && (TYPE_MODE (type) == DFmode
11352 || TYPE_MODE (type) == TFmode
11353 || TYPE_MODE (type) == SDmode
11354 || TYPE_MODE (type) == DDmode
11355 || TYPE_MODE (type) == TDmode))))
11356 {
11357 /* FP args go in FP registers, if present. */
11358 reg = fpr;
11359 n_reg = (size + 7) / 8;
11360 sav_ofs = ((TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT) ? 8 : 4) * 4;
11361 sav_scale = ((TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT) ? 8 : 4);
11362 if (TYPE_MODE (type) != SFmode && TYPE_MODE (type) != SDmode)
11363 align = 8;
11364 }
11365 else
11366 {
11367 /* Otherwise into GP registers. */
11368 reg = gpr;
11369 n_reg = rsize;
11370 sav_ofs = 0;
11371 sav_scale = 4;
11372 if (n_reg == 2)
11373 align = 8;
11374 }
11375
11376 /* Pull the value out of the saved registers.... */
11377
11378 lab_over = NULL;
11379 addr = create_tmp_var (ptr_type_node, "addr");
11380
11381 /* AltiVec vectors never go in registers when -mabi=altivec. */
11382 if (TARGET_ALTIVEC_ABI && ALTIVEC_VECTOR_MODE (TYPE_MODE (type)))
11383 align = 16;
11384 else
11385 {
11386 lab_false = create_artificial_label (input_location);
11387 lab_over = create_artificial_label (input_location);
11388
11389 /* Long long and SPE vectors are aligned in the registers.
11390 As are any other 2 gpr item such as complex int due to a
11391 historical mistake. */
11392 u = reg;
11393 if (n_reg == 2 && reg == gpr)
11394 {
11395 regalign = 1;
11396 u = build2 (BIT_AND_EXPR, TREE_TYPE (reg), unshare_expr (reg),
11397 build_int_cst (TREE_TYPE (reg), n_reg - 1));
11398 u = build2 (POSTINCREMENT_EXPR, TREE_TYPE (reg),
11399 unshare_expr (reg), u);
11400 }
11401 /* _Decimal128 is passed in even/odd fpr pairs; the stored
11402 reg number is 0 for f1, so we want to make it odd. */
11403 else if (reg == fpr && TYPE_MODE (type) == TDmode)
11404 {
11405 t = build2 (BIT_IOR_EXPR, TREE_TYPE (reg), unshare_expr (reg),
11406 build_int_cst (TREE_TYPE (reg), 1));
11407 u = build2 (MODIFY_EXPR, void_type_node, unshare_expr (reg), t);
11408 }
11409
11410 t = fold_convert (TREE_TYPE (reg), size_int (8 - n_reg + 1));
11411 t = build2 (GE_EXPR, boolean_type_node, u, t);
11412 u = build1 (GOTO_EXPR, void_type_node, lab_false);
11413 t = build3 (COND_EXPR, void_type_node, t, u, NULL_TREE);
11414 gimplify_and_add (t, pre_p);
11415
11416 t = sav;
11417 if (sav_ofs)
11418 t = fold_build_pointer_plus_hwi (sav, sav_ofs);
11419
11420 u = build2 (POSTINCREMENT_EXPR, TREE_TYPE (reg), unshare_expr (reg),
11421 build_int_cst (TREE_TYPE (reg), n_reg));
11422 u = fold_convert (sizetype, u);
11423 u = build2 (MULT_EXPR, sizetype, u, size_int (sav_scale));
11424 t = fold_build_pointer_plus (t, u);
11425
11426 /* _Decimal32 varargs are located in the second word of the 64-bit
11427 FP register for 32-bit binaries. */
11428 if (!TARGET_POWERPC64
11429 && TARGET_HARD_FLOAT && TARGET_FPRS
11430 && TYPE_MODE (type) == SDmode)
11431 t = fold_build_pointer_plus_hwi (t, size);
11432
11433 gimplify_assign (addr, t, pre_p);
11434
11435 gimple_seq_add_stmt (pre_p, gimple_build_goto (lab_over));
11436
11437 stmt = gimple_build_label (lab_false);
11438 gimple_seq_add_stmt (pre_p, stmt);
11439
11440 if ((n_reg == 2 && !regalign) || n_reg > 2)
11441 {
11442 /* Ensure that we don't find any more args in regs.
11443 Alignment has taken care of for special cases. */
11444 gimplify_assign (reg, build_int_cst (TREE_TYPE (reg), 8), pre_p);
11445 }
11446 }
11447
11448 /* ... otherwise out of the overflow area. */
11449
11450 /* Care for on-stack alignment if needed. */
11451 t = ovf;
11452 if (align != 1)
11453 {
11454 t = fold_build_pointer_plus_hwi (t, align - 1);
11455 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
11456 build_int_cst (TREE_TYPE (t), -align));
11457 }
11458 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
11459
11460 gimplify_assign (unshare_expr (addr), t, pre_p);
11461
11462 t = fold_build_pointer_plus_hwi (t, size);
11463 gimplify_assign (unshare_expr (ovf), t, pre_p);
11464
11465 if (lab_over)
11466 {
11467 stmt = gimple_build_label (lab_over);
11468 gimple_seq_add_stmt (pre_p, stmt);
11469 }
11470
11471 if (STRICT_ALIGNMENT
11472 && (TYPE_ALIGN (type)
11473 > (unsigned) BITS_PER_UNIT * (align < 4 ? 4 : align)))
11474 {
11475 /* The value (of type complex double, for example) may not be
11476 aligned in memory in the saved registers, so copy via a
11477 temporary. (This is the same code as used for SPARC.) */
11478 tree tmp = create_tmp_var (type, "va_arg_tmp");
11479 tree dest_addr = build_fold_addr_expr (tmp);
11480
11481 tree copy = build_call_expr (builtin_decl_implicit (BUILT_IN_MEMCPY),
11482 3, dest_addr, addr, size_int (rsize * 4));
11483
11484 gimplify_and_add (copy, pre_p);
11485 addr = dest_addr;
11486 }
11487
11488 addr = fold_convert (ptrtype, addr);
11489 return build_va_arg_indirect_ref (addr);
11490 }
11491
11492 /* Builtins. */
11493
11494 static void
11495 def_builtin (const char *name, tree type, enum rs6000_builtins code)
11496 {
11497 tree t;
11498 unsigned classify = rs6000_builtin_info[(int)code].attr;
11499 const char *attr_string = "";
11500
11501 gcc_assert (name != NULL);
11502 gcc_assert (IN_RANGE ((int)code, 0, (int)RS6000_BUILTIN_COUNT));
11503
11504 if (rs6000_builtin_decls[(int)code])
11505 fatal_error ("internal error: builtin function %s already processed", name);
11506
11507 rs6000_builtin_decls[(int)code] = t =
11508 add_builtin_function (name, type, (int)code, BUILT_IN_MD, NULL, NULL_TREE);
11509
11510 /* Set any special attributes. */
11511 if ((classify & RS6000_BTC_CONST) != 0)
11512 {
11513 /* const function, function only depends on the inputs. */
11514 TREE_READONLY (t) = 1;
11515 TREE_NOTHROW (t) = 1;
11516 attr_string = ", pure";
11517 }
11518 else if ((classify & RS6000_BTC_PURE) != 0)
11519 {
11520 /* pure function, function can read global memory, but does not set any
11521 external state. */
11522 DECL_PURE_P (t) = 1;
11523 TREE_NOTHROW (t) = 1;
11524 attr_string = ", const";
11525 }
11526 else if ((classify & RS6000_BTC_FP) != 0)
11527 {
11528 /* Function is a math function. If rounding mode is on, then treat the
11529 function as not reading global memory, but it can have arbitrary side
11530 effects. If it is off, then assume the function is a const function.
11531 This mimics the ATTR_MATHFN_FPROUNDING attribute in
11532 builtin-attribute.def that is used for the math functions. */
11533 TREE_NOTHROW (t) = 1;
11534 if (flag_rounding_math)
11535 {
11536 DECL_PURE_P (t) = 1;
11537 DECL_IS_NOVOPS (t) = 1;
11538 attr_string = ", fp, pure";
11539 }
11540 else
11541 {
11542 TREE_READONLY (t) = 1;
11543 attr_string = ", fp, const";
11544 }
11545 }
11546 else if ((classify & RS6000_BTC_ATTR_MASK) != 0)
11547 gcc_unreachable ();
11548
11549 if (TARGET_DEBUG_BUILTIN)
11550 fprintf (stderr, "rs6000_builtin, code = %4d, %s%s\n",
11551 (int)code, name, attr_string);
11552 }
11553
11554 /* Simple ternary operations: VECd = foo (VECa, VECb, VECc). */
11555
11556 #undef RS6000_BUILTIN_1
11557 #undef RS6000_BUILTIN_2
11558 #undef RS6000_BUILTIN_3
11559 #undef RS6000_BUILTIN_A
11560 #undef RS6000_BUILTIN_D
11561 #undef RS6000_BUILTIN_E
11562 #undef RS6000_BUILTIN_H
11563 #undef RS6000_BUILTIN_P
11564 #undef RS6000_BUILTIN_Q
11565 #undef RS6000_BUILTIN_S
11566 #undef RS6000_BUILTIN_X
11567
11568 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
11569 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
11570 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE) \
11571 { MASK, ICODE, NAME, ENUM },
11572
11573 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
11574 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
11575 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
11576 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
11577 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
11578 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
11579 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
11580 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
11581
11582 static const struct builtin_description bdesc_3arg[] =
11583 {
11584 #include "rs6000-builtin.def"
11585 };
11586
11587 /* DST operations: void foo (void *, const int, const char). */
11588
11589 #undef RS6000_BUILTIN_1
11590 #undef RS6000_BUILTIN_2
11591 #undef RS6000_BUILTIN_3
11592 #undef RS6000_BUILTIN_A
11593 #undef RS6000_BUILTIN_D
11594 #undef RS6000_BUILTIN_E
11595 #undef RS6000_BUILTIN_H
11596 #undef RS6000_BUILTIN_P
11597 #undef RS6000_BUILTIN_Q
11598 #undef RS6000_BUILTIN_S
11599 #undef RS6000_BUILTIN_X
11600
11601 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
11602 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
11603 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
11604 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
11605 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE) \
11606 { MASK, ICODE, NAME, ENUM },
11607
11608 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
11609 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
11610 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
11611 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
11612 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
11613 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
11614
11615 static const struct builtin_description bdesc_dst[] =
11616 {
11617 #include "rs6000-builtin.def"
11618 };
11619
11620 /* Simple binary operations: VECc = foo (VECa, VECb). */
11621
11622 #undef RS6000_BUILTIN_1
11623 #undef RS6000_BUILTIN_2
11624 #undef RS6000_BUILTIN_3
11625 #undef RS6000_BUILTIN_A
11626 #undef RS6000_BUILTIN_D
11627 #undef RS6000_BUILTIN_E
11628 #undef RS6000_BUILTIN_H
11629 #undef RS6000_BUILTIN_P
11630 #undef RS6000_BUILTIN_Q
11631 #undef RS6000_BUILTIN_S
11632 #undef RS6000_BUILTIN_X
11633
11634 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
11635 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE) \
11636 { MASK, ICODE, NAME, ENUM },
11637
11638 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
11639 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
11640 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
11641 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
11642 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
11643 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
11644 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
11645 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
11646 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
11647
11648 static const struct builtin_description bdesc_2arg[] =
11649 {
11650 #include "rs6000-builtin.def"
11651 };
11652
11653 #undef RS6000_BUILTIN_1
11654 #undef RS6000_BUILTIN_2
11655 #undef RS6000_BUILTIN_3
11656 #undef RS6000_BUILTIN_A
11657 #undef RS6000_BUILTIN_D
11658 #undef RS6000_BUILTIN_E
11659 #undef RS6000_BUILTIN_H
11660 #undef RS6000_BUILTIN_P
11661 #undef RS6000_BUILTIN_Q
11662 #undef RS6000_BUILTIN_S
11663 #undef RS6000_BUILTIN_X
11664
11665 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
11666 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
11667 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
11668 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
11669 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
11670 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
11671 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
11672 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE) \
11673 { MASK, ICODE, NAME, ENUM },
11674
11675 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
11676 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
11677 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
11678
11679 /* AltiVec predicates. */
11680
11681 static const struct builtin_description bdesc_altivec_preds[] =
11682 {
11683 #include "rs6000-builtin.def"
11684 };
11685
11686 /* SPE predicates. */
11687 #undef RS6000_BUILTIN_1
11688 #undef RS6000_BUILTIN_2
11689 #undef RS6000_BUILTIN_3
11690 #undef RS6000_BUILTIN_A
11691 #undef RS6000_BUILTIN_D
11692 #undef RS6000_BUILTIN_E
11693 #undef RS6000_BUILTIN_H
11694 #undef RS6000_BUILTIN_P
11695 #undef RS6000_BUILTIN_Q
11696 #undef RS6000_BUILTIN_S
11697 #undef RS6000_BUILTIN_X
11698
11699 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
11700 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
11701 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
11702 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
11703 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
11704 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
11705 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
11706 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
11707 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
11708 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE) \
11709 { MASK, ICODE, NAME, ENUM },
11710
11711 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
11712
11713 static const struct builtin_description bdesc_spe_predicates[] =
11714 {
11715 #include "rs6000-builtin.def"
11716 };
11717
11718 /* SPE evsel predicates. */
11719 #undef RS6000_BUILTIN_1
11720 #undef RS6000_BUILTIN_2
11721 #undef RS6000_BUILTIN_3
11722 #undef RS6000_BUILTIN_A
11723 #undef RS6000_BUILTIN_D
11724 #undef RS6000_BUILTIN_E
11725 #undef RS6000_BUILTIN_H
11726 #undef RS6000_BUILTIN_P
11727 #undef RS6000_BUILTIN_Q
11728 #undef RS6000_BUILTIN_S
11729 #undef RS6000_BUILTIN_X
11730
11731 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
11732 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
11733 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
11734 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
11735 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
11736 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE) \
11737 { MASK, ICODE, NAME, ENUM },
11738
11739 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
11740 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
11741 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
11742 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
11743 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
11744
11745 static const struct builtin_description bdesc_spe_evsel[] =
11746 {
11747 #include "rs6000-builtin.def"
11748 };
11749
11750 /* PAIRED predicates. */
11751 #undef RS6000_BUILTIN_1
11752 #undef RS6000_BUILTIN_2
11753 #undef RS6000_BUILTIN_3
11754 #undef RS6000_BUILTIN_A
11755 #undef RS6000_BUILTIN_D
11756 #undef RS6000_BUILTIN_E
11757 #undef RS6000_BUILTIN_H
11758 #undef RS6000_BUILTIN_P
11759 #undef RS6000_BUILTIN_Q
11760 #undef RS6000_BUILTIN_S
11761 #undef RS6000_BUILTIN_X
11762
11763 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
11764 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
11765 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
11766 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
11767 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
11768 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
11769 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
11770 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
11771 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE) \
11772 { MASK, ICODE, NAME, ENUM },
11773
11774 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
11775 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
11776
11777 static const struct builtin_description bdesc_paired_preds[] =
11778 {
11779 #include "rs6000-builtin.def"
11780 };
11781
11782 /* ABS* operations. */
11783
11784 #undef RS6000_BUILTIN_1
11785 #undef RS6000_BUILTIN_2
11786 #undef RS6000_BUILTIN_3
11787 #undef RS6000_BUILTIN_A
11788 #undef RS6000_BUILTIN_D
11789 #undef RS6000_BUILTIN_E
11790 #undef RS6000_BUILTIN_H
11791 #undef RS6000_BUILTIN_P
11792 #undef RS6000_BUILTIN_Q
11793 #undef RS6000_BUILTIN_S
11794 #undef RS6000_BUILTIN_X
11795
11796 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
11797 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
11798 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
11799 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE) \
11800 { MASK, ICODE, NAME, ENUM },
11801
11802 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
11803 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
11804 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
11805 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
11806 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
11807 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
11808 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
11809
11810 static const struct builtin_description bdesc_abs[] =
11811 {
11812 #include "rs6000-builtin.def"
11813 };
11814
11815 /* Simple unary operations: VECb = foo (unsigned literal) or VECb =
11816 foo (VECa). */
11817
11818 #undef RS6000_BUILTIN_1
11819 #undef RS6000_BUILTIN_2
11820 #undef RS6000_BUILTIN_3
11821 #undef RS6000_BUILTIN_A
11822 #undef RS6000_BUILTIN_D
11823 #undef RS6000_BUILTIN_E
11824 #undef RS6000_BUILTIN_H
11825 #undef RS6000_BUILTIN_P
11826 #undef RS6000_BUILTIN_Q
11827 #undef RS6000_BUILTIN_S
11828 #undef RS6000_BUILTIN_X
11829
11830 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE) \
11831 { MASK, ICODE, NAME, ENUM },
11832
11833 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
11834 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
11835 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
11836 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
11837 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
11838 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
11839 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
11840 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
11841 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
11842 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
11843
11844 static const struct builtin_description bdesc_1arg[] =
11845 {
11846 #include "rs6000-builtin.def"
11847 };
11848
11849 /* HTM builtins. */
11850 #undef RS6000_BUILTIN_1
11851 #undef RS6000_BUILTIN_2
11852 #undef RS6000_BUILTIN_3
11853 #undef RS6000_BUILTIN_A
11854 #undef RS6000_BUILTIN_D
11855 #undef RS6000_BUILTIN_E
11856 #undef RS6000_BUILTIN_H
11857 #undef RS6000_BUILTIN_P
11858 #undef RS6000_BUILTIN_Q
11859 #undef RS6000_BUILTIN_S
11860 #undef RS6000_BUILTIN_X
11861
11862 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
11863 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
11864 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
11865 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
11866 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
11867 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
11868 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE) \
11869 { MASK, ICODE, NAME, ENUM },
11870
11871 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
11872 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
11873 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
11874 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
11875
11876 static const struct builtin_description bdesc_htm[] =
11877 {
11878 #include "rs6000-builtin.def"
11879 };
11880
11881 #undef RS6000_BUILTIN_1
11882 #undef RS6000_BUILTIN_2
11883 #undef RS6000_BUILTIN_3
11884 #undef RS6000_BUILTIN_A
11885 #undef RS6000_BUILTIN_D
11886 #undef RS6000_BUILTIN_E
11887 #undef RS6000_BUILTIN_H
11888 #undef RS6000_BUILTIN_P
11889 #undef RS6000_BUILTIN_Q
11890 #undef RS6000_BUILTIN_S
11891
11892 /* Return true if a builtin function is overloaded. */
11893 bool
11894 rs6000_overloaded_builtin_p (enum rs6000_builtins fncode)
11895 {
11896 return (rs6000_builtin_info[(int)fncode].attr & RS6000_BTC_OVERLOADED) != 0;
11897 }
11898
11899 /* Expand an expression EXP that calls a builtin without arguments. */
11900 static rtx
11901 rs6000_expand_zeroop_builtin (enum insn_code icode, rtx target)
11902 {
11903 rtx pat;
11904 machine_mode tmode = insn_data[icode].operand[0].mode;
11905
11906 if (icode == CODE_FOR_nothing)
11907 /* Builtin not supported on this processor. */
11908 return 0;
11909
11910 if (target == 0
11911 || GET_MODE (target) != tmode
11912 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
11913 target = gen_reg_rtx (tmode);
11914
11915 pat = GEN_FCN (icode) (target);
11916 if (! pat)
11917 return 0;
11918 emit_insn (pat);
11919
11920 return target;
11921 }
11922
11923
11924 static rtx
11925 rs6000_expand_mtfsf_builtin (enum insn_code icode, tree exp)
11926 {
11927 rtx pat;
11928 tree arg0 = CALL_EXPR_ARG (exp, 0);
11929 tree arg1 = CALL_EXPR_ARG (exp, 1);
11930 rtx op0 = expand_normal (arg0);
11931 rtx op1 = expand_normal (arg1);
11932 machine_mode mode0 = insn_data[icode].operand[0].mode;
11933 machine_mode mode1 = insn_data[icode].operand[1].mode;
11934
11935 if (icode == CODE_FOR_nothing)
11936 /* Builtin not supported on this processor. */
11937 return 0;
11938
11939 /* If we got invalid arguments bail out before generating bad rtl. */
11940 if (arg0 == error_mark_node || arg1 == error_mark_node)
11941 return const0_rtx;
11942
11943 if (GET_CODE (op0) != CONST_INT
11944 || INTVAL (op0) > 255
11945 || INTVAL (op0) < 0)
11946 {
11947 error ("argument 1 must be an 8-bit field value");
11948 return const0_rtx;
11949 }
11950
11951 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
11952 op0 = copy_to_mode_reg (mode0, op0);
11953
11954 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
11955 op1 = copy_to_mode_reg (mode1, op1);
11956
11957 pat = GEN_FCN (icode) (op0, op1);
11958 if (! pat)
11959 return const0_rtx;
11960 emit_insn (pat);
11961
11962 return NULL_RTX;
11963 }
11964
11965
11966 static rtx
11967 rs6000_expand_unop_builtin (enum insn_code icode, tree exp, rtx target)
11968 {
11969 rtx pat;
11970 tree arg0 = CALL_EXPR_ARG (exp, 0);
11971 rtx op0 = expand_normal (arg0);
11972 machine_mode tmode = insn_data[icode].operand[0].mode;
11973 machine_mode mode0 = insn_data[icode].operand[1].mode;
11974
11975 if (icode == CODE_FOR_nothing)
11976 /* Builtin not supported on this processor. */
11977 return 0;
11978
11979 /* If we got invalid arguments bail out before generating bad rtl. */
11980 if (arg0 == error_mark_node)
11981 return const0_rtx;
11982
11983 if (icode == CODE_FOR_altivec_vspltisb
11984 || icode == CODE_FOR_altivec_vspltish
11985 || icode == CODE_FOR_altivec_vspltisw
11986 || icode == CODE_FOR_spe_evsplatfi
11987 || icode == CODE_FOR_spe_evsplati)
11988 {
11989 /* Only allow 5-bit *signed* literals. */
11990 if (GET_CODE (op0) != CONST_INT
11991 || INTVAL (op0) > 15
11992 || INTVAL (op0) < -16)
11993 {
11994 error ("argument 1 must be a 5-bit signed literal");
11995 return const0_rtx;
11996 }
11997 }
11998
11999 if (target == 0
12000 || GET_MODE (target) != tmode
12001 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12002 target = gen_reg_rtx (tmode);
12003
12004 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
12005 op0 = copy_to_mode_reg (mode0, op0);
12006
12007 pat = GEN_FCN (icode) (target, op0);
12008 if (! pat)
12009 return 0;
12010 emit_insn (pat);
12011
12012 return target;
12013 }
12014
12015 static rtx
12016 altivec_expand_abs_builtin (enum insn_code icode, tree exp, rtx target)
12017 {
12018 rtx pat, scratch1, scratch2;
12019 tree arg0 = CALL_EXPR_ARG (exp, 0);
12020 rtx op0 = expand_normal (arg0);
12021 machine_mode tmode = insn_data[icode].operand[0].mode;
12022 machine_mode mode0 = insn_data[icode].operand[1].mode;
12023
12024 /* If we have invalid arguments, bail out before generating bad rtl. */
12025 if (arg0 == error_mark_node)
12026 return const0_rtx;
12027
12028 if (target == 0
12029 || GET_MODE (target) != tmode
12030 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12031 target = gen_reg_rtx (tmode);
12032
12033 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
12034 op0 = copy_to_mode_reg (mode0, op0);
12035
12036 scratch1 = gen_reg_rtx (mode0);
12037 scratch2 = gen_reg_rtx (mode0);
12038
12039 pat = GEN_FCN (icode) (target, op0, scratch1, scratch2);
12040 if (! pat)
12041 return 0;
12042 emit_insn (pat);
12043
12044 return target;
12045 }
12046
12047 static rtx
12048 rs6000_expand_binop_builtin (enum insn_code icode, tree exp, rtx target)
12049 {
12050 rtx pat;
12051 tree arg0 = CALL_EXPR_ARG (exp, 0);
12052 tree arg1 = CALL_EXPR_ARG (exp, 1);
12053 rtx op0 = expand_normal (arg0);
12054 rtx op1 = expand_normal (arg1);
12055 machine_mode tmode = insn_data[icode].operand[0].mode;
12056 machine_mode mode0 = insn_data[icode].operand[1].mode;
12057 machine_mode mode1 = insn_data[icode].operand[2].mode;
12058
12059 if (icode == CODE_FOR_nothing)
12060 /* Builtin not supported on this processor. */
12061 return 0;
12062
12063 /* If we got invalid arguments bail out before generating bad rtl. */
12064 if (arg0 == error_mark_node || arg1 == error_mark_node)
12065 return const0_rtx;
12066
12067 if (icode == CODE_FOR_altivec_vcfux
12068 || icode == CODE_FOR_altivec_vcfsx
12069 || icode == CODE_FOR_altivec_vctsxs
12070 || icode == CODE_FOR_altivec_vctuxs
12071 || icode == CODE_FOR_altivec_vspltb
12072 || icode == CODE_FOR_altivec_vsplth
12073 || icode == CODE_FOR_altivec_vspltw
12074 || icode == CODE_FOR_spe_evaddiw
12075 || icode == CODE_FOR_spe_evldd
12076 || icode == CODE_FOR_spe_evldh
12077 || icode == CODE_FOR_spe_evldw
12078 || icode == CODE_FOR_spe_evlhhesplat
12079 || icode == CODE_FOR_spe_evlhhossplat
12080 || icode == CODE_FOR_spe_evlhhousplat
12081 || icode == CODE_FOR_spe_evlwhe
12082 || icode == CODE_FOR_spe_evlwhos
12083 || icode == CODE_FOR_spe_evlwhou
12084 || icode == CODE_FOR_spe_evlwhsplat
12085 || icode == CODE_FOR_spe_evlwwsplat
12086 || icode == CODE_FOR_spe_evrlwi
12087 || icode == CODE_FOR_spe_evslwi
12088 || icode == CODE_FOR_spe_evsrwis
12089 || icode == CODE_FOR_spe_evsubifw
12090 || icode == CODE_FOR_spe_evsrwiu)
12091 {
12092 /* Only allow 5-bit unsigned literals. */
12093 STRIP_NOPS (arg1);
12094 if (TREE_CODE (arg1) != INTEGER_CST
12095 || TREE_INT_CST_LOW (arg1) & ~0x1f)
12096 {
12097 error ("argument 2 must be a 5-bit unsigned literal");
12098 return const0_rtx;
12099 }
12100 }
12101
12102 if (target == 0
12103 || GET_MODE (target) != tmode
12104 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12105 target = gen_reg_rtx (tmode);
12106
12107 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
12108 op0 = copy_to_mode_reg (mode0, op0);
12109 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
12110 op1 = copy_to_mode_reg (mode1, op1);
12111
12112 pat = GEN_FCN (icode) (target, op0, op1);
12113 if (! pat)
12114 return 0;
12115 emit_insn (pat);
12116
12117 return target;
12118 }
12119
12120 static rtx
12121 altivec_expand_predicate_builtin (enum insn_code icode, tree exp, rtx target)
12122 {
12123 rtx pat, scratch;
12124 tree cr6_form = CALL_EXPR_ARG (exp, 0);
12125 tree arg0 = CALL_EXPR_ARG (exp, 1);
12126 tree arg1 = CALL_EXPR_ARG (exp, 2);
12127 rtx op0 = expand_normal (arg0);
12128 rtx op1 = expand_normal (arg1);
12129 machine_mode tmode = SImode;
12130 machine_mode mode0 = insn_data[icode].operand[1].mode;
12131 machine_mode mode1 = insn_data[icode].operand[2].mode;
12132 int cr6_form_int;
12133
12134 if (TREE_CODE (cr6_form) != INTEGER_CST)
12135 {
12136 error ("argument 1 of __builtin_altivec_predicate must be a constant");
12137 return const0_rtx;
12138 }
12139 else
12140 cr6_form_int = TREE_INT_CST_LOW (cr6_form);
12141
12142 gcc_assert (mode0 == mode1);
12143
12144 /* If we have invalid arguments, bail out before generating bad rtl. */
12145 if (arg0 == error_mark_node || arg1 == error_mark_node)
12146 return const0_rtx;
12147
12148 if (target == 0
12149 || GET_MODE (target) != tmode
12150 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12151 target = gen_reg_rtx (tmode);
12152
12153 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
12154 op0 = copy_to_mode_reg (mode0, op0);
12155 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
12156 op1 = copy_to_mode_reg (mode1, op1);
12157
12158 scratch = gen_reg_rtx (mode0);
12159
12160 pat = GEN_FCN (icode) (scratch, op0, op1);
12161 if (! pat)
12162 return 0;
12163 emit_insn (pat);
12164
12165 /* The vec_any* and vec_all* predicates use the same opcodes for two
12166 different operations, but the bits in CR6 will be different
12167 depending on what information we want. So we have to play tricks
12168 with CR6 to get the right bits out.
12169
12170 If you think this is disgusting, look at the specs for the
12171 AltiVec predicates. */
12172
12173 switch (cr6_form_int)
12174 {
12175 case 0:
12176 emit_insn (gen_cr6_test_for_zero (target));
12177 break;
12178 case 1:
12179 emit_insn (gen_cr6_test_for_zero_reverse (target));
12180 break;
12181 case 2:
12182 emit_insn (gen_cr6_test_for_lt (target));
12183 break;
12184 case 3:
12185 emit_insn (gen_cr6_test_for_lt_reverse (target));
12186 break;
12187 default:
12188 error ("argument 1 of __builtin_altivec_predicate is out of range");
12189 break;
12190 }
12191
12192 return target;
12193 }
12194
12195 static rtx
12196 paired_expand_lv_builtin (enum insn_code icode, tree exp, rtx target)
12197 {
12198 rtx pat, addr;
12199 tree arg0 = CALL_EXPR_ARG (exp, 0);
12200 tree arg1 = CALL_EXPR_ARG (exp, 1);
12201 machine_mode tmode = insn_data[icode].operand[0].mode;
12202 machine_mode mode0 = Pmode;
12203 machine_mode mode1 = Pmode;
12204 rtx op0 = expand_normal (arg0);
12205 rtx op1 = expand_normal (arg1);
12206
12207 if (icode == CODE_FOR_nothing)
12208 /* Builtin not supported on this processor. */
12209 return 0;
12210
12211 /* If we got invalid arguments bail out before generating bad rtl. */
12212 if (arg0 == error_mark_node || arg1 == error_mark_node)
12213 return const0_rtx;
12214
12215 if (target == 0
12216 || GET_MODE (target) != tmode
12217 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12218 target = gen_reg_rtx (tmode);
12219
12220 op1 = copy_to_mode_reg (mode1, op1);
12221
12222 if (op0 == const0_rtx)
12223 {
12224 addr = gen_rtx_MEM (tmode, op1);
12225 }
12226 else
12227 {
12228 op0 = copy_to_mode_reg (mode0, op0);
12229 addr = gen_rtx_MEM (tmode, gen_rtx_PLUS (Pmode, op0, op1));
12230 }
12231
12232 pat = GEN_FCN (icode) (target, addr);
12233
12234 if (! pat)
12235 return 0;
12236 emit_insn (pat);
12237
12238 return target;
12239 }
12240
12241 /* Return a constant vector for use as a little-endian permute control vector
12242 to reverse the order of elements of the given vector mode. */
12243 static rtx
12244 swap_selector_for_mode (machine_mode mode)
12245 {
12246 /* These are little endian vectors, so their elements are reversed
12247 from what you would normally expect for a permute control vector. */
12248 unsigned int swap2[16] = {7,6,5,4,3,2,1,0,15,14,13,12,11,10,9,8};
12249 unsigned int swap4[16] = {3,2,1,0,7,6,5,4,11,10,9,8,15,14,13,12};
12250 unsigned int swap8[16] = {1,0,3,2,5,4,7,6,9,8,11,10,13,12,15,14};
12251 unsigned int swap16[16] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15};
12252 unsigned int *swaparray, i;
12253 rtx perm[16];
12254
12255 switch (mode)
12256 {
12257 case V2DFmode:
12258 case V2DImode:
12259 swaparray = swap2;
12260 break;
12261 case V4SFmode:
12262 case V4SImode:
12263 swaparray = swap4;
12264 break;
12265 case V8HImode:
12266 swaparray = swap8;
12267 break;
12268 case V16QImode:
12269 swaparray = swap16;
12270 break;
12271 default:
12272 gcc_unreachable ();
12273 }
12274
12275 for (i = 0; i < 16; ++i)
12276 perm[i] = GEN_INT (swaparray[i]);
12277
12278 return force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, perm)));
12279 }
12280
12281 /* Generate code for an "lvx", "lvxl", or "lve*x" built-in for a little endian target
12282 with -maltivec=be specified. Issue the load followed by an element-reversing
12283 permute. */
12284 void
12285 altivec_expand_lvx_be (rtx op0, rtx op1, machine_mode mode, unsigned unspec)
12286 {
12287 rtx tmp = gen_reg_rtx (mode);
12288 rtx load = gen_rtx_SET (VOIDmode, tmp, op1);
12289 rtx lvx = gen_rtx_UNSPEC (mode, gen_rtvec (1, const0_rtx), unspec);
12290 rtx par = gen_rtx_PARALLEL (mode, gen_rtvec (2, load, lvx));
12291 rtx sel = swap_selector_for_mode (mode);
12292 rtx vperm = gen_rtx_UNSPEC (mode, gen_rtvec (3, tmp, tmp, sel), UNSPEC_VPERM);
12293
12294 gcc_assert (REG_P (op0));
12295 emit_insn (par);
12296 emit_insn (gen_rtx_SET (VOIDmode, op0, vperm));
12297 }
12298
12299 /* Generate code for a "stvx" or "stvxl" built-in for a little endian target
12300 with -maltivec=be specified. Issue the store preceded by an element-reversing
12301 permute. */
12302 void
12303 altivec_expand_stvx_be (rtx op0, rtx op1, machine_mode mode, unsigned unspec)
12304 {
12305 rtx tmp = gen_reg_rtx (mode);
12306 rtx store = gen_rtx_SET (VOIDmode, op0, tmp);
12307 rtx stvx = gen_rtx_UNSPEC (mode, gen_rtvec (1, const0_rtx), unspec);
12308 rtx par = gen_rtx_PARALLEL (mode, gen_rtvec (2, store, stvx));
12309 rtx sel = swap_selector_for_mode (mode);
12310 rtx vperm;
12311
12312 gcc_assert (REG_P (op1));
12313 vperm = gen_rtx_UNSPEC (mode, gen_rtvec (3, op1, op1, sel), UNSPEC_VPERM);
12314 emit_insn (gen_rtx_SET (VOIDmode, tmp, vperm));
12315 emit_insn (par);
12316 }
12317
12318 /* Generate code for a "stve*x" built-in for a little endian target with -maltivec=be
12319 specified. Issue the store preceded by an element-reversing permute. */
12320 void
12321 altivec_expand_stvex_be (rtx op0, rtx op1, machine_mode mode, unsigned unspec)
12322 {
12323 machine_mode inner_mode = GET_MODE_INNER (mode);
12324 rtx tmp = gen_reg_rtx (mode);
12325 rtx stvx = gen_rtx_UNSPEC (inner_mode, gen_rtvec (1, tmp), unspec);
12326 rtx sel = swap_selector_for_mode (mode);
12327 rtx vperm;
12328
12329 gcc_assert (REG_P (op1));
12330 vperm = gen_rtx_UNSPEC (mode, gen_rtvec (3, op1, op1, sel), UNSPEC_VPERM);
12331 emit_insn (gen_rtx_SET (VOIDmode, tmp, vperm));
12332 emit_insn (gen_rtx_SET (VOIDmode, op0, stvx));
12333 }
12334
12335 static rtx
12336 altivec_expand_lv_builtin (enum insn_code icode, tree exp, rtx target, bool blk)
12337 {
12338 rtx pat, addr;
12339 tree arg0 = CALL_EXPR_ARG (exp, 0);
12340 tree arg1 = CALL_EXPR_ARG (exp, 1);
12341 machine_mode tmode = insn_data[icode].operand[0].mode;
12342 machine_mode mode0 = Pmode;
12343 machine_mode mode1 = Pmode;
12344 rtx op0 = expand_normal (arg0);
12345 rtx op1 = expand_normal (arg1);
12346
12347 if (icode == CODE_FOR_nothing)
12348 /* Builtin not supported on this processor. */
12349 return 0;
12350
12351 /* If we got invalid arguments bail out before generating bad rtl. */
12352 if (arg0 == error_mark_node || arg1 == error_mark_node)
12353 return const0_rtx;
12354
12355 if (target == 0
12356 || GET_MODE (target) != tmode
12357 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12358 target = gen_reg_rtx (tmode);
12359
12360 op1 = copy_to_mode_reg (mode1, op1);
12361
12362 if (op0 == const0_rtx)
12363 {
12364 addr = gen_rtx_MEM (blk ? BLKmode : tmode, op1);
12365 }
12366 else
12367 {
12368 op0 = copy_to_mode_reg (mode0, op0);
12369 addr = gen_rtx_MEM (blk ? BLKmode : tmode, gen_rtx_PLUS (Pmode, op0, op1));
12370 }
12371
12372 pat = GEN_FCN (icode) (target, addr);
12373
12374 if (! pat)
12375 return 0;
12376 emit_insn (pat);
12377
12378 return target;
12379 }
12380
12381 static rtx
12382 spe_expand_stv_builtin (enum insn_code icode, tree exp)
12383 {
12384 tree arg0 = CALL_EXPR_ARG (exp, 0);
12385 tree arg1 = CALL_EXPR_ARG (exp, 1);
12386 tree arg2 = CALL_EXPR_ARG (exp, 2);
12387 rtx op0 = expand_normal (arg0);
12388 rtx op1 = expand_normal (arg1);
12389 rtx op2 = expand_normal (arg2);
12390 rtx pat;
12391 machine_mode mode0 = insn_data[icode].operand[0].mode;
12392 machine_mode mode1 = insn_data[icode].operand[1].mode;
12393 machine_mode mode2 = insn_data[icode].operand[2].mode;
12394
12395 /* Invalid arguments. Bail before doing anything stoopid! */
12396 if (arg0 == error_mark_node
12397 || arg1 == error_mark_node
12398 || arg2 == error_mark_node)
12399 return const0_rtx;
12400
12401 if (! (*insn_data[icode].operand[2].predicate) (op0, mode2))
12402 op0 = copy_to_mode_reg (mode2, op0);
12403 if (! (*insn_data[icode].operand[0].predicate) (op1, mode0))
12404 op1 = copy_to_mode_reg (mode0, op1);
12405 if (! (*insn_data[icode].operand[1].predicate) (op2, mode1))
12406 op2 = copy_to_mode_reg (mode1, op2);
12407
12408 pat = GEN_FCN (icode) (op1, op2, op0);
12409 if (pat)
12410 emit_insn (pat);
12411 return NULL_RTX;
12412 }
12413
12414 static rtx
12415 paired_expand_stv_builtin (enum insn_code icode, tree exp)
12416 {
12417 tree arg0 = CALL_EXPR_ARG (exp, 0);
12418 tree arg1 = CALL_EXPR_ARG (exp, 1);
12419 tree arg2 = CALL_EXPR_ARG (exp, 2);
12420 rtx op0 = expand_normal (arg0);
12421 rtx op1 = expand_normal (arg1);
12422 rtx op2 = expand_normal (arg2);
12423 rtx pat, addr;
12424 machine_mode tmode = insn_data[icode].operand[0].mode;
12425 machine_mode mode1 = Pmode;
12426 machine_mode mode2 = Pmode;
12427
12428 /* Invalid arguments. Bail before doing anything stoopid! */
12429 if (arg0 == error_mark_node
12430 || arg1 == error_mark_node
12431 || arg2 == error_mark_node)
12432 return const0_rtx;
12433
12434 if (! (*insn_data[icode].operand[1].predicate) (op0, tmode))
12435 op0 = copy_to_mode_reg (tmode, op0);
12436
12437 op2 = copy_to_mode_reg (mode2, op2);
12438
12439 if (op1 == const0_rtx)
12440 {
12441 addr = gen_rtx_MEM (tmode, op2);
12442 }
12443 else
12444 {
12445 op1 = copy_to_mode_reg (mode1, op1);
12446 addr = gen_rtx_MEM (tmode, gen_rtx_PLUS (Pmode, op1, op2));
12447 }
12448
12449 pat = GEN_FCN (icode) (addr, op0);
12450 if (pat)
12451 emit_insn (pat);
12452 return NULL_RTX;
12453 }
12454
12455 static rtx
12456 altivec_expand_stv_builtin (enum insn_code icode, tree exp)
12457 {
12458 tree arg0 = CALL_EXPR_ARG (exp, 0);
12459 tree arg1 = CALL_EXPR_ARG (exp, 1);
12460 tree arg2 = CALL_EXPR_ARG (exp, 2);
12461 rtx op0 = expand_normal (arg0);
12462 rtx op1 = expand_normal (arg1);
12463 rtx op2 = expand_normal (arg2);
12464 rtx pat, addr;
12465 machine_mode tmode = insn_data[icode].operand[0].mode;
12466 machine_mode smode = insn_data[icode].operand[1].mode;
12467 machine_mode mode1 = Pmode;
12468 machine_mode mode2 = Pmode;
12469
12470 /* Invalid arguments. Bail before doing anything stoopid! */
12471 if (arg0 == error_mark_node
12472 || arg1 == error_mark_node
12473 || arg2 == error_mark_node)
12474 return const0_rtx;
12475
12476 if (! (*insn_data[icode].operand[1].predicate) (op0, smode))
12477 op0 = copy_to_mode_reg (smode, op0);
12478
12479 op2 = copy_to_mode_reg (mode2, op2);
12480
12481 if (op1 == const0_rtx)
12482 {
12483 addr = gen_rtx_MEM (tmode, op2);
12484 }
12485 else
12486 {
12487 op1 = copy_to_mode_reg (mode1, op1);
12488 addr = gen_rtx_MEM (tmode, gen_rtx_PLUS (Pmode, op1, op2));
12489 }
12490
12491 pat = GEN_FCN (icode) (addr, op0);
12492 if (pat)
12493 emit_insn (pat);
12494 return NULL_RTX;
12495 }
12496
12497 /* Return the appropriate SPR number associated with the given builtin. */
12498 static inline HOST_WIDE_INT
12499 htm_spr_num (enum rs6000_builtins code)
12500 {
12501 if (code == HTM_BUILTIN_GET_TFHAR
12502 || code == HTM_BUILTIN_SET_TFHAR)
12503 return TFHAR_SPR;
12504 else if (code == HTM_BUILTIN_GET_TFIAR
12505 || code == HTM_BUILTIN_SET_TFIAR)
12506 return TFIAR_SPR;
12507 else if (code == HTM_BUILTIN_GET_TEXASR
12508 || code == HTM_BUILTIN_SET_TEXASR)
12509 return TEXASR_SPR;
12510 gcc_assert (code == HTM_BUILTIN_GET_TEXASRU
12511 || code == HTM_BUILTIN_SET_TEXASRU);
12512 return TEXASRU_SPR;
12513 }
12514
12515 /* Return the appropriate SPR regno associated with the given builtin. */
12516 static inline HOST_WIDE_INT
12517 htm_spr_regno (enum rs6000_builtins code)
12518 {
12519 if (code == HTM_BUILTIN_GET_TFHAR
12520 || code == HTM_BUILTIN_SET_TFHAR)
12521 return TFHAR_REGNO;
12522 else if (code == HTM_BUILTIN_GET_TFIAR
12523 || code == HTM_BUILTIN_SET_TFIAR)
12524 return TFIAR_REGNO;
12525 gcc_assert (code == HTM_BUILTIN_GET_TEXASR
12526 || code == HTM_BUILTIN_SET_TEXASR
12527 || code == HTM_BUILTIN_GET_TEXASRU
12528 || code == HTM_BUILTIN_SET_TEXASRU);
12529 return TEXASR_REGNO;
12530 }
12531
12532 /* Return the correct ICODE value depending on whether we are
12533 setting or reading the HTM SPRs. */
12534 static inline enum insn_code
12535 rs6000_htm_spr_icode (bool nonvoid)
12536 {
12537 if (nonvoid)
12538 return (TARGET_64BIT) ? CODE_FOR_htm_mfspr_di : CODE_FOR_htm_mfspr_si;
12539 else
12540 return (TARGET_64BIT) ? CODE_FOR_htm_mtspr_di : CODE_FOR_htm_mtspr_si;
12541 }
12542
12543 /* Expand the HTM builtin in EXP and store the result in TARGET.
12544 Store true in *EXPANDEDP if we found a builtin to expand. */
12545 static rtx
12546 htm_expand_builtin (tree exp, rtx target, bool * expandedp)
12547 {
12548 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
12549 bool nonvoid = TREE_TYPE (TREE_TYPE (fndecl)) != void_type_node;
12550 enum rs6000_builtins fcode = (enum rs6000_builtins) DECL_FUNCTION_CODE (fndecl);
12551 const struct builtin_description *d;
12552 size_t i;
12553
12554 *expandedp = false;
12555
12556 /* Expand the HTM builtins. */
12557 d = bdesc_htm;
12558 for (i = 0; i < ARRAY_SIZE (bdesc_htm); i++, d++)
12559 if (d->code == fcode)
12560 {
12561 rtx op[MAX_HTM_OPERANDS], pat;
12562 int nopnds = 0;
12563 tree arg;
12564 call_expr_arg_iterator iter;
12565 unsigned attr = rs6000_builtin_info[fcode].attr;
12566 enum insn_code icode = d->icode;
12567
12568 if (attr & RS6000_BTC_SPR)
12569 icode = rs6000_htm_spr_icode (nonvoid);
12570
12571 if (nonvoid)
12572 {
12573 machine_mode tmode = insn_data[icode].operand[0].mode;
12574 if (!target
12575 || GET_MODE (target) != tmode
12576 || !(*insn_data[icode].operand[0].predicate) (target, tmode))
12577 target = gen_reg_rtx (tmode);
12578 op[nopnds++] = target;
12579 }
12580
12581 FOR_EACH_CALL_EXPR_ARG (arg, iter, exp)
12582 {
12583 const struct insn_operand_data *insn_op;
12584
12585 if (arg == error_mark_node || nopnds >= MAX_HTM_OPERANDS)
12586 return NULL_RTX;
12587
12588 insn_op = &insn_data[icode].operand[nopnds];
12589
12590 op[nopnds] = expand_normal (arg);
12591
12592 if (!(*insn_op->predicate) (op[nopnds], insn_op->mode))
12593 {
12594 if (!strcmp (insn_op->constraint, "n"))
12595 {
12596 int arg_num = (nonvoid) ? nopnds : nopnds + 1;
12597 if (!CONST_INT_P (op[nopnds]))
12598 error ("argument %d must be an unsigned literal", arg_num);
12599 else
12600 error ("argument %d is an unsigned literal that is "
12601 "out of range", arg_num);
12602 return const0_rtx;
12603 }
12604 op[nopnds] = copy_to_mode_reg (insn_op->mode, op[nopnds]);
12605 }
12606
12607 nopnds++;
12608 }
12609
12610 /* Handle the builtins for extended mnemonics. These accept
12611 no arguments, but map to builtins that take arguments. */
12612 switch (fcode)
12613 {
12614 case HTM_BUILTIN_TENDALL: /* Alias for: tend. 1 */
12615 case HTM_BUILTIN_TRESUME: /* Alias for: tsr. 1 */
12616 op[nopnds++] = GEN_INT (1);
12617 #ifdef ENABLE_CHECKING
12618 attr |= RS6000_BTC_UNARY;
12619 #endif
12620 break;
12621 case HTM_BUILTIN_TSUSPEND: /* Alias for: tsr. 0 */
12622 op[nopnds++] = GEN_INT (0);
12623 #ifdef ENABLE_CHECKING
12624 attr |= RS6000_BTC_UNARY;
12625 #endif
12626 break;
12627 default:
12628 break;
12629 }
12630
12631 /* If this builtin accesses SPRs, then pass in the appropriate
12632 SPR number and SPR regno as the last two operands. */
12633 if (attr & RS6000_BTC_SPR)
12634 {
12635 op[nopnds++] = gen_rtx_CONST_INT (Pmode, htm_spr_num (fcode));
12636 op[nopnds++] = gen_rtx_REG (Pmode, htm_spr_regno (fcode));
12637 }
12638
12639 #ifdef ENABLE_CHECKING
12640 int expected_nopnds = 0;
12641 if ((attr & RS6000_BTC_TYPE_MASK) == RS6000_BTC_UNARY)
12642 expected_nopnds = 1;
12643 else if ((attr & RS6000_BTC_TYPE_MASK) == RS6000_BTC_BINARY)
12644 expected_nopnds = 2;
12645 else if ((attr & RS6000_BTC_TYPE_MASK) == RS6000_BTC_TERNARY)
12646 expected_nopnds = 3;
12647 if (!(attr & RS6000_BTC_VOID))
12648 expected_nopnds += 1;
12649 if (attr & RS6000_BTC_SPR)
12650 expected_nopnds += 2;
12651
12652 gcc_assert (nopnds == expected_nopnds && nopnds <= MAX_HTM_OPERANDS);
12653 #endif
12654
12655 switch (nopnds)
12656 {
12657 case 1:
12658 pat = GEN_FCN (icode) (op[0]);
12659 break;
12660 case 2:
12661 pat = GEN_FCN (icode) (op[0], op[1]);
12662 break;
12663 case 3:
12664 pat = GEN_FCN (icode) (op[0], op[1], op[2]);
12665 break;
12666 case 4:
12667 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3]);
12668 break;
12669 default:
12670 gcc_unreachable ();
12671 }
12672 if (!pat)
12673 return NULL_RTX;
12674 emit_insn (pat);
12675
12676 *expandedp = true;
12677 if (nonvoid)
12678 return target;
12679 return const0_rtx;
12680 }
12681
12682 return NULL_RTX;
12683 }
12684
12685 static rtx
12686 rs6000_expand_ternop_builtin (enum insn_code icode, tree exp, rtx target)
12687 {
12688 rtx pat;
12689 tree arg0 = CALL_EXPR_ARG (exp, 0);
12690 tree arg1 = CALL_EXPR_ARG (exp, 1);
12691 tree arg2 = CALL_EXPR_ARG (exp, 2);
12692 rtx op0 = expand_normal (arg0);
12693 rtx op1 = expand_normal (arg1);
12694 rtx op2 = expand_normal (arg2);
12695 machine_mode tmode = insn_data[icode].operand[0].mode;
12696 machine_mode mode0 = insn_data[icode].operand[1].mode;
12697 machine_mode mode1 = insn_data[icode].operand[2].mode;
12698 machine_mode mode2 = insn_data[icode].operand[3].mode;
12699
12700 if (icode == CODE_FOR_nothing)
12701 /* Builtin not supported on this processor. */
12702 return 0;
12703
12704 /* If we got invalid arguments bail out before generating bad rtl. */
12705 if (arg0 == error_mark_node
12706 || arg1 == error_mark_node
12707 || arg2 == error_mark_node)
12708 return const0_rtx;
12709
12710 /* Check and prepare argument depending on the instruction code.
12711
12712 Note that a switch statement instead of the sequence of tests
12713 would be incorrect as many of the CODE_FOR values could be
12714 CODE_FOR_nothing and that would yield multiple alternatives
12715 with identical values. We'd never reach here at runtime in
12716 this case. */
12717 if (icode == CODE_FOR_altivec_vsldoi_v4sf
12718 || icode == CODE_FOR_altivec_vsldoi_v4si
12719 || icode == CODE_FOR_altivec_vsldoi_v8hi
12720 || icode == CODE_FOR_altivec_vsldoi_v16qi)
12721 {
12722 /* Only allow 4-bit unsigned literals. */
12723 STRIP_NOPS (arg2);
12724 if (TREE_CODE (arg2) != INTEGER_CST
12725 || TREE_INT_CST_LOW (arg2) & ~0xf)
12726 {
12727 error ("argument 3 must be a 4-bit unsigned literal");
12728 return const0_rtx;
12729 }
12730 }
12731 else if (icode == CODE_FOR_vsx_xxpermdi_v2df
12732 || icode == CODE_FOR_vsx_xxpermdi_v2di
12733 || icode == CODE_FOR_vsx_xxsldwi_v16qi
12734 || icode == CODE_FOR_vsx_xxsldwi_v8hi
12735 || icode == CODE_FOR_vsx_xxsldwi_v4si
12736 || icode == CODE_FOR_vsx_xxsldwi_v4sf
12737 || icode == CODE_FOR_vsx_xxsldwi_v2di
12738 || icode == CODE_FOR_vsx_xxsldwi_v2df)
12739 {
12740 /* Only allow 2-bit unsigned literals. */
12741 STRIP_NOPS (arg2);
12742 if (TREE_CODE (arg2) != INTEGER_CST
12743 || TREE_INT_CST_LOW (arg2) & ~0x3)
12744 {
12745 error ("argument 3 must be a 2-bit unsigned literal");
12746 return const0_rtx;
12747 }
12748 }
12749 else if (icode == CODE_FOR_vsx_set_v2df
12750 || icode == CODE_FOR_vsx_set_v2di
12751 || icode == CODE_FOR_bcdadd
12752 || icode == CODE_FOR_bcdadd_lt
12753 || icode == CODE_FOR_bcdadd_eq
12754 || icode == CODE_FOR_bcdadd_gt
12755 || icode == CODE_FOR_bcdsub
12756 || icode == CODE_FOR_bcdsub_lt
12757 || icode == CODE_FOR_bcdsub_eq
12758 || icode == CODE_FOR_bcdsub_gt)
12759 {
12760 /* Only allow 1-bit unsigned literals. */
12761 STRIP_NOPS (arg2);
12762 if (TREE_CODE (arg2) != INTEGER_CST
12763 || TREE_INT_CST_LOW (arg2) & ~0x1)
12764 {
12765 error ("argument 3 must be a 1-bit unsigned literal");
12766 return const0_rtx;
12767 }
12768 }
12769 else if (icode == CODE_FOR_dfp_ddedpd_dd
12770 || icode == CODE_FOR_dfp_ddedpd_td)
12771 {
12772 /* Only allow 2-bit unsigned literals where the value is 0 or 2. */
12773 STRIP_NOPS (arg0);
12774 if (TREE_CODE (arg0) != INTEGER_CST
12775 || TREE_INT_CST_LOW (arg2) & ~0x3)
12776 {
12777 error ("argument 1 must be 0 or 2");
12778 return const0_rtx;
12779 }
12780 }
12781 else if (icode == CODE_FOR_dfp_denbcd_dd
12782 || icode == CODE_FOR_dfp_denbcd_td)
12783 {
12784 /* Only allow 1-bit unsigned literals. */
12785 STRIP_NOPS (arg0);
12786 if (TREE_CODE (arg0) != INTEGER_CST
12787 || TREE_INT_CST_LOW (arg0) & ~0x1)
12788 {
12789 error ("argument 1 must be a 1-bit unsigned literal");
12790 return const0_rtx;
12791 }
12792 }
12793 else if (icode == CODE_FOR_dfp_dscli_dd
12794 || icode == CODE_FOR_dfp_dscli_td
12795 || icode == CODE_FOR_dfp_dscri_dd
12796 || icode == CODE_FOR_dfp_dscri_td)
12797 {
12798 /* Only allow 6-bit unsigned literals. */
12799 STRIP_NOPS (arg1);
12800 if (TREE_CODE (arg1) != INTEGER_CST
12801 || TREE_INT_CST_LOW (arg1) & ~0x3f)
12802 {
12803 error ("argument 2 must be a 6-bit unsigned literal");
12804 return const0_rtx;
12805 }
12806 }
12807 else if (icode == CODE_FOR_crypto_vshasigmaw
12808 || icode == CODE_FOR_crypto_vshasigmad)
12809 {
12810 /* Check whether the 2nd and 3rd arguments are integer constants and in
12811 range and prepare arguments. */
12812 STRIP_NOPS (arg1);
12813 if (TREE_CODE (arg1) != INTEGER_CST || wi::geu_p (arg1, 2))
12814 {
12815 error ("argument 2 must be 0 or 1");
12816 return const0_rtx;
12817 }
12818
12819 STRIP_NOPS (arg2);
12820 if (TREE_CODE (arg2) != INTEGER_CST || wi::geu_p (arg1, 16))
12821 {
12822 error ("argument 3 must be in the range 0..15");
12823 return const0_rtx;
12824 }
12825 }
12826
12827 if (target == 0
12828 || GET_MODE (target) != tmode
12829 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12830 target = gen_reg_rtx (tmode);
12831
12832 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
12833 op0 = copy_to_mode_reg (mode0, op0);
12834 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
12835 op1 = copy_to_mode_reg (mode1, op1);
12836 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
12837 op2 = copy_to_mode_reg (mode2, op2);
12838
12839 if (TARGET_PAIRED_FLOAT && icode == CODE_FOR_selv2sf4)
12840 pat = GEN_FCN (icode) (target, op0, op1, op2, CONST0_RTX (SFmode));
12841 else
12842 pat = GEN_FCN (icode) (target, op0, op1, op2);
12843 if (! pat)
12844 return 0;
12845 emit_insn (pat);
12846
12847 return target;
12848 }
12849
12850 /* Expand the lvx builtins. */
12851 static rtx
12852 altivec_expand_ld_builtin (tree exp, rtx target, bool *expandedp)
12853 {
12854 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
12855 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
12856 tree arg0;
12857 machine_mode tmode, mode0;
12858 rtx pat, op0;
12859 enum insn_code icode;
12860
12861 switch (fcode)
12862 {
12863 case ALTIVEC_BUILTIN_LD_INTERNAL_16qi:
12864 icode = CODE_FOR_vector_altivec_load_v16qi;
12865 break;
12866 case ALTIVEC_BUILTIN_LD_INTERNAL_8hi:
12867 icode = CODE_FOR_vector_altivec_load_v8hi;
12868 break;
12869 case ALTIVEC_BUILTIN_LD_INTERNAL_4si:
12870 icode = CODE_FOR_vector_altivec_load_v4si;
12871 break;
12872 case ALTIVEC_BUILTIN_LD_INTERNAL_4sf:
12873 icode = CODE_FOR_vector_altivec_load_v4sf;
12874 break;
12875 case ALTIVEC_BUILTIN_LD_INTERNAL_2df:
12876 icode = CODE_FOR_vector_altivec_load_v2df;
12877 break;
12878 case ALTIVEC_BUILTIN_LD_INTERNAL_2di:
12879 icode = CODE_FOR_vector_altivec_load_v2di;
12880 case ALTIVEC_BUILTIN_LD_INTERNAL_1ti:
12881 icode = CODE_FOR_vector_altivec_load_v1ti;
12882 break;
12883 default:
12884 *expandedp = false;
12885 return NULL_RTX;
12886 }
12887
12888 *expandedp = true;
12889
12890 arg0 = CALL_EXPR_ARG (exp, 0);
12891 op0 = expand_normal (arg0);
12892 tmode = insn_data[icode].operand[0].mode;
12893 mode0 = insn_data[icode].operand[1].mode;
12894
12895 if (target == 0
12896 || GET_MODE (target) != tmode
12897 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12898 target = gen_reg_rtx (tmode);
12899
12900 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
12901 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
12902
12903 pat = GEN_FCN (icode) (target, op0);
12904 if (! pat)
12905 return 0;
12906 emit_insn (pat);
12907 return target;
12908 }
12909
12910 /* Expand the stvx builtins. */
12911 static rtx
12912 altivec_expand_st_builtin (tree exp, rtx target ATTRIBUTE_UNUSED,
12913 bool *expandedp)
12914 {
12915 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
12916 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
12917 tree arg0, arg1;
12918 machine_mode mode0, mode1;
12919 rtx pat, op0, op1;
12920 enum insn_code icode;
12921
12922 switch (fcode)
12923 {
12924 case ALTIVEC_BUILTIN_ST_INTERNAL_16qi:
12925 icode = CODE_FOR_vector_altivec_store_v16qi;
12926 break;
12927 case ALTIVEC_BUILTIN_ST_INTERNAL_8hi:
12928 icode = CODE_FOR_vector_altivec_store_v8hi;
12929 break;
12930 case ALTIVEC_BUILTIN_ST_INTERNAL_4si:
12931 icode = CODE_FOR_vector_altivec_store_v4si;
12932 break;
12933 case ALTIVEC_BUILTIN_ST_INTERNAL_4sf:
12934 icode = CODE_FOR_vector_altivec_store_v4sf;
12935 break;
12936 case ALTIVEC_BUILTIN_ST_INTERNAL_2df:
12937 icode = CODE_FOR_vector_altivec_store_v2df;
12938 break;
12939 case ALTIVEC_BUILTIN_ST_INTERNAL_2di:
12940 icode = CODE_FOR_vector_altivec_store_v2di;
12941 case ALTIVEC_BUILTIN_ST_INTERNAL_1ti:
12942 icode = CODE_FOR_vector_altivec_store_v1ti;
12943 break;
12944 default:
12945 *expandedp = false;
12946 return NULL_RTX;
12947 }
12948
12949 arg0 = CALL_EXPR_ARG (exp, 0);
12950 arg1 = CALL_EXPR_ARG (exp, 1);
12951 op0 = expand_normal (arg0);
12952 op1 = expand_normal (arg1);
12953 mode0 = insn_data[icode].operand[0].mode;
12954 mode1 = insn_data[icode].operand[1].mode;
12955
12956 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
12957 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
12958 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
12959 op1 = copy_to_mode_reg (mode1, op1);
12960
12961 pat = GEN_FCN (icode) (op0, op1);
12962 if (pat)
12963 emit_insn (pat);
12964
12965 *expandedp = true;
12966 return NULL_RTX;
12967 }
12968
12969 /* Expand the dst builtins. */
12970 static rtx
12971 altivec_expand_dst_builtin (tree exp, rtx target ATTRIBUTE_UNUSED,
12972 bool *expandedp)
12973 {
12974 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
12975 enum rs6000_builtins fcode = (enum rs6000_builtins) DECL_FUNCTION_CODE (fndecl);
12976 tree arg0, arg1, arg2;
12977 machine_mode mode0, mode1;
12978 rtx pat, op0, op1, op2;
12979 const struct builtin_description *d;
12980 size_t i;
12981
12982 *expandedp = false;
12983
12984 /* Handle DST variants. */
12985 d = bdesc_dst;
12986 for (i = 0; i < ARRAY_SIZE (bdesc_dst); i++, d++)
12987 if (d->code == fcode)
12988 {
12989 arg0 = CALL_EXPR_ARG (exp, 0);
12990 arg1 = CALL_EXPR_ARG (exp, 1);
12991 arg2 = CALL_EXPR_ARG (exp, 2);
12992 op0 = expand_normal (arg0);
12993 op1 = expand_normal (arg1);
12994 op2 = expand_normal (arg2);
12995 mode0 = insn_data[d->icode].operand[0].mode;
12996 mode1 = insn_data[d->icode].operand[1].mode;
12997
12998 /* Invalid arguments, bail out before generating bad rtl. */
12999 if (arg0 == error_mark_node
13000 || arg1 == error_mark_node
13001 || arg2 == error_mark_node)
13002 return const0_rtx;
13003
13004 *expandedp = true;
13005 STRIP_NOPS (arg2);
13006 if (TREE_CODE (arg2) != INTEGER_CST
13007 || TREE_INT_CST_LOW (arg2) & ~0x3)
13008 {
13009 error ("argument to %qs must be a 2-bit unsigned literal", d->name);
13010 return const0_rtx;
13011 }
13012
13013 if (! (*insn_data[d->icode].operand[0].predicate) (op0, mode0))
13014 op0 = copy_to_mode_reg (Pmode, op0);
13015 if (! (*insn_data[d->icode].operand[1].predicate) (op1, mode1))
13016 op1 = copy_to_mode_reg (mode1, op1);
13017
13018 pat = GEN_FCN (d->icode) (op0, op1, op2);
13019 if (pat != 0)
13020 emit_insn (pat);
13021
13022 return NULL_RTX;
13023 }
13024
13025 return NULL_RTX;
13026 }
13027
13028 /* Expand vec_init builtin. */
13029 static rtx
13030 altivec_expand_vec_init_builtin (tree type, tree exp, rtx target)
13031 {
13032 machine_mode tmode = TYPE_MODE (type);
13033 machine_mode inner_mode = GET_MODE_INNER (tmode);
13034 int i, n_elt = GET_MODE_NUNITS (tmode);
13035
13036 gcc_assert (VECTOR_MODE_P (tmode));
13037 gcc_assert (n_elt == call_expr_nargs (exp));
13038
13039 if (!target || !register_operand (target, tmode))
13040 target = gen_reg_rtx (tmode);
13041
13042 /* If we have a vector compromised of a single element, such as V1TImode, do
13043 the initialization directly. */
13044 if (n_elt == 1 && GET_MODE_SIZE (tmode) == GET_MODE_SIZE (inner_mode))
13045 {
13046 rtx x = expand_normal (CALL_EXPR_ARG (exp, 0));
13047 emit_move_insn (target, gen_lowpart (tmode, x));
13048 }
13049 else
13050 {
13051 rtvec v = rtvec_alloc (n_elt);
13052
13053 for (i = 0; i < n_elt; ++i)
13054 {
13055 rtx x = expand_normal (CALL_EXPR_ARG (exp, i));
13056 RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
13057 }
13058
13059 rs6000_expand_vector_init (target, gen_rtx_PARALLEL (tmode, v));
13060 }
13061
13062 return target;
13063 }
13064
13065 /* Return the integer constant in ARG. Constrain it to be in the range
13066 of the subparts of VEC_TYPE; issue an error if not. */
13067
13068 static int
13069 get_element_number (tree vec_type, tree arg)
13070 {
13071 unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
13072
13073 if (!tree_fits_uhwi_p (arg)
13074 || (elt = tree_to_uhwi (arg), elt > max))
13075 {
13076 error ("selector must be an integer constant in the range 0..%wi", max);
13077 return 0;
13078 }
13079
13080 return elt;
13081 }
13082
13083 /* Expand vec_set builtin. */
13084 static rtx
13085 altivec_expand_vec_set_builtin (tree exp)
13086 {
13087 machine_mode tmode, mode1;
13088 tree arg0, arg1, arg2;
13089 int elt;
13090 rtx op0, op1;
13091
13092 arg0 = CALL_EXPR_ARG (exp, 0);
13093 arg1 = CALL_EXPR_ARG (exp, 1);
13094 arg2 = CALL_EXPR_ARG (exp, 2);
13095
13096 tmode = TYPE_MODE (TREE_TYPE (arg0));
13097 mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
13098 gcc_assert (VECTOR_MODE_P (tmode));
13099
13100 op0 = expand_expr (arg0, NULL_RTX, tmode, EXPAND_NORMAL);
13101 op1 = expand_expr (arg1, NULL_RTX, mode1, EXPAND_NORMAL);
13102 elt = get_element_number (TREE_TYPE (arg0), arg2);
13103
13104 if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
13105 op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
13106
13107 op0 = force_reg (tmode, op0);
13108 op1 = force_reg (mode1, op1);
13109
13110 rs6000_expand_vector_set (op0, op1, elt);
13111
13112 return op0;
13113 }
13114
13115 /* Expand vec_ext builtin. */
13116 static rtx
13117 altivec_expand_vec_ext_builtin (tree exp, rtx target)
13118 {
13119 machine_mode tmode, mode0;
13120 tree arg0, arg1;
13121 int elt;
13122 rtx op0;
13123
13124 arg0 = CALL_EXPR_ARG (exp, 0);
13125 arg1 = CALL_EXPR_ARG (exp, 1);
13126
13127 op0 = expand_normal (arg0);
13128 elt = get_element_number (TREE_TYPE (arg0), arg1);
13129
13130 tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
13131 mode0 = TYPE_MODE (TREE_TYPE (arg0));
13132 gcc_assert (VECTOR_MODE_P (mode0));
13133
13134 op0 = force_reg (mode0, op0);
13135
13136 if (optimize || !target || !register_operand (target, tmode))
13137 target = gen_reg_rtx (tmode);
13138
13139 rs6000_expand_vector_extract (target, op0, elt);
13140
13141 return target;
13142 }
13143
13144 /* Expand the builtin in EXP and store the result in TARGET. Store
13145 true in *EXPANDEDP if we found a builtin to expand. */
13146 static rtx
13147 altivec_expand_builtin (tree exp, rtx target, bool *expandedp)
13148 {
13149 const struct builtin_description *d;
13150 size_t i;
13151 enum insn_code icode;
13152 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
13153 tree arg0;
13154 rtx op0, pat;
13155 machine_mode tmode, mode0;
13156 enum rs6000_builtins fcode
13157 = (enum rs6000_builtins) DECL_FUNCTION_CODE (fndecl);
13158
13159 if (rs6000_overloaded_builtin_p (fcode))
13160 {
13161 *expandedp = true;
13162 error ("unresolved overload for Altivec builtin %qF", fndecl);
13163
13164 /* Given it is invalid, just generate a normal call. */
13165 return expand_call (exp, target, false);
13166 }
13167
13168 target = altivec_expand_ld_builtin (exp, target, expandedp);
13169 if (*expandedp)
13170 return target;
13171
13172 target = altivec_expand_st_builtin (exp, target, expandedp);
13173 if (*expandedp)
13174 return target;
13175
13176 target = altivec_expand_dst_builtin (exp, target, expandedp);
13177 if (*expandedp)
13178 return target;
13179
13180 *expandedp = true;
13181
13182 switch (fcode)
13183 {
13184 case ALTIVEC_BUILTIN_STVX_V2DF:
13185 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v2df, exp);
13186 case ALTIVEC_BUILTIN_STVX_V2DI:
13187 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v2di, exp);
13188 case ALTIVEC_BUILTIN_STVX_V4SF:
13189 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v4sf, exp);
13190 case ALTIVEC_BUILTIN_STVX:
13191 case ALTIVEC_BUILTIN_STVX_V4SI:
13192 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v4si, exp);
13193 case ALTIVEC_BUILTIN_STVX_V8HI:
13194 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v8hi, exp);
13195 case ALTIVEC_BUILTIN_STVX_V16QI:
13196 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v16qi, exp);
13197 case ALTIVEC_BUILTIN_STVEBX:
13198 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvebx, exp);
13199 case ALTIVEC_BUILTIN_STVEHX:
13200 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvehx, exp);
13201 case ALTIVEC_BUILTIN_STVEWX:
13202 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvewx, exp);
13203 case ALTIVEC_BUILTIN_STVXL_V2DF:
13204 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvxl_v2df, exp);
13205 case ALTIVEC_BUILTIN_STVXL_V2DI:
13206 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvxl_v2di, exp);
13207 case ALTIVEC_BUILTIN_STVXL_V4SF:
13208 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvxl_v4sf, exp);
13209 case ALTIVEC_BUILTIN_STVXL:
13210 case ALTIVEC_BUILTIN_STVXL_V4SI:
13211 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvxl_v4si, exp);
13212 case ALTIVEC_BUILTIN_STVXL_V8HI:
13213 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvxl_v8hi, exp);
13214 case ALTIVEC_BUILTIN_STVXL_V16QI:
13215 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvxl_v16qi, exp);
13216
13217 case ALTIVEC_BUILTIN_STVLX:
13218 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvlx, exp);
13219 case ALTIVEC_BUILTIN_STVLXL:
13220 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvlxl, exp);
13221 case ALTIVEC_BUILTIN_STVRX:
13222 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvrx, exp);
13223 case ALTIVEC_BUILTIN_STVRXL:
13224 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvrxl, exp);
13225
13226 case VSX_BUILTIN_STXVD2X_V1TI:
13227 return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v1ti, exp);
13228 case VSX_BUILTIN_STXVD2X_V2DF:
13229 return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v2df, exp);
13230 case VSX_BUILTIN_STXVD2X_V2DI:
13231 return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v2di, exp);
13232 case VSX_BUILTIN_STXVW4X_V4SF:
13233 return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v4sf, exp);
13234 case VSX_BUILTIN_STXVW4X_V4SI:
13235 return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v4si, exp);
13236 case VSX_BUILTIN_STXVW4X_V8HI:
13237 return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v8hi, exp);
13238 case VSX_BUILTIN_STXVW4X_V16QI:
13239 return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v16qi, exp);
13240
13241 case ALTIVEC_BUILTIN_MFVSCR:
13242 icode = CODE_FOR_altivec_mfvscr;
13243 tmode = insn_data[icode].operand[0].mode;
13244
13245 if (target == 0
13246 || GET_MODE (target) != tmode
13247 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13248 target = gen_reg_rtx (tmode);
13249
13250 pat = GEN_FCN (icode) (target);
13251 if (! pat)
13252 return 0;
13253 emit_insn (pat);
13254 return target;
13255
13256 case ALTIVEC_BUILTIN_MTVSCR:
13257 icode = CODE_FOR_altivec_mtvscr;
13258 arg0 = CALL_EXPR_ARG (exp, 0);
13259 op0 = expand_normal (arg0);
13260 mode0 = insn_data[icode].operand[0].mode;
13261
13262 /* If we got invalid arguments bail out before generating bad rtl. */
13263 if (arg0 == error_mark_node)
13264 return const0_rtx;
13265
13266 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
13267 op0 = copy_to_mode_reg (mode0, op0);
13268
13269 pat = GEN_FCN (icode) (op0);
13270 if (pat)
13271 emit_insn (pat);
13272 return NULL_RTX;
13273
13274 case ALTIVEC_BUILTIN_DSSALL:
13275 emit_insn (gen_altivec_dssall ());
13276 return NULL_RTX;
13277
13278 case ALTIVEC_BUILTIN_DSS:
13279 icode = CODE_FOR_altivec_dss;
13280 arg0 = CALL_EXPR_ARG (exp, 0);
13281 STRIP_NOPS (arg0);
13282 op0 = expand_normal (arg0);
13283 mode0 = insn_data[icode].operand[0].mode;
13284
13285 /* If we got invalid arguments bail out before generating bad rtl. */
13286 if (arg0 == error_mark_node)
13287 return const0_rtx;
13288
13289 if (TREE_CODE (arg0) != INTEGER_CST
13290 || TREE_INT_CST_LOW (arg0) & ~0x3)
13291 {
13292 error ("argument to dss must be a 2-bit unsigned literal");
13293 return const0_rtx;
13294 }
13295
13296 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
13297 op0 = copy_to_mode_reg (mode0, op0);
13298
13299 emit_insn (gen_altivec_dss (op0));
13300 return NULL_RTX;
13301
13302 case ALTIVEC_BUILTIN_VEC_INIT_V4SI:
13303 case ALTIVEC_BUILTIN_VEC_INIT_V8HI:
13304 case ALTIVEC_BUILTIN_VEC_INIT_V16QI:
13305 case ALTIVEC_BUILTIN_VEC_INIT_V4SF:
13306 case VSX_BUILTIN_VEC_INIT_V2DF:
13307 case VSX_BUILTIN_VEC_INIT_V2DI:
13308 case VSX_BUILTIN_VEC_INIT_V1TI:
13309 return altivec_expand_vec_init_builtin (TREE_TYPE (exp), exp, target);
13310
13311 case ALTIVEC_BUILTIN_VEC_SET_V4SI:
13312 case ALTIVEC_BUILTIN_VEC_SET_V8HI:
13313 case ALTIVEC_BUILTIN_VEC_SET_V16QI:
13314 case ALTIVEC_BUILTIN_VEC_SET_V4SF:
13315 case VSX_BUILTIN_VEC_SET_V2DF:
13316 case VSX_BUILTIN_VEC_SET_V2DI:
13317 case VSX_BUILTIN_VEC_SET_V1TI:
13318 return altivec_expand_vec_set_builtin (exp);
13319
13320 case ALTIVEC_BUILTIN_VEC_EXT_V4SI:
13321 case ALTIVEC_BUILTIN_VEC_EXT_V8HI:
13322 case ALTIVEC_BUILTIN_VEC_EXT_V16QI:
13323 case ALTIVEC_BUILTIN_VEC_EXT_V4SF:
13324 case VSX_BUILTIN_VEC_EXT_V2DF:
13325 case VSX_BUILTIN_VEC_EXT_V2DI:
13326 case VSX_BUILTIN_VEC_EXT_V1TI:
13327 return altivec_expand_vec_ext_builtin (exp, target);
13328
13329 default:
13330 break;
13331 /* Fall through. */
13332 }
13333
13334 /* Expand abs* operations. */
13335 d = bdesc_abs;
13336 for (i = 0; i < ARRAY_SIZE (bdesc_abs); i++, d++)
13337 if (d->code == fcode)
13338 return altivec_expand_abs_builtin (d->icode, exp, target);
13339
13340 /* Expand the AltiVec predicates. */
13341 d = bdesc_altivec_preds;
13342 for (i = 0; i < ARRAY_SIZE (bdesc_altivec_preds); i++, d++)
13343 if (d->code == fcode)
13344 return altivec_expand_predicate_builtin (d->icode, exp, target);
13345
13346 /* LV* are funky. We initialized them differently. */
13347 switch (fcode)
13348 {
13349 case ALTIVEC_BUILTIN_LVSL:
13350 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvsl,
13351 exp, target, false);
13352 case ALTIVEC_BUILTIN_LVSR:
13353 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvsr,
13354 exp, target, false);
13355 case ALTIVEC_BUILTIN_LVEBX:
13356 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvebx,
13357 exp, target, false);
13358 case ALTIVEC_BUILTIN_LVEHX:
13359 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvehx,
13360 exp, target, false);
13361 case ALTIVEC_BUILTIN_LVEWX:
13362 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvewx,
13363 exp, target, false);
13364 case ALTIVEC_BUILTIN_LVXL_V2DF:
13365 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvxl_v2df,
13366 exp, target, false);
13367 case ALTIVEC_BUILTIN_LVXL_V2DI:
13368 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvxl_v2di,
13369 exp, target, false);
13370 case ALTIVEC_BUILTIN_LVXL_V4SF:
13371 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvxl_v4sf,
13372 exp, target, false);
13373 case ALTIVEC_BUILTIN_LVXL:
13374 case ALTIVEC_BUILTIN_LVXL_V4SI:
13375 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvxl_v4si,
13376 exp, target, false);
13377 case ALTIVEC_BUILTIN_LVXL_V8HI:
13378 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvxl_v8hi,
13379 exp, target, false);
13380 case ALTIVEC_BUILTIN_LVXL_V16QI:
13381 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvxl_v16qi,
13382 exp, target, false);
13383 case ALTIVEC_BUILTIN_LVX_V2DF:
13384 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v2df,
13385 exp, target, false);
13386 case ALTIVEC_BUILTIN_LVX_V2DI:
13387 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v2di,
13388 exp, target, false);
13389 case ALTIVEC_BUILTIN_LVX_V4SF:
13390 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v4sf,
13391 exp, target, false);
13392 case ALTIVEC_BUILTIN_LVX:
13393 case ALTIVEC_BUILTIN_LVX_V4SI:
13394 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v4si,
13395 exp, target, false);
13396 case ALTIVEC_BUILTIN_LVX_V8HI:
13397 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v8hi,
13398 exp, target, false);
13399 case ALTIVEC_BUILTIN_LVX_V16QI:
13400 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v16qi,
13401 exp, target, false);
13402 case ALTIVEC_BUILTIN_LVLX:
13403 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvlx,
13404 exp, target, true);
13405 case ALTIVEC_BUILTIN_LVLXL:
13406 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvlxl,
13407 exp, target, true);
13408 case ALTIVEC_BUILTIN_LVRX:
13409 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvrx,
13410 exp, target, true);
13411 case ALTIVEC_BUILTIN_LVRXL:
13412 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvrxl,
13413 exp, target, true);
13414 case VSX_BUILTIN_LXVD2X_V1TI:
13415 return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v1ti,
13416 exp, target, false);
13417 case VSX_BUILTIN_LXVD2X_V2DF:
13418 return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v2df,
13419 exp, target, false);
13420 case VSX_BUILTIN_LXVD2X_V2DI:
13421 return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v2di,
13422 exp, target, false);
13423 case VSX_BUILTIN_LXVW4X_V4SF:
13424 return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v4sf,
13425 exp, target, false);
13426 case VSX_BUILTIN_LXVW4X_V4SI:
13427 return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v4si,
13428 exp, target, false);
13429 case VSX_BUILTIN_LXVW4X_V8HI:
13430 return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v8hi,
13431 exp, target, false);
13432 case VSX_BUILTIN_LXVW4X_V16QI:
13433 return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v16qi,
13434 exp, target, false);
13435 break;
13436 default:
13437 break;
13438 /* Fall through. */
13439 }
13440
13441 *expandedp = false;
13442 return NULL_RTX;
13443 }
13444
13445 /* Expand the builtin in EXP and store the result in TARGET. Store
13446 true in *EXPANDEDP if we found a builtin to expand. */
13447 static rtx
13448 paired_expand_builtin (tree exp, rtx target, bool * expandedp)
13449 {
13450 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
13451 enum rs6000_builtins fcode = (enum rs6000_builtins) DECL_FUNCTION_CODE (fndecl);
13452 const struct builtin_description *d;
13453 size_t i;
13454
13455 *expandedp = true;
13456
13457 switch (fcode)
13458 {
13459 case PAIRED_BUILTIN_STX:
13460 return paired_expand_stv_builtin (CODE_FOR_paired_stx, exp);
13461 case PAIRED_BUILTIN_LX:
13462 return paired_expand_lv_builtin (CODE_FOR_paired_lx, exp, target);
13463 default:
13464 break;
13465 /* Fall through. */
13466 }
13467
13468 /* Expand the paired predicates. */
13469 d = bdesc_paired_preds;
13470 for (i = 0; i < ARRAY_SIZE (bdesc_paired_preds); i++, d++)
13471 if (d->code == fcode)
13472 return paired_expand_predicate_builtin (d->icode, exp, target);
13473
13474 *expandedp = false;
13475 return NULL_RTX;
13476 }
13477
13478 /* Binops that need to be initialized manually, but can be expanded
13479 automagically by rs6000_expand_binop_builtin. */
13480 static const struct builtin_description bdesc_2arg_spe[] =
13481 {
13482 { RS6000_BTM_SPE, CODE_FOR_spe_evlddx, "__builtin_spe_evlddx", SPE_BUILTIN_EVLDDX },
13483 { RS6000_BTM_SPE, CODE_FOR_spe_evldwx, "__builtin_spe_evldwx", SPE_BUILTIN_EVLDWX },
13484 { RS6000_BTM_SPE, CODE_FOR_spe_evldhx, "__builtin_spe_evldhx", SPE_BUILTIN_EVLDHX },
13485 { RS6000_BTM_SPE, CODE_FOR_spe_evlwhex, "__builtin_spe_evlwhex", SPE_BUILTIN_EVLWHEX },
13486 { RS6000_BTM_SPE, CODE_FOR_spe_evlwhoux, "__builtin_spe_evlwhoux", SPE_BUILTIN_EVLWHOUX },
13487 { RS6000_BTM_SPE, CODE_FOR_spe_evlwhosx, "__builtin_spe_evlwhosx", SPE_BUILTIN_EVLWHOSX },
13488 { RS6000_BTM_SPE, CODE_FOR_spe_evlwwsplatx, "__builtin_spe_evlwwsplatx", SPE_BUILTIN_EVLWWSPLATX },
13489 { RS6000_BTM_SPE, CODE_FOR_spe_evlwhsplatx, "__builtin_spe_evlwhsplatx", SPE_BUILTIN_EVLWHSPLATX },
13490 { RS6000_BTM_SPE, CODE_FOR_spe_evlhhesplatx, "__builtin_spe_evlhhesplatx", SPE_BUILTIN_EVLHHESPLATX },
13491 { RS6000_BTM_SPE, CODE_FOR_spe_evlhhousplatx, "__builtin_spe_evlhhousplatx", SPE_BUILTIN_EVLHHOUSPLATX },
13492 { RS6000_BTM_SPE, CODE_FOR_spe_evlhhossplatx, "__builtin_spe_evlhhossplatx", SPE_BUILTIN_EVLHHOSSPLATX },
13493 { RS6000_BTM_SPE, CODE_FOR_spe_evldd, "__builtin_spe_evldd", SPE_BUILTIN_EVLDD },
13494 { RS6000_BTM_SPE, CODE_FOR_spe_evldw, "__builtin_spe_evldw", SPE_BUILTIN_EVLDW },
13495 { RS6000_BTM_SPE, CODE_FOR_spe_evldh, "__builtin_spe_evldh", SPE_BUILTIN_EVLDH },
13496 { RS6000_BTM_SPE, CODE_FOR_spe_evlwhe, "__builtin_spe_evlwhe", SPE_BUILTIN_EVLWHE },
13497 { RS6000_BTM_SPE, CODE_FOR_spe_evlwhou, "__builtin_spe_evlwhou", SPE_BUILTIN_EVLWHOU },
13498 { RS6000_BTM_SPE, CODE_FOR_spe_evlwhos, "__builtin_spe_evlwhos", SPE_BUILTIN_EVLWHOS },
13499 { RS6000_BTM_SPE, CODE_FOR_spe_evlwwsplat, "__builtin_spe_evlwwsplat", SPE_BUILTIN_EVLWWSPLAT },
13500 { RS6000_BTM_SPE, CODE_FOR_spe_evlwhsplat, "__builtin_spe_evlwhsplat", SPE_BUILTIN_EVLWHSPLAT },
13501 { RS6000_BTM_SPE, CODE_FOR_spe_evlhhesplat, "__builtin_spe_evlhhesplat", SPE_BUILTIN_EVLHHESPLAT },
13502 { RS6000_BTM_SPE, CODE_FOR_spe_evlhhousplat, "__builtin_spe_evlhhousplat", SPE_BUILTIN_EVLHHOUSPLAT },
13503 { RS6000_BTM_SPE, CODE_FOR_spe_evlhhossplat, "__builtin_spe_evlhhossplat", SPE_BUILTIN_EVLHHOSSPLAT }
13504 };
13505
13506 /* Expand the builtin in EXP and store the result in TARGET. Store
13507 true in *EXPANDEDP if we found a builtin to expand.
13508
13509 This expands the SPE builtins that are not simple unary and binary
13510 operations. */
13511 static rtx
13512 spe_expand_builtin (tree exp, rtx target, bool *expandedp)
13513 {
13514 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
13515 tree arg1, arg0;
13516 enum rs6000_builtins fcode = (enum rs6000_builtins) DECL_FUNCTION_CODE (fndecl);
13517 enum insn_code icode;
13518 machine_mode tmode, mode0;
13519 rtx pat, op0;
13520 const struct builtin_description *d;
13521 size_t i;
13522
13523 *expandedp = true;
13524
13525 /* Syntax check for a 5-bit unsigned immediate. */
13526 switch (fcode)
13527 {
13528 case SPE_BUILTIN_EVSTDD:
13529 case SPE_BUILTIN_EVSTDH:
13530 case SPE_BUILTIN_EVSTDW:
13531 case SPE_BUILTIN_EVSTWHE:
13532 case SPE_BUILTIN_EVSTWHO:
13533 case SPE_BUILTIN_EVSTWWE:
13534 case SPE_BUILTIN_EVSTWWO:
13535 arg1 = CALL_EXPR_ARG (exp, 2);
13536 if (TREE_CODE (arg1) != INTEGER_CST
13537 || TREE_INT_CST_LOW (arg1) & ~0x1f)
13538 {
13539 error ("argument 2 must be a 5-bit unsigned literal");
13540 return const0_rtx;
13541 }
13542 break;
13543 default:
13544 break;
13545 }
13546
13547 /* The evsplat*i instructions are not quite generic. */
13548 switch (fcode)
13549 {
13550 case SPE_BUILTIN_EVSPLATFI:
13551 return rs6000_expand_unop_builtin (CODE_FOR_spe_evsplatfi,
13552 exp, target);
13553 case SPE_BUILTIN_EVSPLATI:
13554 return rs6000_expand_unop_builtin (CODE_FOR_spe_evsplati,
13555 exp, target);
13556 default:
13557 break;
13558 }
13559
13560 d = bdesc_2arg_spe;
13561 for (i = 0; i < ARRAY_SIZE (bdesc_2arg_spe); ++i, ++d)
13562 if (d->code == fcode)
13563 return rs6000_expand_binop_builtin (d->icode, exp, target);
13564
13565 d = bdesc_spe_predicates;
13566 for (i = 0; i < ARRAY_SIZE (bdesc_spe_predicates); ++i, ++d)
13567 if (d->code == fcode)
13568 return spe_expand_predicate_builtin (d->icode, exp, target);
13569
13570 d = bdesc_spe_evsel;
13571 for (i = 0; i < ARRAY_SIZE (bdesc_spe_evsel); ++i, ++d)
13572 if (d->code == fcode)
13573 return spe_expand_evsel_builtin (d->icode, exp, target);
13574
13575 switch (fcode)
13576 {
13577 case SPE_BUILTIN_EVSTDDX:
13578 return spe_expand_stv_builtin (CODE_FOR_spe_evstddx, exp);
13579 case SPE_BUILTIN_EVSTDHX:
13580 return spe_expand_stv_builtin (CODE_FOR_spe_evstdhx, exp);
13581 case SPE_BUILTIN_EVSTDWX:
13582 return spe_expand_stv_builtin (CODE_FOR_spe_evstdwx, exp);
13583 case SPE_BUILTIN_EVSTWHEX:
13584 return spe_expand_stv_builtin (CODE_FOR_spe_evstwhex, exp);
13585 case SPE_BUILTIN_EVSTWHOX:
13586 return spe_expand_stv_builtin (CODE_FOR_spe_evstwhox, exp);
13587 case SPE_BUILTIN_EVSTWWEX:
13588 return spe_expand_stv_builtin (CODE_FOR_spe_evstwwex, exp);
13589 case SPE_BUILTIN_EVSTWWOX:
13590 return spe_expand_stv_builtin (CODE_FOR_spe_evstwwox, exp);
13591 case SPE_BUILTIN_EVSTDD:
13592 return spe_expand_stv_builtin (CODE_FOR_spe_evstdd, exp);
13593 case SPE_BUILTIN_EVSTDH:
13594 return spe_expand_stv_builtin (CODE_FOR_spe_evstdh, exp);
13595 case SPE_BUILTIN_EVSTDW:
13596 return spe_expand_stv_builtin (CODE_FOR_spe_evstdw, exp);
13597 case SPE_BUILTIN_EVSTWHE:
13598 return spe_expand_stv_builtin (CODE_FOR_spe_evstwhe, exp);
13599 case SPE_BUILTIN_EVSTWHO:
13600 return spe_expand_stv_builtin (CODE_FOR_spe_evstwho, exp);
13601 case SPE_BUILTIN_EVSTWWE:
13602 return spe_expand_stv_builtin (CODE_FOR_spe_evstwwe, exp);
13603 case SPE_BUILTIN_EVSTWWO:
13604 return spe_expand_stv_builtin (CODE_FOR_spe_evstwwo, exp);
13605 case SPE_BUILTIN_MFSPEFSCR:
13606 icode = CODE_FOR_spe_mfspefscr;
13607 tmode = insn_data[icode].operand[0].mode;
13608
13609 if (target == 0
13610 || GET_MODE (target) != tmode
13611 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13612 target = gen_reg_rtx (tmode);
13613
13614 pat = GEN_FCN (icode) (target);
13615 if (! pat)
13616 return 0;
13617 emit_insn (pat);
13618 return target;
13619 case SPE_BUILTIN_MTSPEFSCR:
13620 icode = CODE_FOR_spe_mtspefscr;
13621 arg0 = CALL_EXPR_ARG (exp, 0);
13622 op0 = expand_normal (arg0);
13623 mode0 = insn_data[icode].operand[0].mode;
13624
13625 if (arg0 == error_mark_node)
13626 return const0_rtx;
13627
13628 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
13629 op0 = copy_to_mode_reg (mode0, op0);
13630
13631 pat = GEN_FCN (icode) (op0);
13632 if (pat)
13633 emit_insn (pat);
13634 return NULL_RTX;
13635 default:
13636 break;
13637 }
13638
13639 *expandedp = false;
13640 return NULL_RTX;
13641 }
13642
13643 static rtx
13644 paired_expand_predicate_builtin (enum insn_code icode, tree exp, rtx target)
13645 {
13646 rtx pat, scratch, tmp;
13647 tree form = CALL_EXPR_ARG (exp, 0);
13648 tree arg0 = CALL_EXPR_ARG (exp, 1);
13649 tree arg1 = CALL_EXPR_ARG (exp, 2);
13650 rtx op0 = expand_normal (arg0);
13651 rtx op1 = expand_normal (arg1);
13652 machine_mode mode0 = insn_data[icode].operand[1].mode;
13653 machine_mode mode1 = insn_data[icode].operand[2].mode;
13654 int form_int;
13655 enum rtx_code code;
13656
13657 if (TREE_CODE (form) != INTEGER_CST)
13658 {
13659 error ("argument 1 of __builtin_paired_predicate must be a constant");
13660 return const0_rtx;
13661 }
13662 else
13663 form_int = TREE_INT_CST_LOW (form);
13664
13665 gcc_assert (mode0 == mode1);
13666
13667 if (arg0 == error_mark_node || arg1 == error_mark_node)
13668 return const0_rtx;
13669
13670 if (target == 0
13671 || GET_MODE (target) != SImode
13672 || !(*insn_data[icode].operand[0].predicate) (target, SImode))
13673 target = gen_reg_rtx (SImode);
13674 if (!(*insn_data[icode].operand[1].predicate) (op0, mode0))
13675 op0 = copy_to_mode_reg (mode0, op0);
13676 if (!(*insn_data[icode].operand[2].predicate) (op1, mode1))
13677 op1 = copy_to_mode_reg (mode1, op1);
13678
13679 scratch = gen_reg_rtx (CCFPmode);
13680
13681 pat = GEN_FCN (icode) (scratch, op0, op1);
13682 if (!pat)
13683 return const0_rtx;
13684
13685 emit_insn (pat);
13686
13687 switch (form_int)
13688 {
13689 /* LT bit. */
13690 case 0:
13691 code = LT;
13692 break;
13693 /* GT bit. */
13694 case 1:
13695 code = GT;
13696 break;
13697 /* EQ bit. */
13698 case 2:
13699 code = EQ;
13700 break;
13701 /* UN bit. */
13702 case 3:
13703 emit_insn (gen_move_from_CR_ov_bit (target, scratch));
13704 return target;
13705 default:
13706 error ("argument 1 of __builtin_paired_predicate is out of range");
13707 return const0_rtx;
13708 }
13709
13710 tmp = gen_rtx_fmt_ee (code, SImode, scratch, const0_rtx);
13711 emit_move_insn (target, tmp);
13712 return target;
13713 }
13714
13715 static rtx
13716 spe_expand_predicate_builtin (enum insn_code icode, tree exp, rtx target)
13717 {
13718 rtx pat, scratch, tmp;
13719 tree form = CALL_EXPR_ARG (exp, 0);
13720 tree arg0 = CALL_EXPR_ARG (exp, 1);
13721 tree arg1 = CALL_EXPR_ARG (exp, 2);
13722 rtx op0 = expand_normal (arg0);
13723 rtx op1 = expand_normal (arg1);
13724 machine_mode mode0 = insn_data[icode].operand[1].mode;
13725 machine_mode mode1 = insn_data[icode].operand[2].mode;
13726 int form_int;
13727 enum rtx_code code;
13728
13729 if (TREE_CODE (form) != INTEGER_CST)
13730 {
13731 error ("argument 1 of __builtin_spe_predicate must be a constant");
13732 return const0_rtx;
13733 }
13734 else
13735 form_int = TREE_INT_CST_LOW (form);
13736
13737 gcc_assert (mode0 == mode1);
13738
13739 if (arg0 == error_mark_node || arg1 == error_mark_node)
13740 return const0_rtx;
13741
13742 if (target == 0
13743 || GET_MODE (target) != SImode
13744 || ! (*insn_data[icode].operand[0].predicate) (target, SImode))
13745 target = gen_reg_rtx (SImode);
13746
13747 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13748 op0 = copy_to_mode_reg (mode0, op0);
13749 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13750 op1 = copy_to_mode_reg (mode1, op1);
13751
13752 scratch = gen_reg_rtx (CCmode);
13753
13754 pat = GEN_FCN (icode) (scratch, op0, op1);
13755 if (! pat)
13756 return const0_rtx;
13757 emit_insn (pat);
13758
13759 /* There are 4 variants for each predicate: _any_, _all_, _upper_,
13760 _lower_. We use one compare, but look in different bits of the
13761 CR for each variant.
13762
13763 There are 2 elements in each SPE simd type (upper/lower). The CR
13764 bits are set as follows:
13765
13766 BIT0 | BIT 1 | BIT 2 | BIT 3
13767 U | L | (U | L) | (U & L)
13768
13769 So, for an "all" relationship, BIT 3 would be set.
13770 For an "any" relationship, BIT 2 would be set. Etc.
13771
13772 Following traditional nomenclature, these bits map to:
13773
13774 BIT0 | BIT 1 | BIT 2 | BIT 3
13775 LT | GT | EQ | OV
13776
13777 Later, we will generate rtl to look in the LT/EQ/EQ/OV bits.
13778 */
13779
13780 switch (form_int)
13781 {
13782 /* All variant. OV bit. */
13783 case 0:
13784 /* We need to get to the OV bit, which is the ORDERED bit. We
13785 could generate (ordered:SI (reg:CC xx) (const_int 0)), but
13786 that's ugly and will make validate_condition_mode die.
13787 So let's just use another pattern. */
13788 emit_insn (gen_move_from_CR_ov_bit (target, scratch));
13789 return target;
13790 /* Any variant. EQ bit. */
13791 case 1:
13792 code = EQ;
13793 break;
13794 /* Upper variant. LT bit. */
13795 case 2:
13796 code = LT;
13797 break;
13798 /* Lower variant. GT bit. */
13799 case 3:
13800 code = GT;
13801 break;
13802 default:
13803 error ("argument 1 of __builtin_spe_predicate is out of range");
13804 return const0_rtx;
13805 }
13806
13807 tmp = gen_rtx_fmt_ee (code, SImode, scratch, const0_rtx);
13808 emit_move_insn (target, tmp);
13809
13810 return target;
13811 }
13812
13813 /* The evsel builtins look like this:
13814
13815 e = __builtin_spe_evsel_OP (a, b, c, d);
13816
13817 and work like this:
13818
13819 e[upper] = a[upper] *OP* b[upper] ? c[upper] : d[upper];
13820 e[lower] = a[lower] *OP* b[lower] ? c[lower] : d[lower];
13821 */
13822
13823 static rtx
13824 spe_expand_evsel_builtin (enum insn_code icode, tree exp, rtx target)
13825 {
13826 rtx pat, scratch;
13827 tree arg0 = CALL_EXPR_ARG (exp, 0);
13828 tree arg1 = CALL_EXPR_ARG (exp, 1);
13829 tree arg2 = CALL_EXPR_ARG (exp, 2);
13830 tree arg3 = CALL_EXPR_ARG (exp, 3);
13831 rtx op0 = expand_normal (arg0);
13832 rtx op1 = expand_normal (arg1);
13833 rtx op2 = expand_normal (arg2);
13834 rtx op3 = expand_normal (arg3);
13835 machine_mode mode0 = insn_data[icode].operand[1].mode;
13836 machine_mode mode1 = insn_data[icode].operand[2].mode;
13837
13838 gcc_assert (mode0 == mode1);
13839
13840 if (arg0 == error_mark_node || arg1 == error_mark_node
13841 || arg2 == error_mark_node || arg3 == error_mark_node)
13842 return const0_rtx;
13843
13844 if (target == 0
13845 || GET_MODE (target) != mode0
13846 || ! (*insn_data[icode].operand[0].predicate) (target, mode0))
13847 target = gen_reg_rtx (mode0);
13848
13849 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13850 op0 = copy_to_mode_reg (mode0, op0);
13851 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
13852 op1 = copy_to_mode_reg (mode0, op1);
13853 if (! (*insn_data[icode].operand[1].predicate) (op2, mode1))
13854 op2 = copy_to_mode_reg (mode0, op2);
13855 if (! (*insn_data[icode].operand[1].predicate) (op3, mode1))
13856 op3 = copy_to_mode_reg (mode0, op3);
13857
13858 /* Generate the compare. */
13859 scratch = gen_reg_rtx (CCmode);
13860 pat = GEN_FCN (icode) (scratch, op0, op1);
13861 if (! pat)
13862 return const0_rtx;
13863 emit_insn (pat);
13864
13865 if (mode0 == V2SImode)
13866 emit_insn (gen_spe_evsel (target, op2, op3, scratch));
13867 else
13868 emit_insn (gen_spe_evsel_fs (target, op2, op3, scratch));
13869
13870 return target;
13871 }
13872
13873 /* Raise an error message for a builtin function that is called without the
13874 appropriate target options being set. */
13875
13876 static void
13877 rs6000_invalid_builtin (enum rs6000_builtins fncode)
13878 {
13879 size_t uns_fncode = (size_t)fncode;
13880 const char *name = rs6000_builtin_info[uns_fncode].name;
13881 HOST_WIDE_INT fnmask = rs6000_builtin_info[uns_fncode].mask;
13882
13883 gcc_assert (name != NULL);
13884 if ((fnmask & RS6000_BTM_CELL) != 0)
13885 error ("Builtin function %s is only valid for the cell processor", name);
13886 else if ((fnmask & RS6000_BTM_VSX) != 0)
13887 error ("Builtin function %s requires the -mvsx option", name);
13888 else if ((fnmask & RS6000_BTM_HTM) != 0)
13889 error ("Builtin function %s requires the -mhtm option", name);
13890 else if ((fnmask & RS6000_BTM_ALTIVEC) != 0)
13891 error ("Builtin function %s requires the -maltivec option", name);
13892 else if ((fnmask & RS6000_BTM_PAIRED) != 0)
13893 error ("Builtin function %s requires the -mpaired option", name);
13894 else if ((fnmask & RS6000_BTM_SPE) != 0)
13895 error ("Builtin function %s requires the -mspe option", name);
13896 else if ((fnmask & (RS6000_BTM_DFP | RS6000_BTM_P8_VECTOR))
13897 == (RS6000_BTM_DFP | RS6000_BTM_P8_VECTOR))
13898 error ("Builtin function %s requires the -mhard-dfp and"
13899 " -mpower8-vector options", name);
13900 else if ((fnmask & RS6000_BTM_DFP) != 0)
13901 error ("Builtin function %s requires the -mhard-dfp option", name);
13902 else if ((fnmask & RS6000_BTM_P8_VECTOR) != 0)
13903 error ("Builtin function %s requires the -mpower8-vector option", name);
13904 else if ((fnmask & (RS6000_BTM_HARD_FLOAT | RS6000_BTM_LDBL128))
13905 == (RS6000_BTM_HARD_FLOAT | RS6000_BTM_LDBL128))
13906 error ("Builtin function %s requires the -mhard-float and"
13907 " -mlong-double-128 options", name);
13908 else if ((fnmask & RS6000_BTM_HARD_FLOAT) != 0)
13909 error ("Builtin function %s requires the -mhard-float option", name);
13910 else
13911 error ("Builtin function %s is not supported with the current options",
13912 name);
13913 }
13914
13915 /* Expand an expression EXP that calls a built-in function,
13916 with result going to TARGET if that's convenient
13917 (and in mode MODE if that's convenient).
13918 SUBTARGET may be used as the target for computing one of EXP's operands.
13919 IGNORE is nonzero if the value is to be ignored. */
13920
13921 static rtx
13922 rs6000_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
13923 machine_mode mode ATTRIBUTE_UNUSED,
13924 int ignore ATTRIBUTE_UNUSED)
13925 {
13926 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
13927 enum rs6000_builtins fcode
13928 = (enum rs6000_builtins)DECL_FUNCTION_CODE (fndecl);
13929 size_t uns_fcode = (size_t)fcode;
13930 const struct builtin_description *d;
13931 size_t i;
13932 rtx ret;
13933 bool success;
13934 HOST_WIDE_INT mask = rs6000_builtin_info[uns_fcode].mask;
13935 bool func_valid_p = ((rs6000_builtin_mask & mask) == mask);
13936
13937 if (TARGET_DEBUG_BUILTIN)
13938 {
13939 enum insn_code icode = rs6000_builtin_info[uns_fcode].icode;
13940 const char *name1 = rs6000_builtin_info[uns_fcode].name;
13941 const char *name2 = ((icode != CODE_FOR_nothing)
13942 ? get_insn_name ((int)icode)
13943 : "nothing");
13944 const char *name3;
13945
13946 switch (rs6000_builtin_info[uns_fcode].attr & RS6000_BTC_TYPE_MASK)
13947 {
13948 default: name3 = "unknown"; break;
13949 case RS6000_BTC_SPECIAL: name3 = "special"; break;
13950 case RS6000_BTC_UNARY: name3 = "unary"; break;
13951 case RS6000_BTC_BINARY: name3 = "binary"; break;
13952 case RS6000_BTC_TERNARY: name3 = "ternary"; break;
13953 case RS6000_BTC_PREDICATE: name3 = "predicate"; break;
13954 case RS6000_BTC_ABS: name3 = "abs"; break;
13955 case RS6000_BTC_EVSEL: name3 = "evsel"; break;
13956 case RS6000_BTC_DST: name3 = "dst"; break;
13957 }
13958
13959
13960 fprintf (stderr,
13961 "rs6000_expand_builtin, %s (%d), insn = %s (%d), type=%s%s\n",
13962 (name1) ? name1 : "---", fcode,
13963 (name2) ? name2 : "---", (int)icode,
13964 name3,
13965 func_valid_p ? "" : ", not valid");
13966 }
13967
13968 if (!func_valid_p)
13969 {
13970 rs6000_invalid_builtin (fcode);
13971
13972 /* Given it is invalid, just generate a normal call. */
13973 return expand_call (exp, target, ignore);
13974 }
13975
13976 switch (fcode)
13977 {
13978 case RS6000_BUILTIN_RECIP:
13979 return rs6000_expand_binop_builtin (CODE_FOR_recipdf3, exp, target);
13980
13981 case RS6000_BUILTIN_RECIPF:
13982 return rs6000_expand_binop_builtin (CODE_FOR_recipsf3, exp, target);
13983
13984 case RS6000_BUILTIN_RSQRTF:
13985 return rs6000_expand_unop_builtin (CODE_FOR_rsqrtsf2, exp, target);
13986
13987 case RS6000_BUILTIN_RSQRT:
13988 return rs6000_expand_unop_builtin (CODE_FOR_rsqrtdf2, exp, target);
13989
13990 case POWER7_BUILTIN_BPERMD:
13991 return rs6000_expand_binop_builtin (((TARGET_64BIT)
13992 ? CODE_FOR_bpermd_di
13993 : CODE_FOR_bpermd_si), exp, target);
13994
13995 case RS6000_BUILTIN_GET_TB:
13996 return rs6000_expand_zeroop_builtin (CODE_FOR_rs6000_get_timebase,
13997 target);
13998
13999 case RS6000_BUILTIN_MFTB:
14000 return rs6000_expand_zeroop_builtin (((TARGET_64BIT)
14001 ? CODE_FOR_rs6000_mftb_di
14002 : CODE_FOR_rs6000_mftb_si),
14003 target);
14004
14005 case RS6000_BUILTIN_MFFS:
14006 return rs6000_expand_zeroop_builtin (CODE_FOR_rs6000_mffs, target);
14007
14008 case RS6000_BUILTIN_MTFSF:
14009 return rs6000_expand_mtfsf_builtin (CODE_FOR_rs6000_mtfsf, exp);
14010
14011 case ALTIVEC_BUILTIN_MASK_FOR_LOAD:
14012 case ALTIVEC_BUILTIN_MASK_FOR_STORE:
14013 {
14014 int icode = (BYTES_BIG_ENDIAN ? (int) CODE_FOR_altivec_lvsr_direct
14015 : (int) CODE_FOR_altivec_lvsl_direct);
14016 machine_mode tmode = insn_data[icode].operand[0].mode;
14017 machine_mode mode = insn_data[icode].operand[1].mode;
14018 tree arg;
14019 rtx op, addr, pat;
14020
14021 gcc_assert (TARGET_ALTIVEC);
14022
14023 arg = CALL_EXPR_ARG (exp, 0);
14024 gcc_assert (POINTER_TYPE_P (TREE_TYPE (arg)));
14025 op = expand_expr (arg, NULL_RTX, Pmode, EXPAND_NORMAL);
14026 addr = memory_address (mode, op);
14027 if (fcode == ALTIVEC_BUILTIN_MASK_FOR_STORE)
14028 op = addr;
14029 else
14030 {
14031 /* For the load case need to negate the address. */
14032 op = gen_reg_rtx (GET_MODE (addr));
14033 emit_insn (gen_rtx_SET (VOIDmode, op,
14034 gen_rtx_NEG (GET_MODE (addr), addr)));
14035 }
14036 op = gen_rtx_MEM (mode, op);
14037
14038 if (target == 0
14039 || GET_MODE (target) != tmode
14040 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14041 target = gen_reg_rtx (tmode);
14042
14043 pat = GEN_FCN (icode) (target, op);
14044 if (!pat)
14045 return 0;
14046 emit_insn (pat);
14047
14048 return target;
14049 }
14050
14051 case ALTIVEC_BUILTIN_VCFUX:
14052 case ALTIVEC_BUILTIN_VCFSX:
14053 case ALTIVEC_BUILTIN_VCTUXS:
14054 case ALTIVEC_BUILTIN_VCTSXS:
14055 /* FIXME: There's got to be a nicer way to handle this case than
14056 constructing a new CALL_EXPR. */
14057 if (call_expr_nargs (exp) == 1)
14058 {
14059 exp = build_call_nary (TREE_TYPE (exp), CALL_EXPR_FN (exp),
14060 2, CALL_EXPR_ARG (exp, 0), integer_zero_node);
14061 }
14062 break;
14063
14064 default:
14065 break;
14066 }
14067
14068 if (TARGET_ALTIVEC)
14069 {
14070 ret = altivec_expand_builtin (exp, target, &success);
14071
14072 if (success)
14073 return ret;
14074 }
14075 if (TARGET_SPE)
14076 {
14077 ret = spe_expand_builtin (exp, target, &success);
14078
14079 if (success)
14080 return ret;
14081 }
14082 if (TARGET_PAIRED_FLOAT)
14083 {
14084 ret = paired_expand_builtin (exp, target, &success);
14085
14086 if (success)
14087 return ret;
14088 }
14089 if (TARGET_HTM)
14090 {
14091 ret = htm_expand_builtin (exp, target, &success);
14092
14093 if (success)
14094 return ret;
14095 }
14096
14097 unsigned attr = rs6000_builtin_info[uns_fcode].attr & RS6000_BTC_TYPE_MASK;
14098 gcc_assert (attr == RS6000_BTC_UNARY
14099 || attr == RS6000_BTC_BINARY
14100 || attr == RS6000_BTC_TERNARY);
14101
14102 /* Handle simple unary operations. */
14103 d = bdesc_1arg;
14104 for (i = 0; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
14105 if (d->code == fcode)
14106 return rs6000_expand_unop_builtin (d->icode, exp, target);
14107
14108 /* Handle simple binary operations. */
14109 d = bdesc_2arg;
14110 for (i = 0; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
14111 if (d->code == fcode)
14112 return rs6000_expand_binop_builtin (d->icode, exp, target);
14113
14114 /* Handle simple ternary operations. */
14115 d = bdesc_3arg;
14116 for (i = 0; i < ARRAY_SIZE (bdesc_3arg); i++, d++)
14117 if (d->code == fcode)
14118 return rs6000_expand_ternop_builtin (d->icode, exp, target);
14119
14120 gcc_unreachable ();
14121 }
14122
14123 static void
14124 rs6000_init_builtins (void)
14125 {
14126 tree tdecl;
14127 tree ftype;
14128 machine_mode mode;
14129
14130 if (TARGET_DEBUG_BUILTIN)
14131 fprintf (stderr, "rs6000_init_builtins%s%s%s%s\n",
14132 (TARGET_PAIRED_FLOAT) ? ", paired" : "",
14133 (TARGET_SPE) ? ", spe" : "",
14134 (TARGET_ALTIVEC) ? ", altivec" : "",
14135 (TARGET_VSX) ? ", vsx" : "");
14136
14137 V2SI_type_node = build_vector_type (intSI_type_node, 2);
14138 V2SF_type_node = build_vector_type (float_type_node, 2);
14139 V2DI_type_node = build_vector_type (intDI_type_node, 2);
14140 V2DF_type_node = build_vector_type (double_type_node, 2);
14141 V4HI_type_node = build_vector_type (intHI_type_node, 4);
14142 V4SI_type_node = build_vector_type (intSI_type_node, 4);
14143 V4SF_type_node = build_vector_type (float_type_node, 4);
14144 V8HI_type_node = build_vector_type (intHI_type_node, 8);
14145 V16QI_type_node = build_vector_type (intQI_type_node, 16);
14146
14147 unsigned_V16QI_type_node = build_vector_type (unsigned_intQI_type_node, 16);
14148 unsigned_V8HI_type_node = build_vector_type (unsigned_intHI_type_node, 8);
14149 unsigned_V4SI_type_node = build_vector_type (unsigned_intSI_type_node, 4);
14150 unsigned_V2DI_type_node = build_vector_type (unsigned_intDI_type_node, 2);
14151
14152 opaque_V2SF_type_node = build_opaque_vector_type (float_type_node, 2);
14153 opaque_V2SI_type_node = build_opaque_vector_type (intSI_type_node, 2);
14154 opaque_p_V2SI_type_node = build_pointer_type (opaque_V2SI_type_node);
14155 opaque_V4SI_type_node = build_opaque_vector_type (intSI_type_node, 4);
14156
14157 /* We use V1TI mode as a special container to hold __int128_t items that
14158 must live in VSX registers. */
14159 if (intTI_type_node)
14160 {
14161 V1TI_type_node = build_vector_type (intTI_type_node, 1);
14162 unsigned_V1TI_type_node = build_vector_type (unsigned_intTI_type_node, 1);
14163 }
14164
14165 /* The 'vector bool ...' types must be kept distinct from 'vector unsigned ...'
14166 types, especially in C++ land. Similarly, 'vector pixel' is distinct from
14167 'vector unsigned short'. */
14168
14169 bool_char_type_node = build_distinct_type_copy (unsigned_intQI_type_node);
14170 bool_short_type_node = build_distinct_type_copy (unsigned_intHI_type_node);
14171 bool_int_type_node = build_distinct_type_copy (unsigned_intSI_type_node);
14172 bool_long_type_node = build_distinct_type_copy (unsigned_intDI_type_node);
14173 pixel_type_node = build_distinct_type_copy (unsigned_intHI_type_node);
14174
14175 long_integer_type_internal_node = long_integer_type_node;
14176 long_unsigned_type_internal_node = long_unsigned_type_node;
14177 long_long_integer_type_internal_node = long_long_integer_type_node;
14178 long_long_unsigned_type_internal_node = long_long_unsigned_type_node;
14179 intQI_type_internal_node = intQI_type_node;
14180 uintQI_type_internal_node = unsigned_intQI_type_node;
14181 intHI_type_internal_node = intHI_type_node;
14182 uintHI_type_internal_node = unsigned_intHI_type_node;
14183 intSI_type_internal_node = intSI_type_node;
14184 uintSI_type_internal_node = unsigned_intSI_type_node;
14185 intDI_type_internal_node = intDI_type_node;
14186 uintDI_type_internal_node = unsigned_intDI_type_node;
14187 intTI_type_internal_node = intTI_type_node;
14188 uintTI_type_internal_node = unsigned_intTI_type_node;
14189 float_type_internal_node = float_type_node;
14190 double_type_internal_node = double_type_node;
14191 long_double_type_internal_node = long_double_type_node;
14192 dfloat64_type_internal_node = dfloat64_type_node;
14193 dfloat128_type_internal_node = dfloat128_type_node;
14194 void_type_internal_node = void_type_node;
14195
14196 /* Initialize the modes for builtin_function_type, mapping a machine mode to
14197 tree type node. */
14198 builtin_mode_to_type[QImode][0] = integer_type_node;
14199 builtin_mode_to_type[HImode][0] = integer_type_node;
14200 builtin_mode_to_type[SImode][0] = intSI_type_node;
14201 builtin_mode_to_type[SImode][1] = unsigned_intSI_type_node;
14202 builtin_mode_to_type[DImode][0] = intDI_type_node;
14203 builtin_mode_to_type[DImode][1] = unsigned_intDI_type_node;
14204 builtin_mode_to_type[TImode][0] = intTI_type_node;
14205 builtin_mode_to_type[TImode][1] = unsigned_intTI_type_node;
14206 builtin_mode_to_type[SFmode][0] = float_type_node;
14207 builtin_mode_to_type[DFmode][0] = double_type_node;
14208 builtin_mode_to_type[TFmode][0] = long_double_type_node;
14209 builtin_mode_to_type[DDmode][0] = dfloat64_type_node;
14210 builtin_mode_to_type[TDmode][0] = dfloat128_type_node;
14211 builtin_mode_to_type[V1TImode][0] = V1TI_type_node;
14212 builtin_mode_to_type[V1TImode][1] = unsigned_V1TI_type_node;
14213 builtin_mode_to_type[V2SImode][0] = V2SI_type_node;
14214 builtin_mode_to_type[V2SFmode][0] = V2SF_type_node;
14215 builtin_mode_to_type[V2DImode][0] = V2DI_type_node;
14216 builtin_mode_to_type[V2DImode][1] = unsigned_V2DI_type_node;
14217 builtin_mode_to_type[V2DFmode][0] = V2DF_type_node;
14218 builtin_mode_to_type[V4HImode][0] = V4HI_type_node;
14219 builtin_mode_to_type[V4SImode][0] = V4SI_type_node;
14220 builtin_mode_to_type[V4SImode][1] = unsigned_V4SI_type_node;
14221 builtin_mode_to_type[V4SFmode][0] = V4SF_type_node;
14222 builtin_mode_to_type[V8HImode][0] = V8HI_type_node;
14223 builtin_mode_to_type[V8HImode][1] = unsigned_V8HI_type_node;
14224 builtin_mode_to_type[V16QImode][0] = V16QI_type_node;
14225 builtin_mode_to_type[V16QImode][1] = unsigned_V16QI_type_node;
14226
14227 tdecl = add_builtin_type ("__bool char", bool_char_type_node);
14228 TYPE_NAME (bool_char_type_node) = tdecl;
14229
14230 tdecl = add_builtin_type ("__bool short", bool_short_type_node);
14231 TYPE_NAME (bool_short_type_node) = tdecl;
14232
14233 tdecl = add_builtin_type ("__bool int", bool_int_type_node);
14234 TYPE_NAME (bool_int_type_node) = tdecl;
14235
14236 tdecl = add_builtin_type ("__pixel", pixel_type_node);
14237 TYPE_NAME (pixel_type_node) = tdecl;
14238
14239 bool_V16QI_type_node = build_vector_type (bool_char_type_node, 16);
14240 bool_V8HI_type_node = build_vector_type (bool_short_type_node, 8);
14241 bool_V4SI_type_node = build_vector_type (bool_int_type_node, 4);
14242 bool_V2DI_type_node = build_vector_type (bool_long_type_node, 2);
14243 pixel_V8HI_type_node = build_vector_type (pixel_type_node, 8);
14244
14245 tdecl = add_builtin_type ("__vector unsigned char", unsigned_V16QI_type_node);
14246 TYPE_NAME (unsigned_V16QI_type_node) = tdecl;
14247
14248 tdecl = add_builtin_type ("__vector signed char", V16QI_type_node);
14249 TYPE_NAME (V16QI_type_node) = tdecl;
14250
14251 tdecl = add_builtin_type ("__vector __bool char", bool_V16QI_type_node);
14252 TYPE_NAME ( bool_V16QI_type_node) = tdecl;
14253
14254 tdecl = add_builtin_type ("__vector unsigned short", unsigned_V8HI_type_node);
14255 TYPE_NAME (unsigned_V8HI_type_node) = tdecl;
14256
14257 tdecl = add_builtin_type ("__vector signed short", V8HI_type_node);
14258 TYPE_NAME (V8HI_type_node) = tdecl;
14259
14260 tdecl = add_builtin_type ("__vector __bool short", bool_V8HI_type_node);
14261 TYPE_NAME (bool_V8HI_type_node) = tdecl;
14262
14263 tdecl = add_builtin_type ("__vector unsigned int", unsigned_V4SI_type_node);
14264 TYPE_NAME (unsigned_V4SI_type_node) = tdecl;
14265
14266 tdecl = add_builtin_type ("__vector signed int", V4SI_type_node);
14267 TYPE_NAME (V4SI_type_node) = tdecl;
14268
14269 tdecl = add_builtin_type ("__vector __bool int", bool_V4SI_type_node);
14270 TYPE_NAME (bool_V4SI_type_node) = tdecl;
14271
14272 tdecl = add_builtin_type ("__vector float", V4SF_type_node);
14273 TYPE_NAME (V4SF_type_node) = tdecl;
14274
14275 tdecl = add_builtin_type ("__vector __pixel", pixel_V8HI_type_node);
14276 TYPE_NAME (pixel_V8HI_type_node) = tdecl;
14277
14278 tdecl = add_builtin_type ("__vector double", V2DF_type_node);
14279 TYPE_NAME (V2DF_type_node) = tdecl;
14280
14281 if (TARGET_POWERPC64)
14282 {
14283 tdecl = add_builtin_type ("__vector long", V2DI_type_node);
14284 TYPE_NAME (V2DI_type_node) = tdecl;
14285
14286 tdecl = add_builtin_type ("__vector unsigned long",
14287 unsigned_V2DI_type_node);
14288 TYPE_NAME (unsigned_V2DI_type_node) = tdecl;
14289
14290 tdecl = add_builtin_type ("__vector __bool long", bool_V2DI_type_node);
14291 TYPE_NAME (bool_V2DI_type_node) = tdecl;
14292 }
14293 else
14294 {
14295 tdecl = add_builtin_type ("__vector long long", V2DI_type_node);
14296 TYPE_NAME (V2DI_type_node) = tdecl;
14297
14298 tdecl = add_builtin_type ("__vector unsigned long long",
14299 unsigned_V2DI_type_node);
14300 TYPE_NAME (unsigned_V2DI_type_node) = tdecl;
14301
14302 tdecl = add_builtin_type ("__vector __bool long long",
14303 bool_V2DI_type_node);
14304 TYPE_NAME (bool_V2DI_type_node) = tdecl;
14305 }
14306
14307 if (V1TI_type_node)
14308 {
14309 tdecl = add_builtin_type ("__vector __int128", V1TI_type_node);
14310 TYPE_NAME (V1TI_type_node) = tdecl;
14311
14312 tdecl = add_builtin_type ("__vector unsigned __int128",
14313 unsigned_V1TI_type_node);
14314 TYPE_NAME (unsigned_V1TI_type_node) = tdecl;
14315 }
14316
14317 /* Paired and SPE builtins are only available if you build a compiler with
14318 the appropriate options, so only create those builtins with the
14319 appropriate compiler option. Create Altivec and VSX builtins on machines
14320 with at least the general purpose extensions (970 and newer) to allow the
14321 use of the target attribute. */
14322 if (TARGET_PAIRED_FLOAT)
14323 paired_init_builtins ();
14324 if (TARGET_SPE)
14325 spe_init_builtins ();
14326 if (TARGET_EXTRA_BUILTINS)
14327 altivec_init_builtins ();
14328 if (TARGET_HTM)
14329 htm_init_builtins ();
14330
14331 if (TARGET_EXTRA_BUILTINS || TARGET_SPE || TARGET_PAIRED_FLOAT)
14332 rs6000_common_init_builtins ();
14333
14334 ftype = builtin_function_type (DFmode, DFmode, DFmode, VOIDmode,
14335 RS6000_BUILTIN_RECIP, "__builtin_recipdiv");
14336 def_builtin ("__builtin_recipdiv", ftype, RS6000_BUILTIN_RECIP);
14337
14338 ftype = builtin_function_type (SFmode, SFmode, SFmode, VOIDmode,
14339 RS6000_BUILTIN_RECIPF, "__builtin_recipdivf");
14340 def_builtin ("__builtin_recipdivf", ftype, RS6000_BUILTIN_RECIPF);
14341
14342 ftype = builtin_function_type (DFmode, DFmode, VOIDmode, VOIDmode,
14343 RS6000_BUILTIN_RSQRT, "__builtin_rsqrt");
14344 def_builtin ("__builtin_rsqrt", ftype, RS6000_BUILTIN_RSQRT);
14345
14346 ftype = builtin_function_type (SFmode, SFmode, VOIDmode, VOIDmode,
14347 RS6000_BUILTIN_RSQRTF, "__builtin_rsqrtf");
14348 def_builtin ("__builtin_rsqrtf", ftype, RS6000_BUILTIN_RSQRTF);
14349
14350 mode = (TARGET_64BIT) ? DImode : SImode;
14351 ftype = builtin_function_type (mode, mode, mode, VOIDmode,
14352 POWER7_BUILTIN_BPERMD, "__builtin_bpermd");
14353 def_builtin ("__builtin_bpermd", ftype, POWER7_BUILTIN_BPERMD);
14354
14355 ftype = build_function_type_list (unsigned_intDI_type_node,
14356 NULL_TREE);
14357 def_builtin ("__builtin_ppc_get_timebase", ftype, RS6000_BUILTIN_GET_TB);
14358
14359 if (TARGET_64BIT)
14360 ftype = build_function_type_list (unsigned_intDI_type_node,
14361 NULL_TREE);
14362 else
14363 ftype = build_function_type_list (unsigned_intSI_type_node,
14364 NULL_TREE);
14365 def_builtin ("__builtin_ppc_mftb", ftype, RS6000_BUILTIN_MFTB);
14366
14367 ftype = build_function_type_list (double_type_node, NULL_TREE);
14368 def_builtin ("__builtin_mffs", ftype, RS6000_BUILTIN_MFFS);
14369
14370 ftype = build_function_type_list (void_type_node,
14371 intSI_type_node, double_type_node,
14372 NULL_TREE);
14373 def_builtin ("__builtin_mtfsf", ftype, RS6000_BUILTIN_MTFSF);
14374
14375 #if TARGET_XCOFF
14376 /* AIX libm provides clog as __clog. */
14377 if ((tdecl = builtin_decl_explicit (BUILT_IN_CLOG)) != NULL_TREE)
14378 set_user_assembler_name (tdecl, "__clog");
14379 #endif
14380
14381 #ifdef SUBTARGET_INIT_BUILTINS
14382 SUBTARGET_INIT_BUILTINS;
14383 #endif
14384 }
14385
14386 /* Returns the rs6000 builtin decl for CODE. */
14387
14388 static tree
14389 rs6000_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
14390 {
14391 HOST_WIDE_INT fnmask;
14392
14393 if (code >= RS6000_BUILTIN_COUNT)
14394 return error_mark_node;
14395
14396 fnmask = rs6000_builtin_info[code].mask;
14397 if ((fnmask & rs6000_builtin_mask) != fnmask)
14398 {
14399 rs6000_invalid_builtin ((enum rs6000_builtins)code);
14400 return error_mark_node;
14401 }
14402
14403 return rs6000_builtin_decls[code];
14404 }
14405
14406 static void
14407 spe_init_builtins (void)
14408 {
14409 tree puint_type_node = build_pointer_type (unsigned_type_node);
14410 tree pushort_type_node = build_pointer_type (short_unsigned_type_node);
14411 const struct builtin_description *d;
14412 size_t i;
14413
14414 tree v2si_ftype_4_v2si
14415 = build_function_type_list (opaque_V2SI_type_node,
14416 opaque_V2SI_type_node,
14417 opaque_V2SI_type_node,
14418 opaque_V2SI_type_node,
14419 opaque_V2SI_type_node,
14420 NULL_TREE);
14421
14422 tree v2sf_ftype_4_v2sf
14423 = build_function_type_list (opaque_V2SF_type_node,
14424 opaque_V2SF_type_node,
14425 opaque_V2SF_type_node,
14426 opaque_V2SF_type_node,
14427 opaque_V2SF_type_node,
14428 NULL_TREE);
14429
14430 tree int_ftype_int_v2si_v2si
14431 = build_function_type_list (integer_type_node,
14432 integer_type_node,
14433 opaque_V2SI_type_node,
14434 opaque_V2SI_type_node,
14435 NULL_TREE);
14436
14437 tree int_ftype_int_v2sf_v2sf
14438 = build_function_type_list (integer_type_node,
14439 integer_type_node,
14440 opaque_V2SF_type_node,
14441 opaque_V2SF_type_node,
14442 NULL_TREE);
14443
14444 tree void_ftype_v2si_puint_int
14445 = build_function_type_list (void_type_node,
14446 opaque_V2SI_type_node,
14447 puint_type_node,
14448 integer_type_node,
14449 NULL_TREE);
14450
14451 tree void_ftype_v2si_puint_char
14452 = build_function_type_list (void_type_node,
14453 opaque_V2SI_type_node,
14454 puint_type_node,
14455 char_type_node,
14456 NULL_TREE);
14457
14458 tree void_ftype_v2si_pv2si_int
14459 = build_function_type_list (void_type_node,
14460 opaque_V2SI_type_node,
14461 opaque_p_V2SI_type_node,
14462 integer_type_node,
14463 NULL_TREE);
14464
14465 tree void_ftype_v2si_pv2si_char
14466 = build_function_type_list (void_type_node,
14467 opaque_V2SI_type_node,
14468 opaque_p_V2SI_type_node,
14469 char_type_node,
14470 NULL_TREE);
14471
14472 tree void_ftype_int
14473 = build_function_type_list (void_type_node, integer_type_node, NULL_TREE);
14474
14475 tree int_ftype_void
14476 = build_function_type_list (integer_type_node, NULL_TREE);
14477
14478 tree v2si_ftype_pv2si_int
14479 = build_function_type_list (opaque_V2SI_type_node,
14480 opaque_p_V2SI_type_node,
14481 integer_type_node,
14482 NULL_TREE);
14483
14484 tree v2si_ftype_puint_int
14485 = build_function_type_list (opaque_V2SI_type_node,
14486 puint_type_node,
14487 integer_type_node,
14488 NULL_TREE);
14489
14490 tree v2si_ftype_pushort_int
14491 = build_function_type_list (opaque_V2SI_type_node,
14492 pushort_type_node,
14493 integer_type_node,
14494 NULL_TREE);
14495
14496 tree v2si_ftype_signed_char
14497 = build_function_type_list (opaque_V2SI_type_node,
14498 signed_char_type_node,
14499 NULL_TREE);
14500
14501 add_builtin_type ("__ev64_opaque__", opaque_V2SI_type_node);
14502
14503 /* Initialize irregular SPE builtins. */
14504
14505 def_builtin ("__builtin_spe_mtspefscr", void_ftype_int, SPE_BUILTIN_MTSPEFSCR);
14506 def_builtin ("__builtin_spe_mfspefscr", int_ftype_void, SPE_BUILTIN_MFSPEFSCR);
14507 def_builtin ("__builtin_spe_evstddx", void_ftype_v2si_pv2si_int, SPE_BUILTIN_EVSTDDX);
14508 def_builtin ("__builtin_spe_evstdhx", void_ftype_v2si_pv2si_int, SPE_BUILTIN_EVSTDHX);
14509 def_builtin ("__builtin_spe_evstdwx", void_ftype_v2si_pv2si_int, SPE_BUILTIN_EVSTDWX);
14510 def_builtin ("__builtin_spe_evstwhex", void_ftype_v2si_puint_int, SPE_BUILTIN_EVSTWHEX);
14511 def_builtin ("__builtin_spe_evstwhox", void_ftype_v2si_puint_int, SPE_BUILTIN_EVSTWHOX);
14512 def_builtin ("__builtin_spe_evstwwex", void_ftype_v2si_puint_int, SPE_BUILTIN_EVSTWWEX);
14513 def_builtin ("__builtin_spe_evstwwox", void_ftype_v2si_puint_int, SPE_BUILTIN_EVSTWWOX);
14514 def_builtin ("__builtin_spe_evstdd", void_ftype_v2si_pv2si_char, SPE_BUILTIN_EVSTDD);
14515 def_builtin ("__builtin_spe_evstdh", void_ftype_v2si_pv2si_char, SPE_BUILTIN_EVSTDH);
14516 def_builtin ("__builtin_spe_evstdw", void_ftype_v2si_pv2si_char, SPE_BUILTIN_EVSTDW);
14517 def_builtin ("__builtin_spe_evstwhe", void_ftype_v2si_puint_char, SPE_BUILTIN_EVSTWHE);
14518 def_builtin ("__builtin_spe_evstwho", void_ftype_v2si_puint_char, SPE_BUILTIN_EVSTWHO);
14519 def_builtin ("__builtin_spe_evstwwe", void_ftype_v2si_puint_char, SPE_BUILTIN_EVSTWWE);
14520 def_builtin ("__builtin_spe_evstwwo", void_ftype_v2si_puint_char, SPE_BUILTIN_EVSTWWO);
14521 def_builtin ("__builtin_spe_evsplatfi", v2si_ftype_signed_char, SPE_BUILTIN_EVSPLATFI);
14522 def_builtin ("__builtin_spe_evsplati", v2si_ftype_signed_char, SPE_BUILTIN_EVSPLATI);
14523
14524 /* Loads. */
14525 def_builtin ("__builtin_spe_evlddx", v2si_ftype_pv2si_int, SPE_BUILTIN_EVLDDX);
14526 def_builtin ("__builtin_spe_evldwx", v2si_ftype_pv2si_int, SPE_BUILTIN_EVLDWX);
14527 def_builtin ("__builtin_spe_evldhx", v2si_ftype_pv2si_int, SPE_BUILTIN_EVLDHX);
14528 def_builtin ("__builtin_spe_evlwhex", v2si_ftype_puint_int, SPE_BUILTIN_EVLWHEX);
14529 def_builtin ("__builtin_spe_evlwhoux", v2si_ftype_puint_int, SPE_BUILTIN_EVLWHOUX);
14530 def_builtin ("__builtin_spe_evlwhosx", v2si_ftype_puint_int, SPE_BUILTIN_EVLWHOSX);
14531 def_builtin ("__builtin_spe_evlwwsplatx", v2si_ftype_puint_int, SPE_BUILTIN_EVLWWSPLATX);
14532 def_builtin ("__builtin_spe_evlwhsplatx", v2si_ftype_puint_int, SPE_BUILTIN_EVLWHSPLATX);
14533 def_builtin ("__builtin_spe_evlhhesplatx", v2si_ftype_pushort_int, SPE_BUILTIN_EVLHHESPLATX);
14534 def_builtin ("__builtin_spe_evlhhousplatx", v2si_ftype_pushort_int, SPE_BUILTIN_EVLHHOUSPLATX);
14535 def_builtin ("__builtin_spe_evlhhossplatx", v2si_ftype_pushort_int, SPE_BUILTIN_EVLHHOSSPLATX);
14536 def_builtin ("__builtin_spe_evldd", v2si_ftype_pv2si_int, SPE_BUILTIN_EVLDD);
14537 def_builtin ("__builtin_spe_evldw", v2si_ftype_pv2si_int, SPE_BUILTIN_EVLDW);
14538 def_builtin ("__builtin_spe_evldh", v2si_ftype_pv2si_int, SPE_BUILTIN_EVLDH);
14539 def_builtin ("__builtin_spe_evlhhesplat", v2si_ftype_pushort_int, SPE_BUILTIN_EVLHHESPLAT);
14540 def_builtin ("__builtin_spe_evlhhossplat", v2si_ftype_pushort_int, SPE_BUILTIN_EVLHHOSSPLAT);
14541 def_builtin ("__builtin_spe_evlhhousplat", v2si_ftype_pushort_int, SPE_BUILTIN_EVLHHOUSPLAT);
14542 def_builtin ("__builtin_spe_evlwhe", v2si_ftype_puint_int, SPE_BUILTIN_EVLWHE);
14543 def_builtin ("__builtin_spe_evlwhos", v2si_ftype_puint_int, SPE_BUILTIN_EVLWHOS);
14544 def_builtin ("__builtin_spe_evlwhou", v2si_ftype_puint_int, SPE_BUILTIN_EVLWHOU);
14545 def_builtin ("__builtin_spe_evlwhsplat", v2si_ftype_puint_int, SPE_BUILTIN_EVLWHSPLAT);
14546 def_builtin ("__builtin_spe_evlwwsplat", v2si_ftype_puint_int, SPE_BUILTIN_EVLWWSPLAT);
14547
14548 /* Predicates. */
14549 d = bdesc_spe_predicates;
14550 for (i = 0; i < ARRAY_SIZE (bdesc_spe_predicates); ++i, d++)
14551 {
14552 tree type;
14553
14554 switch (insn_data[d->icode].operand[1].mode)
14555 {
14556 case V2SImode:
14557 type = int_ftype_int_v2si_v2si;
14558 break;
14559 case V2SFmode:
14560 type = int_ftype_int_v2sf_v2sf;
14561 break;
14562 default:
14563 gcc_unreachable ();
14564 }
14565
14566 def_builtin (d->name, type, d->code);
14567 }
14568
14569 /* Evsel predicates. */
14570 d = bdesc_spe_evsel;
14571 for (i = 0; i < ARRAY_SIZE (bdesc_spe_evsel); ++i, d++)
14572 {
14573 tree type;
14574
14575 switch (insn_data[d->icode].operand[1].mode)
14576 {
14577 case V2SImode:
14578 type = v2si_ftype_4_v2si;
14579 break;
14580 case V2SFmode:
14581 type = v2sf_ftype_4_v2sf;
14582 break;
14583 default:
14584 gcc_unreachable ();
14585 }
14586
14587 def_builtin (d->name, type, d->code);
14588 }
14589 }
14590
14591 static void
14592 paired_init_builtins (void)
14593 {
14594 const struct builtin_description *d;
14595 size_t i;
14596
14597 tree int_ftype_int_v2sf_v2sf
14598 = build_function_type_list (integer_type_node,
14599 integer_type_node,
14600 V2SF_type_node,
14601 V2SF_type_node,
14602 NULL_TREE);
14603 tree pcfloat_type_node =
14604 build_pointer_type (build_qualified_type
14605 (float_type_node, TYPE_QUAL_CONST));
14606
14607 tree v2sf_ftype_long_pcfloat = build_function_type_list (V2SF_type_node,
14608 long_integer_type_node,
14609 pcfloat_type_node,
14610 NULL_TREE);
14611 tree void_ftype_v2sf_long_pcfloat =
14612 build_function_type_list (void_type_node,
14613 V2SF_type_node,
14614 long_integer_type_node,
14615 pcfloat_type_node,
14616 NULL_TREE);
14617
14618
14619 def_builtin ("__builtin_paired_lx", v2sf_ftype_long_pcfloat,
14620 PAIRED_BUILTIN_LX);
14621
14622
14623 def_builtin ("__builtin_paired_stx", void_ftype_v2sf_long_pcfloat,
14624 PAIRED_BUILTIN_STX);
14625
14626 /* Predicates. */
14627 d = bdesc_paired_preds;
14628 for (i = 0; i < ARRAY_SIZE (bdesc_paired_preds); ++i, d++)
14629 {
14630 tree type;
14631
14632 if (TARGET_DEBUG_BUILTIN)
14633 fprintf (stderr, "paired pred #%d, insn = %s [%d], mode = %s\n",
14634 (int)i, get_insn_name (d->icode), (int)d->icode,
14635 GET_MODE_NAME (insn_data[d->icode].operand[1].mode));
14636
14637 switch (insn_data[d->icode].operand[1].mode)
14638 {
14639 case V2SFmode:
14640 type = int_ftype_int_v2sf_v2sf;
14641 break;
14642 default:
14643 gcc_unreachable ();
14644 }
14645
14646 def_builtin (d->name, type, d->code);
14647 }
14648 }
14649
14650 static void
14651 altivec_init_builtins (void)
14652 {
14653 const struct builtin_description *d;
14654 size_t i;
14655 tree ftype;
14656 tree decl;
14657
14658 tree pvoid_type_node = build_pointer_type (void_type_node);
14659
14660 tree pcvoid_type_node
14661 = build_pointer_type (build_qualified_type (void_type_node,
14662 TYPE_QUAL_CONST));
14663
14664 tree int_ftype_opaque
14665 = build_function_type_list (integer_type_node,
14666 opaque_V4SI_type_node, NULL_TREE);
14667 tree opaque_ftype_opaque
14668 = build_function_type_list (integer_type_node, NULL_TREE);
14669 tree opaque_ftype_opaque_int
14670 = build_function_type_list (opaque_V4SI_type_node,
14671 opaque_V4SI_type_node, integer_type_node, NULL_TREE);
14672 tree opaque_ftype_opaque_opaque_int
14673 = build_function_type_list (opaque_V4SI_type_node,
14674 opaque_V4SI_type_node, opaque_V4SI_type_node,
14675 integer_type_node, NULL_TREE);
14676 tree int_ftype_int_opaque_opaque
14677 = build_function_type_list (integer_type_node,
14678 integer_type_node, opaque_V4SI_type_node,
14679 opaque_V4SI_type_node, NULL_TREE);
14680 tree int_ftype_int_v4si_v4si
14681 = build_function_type_list (integer_type_node,
14682 integer_type_node, V4SI_type_node,
14683 V4SI_type_node, NULL_TREE);
14684 tree int_ftype_int_v2di_v2di
14685 = build_function_type_list (integer_type_node,
14686 integer_type_node, V2DI_type_node,
14687 V2DI_type_node, NULL_TREE);
14688 tree void_ftype_v4si
14689 = build_function_type_list (void_type_node, V4SI_type_node, NULL_TREE);
14690 tree v8hi_ftype_void
14691 = build_function_type_list (V8HI_type_node, NULL_TREE);
14692 tree void_ftype_void
14693 = build_function_type_list (void_type_node, NULL_TREE);
14694 tree void_ftype_int
14695 = build_function_type_list (void_type_node, integer_type_node, NULL_TREE);
14696
14697 tree opaque_ftype_long_pcvoid
14698 = build_function_type_list (opaque_V4SI_type_node,
14699 long_integer_type_node, pcvoid_type_node,
14700 NULL_TREE);
14701 tree v16qi_ftype_long_pcvoid
14702 = build_function_type_list (V16QI_type_node,
14703 long_integer_type_node, pcvoid_type_node,
14704 NULL_TREE);
14705 tree v8hi_ftype_long_pcvoid
14706 = build_function_type_list (V8HI_type_node,
14707 long_integer_type_node, pcvoid_type_node,
14708 NULL_TREE);
14709 tree v4si_ftype_long_pcvoid
14710 = build_function_type_list (V4SI_type_node,
14711 long_integer_type_node, pcvoid_type_node,
14712 NULL_TREE);
14713 tree v4sf_ftype_long_pcvoid
14714 = build_function_type_list (V4SF_type_node,
14715 long_integer_type_node, pcvoid_type_node,
14716 NULL_TREE);
14717 tree v2df_ftype_long_pcvoid
14718 = build_function_type_list (V2DF_type_node,
14719 long_integer_type_node, pcvoid_type_node,
14720 NULL_TREE);
14721 tree v2di_ftype_long_pcvoid
14722 = build_function_type_list (V2DI_type_node,
14723 long_integer_type_node, pcvoid_type_node,
14724 NULL_TREE);
14725
14726 tree void_ftype_opaque_long_pvoid
14727 = build_function_type_list (void_type_node,
14728 opaque_V4SI_type_node, long_integer_type_node,
14729 pvoid_type_node, NULL_TREE);
14730 tree void_ftype_v4si_long_pvoid
14731 = build_function_type_list (void_type_node,
14732 V4SI_type_node, long_integer_type_node,
14733 pvoid_type_node, NULL_TREE);
14734 tree void_ftype_v16qi_long_pvoid
14735 = build_function_type_list (void_type_node,
14736 V16QI_type_node, long_integer_type_node,
14737 pvoid_type_node, NULL_TREE);
14738 tree void_ftype_v8hi_long_pvoid
14739 = build_function_type_list (void_type_node,
14740 V8HI_type_node, long_integer_type_node,
14741 pvoid_type_node, NULL_TREE);
14742 tree void_ftype_v4sf_long_pvoid
14743 = build_function_type_list (void_type_node,
14744 V4SF_type_node, long_integer_type_node,
14745 pvoid_type_node, NULL_TREE);
14746 tree void_ftype_v2df_long_pvoid
14747 = build_function_type_list (void_type_node,
14748 V2DF_type_node, long_integer_type_node,
14749 pvoid_type_node, NULL_TREE);
14750 tree void_ftype_v2di_long_pvoid
14751 = build_function_type_list (void_type_node,
14752 V2DI_type_node, long_integer_type_node,
14753 pvoid_type_node, NULL_TREE);
14754 tree int_ftype_int_v8hi_v8hi
14755 = build_function_type_list (integer_type_node,
14756 integer_type_node, V8HI_type_node,
14757 V8HI_type_node, NULL_TREE);
14758 tree int_ftype_int_v16qi_v16qi
14759 = build_function_type_list (integer_type_node,
14760 integer_type_node, V16QI_type_node,
14761 V16QI_type_node, NULL_TREE);
14762 tree int_ftype_int_v4sf_v4sf
14763 = build_function_type_list (integer_type_node,
14764 integer_type_node, V4SF_type_node,
14765 V4SF_type_node, NULL_TREE);
14766 tree int_ftype_int_v2df_v2df
14767 = build_function_type_list (integer_type_node,
14768 integer_type_node, V2DF_type_node,
14769 V2DF_type_node, NULL_TREE);
14770 tree v2di_ftype_v2di
14771 = build_function_type_list (V2DI_type_node, V2DI_type_node, NULL_TREE);
14772 tree v4si_ftype_v4si
14773 = build_function_type_list (V4SI_type_node, V4SI_type_node, NULL_TREE);
14774 tree v8hi_ftype_v8hi
14775 = build_function_type_list (V8HI_type_node, V8HI_type_node, NULL_TREE);
14776 tree v16qi_ftype_v16qi
14777 = build_function_type_list (V16QI_type_node, V16QI_type_node, NULL_TREE);
14778 tree v4sf_ftype_v4sf
14779 = build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE);
14780 tree v2df_ftype_v2df
14781 = build_function_type_list (V2DF_type_node, V2DF_type_node, NULL_TREE);
14782 tree void_ftype_pcvoid_int_int
14783 = build_function_type_list (void_type_node,
14784 pcvoid_type_node, integer_type_node,
14785 integer_type_node, NULL_TREE);
14786
14787 def_builtin ("__builtin_altivec_mtvscr", void_ftype_v4si, ALTIVEC_BUILTIN_MTVSCR);
14788 def_builtin ("__builtin_altivec_mfvscr", v8hi_ftype_void, ALTIVEC_BUILTIN_MFVSCR);
14789 def_builtin ("__builtin_altivec_dssall", void_ftype_void, ALTIVEC_BUILTIN_DSSALL);
14790 def_builtin ("__builtin_altivec_dss", void_ftype_int, ALTIVEC_BUILTIN_DSS);
14791 def_builtin ("__builtin_altivec_lvsl", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVSL);
14792 def_builtin ("__builtin_altivec_lvsr", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVSR);
14793 def_builtin ("__builtin_altivec_lvebx", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVEBX);
14794 def_builtin ("__builtin_altivec_lvehx", v8hi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVEHX);
14795 def_builtin ("__builtin_altivec_lvewx", v4si_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVEWX);
14796 def_builtin ("__builtin_altivec_lvxl", v4si_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVXL);
14797 def_builtin ("__builtin_altivec_lvxl_v2df", v2df_ftype_long_pcvoid,
14798 ALTIVEC_BUILTIN_LVXL_V2DF);
14799 def_builtin ("__builtin_altivec_lvxl_v2di", v2di_ftype_long_pcvoid,
14800 ALTIVEC_BUILTIN_LVXL_V2DI);
14801 def_builtin ("__builtin_altivec_lvxl_v4sf", v4sf_ftype_long_pcvoid,
14802 ALTIVEC_BUILTIN_LVXL_V4SF);
14803 def_builtin ("__builtin_altivec_lvxl_v4si", v4si_ftype_long_pcvoid,
14804 ALTIVEC_BUILTIN_LVXL_V4SI);
14805 def_builtin ("__builtin_altivec_lvxl_v8hi", v8hi_ftype_long_pcvoid,
14806 ALTIVEC_BUILTIN_LVXL_V8HI);
14807 def_builtin ("__builtin_altivec_lvxl_v16qi", v16qi_ftype_long_pcvoid,
14808 ALTIVEC_BUILTIN_LVXL_V16QI);
14809 def_builtin ("__builtin_altivec_lvx", v4si_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVX);
14810 def_builtin ("__builtin_altivec_lvx_v2df", v2df_ftype_long_pcvoid,
14811 ALTIVEC_BUILTIN_LVX_V2DF);
14812 def_builtin ("__builtin_altivec_lvx_v2di", v2di_ftype_long_pcvoid,
14813 ALTIVEC_BUILTIN_LVX_V2DI);
14814 def_builtin ("__builtin_altivec_lvx_v4sf", v4sf_ftype_long_pcvoid,
14815 ALTIVEC_BUILTIN_LVX_V4SF);
14816 def_builtin ("__builtin_altivec_lvx_v4si", v4si_ftype_long_pcvoid,
14817 ALTIVEC_BUILTIN_LVX_V4SI);
14818 def_builtin ("__builtin_altivec_lvx_v8hi", v8hi_ftype_long_pcvoid,
14819 ALTIVEC_BUILTIN_LVX_V8HI);
14820 def_builtin ("__builtin_altivec_lvx_v16qi", v16qi_ftype_long_pcvoid,
14821 ALTIVEC_BUILTIN_LVX_V16QI);
14822 def_builtin ("__builtin_altivec_stvx", void_ftype_v4si_long_pvoid, ALTIVEC_BUILTIN_STVX);
14823 def_builtin ("__builtin_altivec_stvx_v2df", void_ftype_v2df_long_pvoid,
14824 ALTIVEC_BUILTIN_STVX_V2DF);
14825 def_builtin ("__builtin_altivec_stvx_v2di", void_ftype_v2di_long_pvoid,
14826 ALTIVEC_BUILTIN_STVX_V2DI);
14827 def_builtin ("__builtin_altivec_stvx_v4sf", void_ftype_v4sf_long_pvoid,
14828 ALTIVEC_BUILTIN_STVX_V4SF);
14829 def_builtin ("__builtin_altivec_stvx_v4si", void_ftype_v4si_long_pvoid,
14830 ALTIVEC_BUILTIN_STVX_V4SI);
14831 def_builtin ("__builtin_altivec_stvx_v8hi", void_ftype_v8hi_long_pvoid,
14832 ALTIVEC_BUILTIN_STVX_V8HI);
14833 def_builtin ("__builtin_altivec_stvx_v16qi", void_ftype_v16qi_long_pvoid,
14834 ALTIVEC_BUILTIN_STVX_V16QI);
14835 def_builtin ("__builtin_altivec_stvewx", void_ftype_v4si_long_pvoid, ALTIVEC_BUILTIN_STVEWX);
14836 def_builtin ("__builtin_altivec_stvxl", void_ftype_v4si_long_pvoid, ALTIVEC_BUILTIN_STVXL);
14837 def_builtin ("__builtin_altivec_stvxl_v2df", void_ftype_v2df_long_pvoid,
14838 ALTIVEC_BUILTIN_STVXL_V2DF);
14839 def_builtin ("__builtin_altivec_stvxl_v2di", void_ftype_v2di_long_pvoid,
14840 ALTIVEC_BUILTIN_STVXL_V2DI);
14841 def_builtin ("__builtin_altivec_stvxl_v4sf", void_ftype_v4sf_long_pvoid,
14842 ALTIVEC_BUILTIN_STVXL_V4SF);
14843 def_builtin ("__builtin_altivec_stvxl_v4si", void_ftype_v4si_long_pvoid,
14844 ALTIVEC_BUILTIN_STVXL_V4SI);
14845 def_builtin ("__builtin_altivec_stvxl_v8hi", void_ftype_v8hi_long_pvoid,
14846 ALTIVEC_BUILTIN_STVXL_V8HI);
14847 def_builtin ("__builtin_altivec_stvxl_v16qi", void_ftype_v16qi_long_pvoid,
14848 ALTIVEC_BUILTIN_STVXL_V16QI);
14849 def_builtin ("__builtin_altivec_stvebx", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_STVEBX);
14850 def_builtin ("__builtin_altivec_stvehx", void_ftype_v8hi_long_pvoid, ALTIVEC_BUILTIN_STVEHX);
14851 def_builtin ("__builtin_vec_ld", opaque_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LD);
14852 def_builtin ("__builtin_vec_lde", opaque_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LDE);
14853 def_builtin ("__builtin_vec_ldl", opaque_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LDL);
14854 def_builtin ("__builtin_vec_lvsl", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVSL);
14855 def_builtin ("__builtin_vec_lvsr", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVSR);
14856 def_builtin ("__builtin_vec_lvebx", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVEBX);
14857 def_builtin ("__builtin_vec_lvehx", v8hi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVEHX);
14858 def_builtin ("__builtin_vec_lvewx", v4si_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVEWX);
14859 def_builtin ("__builtin_vec_st", void_ftype_opaque_long_pvoid, ALTIVEC_BUILTIN_VEC_ST);
14860 def_builtin ("__builtin_vec_ste", void_ftype_opaque_long_pvoid, ALTIVEC_BUILTIN_VEC_STE);
14861 def_builtin ("__builtin_vec_stl", void_ftype_opaque_long_pvoid, ALTIVEC_BUILTIN_VEC_STL);
14862 def_builtin ("__builtin_vec_stvewx", void_ftype_opaque_long_pvoid, ALTIVEC_BUILTIN_VEC_STVEWX);
14863 def_builtin ("__builtin_vec_stvebx", void_ftype_opaque_long_pvoid, ALTIVEC_BUILTIN_VEC_STVEBX);
14864 def_builtin ("__builtin_vec_stvehx", void_ftype_opaque_long_pvoid, ALTIVEC_BUILTIN_VEC_STVEHX);
14865
14866 def_builtin ("__builtin_vsx_lxvd2x_v2df", v2df_ftype_long_pcvoid,
14867 VSX_BUILTIN_LXVD2X_V2DF);
14868 def_builtin ("__builtin_vsx_lxvd2x_v2di", v2di_ftype_long_pcvoid,
14869 VSX_BUILTIN_LXVD2X_V2DI);
14870 def_builtin ("__builtin_vsx_lxvw4x_v4sf", v4sf_ftype_long_pcvoid,
14871 VSX_BUILTIN_LXVW4X_V4SF);
14872 def_builtin ("__builtin_vsx_lxvw4x_v4si", v4si_ftype_long_pcvoid,
14873 VSX_BUILTIN_LXVW4X_V4SI);
14874 def_builtin ("__builtin_vsx_lxvw4x_v8hi", v8hi_ftype_long_pcvoid,
14875 VSX_BUILTIN_LXVW4X_V8HI);
14876 def_builtin ("__builtin_vsx_lxvw4x_v16qi", v16qi_ftype_long_pcvoid,
14877 VSX_BUILTIN_LXVW4X_V16QI);
14878 def_builtin ("__builtin_vsx_stxvd2x_v2df", void_ftype_v2df_long_pvoid,
14879 VSX_BUILTIN_STXVD2X_V2DF);
14880 def_builtin ("__builtin_vsx_stxvd2x_v2di", void_ftype_v2di_long_pvoid,
14881 VSX_BUILTIN_STXVD2X_V2DI);
14882 def_builtin ("__builtin_vsx_stxvw4x_v4sf", void_ftype_v4sf_long_pvoid,
14883 VSX_BUILTIN_STXVW4X_V4SF);
14884 def_builtin ("__builtin_vsx_stxvw4x_v4si", void_ftype_v4si_long_pvoid,
14885 VSX_BUILTIN_STXVW4X_V4SI);
14886 def_builtin ("__builtin_vsx_stxvw4x_v8hi", void_ftype_v8hi_long_pvoid,
14887 VSX_BUILTIN_STXVW4X_V8HI);
14888 def_builtin ("__builtin_vsx_stxvw4x_v16qi", void_ftype_v16qi_long_pvoid,
14889 VSX_BUILTIN_STXVW4X_V16QI);
14890 def_builtin ("__builtin_vec_vsx_ld", opaque_ftype_long_pcvoid,
14891 VSX_BUILTIN_VEC_LD);
14892 def_builtin ("__builtin_vec_vsx_st", void_ftype_opaque_long_pvoid,
14893 VSX_BUILTIN_VEC_ST);
14894
14895 def_builtin ("__builtin_vec_step", int_ftype_opaque, ALTIVEC_BUILTIN_VEC_STEP);
14896 def_builtin ("__builtin_vec_splats", opaque_ftype_opaque, ALTIVEC_BUILTIN_VEC_SPLATS);
14897 def_builtin ("__builtin_vec_promote", opaque_ftype_opaque, ALTIVEC_BUILTIN_VEC_PROMOTE);
14898
14899 def_builtin ("__builtin_vec_sld", opaque_ftype_opaque_opaque_int, ALTIVEC_BUILTIN_VEC_SLD);
14900 def_builtin ("__builtin_vec_splat", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_SPLAT);
14901 def_builtin ("__builtin_vec_extract", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_EXTRACT);
14902 def_builtin ("__builtin_vec_insert", opaque_ftype_opaque_opaque_int, ALTIVEC_BUILTIN_VEC_INSERT);
14903 def_builtin ("__builtin_vec_vspltw", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_VSPLTW);
14904 def_builtin ("__builtin_vec_vsplth", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_VSPLTH);
14905 def_builtin ("__builtin_vec_vspltb", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_VSPLTB);
14906 def_builtin ("__builtin_vec_ctf", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_CTF);
14907 def_builtin ("__builtin_vec_vcfsx", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_VCFSX);
14908 def_builtin ("__builtin_vec_vcfux", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_VCFUX);
14909 def_builtin ("__builtin_vec_cts", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_CTS);
14910 def_builtin ("__builtin_vec_ctu", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_CTU);
14911
14912 /* Cell builtins. */
14913 def_builtin ("__builtin_altivec_lvlx", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVLX);
14914 def_builtin ("__builtin_altivec_lvlxl", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVLXL);
14915 def_builtin ("__builtin_altivec_lvrx", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVRX);
14916 def_builtin ("__builtin_altivec_lvrxl", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVRXL);
14917
14918 def_builtin ("__builtin_vec_lvlx", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVLX);
14919 def_builtin ("__builtin_vec_lvlxl", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVLXL);
14920 def_builtin ("__builtin_vec_lvrx", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVRX);
14921 def_builtin ("__builtin_vec_lvrxl", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVRXL);
14922
14923 def_builtin ("__builtin_altivec_stvlx", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_STVLX);
14924 def_builtin ("__builtin_altivec_stvlxl", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_STVLXL);
14925 def_builtin ("__builtin_altivec_stvrx", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_STVRX);
14926 def_builtin ("__builtin_altivec_stvrxl", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_STVRXL);
14927
14928 def_builtin ("__builtin_vec_stvlx", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_VEC_STVLX);
14929 def_builtin ("__builtin_vec_stvlxl", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_VEC_STVLXL);
14930 def_builtin ("__builtin_vec_stvrx", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_VEC_STVRX);
14931 def_builtin ("__builtin_vec_stvrxl", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_VEC_STVRXL);
14932
14933 /* Add the DST variants. */
14934 d = bdesc_dst;
14935 for (i = 0; i < ARRAY_SIZE (bdesc_dst); i++, d++)
14936 def_builtin (d->name, void_ftype_pcvoid_int_int, d->code);
14937
14938 /* Initialize the predicates. */
14939 d = bdesc_altivec_preds;
14940 for (i = 0; i < ARRAY_SIZE (bdesc_altivec_preds); i++, d++)
14941 {
14942 machine_mode mode1;
14943 tree type;
14944
14945 if (rs6000_overloaded_builtin_p (d->code))
14946 mode1 = VOIDmode;
14947 else
14948 mode1 = insn_data[d->icode].operand[1].mode;
14949
14950 switch (mode1)
14951 {
14952 case VOIDmode:
14953 type = int_ftype_int_opaque_opaque;
14954 break;
14955 case V2DImode:
14956 type = int_ftype_int_v2di_v2di;
14957 break;
14958 case V4SImode:
14959 type = int_ftype_int_v4si_v4si;
14960 break;
14961 case V8HImode:
14962 type = int_ftype_int_v8hi_v8hi;
14963 break;
14964 case V16QImode:
14965 type = int_ftype_int_v16qi_v16qi;
14966 break;
14967 case V4SFmode:
14968 type = int_ftype_int_v4sf_v4sf;
14969 break;
14970 case V2DFmode:
14971 type = int_ftype_int_v2df_v2df;
14972 break;
14973 default:
14974 gcc_unreachable ();
14975 }
14976
14977 def_builtin (d->name, type, d->code);
14978 }
14979
14980 /* Initialize the abs* operators. */
14981 d = bdesc_abs;
14982 for (i = 0; i < ARRAY_SIZE (bdesc_abs); i++, d++)
14983 {
14984 machine_mode mode0;
14985 tree type;
14986
14987 mode0 = insn_data[d->icode].operand[0].mode;
14988
14989 switch (mode0)
14990 {
14991 case V2DImode:
14992 type = v2di_ftype_v2di;
14993 break;
14994 case V4SImode:
14995 type = v4si_ftype_v4si;
14996 break;
14997 case V8HImode:
14998 type = v8hi_ftype_v8hi;
14999 break;
15000 case V16QImode:
15001 type = v16qi_ftype_v16qi;
15002 break;
15003 case V4SFmode:
15004 type = v4sf_ftype_v4sf;
15005 break;
15006 case V2DFmode:
15007 type = v2df_ftype_v2df;
15008 break;
15009 default:
15010 gcc_unreachable ();
15011 }
15012
15013 def_builtin (d->name, type, d->code);
15014 }
15015
15016 /* Initialize target builtin that implements
15017 targetm.vectorize.builtin_mask_for_load. */
15018
15019 decl = add_builtin_function ("__builtin_altivec_mask_for_load",
15020 v16qi_ftype_long_pcvoid,
15021 ALTIVEC_BUILTIN_MASK_FOR_LOAD,
15022 BUILT_IN_MD, NULL, NULL_TREE);
15023 TREE_READONLY (decl) = 1;
15024 /* Record the decl. Will be used by rs6000_builtin_mask_for_load. */
15025 altivec_builtin_mask_for_load = decl;
15026
15027 /* Access to the vec_init patterns. */
15028 ftype = build_function_type_list (V4SI_type_node, integer_type_node,
15029 integer_type_node, integer_type_node,
15030 integer_type_node, NULL_TREE);
15031 def_builtin ("__builtin_vec_init_v4si", ftype, ALTIVEC_BUILTIN_VEC_INIT_V4SI);
15032
15033 ftype = build_function_type_list (V8HI_type_node, short_integer_type_node,
15034 short_integer_type_node,
15035 short_integer_type_node,
15036 short_integer_type_node,
15037 short_integer_type_node,
15038 short_integer_type_node,
15039 short_integer_type_node,
15040 short_integer_type_node, NULL_TREE);
15041 def_builtin ("__builtin_vec_init_v8hi", ftype, ALTIVEC_BUILTIN_VEC_INIT_V8HI);
15042
15043 ftype = build_function_type_list (V16QI_type_node, char_type_node,
15044 char_type_node, char_type_node,
15045 char_type_node, char_type_node,
15046 char_type_node, char_type_node,
15047 char_type_node, char_type_node,
15048 char_type_node, char_type_node,
15049 char_type_node, char_type_node,
15050 char_type_node, char_type_node,
15051 char_type_node, NULL_TREE);
15052 def_builtin ("__builtin_vec_init_v16qi", ftype,
15053 ALTIVEC_BUILTIN_VEC_INIT_V16QI);
15054
15055 ftype = build_function_type_list (V4SF_type_node, float_type_node,
15056 float_type_node, float_type_node,
15057 float_type_node, NULL_TREE);
15058 def_builtin ("__builtin_vec_init_v4sf", ftype, ALTIVEC_BUILTIN_VEC_INIT_V4SF);
15059
15060 /* VSX builtins. */
15061 ftype = build_function_type_list (V2DF_type_node, double_type_node,
15062 double_type_node, NULL_TREE);
15063 def_builtin ("__builtin_vec_init_v2df", ftype, VSX_BUILTIN_VEC_INIT_V2DF);
15064
15065 ftype = build_function_type_list (V2DI_type_node, intDI_type_node,
15066 intDI_type_node, NULL_TREE);
15067 def_builtin ("__builtin_vec_init_v2di", ftype, VSX_BUILTIN_VEC_INIT_V2DI);
15068
15069 /* Access to the vec_set patterns. */
15070 ftype = build_function_type_list (V4SI_type_node, V4SI_type_node,
15071 intSI_type_node,
15072 integer_type_node, NULL_TREE);
15073 def_builtin ("__builtin_vec_set_v4si", ftype, ALTIVEC_BUILTIN_VEC_SET_V4SI);
15074
15075 ftype = build_function_type_list (V8HI_type_node, V8HI_type_node,
15076 intHI_type_node,
15077 integer_type_node, NULL_TREE);
15078 def_builtin ("__builtin_vec_set_v8hi", ftype, ALTIVEC_BUILTIN_VEC_SET_V8HI);
15079
15080 ftype = build_function_type_list (V16QI_type_node, V16QI_type_node,
15081 intQI_type_node,
15082 integer_type_node, NULL_TREE);
15083 def_builtin ("__builtin_vec_set_v16qi", ftype, ALTIVEC_BUILTIN_VEC_SET_V16QI);
15084
15085 ftype = build_function_type_list (V4SF_type_node, V4SF_type_node,
15086 float_type_node,
15087 integer_type_node, NULL_TREE);
15088 def_builtin ("__builtin_vec_set_v4sf", ftype, ALTIVEC_BUILTIN_VEC_SET_V4SF);
15089
15090 ftype = build_function_type_list (V2DF_type_node, V2DF_type_node,
15091 double_type_node,
15092 integer_type_node, NULL_TREE);
15093 def_builtin ("__builtin_vec_set_v2df", ftype, VSX_BUILTIN_VEC_SET_V2DF);
15094
15095 ftype = build_function_type_list (V2DI_type_node, V2DI_type_node,
15096 intDI_type_node,
15097 integer_type_node, NULL_TREE);
15098 def_builtin ("__builtin_vec_set_v2di", ftype, VSX_BUILTIN_VEC_SET_V2DI);
15099
15100 /* Access to the vec_extract patterns. */
15101 ftype = build_function_type_list (intSI_type_node, V4SI_type_node,
15102 integer_type_node, NULL_TREE);
15103 def_builtin ("__builtin_vec_ext_v4si", ftype, ALTIVEC_BUILTIN_VEC_EXT_V4SI);
15104
15105 ftype = build_function_type_list (intHI_type_node, V8HI_type_node,
15106 integer_type_node, NULL_TREE);
15107 def_builtin ("__builtin_vec_ext_v8hi", ftype, ALTIVEC_BUILTIN_VEC_EXT_V8HI);
15108
15109 ftype = build_function_type_list (intQI_type_node, V16QI_type_node,
15110 integer_type_node, NULL_TREE);
15111 def_builtin ("__builtin_vec_ext_v16qi", ftype, ALTIVEC_BUILTIN_VEC_EXT_V16QI);
15112
15113 ftype = build_function_type_list (float_type_node, V4SF_type_node,
15114 integer_type_node, NULL_TREE);
15115 def_builtin ("__builtin_vec_ext_v4sf", ftype, ALTIVEC_BUILTIN_VEC_EXT_V4SF);
15116
15117 ftype = build_function_type_list (double_type_node, V2DF_type_node,
15118 integer_type_node, NULL_TREE);
15119 def_builtin ("__builtin_vec_ext_v2df", ftype, VSX_BUILTIN_VEC_EXT_V2DF);
15120
15121 ftype = build_function_type_list (intDI_type_node, V2DI_type_node,
15122 integer_type_node, NULL_TREE);
15123 def_builtin ("__builtin_vec_ext_v2di", ftype, VSX_BUILTIN_VEC_EXT_V2DI);
15124
15125
15126 if (V1TI_type_node)
15127 {
15128 tree v1ti_ftype_long_pcvoid
15129 = build_function_type_list (V1TI_type_node,
15130 long_integer_type_node, pcvoid_type_node,
15131 NULL_TREE);
15132 tree void_ftype_v1ti_long_pvoid
15133 = build_function_type_list (void_type_node,
15134 V1TI_type_node, long_integer_type_node,
15135 pvoid_type_node, NULL_TREE);
15136 def_builtin ("__builtin_vsx_lxvd2x_v1ti", v1ti_ftype_long_pcvoid,
15137 VSX_BUILTIN_LXVD2X_V1TI);
15138 def_builtin ("__builtin_vsx_stxvd2x_v1ti", void_ftype_v1ti_long_pvoid,
15139 VSX_BUILTIN_STXVD2X_V1TI);
15140 ftype = build_function_type_list (V1TI_type_node, intTI_type_node,
15141 NULL_TREE, NULL_TREE);
15142 def_builtin ("__builtin_vec_init_v1ti", ftype, VSX_BUILTIN_VEC_INIT_V1TI);
15143 ftype = build_function_type_list (V1TI_type_node, V1TI_type_node,
15144 intTI_type_node,
15145 integer_type_node, NULL_TREE);
15146 def_builtin ("__builtin_vec_set_v1ti", ftype, VSX_BUILTIN_VEC_SET_V1TI);
15147 ftype = build_function_type_list (intTI_type_node, V1TI_type_node,
15148 integer_type_node, NULL_TREE);
15149 def_builtin ("__builtin_vec_ext_v1ti", ftype, VSX_BUILTIN_VEC_EXT_V1TI);
15150 }
15151
15152 }
15153
15154 static void
15155 htm_init_builtins (void)
15156 {
15157 HOST_WIDE_INT builtin_mask = rs6000_builtin_mask;
15158 const struct builtin_description *d;
15159 size_t i;
15160
15161 d = bdesc_htm;
15162 for (i = 0; i < ARRAY_SIZE (bdesc_htm); i++, d++)
15163 {
15164 tree op[MAX_HTM_OPERANDS], type;
15165 HOST_WIDE_INT mask = d->mask;
15166 unsigned attr = rs6000_builtin_info[d->code].attr;
15167 bool void_func = (attr & RS6000_BTC_VOID);
15168 int attr_args = (attr & RS6000_BTC_TYPE_MASK);
15169 int nopnds = 0;
15170 tree argtype = (attr & RS6000_BTC_SPR) ? long_unsigned_type_node
15171 : unsigned_type_node;
15172
15173 if ((mask & builtin_mask) != mask)
15174 {
15175 if (TARGET_DEBUG_BUILTIN)
15176 fprintf (stderr, "htm_builtin, skip binary %s\n", d->name);
15177 continue;
15178 }
15179
15180 if (d->name == 0)
15181 {
15182 if (TARGET_DEBUG_BUILTIN)
15183 fprintf (stderr, "htm_builtin, bdesc_htm[%ld] no name\n",
15184 (long unsigned) i);
15185 continue;
15186 }
15187
15188 op[nopnds++] = (void_func) ? void_type_node : argtype;
15189
15190 if (attr_args == RS6000_BTC_UNARY)
15191 op[nopnds++] = argtype;
15192 else if (attr_args == RS6000_BTC_BINARY)
15193 {
15194 op[nopnds++] = argtype;
15195 op[nopnds++] = argtype;
15196 }
15197 else if (attr_args == RS6000_BTC_TERNARY)
15198 {
15199 op[nopnds++] = argtype;
15200 op[nopnds++] = argtype;
15201 op[nopnds++] = argtype;
15202 }
15203
15204 switch (nopnds)
15205 {
15206 case 1:
15207 type = build_function_type_list (op[0], NULL_TREE);
15208 break;
15209 case 2:
15210 type = build_function_type_list (op[0], op[1], NULL_TREE);
15211 break;
15212 case 3:
15213 type = build_function_type_list (op[0], op[1], op[2], NULL_TREE);
15214 break;
15215 case 4:
15216 type = build_function_type_list (op[0], op[1], op[2], op[3],
15217 NULL_TREE);
15218 break;
15219 default:
15220 gcc_unreachable ();
15221 }
15222
15223 def_builtin (d->name, type, d->code);
15224 }
15225 }
15226
15227 /* Hash function for builtin functions with up to 3 arguments and a return
15228 type. */
15229 hashval_t
15230 builtin_hasher::hash (builtin_hash_struct *bh)
15231 {
15232 unsigned ret = 0;
15233 int i;
15234
15235 for (i = 0; i < 4; i++)
15236 {
15237 ret = (ret * (unsigned)MAX_MACHINE_MODE) + ((unsigned)bh->mode[i]);
15238 ret = (ret * 2) + bh->uns_p[i];
15239 }
15240
15241 return ret;
15242 }
15243
15244 /* Compare builtin hash entries H1 and H2 for equivalence. */
15245 bool
15246 builtin_hasher::equal (builtin_hash_struct *p1, builtin_hash_struct *p2)
15247 {
15248 return ((p1->mode[0] == p2->mode[0])
15249 && (p1->mode[1] == p2->mode[1])
15250 && (p1->mode[2] == p2->mode[2])
15251 && (p1->mode[3] == p2->mode[3])
15252 && (p1->uns_p[0] == p2->uns_p[0])
15253 && (p1->uns_p[1] == p2->uns_p[1])
15254 && (p1->uns_p[2] == p2->uns_p[2])
15255 && (p1->uns_p[3] == p2->uns_p[3]));
15256 }
15257
15258 /* Map types for builtin functions with an explicit return type and up to 3
15259 arguments. Functions with fewer than 3 arguments use VOIDmode as the type
15260 of the argument. */
15261 static tree
15262 builtin_function_type (machine_mode mode_ret, machine_mode mode_arg0,
15263 machine_mode mode_arg1, machine_mode mode_arg2,
15264 enum rs6000_builtins builtin, const char *name)
15265 {
15266 struct builtin_hash_struct h;
15267 struct builtin_hash_struct *h2;
15268 int num_args = 3;
15269 int i;
15270 tree ret_type = NULL_TREE;
15271 tree arg_type[3] = { NULL_TREE, NULL_TREE, NULL_TREE };
15272
15273 /* Create builtin_hash_table. */
15274 if (builtin_hash_table == NULL)
15275 builtin_hash_table = hash_table<builtin_hasher>::create_ggc (1500);
15276
15277 h.type = NULL_TREE;
15278 h.mode[0] = mode_ret;
15279 h.mode[1] = mode_arg0;
15280 h.mode[2] = mode_arg1;
15281 h.mode[3] = mode_arg2;
15282 h.uns_p[0] = 0;
15283 h.uns_p[1] = 0;
15284 h.uns_p[2] = 0;
15285 h.uns_p[3] = 0;
15286
15287 /* If the builtin is a type that produces unsigned results or takes unsigned
15288 arguments, and it is returned as a decl for the vectorizer (such as
15289 widening multiplies, permute), make sure the arguments and return value
15290 are type correct. */
15291 switch (builtin)
15292 {
15293 /* unsigned 1 argument functions. */
15294 case CRYPTO_BUILTIN_VSBOX:
15295 case P8V_BUILTIN_VGBBD:
15296 case MISC_BUILTIN_CDTBCD:
15297 case MISC_BUILTIN_CBCDTD:
15298 h.uns_p[0] = 1;
15299 h.uns_p[1] = 1;
15300 break;
15301
15302 /* unsigned 2 argument functions. */
15303 case ALTIVEC_BUILTIN_VMULEUB_UNS:
15304 case ALTIVEC_BUILTIN_VMULEUH_UNS:
15305 case ALTIVEC_BUILTIN_VMULOUB_UNS:
15306 case ALTIVEC_BUILTIN_VMULOUH_UNS:
15307 case CRYPTO_BUILTIN_VCIPHER:
15308 case CRYPTO_BUILTIN_VCIPHERLAST:
15309 case CRYPTO_BUILTIN_VNCIPHER:
15310 case CRYPTO_BUILTIN_VNCIPHERLAST:
15311 case CRYPTO_BUILTIN_VPMSUMB:
15312 case CRYPTO_BUILTIN_VPMSUMH:
15313 case CRYPTO_BUILTIN_VPMSUMW:
15314 case CRYPTO_BUILTIN_VPMSUMD:
15315 case CRYPTO_BUILTIN_VPMSUM:
15316 case MISC_BUILTIN_ADDG6S:
15317 case MISC_BUILTIN_DIVWEU:
15318 case MISC_BUILTIN_DIVWEUO:
15319 case MISC_BUILTIN_DIVDEU:
15320 case MISC_BUILTIN_DIVDEUO:
15321 h.uns_p[0] = 1;
15322 h.uns_p[1] = 1;
15323 h.uns_p[2] = 1;
15324 break;
15325
15326 /* unsigned 3 argument functions. */
15327 case ALTIVEC_BUILTIN_VPERM_16QI_UNS:
15328 case ALTIVEC_BUILTIN_VPERM_8HI_UNS:
15329 case ALTIVEC_BUILTIN_VPERM_4SI_UNS:
15330 case ALTIVEC_BUILTIN_VPERM_2DI_UNS:
15331 case ALTIVEC_BUILTIN_VSEL_16QI_UNS:
15332 case ALTIVEC_BUILTIN_VSEL_8HI_UNS:
15333 case ALTIVEC_BUILTIN_VSEL_4SI_UNS:
15334 case ALTIVEC_BUILTIN_VSEL_2DI_UNS:
15335 case VSX_BUILTIN_VPERM_16QI_UNS:
15336 case VSX_BUILTIN_VPERM_8HI_UNS:
15337 case VSX_BUILTIN_VPERM_4SI_UNS:
15338 case VSX_BUILTIN_VPERM_2DI_UNS:
15339 case VSX_BUILTIN_XXSEL_16QI_UNS:
15340 case VSX_BUILTIN_XXSEL_8HI_UNS:
15341 case VSX_BUILTIN_XXSEL_4SI_UNS:
15342 case VSX_BUILTIN_XXSEL_2DI_UNS:
15343 case CRYPTO_BUILTIN_VPERMXOR:
15344 case CRYPTO_BUILTIN_VPERMXOR_V2DI:
15345 case CRYPTO_BUILTIN_VPERMXOR_V4SI:
15346 case CRYPTO_BUILTIN_VPERMXOR_V8HI:
15347 case CRYPTO_BUILTIN_VPERMXOR_V16QI:
15348 case CRYPTO_BUILTIN_VSHASIGMAW:
15349 case CRYPTO_BUILTIN_VSHASIGMAD:
15350 case CRYPTO_BUILTIN_VSHASIGMA:
15351 h.uns_p[0] = 1;
15352 h.uns_p[1] = 1;
15353 h.uns_p[2] = 1;
15354 h.uns_p[3] = 1;
15355 break;
15356
15357 /* signed permute functions with unsigned char mask. */
15358 case ALTIVEC_BUILTIN_VPERM_16QI:
15359 case ALTIVEC_BUILTIN_VPERM_8HI:
15360 case ALTIVEC_BUILTIN_VPERM_4SI:
15361 case ALTIVEC_BUILTIN_VPERM_4SF:
15362 case ALTIVEC_BUILTIN_VPERM_2DI:
15363 case ALTIVEC_BUILTIN_VPERM_2DF:
15364 case VSX_BUILTIN_VPERM_16QI:
15365 case VSX_BUILTIN_VPERM_8HI:
15366 case VSX_BUILTIN_VPERM_4SI:
15367 case VSX_BUILTIN_VPERM_4SF:
15368 case VSX_BUILTIN_VPERM_2DI:
15369 case VSX_BUILTIN_VPERM_2DF:
15370 h.uns_p[3] = 1;
15371 break;
15372
15373 /* unsigned args, signed return. */
15374 case VSX_BUILTIN_XVCVUXDDP_UNS:
15375 case ALTIVEC_BUILTIN_UNSFLOAT_V4SI_V4SF:
15376 h.uns_p[1] = 1;
15377 break;
15378
15379 /* signed args, unsigned return. */
15380 case VSX_BUILTIN_XVCVDPUXDS_UNS:
15381 case ALTIVEC_BUILTIN_FIXUNS_V4SF_V4SI:
15382 case MISC_BUILTIN_UNPACK_TD:
15383 case MISC_BUILTIN_UNPACK_V1TI:
15384 h.uns_p[0] = 1;
15385 break;
15386
15387 /* unsigned arguments for 128-bit pack instructions. */
15388 case MISC_BUILTIN_PACK_TD:
15389 case MISC_BUILTIN_PACK_V1TI:
15390 h.uns_p[1] = 1;
15391 h.uns_p[2] = 1;
15392 break;
15393
15394 default:
15395 break;
15396 }
15397
15398 /* Figure out how many args are present. */
15399 while (num_args > 0 && h.mode[num_args] == VOIDmode)
15400 num_args--;
15401
15402 if (num_args == 0)
15403 fatal_error ("internal error: builtin function %s had no type", name);
15404
15405 ret_type = builtin_mode_to_type[h.mode[0]][h.uns_p[0]];
15406 if (!ret_type && h.uns_p[0])
15407 ret_type = builtin_mode_to_type[h.mode[0]][0];
15408
15409 if (!ret_type)
15410 fatal_error ("internal error: builtin function %s had an unexpected "
15411 "return type %s", name, GET_MODE_NAME (h.mode[0]));
15412
15413 for (i = 0; i < (int) ARRAY_SIZE (arg_type); i++)
15414 arg_type[i] = NULL_TREE;
15415
15416 for (i = 0; i < num_args; i++)
15417 {
15418 int m = (int) h.mode[i+1];
15419 int uns_p = h.uns_p[i+1];
15420
15421 arg_type[i] = builtin_mode_to_type[m][uns_p];
15422 if (!arg_type[i] && uns_p)
15423 arg_type[i] = builtin_mode_to_type[m][0];
15424
15425 if (!arg_type[i])
15426 fatal_error ("internal error: builtin function %s, argument %d "
15427 "had unexpected argument type %s", name, i,
15428 GET_MODE_NAME (m));
15429 }
15430
15431 builtin_hash_struct **found = builtin_hash_table->find_slot (&h, INSERT);
15432 if (*found == NULL)
15433 {
15434 h2 = ggc_alloc<builtin_hash_struct> ();
15435 *h2 = h;
15436 *found = h2;
15437
15438 h2->type = build_function_type_list (ret_type, arg_type[0], arg_type[1],
15439 arg_type[2], NULL_TREE);
15440 }
15441
15442 return (*found)->type;
15443 }
15444
15445 static void
15446 rs6000_common_init_builtins (void)
15447 {
15448 const struct builtin_description *d;
15449 size_t i;
15450
15451 tree opaque_ftype_opaque = NULL_TREE;
15452 tree opaque_ftype_opaque_opaque = NULL_TREE;
15453 tree opaque_ftype_opaque_opaque_opaque = NULL_TREE;
15454 tree v2si_ftype_qi = NULL_TREE;
15455 tree v2si_ftype_v2si_qi = NULL_TREE;
15456 tree v2si_ftype_int_qi = NULL_TREE;
15457 HOST_WIDE_INT builtin_mask = rs6000_builtin_mask;
15458
15459 if (!TARGET_PAIRED_FLOAT)
15460 {
15461 builtin_mode_to_type[V2SImode][0] = opaque_V2SI_type_node;
15462 builtin_mode_to_type[V2SFmode][0] = opaque_V2SF_type_node;
15463 }
15464
15465 /* Paired and SPE builtins are only available if you build a compiler with
15466 the appropriate options, so only create those builtins with the
15467 appropriate compiler option. Create Altivec and VSX builtins on machines
15468 with at least the general purpose extensions (970 and newer) to allow the
15469 use of the target attribute.. */
15470
15471 if (TARGET_EXTRA_BUILTINS)
15472 builtin_mask |= RS6000_BTM_COMMON;
15473
15474 /* Add the ternary operators. */
15475 d = bdesc_3arg;
15476 for (i = 0; i < ARRAY_SIZE (bdesc_3arg); i++, d++)
15477 {
15478 tree type;
15479 HOST_WIDE_INT mask = d->mask;
15480
15481 if ((mask & builtin_mask) != mask)
15482 {
15483 if (TARGET_DEBUG_BUILTIN)
15484 fprintf (stderr, "rs6000_builtin, skip ternary %s\n", d->name);
15485 continue;
15486 }
15487
15488 if (rs6000_overloaded_builtin_p (d->code))
15489 {
15490 if (! (type = opaque_ftype_opaque_opaque_opaque))
15491 type = opaque_ftype_opaque_opaque_opaque
15492 = build_function_type_list (opaque_V4SI_type_node,
15493 opaque_V4SI_type_node,
15494 opaque_V4SI_type_node,
15495 opaque_V4SI_type_node,
15496 NULL_TREE);
15497 }
15498 else
15499 {
15500 enum insn_code icode = d->icode;
15501 if (d->name == 0)
15502 {
15503 if (TARGET_DEBUG_BUILTIN)
15504 fprintf (stderr, "rs6000_builtin, bdesc_3arg[%ld] no name\n",
15505 (long unsigned)i);
15506
15507 continue;
15508 }
15509
15510 if (icode == CODE_FOR_nothing)
15511 {
15512 if (TARGET_DEBUG_BUILTIN)
15513 fprintf (stderr, "rs6000_builtin, skip ternary %s (no code)\n",
15514 d->name);
15515
15516 continue;
15517 }
15518
15519 type = builtin_function_type (insn_data[icode].operand[0].mode,
15520 insn_data[icode].operand[1].mode,
15521 insn_data[icode].operand[2].mode,
15522 insn_data[icode].operand[3].mode,
15523 d->code, d->name);
15524 }
15525
15526 def_builtin (d->name, type, d->code);
15527 }
15528
15529 /* Add the binary operators. */
15530 d = bdesc_2arg;
15531 for (i = 0; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
15532 {
15533 machine_mode mode0, mode1, mode2;
15534 tree type;
15535 HOST_WIDE_INT mask = d->mask;
15536
15537 if ((mask & builtin_mask) != mask)
15538 {
15539 if (TARGET_DEBUG_BUILTIN)
15540 fprintf (stderr, "rs6000_builtin, skip binary %s\n", d->name);
15541 continue;
15542 }
15543
15544 if (rs6000_overloaded_builtin_p (d->code))
15545 {
15546 if (! (type = opaque_ftype_opaque_opaque))
15547 type = opaque_ftype_opaque_opaque
15548 = build_function_type_list (opaque_V4SI_type_node,
15549 opaque_V4SI_type_node,
15550 opaque_V4SI_type_node,
15551 NULL_TREE);
15552 }
15553 else
15554 {
15555 enum insn_code icode = d->icode;
15556 if (d->name == 0)
15557 {
15558 if (TARGET_DEBUG_BUILTIN)
15559 fprintf (stderr, "rs6000_builtin, bdesc_2arg[%ld] no name\n",
15560 (long unsigned)i);
15561
15562 continue;
15563 }
15564
15565 if (icode == CODE_FOR_nothing)
15566 {
15567 if (TARGET_DEBUG_BUILTIN)
15568 fprintf (stderr, "rs6000_builtin, skip binary %s (no code)\n",
15569 d->name);
15570
15571 continue;
15572 }
15573
15574 mode0 = insn_data[icode].operand[0].mode;
15575 mode1 = insn_data[icode].operand[1].mode;
15576 mode2 = insn_data[icode].operand[2].mode;
15577
15578 if (mode0 == V2SImode && mode1 == V2SImode && mode2 == QImode)
15579 {
15580 if (! (type = v2si_ftype_v2si_qi))
15581 type = v2si_ftype_v2si_qi
15582 = build_function_type_list (opaque_V2SI_type_node,
15583 opaque_V2SI_type_node,
15584 char_type_node,
15585 NULL_TREE);
15586 }
15587
15588 else if (mode0 == V2SImode && GET_MODE_CLASS (mode1) == MODE_INT
15589 && mode2 == QImode)
15590 {
15591 if (! (type = v2si_ftype_int_qi))
15592 type = v2si_ftype_int_qi
15593 = build_function_type_list (opaque_V2SI_type_node,
15594 integer_type_node,
15595 char_type_node,
15596 NULL_TREE);
15597 }
15598
15599 else
15600 type = builtin_function_type (mode0, mode1, mode2, VOIDmode,
15601 d->code, d->name);
15602 }
15603
15604 def_builtin (d->name, type, d->code);
15605 }
15606
15607 /* Add the simple unary operators. */
15608 d = bdesc_1arg;
15609 for (i = 0; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
15610 {
15611 machine_mode mode0, mode1;
15612 tree type;
15613 HOST_WIDE_INT mask = d->mask;
15614
15615 if ((mask & builtin_mask) != mask)
15616 {
15617 if (TARGET_DEBUG_BUILTIN)
15618 fprintf (stderr, "rs6000_builtin, skip unary %s\n", d->name);
15619 continue;
15620 }
15621
15622 if (rs6000_overloaded_builtin_p (d->code))
15623 {
15624 if (! (type = opaque_ftype_opaque))
15625 type = opaque_ftype_opaque
15626 = build_function_type_list (opaque_V4SI_type_node,
15627 opaque_V4SI_type_node,
15628 NULL_TREE);
15629 }
15630 else
15631 {
15632 enum insn_code icode = d->icode;
15633 if (d->name == 0)
15634 {
15635 if (TARGET_DEBUG_BUILTIN)
15636 fprintf (stderr, "rs6000_builtin, bdesc_1arg[%ld] no name\n",
15637 (long unsigned)i);
15638
15639 continue;
15640 }
15641
15642 if (icode == CODE_FOR_nothing)
15643 {
15644 if (TARGET_DEBUG_BUILTIN)
15645 fprintf (stderr, "rs6000_builtin, skip unary %s (no code)\n",
15646 d->name);
15647
15648 continue;
15649 }
15650
15651 mode0 = insn_data[icode].operand[0].mode;
15652 mode1 = insn_data[icode].operand[1].mode;
15653
15654 if (mode0 == V2SImode && mode1 == QImode)
15655 {
15656 if (! (type = v2si_ftype_qi))
15657 type = v2si_ftype_qi
15658 = build_function_type_list (opaque_V2SI_type_node,
15659 char_type_node,
15660 NULL_TREE);
15661 }
15662
15663 else
15664 type = builtin_function_type (mode0, mode1, VOIDmode, VOIDmode,
15665 d->code, d->name);
15666 }
15667
15668 def_builtin (d->name, type, d->code);
15669 }
15670 }
15671
15672 static void
15673 rs6000_init_libfuncs (void)
15674 {
15675 if (!TARGET_IEEEQUAD)
15676 /* AIX/Darwin/64-bit Linux quad floating point routines. */
15677 if (!TARGET_XL_COMPAT)
15678 {
15679 set_optab_libfunc (add_optab, TFmode, "__gcc_qadd");
15680 set_optab_libfunc (sub_optab, TFmode, "__gcc_qsub");
15681 set_optab_libfunc (smul_optab, TFmode, "__gcc_qmul");
15682 set_optab_libfunc (sdiv_optab, TFmode, "__gcc_qdiv");
15683
15684 if (!(TARGET_HARD_FLOAT && (TARGET_FPRS || TARGET_E500_DOUBLE)))
15685 {
15686 set_optab_libfunc (neg_optab, TFmode, "__gcc_qneg");
15687 set_optab_libfunc (eq_optab, TFmode, "__gcc_qeq");
15688 set_optab_libfunc (ne_optab, TFmode, "__gcc_qne");
15689 set_optab_libfunc (gt_optab, TFmode, "__gcc_qgt");
15690 set_optab_libfunc (ge_optab, TFmode, "__gcc_qge");
15691 set_optab_libfunc (lt_optab, TFmode, "__gcc_qlt");
15692 set_optab_libfunc (le_optab, TFmode, "__gcc_qle");
15693
15694 set_conv_libfunc (sext_optab, TFmode, SFmode, "__gcc_stoq");
15695 set_conv_libfunc (sext_optab, TFmode, DFmode, "__gcc_dtoq");
15696 set_conv_libfunc (trunc_optab, SFmode, TFmode, "__gcc_qtos");
15697 set_conv_libfunc (trunc_optab, DFmode, TFmode, "__gcc_qtod");
15698 set_conv_libfunc (sfix_optab, SImode, TFmode, "__gcc_qtoi");
15699 set_conv_libfunc (ufix_optab, SImode, TFmode, "__gcc_qtou");
15700 set_conv_libfunc (sfloat_optab, TFmode, SImode, "__gcc_itoq");
15701 set_conv_libfunc (ufloat_optab, TFmode, SImode, "__gcc_utoq");
15702 }
15703
15704 if (!(TARGET_HARD_FLOAT && TARGET_FPRS))
15705 set_optab_libfunc (unord_optab, TFmode, "__gcc_qunord");
15706 }
15707 else
15708 {
15709 set_optab_libfunc (add_optab, TFmode, "_xlqadd");
15710 set_optab_libfunc (sub_optab, TFmode, "_xlqsub");
15711 set_optab_libfunc (smul_optab, TFmode, "_xlqmul");
15712 set_optab_libfunc (sdiv_optab, TFmode, "_xlqdiv");
15713 }
15714 else
15715 {
15716 /* 32-bit SVR4 quad floating point routines. */
15717
15718 set_optab_libfunc (add_optab, TFmode, "_q_add");
15719 set_optab_libfunc (sub_optab, TFmode, "_q_sub");
15720 set_optab_libfunc (neg_optab, TFmode, "_q_neg");
15721 set_optab_libfunc (smul_optab, TFmode, "_q_mul");
15722 set_optab_libfunc (sdiv_optab, TFmode, "_q_div");
15723 if (TARGET_PPC_GPOPT)
15724 set_optab_libfunc (sqrt_optab, TFmode, "_q_sqrt");
15725
15726 set_optab_libfunc (eq_optab, TFmode, "_q_feq");
15727 set_optab_libfunc (ne_optab, TFmode, "_q_fne");
15728 set_optab_libfunc (gt_optab, TFmode, "_q_fgt");
15729 set_optab_libfunc (ge_optab, TFmode, "_q_fge");
15730 set_optab_libfunc (lt_optab, TFmode, "_q_flt");
15731 set_optab_libfunc (le_optab, TFmode, "_q_fle");
15732
15733 set_conv_libfunc (sext_optab, TFmode, SFmode, "_q_stoq");
15734 set_conv_libfunc (sext_optab, TFmode, DFmode, "_q_dtoq");
15735 set_conv_libfunc (trunc_optab, SFmode, TFmode, "_q_qtos");
15736 set_conv_libfunc (trunc_optab, DFmode, TFmode, "_q_qtod");
15737 set_conv_libfunc (sfix_optab, SImode, TFmode, "_q_qtoi");
15738 set_conv_libfunc (ufix_optab, SImode, TFmode, "_q_qtou");
15739 set_conv_libfunc (sfloat_optab, TFmode, SImode, "_q_itoq");
15740 set_conv_libfunc (ufloat_optab, TFmode, SImode, "_q_utoq");
15741 }
15742 }
15743
15744 \f
15745 /* Expand a block clear operation, and return 1 if successful. Return 0
15746 if we should let the compiler generate normal code.
15747
15748 operands[0] is the destination
15749 operands[1] is the length
15750 operands[3] is the alignment */
15751
15752 int
15753 expand_block_clear (rtx operands[])
15754 {
15755 rtx orig_dest = operands[0];
15756 rtx bytes_rtx = operands[1];
15757 rtx align_rtx = operands[3];
15758 bool constp = (GET_CODE (bytes_rtx) == CONST_INT);
15759 HOST_WIDE_INT align;
15760 HOST_WIDE_INT bytes;
15761 int offset;
15762 int clear_bytes;
15763 int clear_step;
15764
15765 /* If this is not a fixed size move, just call memcpy */
15766 if (! constp)
15767 return 0;
15768
15769 /* This must be a fixed size alignment */
15770 gcc_assert (GET_CODE (align_rtx) == CONST_INT);
15771 align = INTVAL (align_rtx) * BITS_PER_UNIT;
15772
15773 /* Anything to clear? */
15774 bytes = INTVAL (bytes_rtx);
15775 if (bytes <= 0)
15776 return 1;
15777
15778 /* Use the builtin memset after a point, to avoid huge code bloat.
15779 When optimize_size, avoid any significant code bloat; calling
15780 memset is about 4 instructions, so allow for one instruction to
15781 load zero and three to do clearing. */
15782 if (TARGET_ALTIVEC && align >= 128)
15783 clear_step = 16;
15784 else if (TARGET_POWERPC64 && (align >= 64 || !STRICT_ALIGNMENT))
15785 clear_step = 8;
15786 else if (TARGET_SPE && align >= 64)
15787 clear_step = 8;
15788 else
15789 clear_step = 4;
15790
15791 if (optimize_size && bytes > 3 * clear_step)
15792 return 0;
15793 if (! optimize_size && bytes > 8 * clear_step)
15794 return 0;
15795
15796 for (offset = 0; bytes > 0; offset += clear_bytes, bytes -= clear_bytes)
15797 {
15798 machine_mode mode = BLKmode;
15799 rtx dest;
15800
15801 if (bytes >= 16 && TARGET_ALTIVEC && align >= 128)
15802 {
15803 clear_bytes = 16;
15804 mode = V4SImode;
15805 }
15806 else if (bytes >= 8 && TARGET_SPE && align >= 64)
15807 {
15808 clear_bytes = 8;
15809 mode = V2SImode;
15810 }
15811 else if (bytes >= 8 && TARGET_POWERPC64
15812 && (align >= 64 || !STRICT_ALIGNMENT))
15813 {
15814 clear_bytes = 8;
15815 mode = DImode;
15816 if (offset == 0 && align < 64)
15817 {
15818 rtx addr;
15819
15820 /* If the address form is reg+offset with offset not a
15821 multiple of four, reload into reg indirect form here
15822 rather than waiting for reload. This way we get one
15823 reload, not one per store. */
15824 addr = XEXP (orig_dest, 0);
15825 if ((GET_CODE (addr) == PLUS || GET_CODE (addr) == LO_SUM)
15826 && GET_CODE (XEXP (addr, 1)) == CONST_INT
15827 && (INTVAL (XEXP (addr, 1)) & 3) != 0)
15828 {
15829 addr = copy_addr_to_reg (addr);
15830 orig_dest = replace_equiv_address (orig_dest, addr);
15831 }
15832 }
15833 }
15834 else if (bytes >= 4 && (align >= 32 || !STRICT_ALIGNMENT))
15835 { /* move 4 bytes */
15836 clear_bytes = 4;
15837 mode = SImode;
15838 }
15839 else if (bytes >= 2 && (align >= 16 || !STRICT_ALIGNMENT))
15840 { /* move 2 bytes */
15841 clear_bytes = 2;
15842 mode = HImode;
15843 }
15844 else /* move 1 byte at a time */
15845 {
15846 clear_bytes = 1;
15847 mode = QImode;
15848 }
15849
15850 dest = adjust_address (orig_dest, mode, offset);
15851
15852 emit_move_insn (dest, CONST0_RTX (mode));
15853 }
15854
15855 return 1;
15856 }
15857
15858 \f
15859 /* Expand a block move operation, and return 1 if successful. Return 0
15860 if we should let the compiler generate normal code.
15861
15862 operands[0] is the destination
15863 operands[1] is the source
15864 operands[2] is the length
15865 operands[3] is the alignment */
15866
15867 #define MAX_MOVE_REG 4
15868
15869 int
15870 expand_block_move (rtx operands[])
15871 {
15872 rtx orig_dest = operands[0];
15873 rtx orig_src = operands[1];
15874 rtx bytes_rtx = operands[2];
15875 rtx align_rtx = operands[3];
15876 int constp = (GET_CODE (bytes_rtx) == CONST_INT);
15877 int align;
15878 int bytes;
15879 int offset;
15880 int move_bytes;
15881 rtx stores[MAX_MOVE_REG];
15882 int num_reg = 0;
15883
15884 /* If this is not a fixed size move, just call memcpy */
15885 if (! constp)
15886 return 0;
15887
15888 /* This must be a fixed size alignment */
15889 gcc_assert (GET_CODE (align_rtx) == CONST_INT);
15890 align = INTVAL (align_rtx) * BITS_PER_UNIT;
15891
15892 /* Anything to move? */
15893 bytes = INTVAL (bytes_rtx);
15894 if (bytes <= 0)
15895 return 1;
15896
15897 if (bytes > rs6000_block_move_inline_limit)
15898 return 0;
15899
15900 for (offset = 0; bytes > 0; offset += move_bytes, bytes -= move_bytes)
15901 {
15902 union {
15903 rtx (*movmemsi) (rtx, rtx, rtx, rtx);
15904 rtx (*mov) (rtx, rtx);
15905 } gen_func;
15906 machine_mode mode = BLKmode;
15907 rtx src, dest;
15908
15909 /* Altivec first, since it will be faster than a string move
15910 when it applies, and usually not significantly larger. */
15911 if (TARGET_ALTIVEC && bytes >= 16 && align >= 128)
15912 {
15913 move_bytes = 16;
15914 mode = V4SImode;
15915 gen_func.mov = gen_movv4si;
15916 }
15917 else if (TARGET_SPE && bytes >= 8 && align >= 64)
15918 {
15919 move_bytes = 8;
15920 mode = V2SImode;
15921 gen_func.mov = gen_movv2si;
15922 }
15923 else if (TARGET_STRING
15924 && bytes > 24 /* move up to 32 bytes at a time */
15925 && ! fixed_regs[5]
15926 && ! fixed_regs[6]
15927 && ! fixed_regs[7]
15928 && ! fixed_regs[8]
15929 && ! fixed_regs[9]
15930 && ! fixed_regs[10]
15931 && ! fixed_regs[11]
15932 && ! fixed_regs[12])
15933 {
15934 move_bytes = (bytes > 32) ? 32 : bytes;
15935 gen_func.movmemsi = gen_movmemsi_8reg;
15936 }
15937 else if (TARGET_STRING
15938 && bytes > 16 /* move up to 24 bytes at a time */
15939 && ! fixed_regs[5]
15940 && ! fixed_regs[6]
15941 && ! fixed_regs[7]
15942 && ! fixed_regs[8]
15943 && ! fixed_regs[9]
15944 && ! fixed_regs[10])
15945 {
15946 move_bytes = (bytes > 24) ? 24 : bytes;
15947 gen_func.movmemsi = gen_movmemsi_6reg;
15948 }
15949 else if (TARGET_STRING
15950 && bytes > 8 /* move up to 16 bytes at a time */
15951 && ! fixed_regs[5]
15952 && ! fixed_regs[6]
15953 && ! fixed_regs[7]
15954 && ! fixed_regs[8])
15955 {
15956 move_bytes = (bytes > 16) ? 16 : bytes;
15957 gen_func.movmemsi = gen_movmemsi_4reg;
15958 }
15959 else if (bytes >= 8 && TARGET_POWERPC64
15960 && (align >= 64 || !STRICT_ALIGNMENT))
15961 {
15962 move_bytes = 8;
15963 mode = DImode;
15964 gen_func.mov = gen_movdi;
15965 if (offset == 0 && align < 64)
15966 {
15967 rtx addr;
15968
15969 /* If the address form is reg+offset with offset not a
15970 multiple of four, reload into reg indirect form here
15971 rather than waiting for reload. This way we get one
15972 reload, not one per load and/or store. */
15973 addr = XEXP (orig_dest, 0);
15974 if ((GET_CODE (addr) == PLUS || GET_CODE (addr) == LO_SUM)
15975 && GET_CODE (XEXP (addr, 1)) == CONST_INT
15976 && (INTVAL (XEXP (addr, 1)) & 3) != 0)
15977 {
15978 addr = copy_addr_to_reg (addr);
15979 orig_dest = replace_equiv_address (orig_dest, addr);
15980 }
15981 addr = XEXP (orig_src, 0);
15982 if ((GET_CODE (addr) == PLUS || GET_CODE (addr) == LO_SUM)
15983 && GET_CODE (XEXP (addr, 1)) == CONST_INT
15984 && (INTVAL (XEXP (addr, 1)) & 3) != 0)
15985 {
15986 addr = copy_addr_to_reg (addr);
15987 orig_src = replace_equiv_address (orig_src, addr);
15988 }
15989 }
15990 }
15991 else if (TARGET_STRING && bytes > 4 && !TARGET_POWERPC64)
15992 { /* move up to 8 bytes at a time */
15993 move_bytes = (bytes > 8) ? 8 : bytes;
15994 gen_func.movmemsi = gen_movmemsi_2reg;
15995 }
15996 else if (bytes >= 4 && (align >= 32 || !STRICT_ALIGNMENT))
15997 { /* move 4 bytes */
15998 move_bytes = 4;
15999 mode = SImode;
16000 gen_func.mov = gen_movsi;
16001 }
16002 else if (bytes >= 2 && (align >= 16 || !STRICT_ALIGNMENT))
16003 { /* move 2 bytes */
16004 move_bytes = 2;
16005 mode = HImode;
16006 gen_func.mov = gen_movhi;
16007 }
16008 else if (TARGET_STRING && bytes > 1)
16009 { /* move up to 4 bytes at a time */
16010 move_bytes = (bytes > 4) ? 4 : bytes;
16011 gen_func.movmemsi = gen_movmemsi_1reg;
16012 }
16013 else /* move 1 byte at a time */
16014 {
16015 move_bytes = 1;
16016 mode = QImode;
16017 gen_func.mov = gen_movqi;
16018 }
16019
16020 src = adjust_address (orig_src, mode, offset);
16021 dest = adjust_address (orig_dest, mode, offset);
16022
16023 if (mode != BLKmode)
16024 {
16025 rtx tmp_reg = gen_reg_rtx (mode);
16026
16027 emit_insn ((*gen_func.mov) (tmp_reg, src));
16028 stores[num_reg++] = (*gen_func.mov) (dest, tmp_reg);
16029 }
16030
16031 if (mode == BLKmode || num_reg >= MAX_MOVE_REG || bytes == move_bytes)
16032 {
16033 int i;
16034 for (i = 0; i < num_reg; i++)
16035 emit_insn (stores[i]);
16036 num_reg = 0;
16037 }
16038
16039 if (mode == BLKmode)
16040 {
16041 /* Move the address into scratch registers. The movmemsi
16042 patterns require zero offset. */
16043 if (!REG_P (XEXP (src, 0)))
16044 {
16045 rtx src_reg = copy_addr_to_reg (XEXP (src, 0));
16046 src = replace_equiv_address (src, src_reg);
16047 }
16048 set_mem_size (src, move_bytes);
16049
16050 if (!REG_P (XEXP (dest, 0)))
16051 {
16052 rtx dest_reg = copy_addr_to_reg (XEXP (dest, 0));
16053 dest = replace_equiv_address (dest, dest_reg);
16054 }
16055 set_mem_size (dest, move_bytes);
16056
16057 emit_insn ((*gen_func.movmemsi) (dest, src,
16058 GEN_INT (move_bytes & 31),
16059 align_rtx));
16060 }
16061 }
16062
16063 return 1;
16064 }
16065
16066 \f
16067 /* Return a string to perform a load_multiple operation.
16068 operands[0] is the vector.
16069 operands[1] is the source address.
16070 operands[2] is the first destination register. */
16071
16072 const char *
16073 rs6000_output_load_multiple (rtx operands[3])
16074 {
16075 /* We have to handle the case where the pseudo used to contain the address
16076 is assigned to one of the output registers. */
16077 int i, j;
16078 int words = XVECLEN (operands[0], 0);
16079 rtx xop[10];
16080
16081 if (XVECLEN (operands[0], 0) == 1)
16082 return "lwz %2,0(%1)";
16083
16084 for (i = 0; i < words; i++)
16085 if (refers_to_regno_p (REGNO (operands[2]) + i,
16086 REGNO (operands[2]) + i + 1, operands[1], 0))
16087 {
16088 if (i == words-1)
16089 {
16090 xop[0] = GEN_INT (4 * (words-1));
16091 xop[1] = operands[1];
16092 xop[2] = operands[2];
16093 output_asm_insn ("lswi %2,%1,%0\n\tlwz %1,%0(%1)", xop);
16094 return "";
16095 }
16096 else if (i == 0)
16097 {
16098 xop[0] = GEN_INT (4 * (words-1));
16099 xop[1] = operands[1];
16100 xop[2] = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
16101 output_asm_insn ("addi %1,%1,4\n\tlswi %2,%1,%0\n\tlwz %1,-4(%1)", xop);
16102 return "";
16103 }
16104 else
16105 {
16106 for (j = 0; j < words; j++)
16107 if (j != i)
16108 {
16109 xop[0] = GEN_INT (j * 4);
16110 xop[1] = operands[1];
16111 xop[2] = gen_rtx_REG (SImode, REGNO (operands[2]) + j);
16112 output_asm_insn ("lwz %2,%0(%1)", xop);
16113 }
16114 xop[0] = GEN_INT (i * 4);
16115 xop[1] = operands[1];
16116 output_asm_insn ("lwz %1,%0(%1)", xop);
16117 return "";
16118 }
16119 }
16120
16121 return "lswi %2,%1,%N0";
16122 }
16123
16124 \f
16125 /* A validation routine: say whether CODE, a condition code, and MODE
16126 match. The other alternatives either don't make sense or should
16127 never be generated. */
16128
16129 void
16130 validate_condition_mode (enum rtx_code code, machine_mode mode)
16131 {
16132 gcc_assert ((GET_RTX_CLASS (code) == RTX_COMPARE
16133 || GET_RTX_CLASS (code) == RTX_COMM_COMPARE)
16134 && GET_MODE_CLASS (mode) == MODE_CC);
16135
16136 /* These don't make sense. */
16137 gcc_assert ((code != GT && code != LT && code != GE && code != LE)
16138 || mode != CCUNSmode);
16139
16140 gcc_assert ((code != GTU && code != LTU && code != GEU && code != LEU)
16141 || mode == CCUNSmode);
16142
16143 gcc_assert (mode == CCFPmode
16144 || (code != ORDERED && code != UNORDERED
16145 && code != UNEQ && code != LTGT
16146 && code != UNGT && code != UNLT
16147 && code != UNGE && code != UNLE));
16148
16149 /* These should never be generated except for
16150 flag_finite_math_only. */
16151 gcc_assert (mode != CCFPmode
16152 || flag_finite_math_only
16153 || (code != LE && code != GE
16154 && code != UNEQ && code != LTGT
16155 && code != UNGT && code != UNLT));
16156
16157 /* These are invalid; the information is not there. */
16158 gcc_assert (mode != CCEQmode || code == EQ || code == NE);
16159 }
16160
16161 \f
16162 /* Return 1 if ANDOP is a mask that has no bits on that are not in the
16163 mask required to convert the result of a rotate insn into a shift
16164 left insn of SHIFTOP bits. Both are known to be SImode CONST_INT. */
16165
16166 int
16167 includes_lshift_p (rtx shiftop, rtx andop)
16168 {
16169 unsigned HOST_WIDE_INT shift_mask = ~(unsigned HOST_WIDE_INT) 0;
16170
16171 shift_mask <<= INTVAL (shiftop);
16172
16173 return (INTVAL (andop) & 0xffffffff & ~shift_mask) == 0;
16174 }
16175
16176 /* Similar, but for right shift. */
16177
16178 int
16179 includes_rshift_p (rtx shiftop, rtx andop)
16180 {
16181 unsigned HOST_WIDE_INT shift_mask = ~(unsigned HOST_WIDE_INT) 0;
16182
16183 shift_mask >>= INTVAL (shiftop);
16184
16185 return (INTVAL (andop) & 0xffffffff & ~shift_mask) == 0;
16186 }
16187
16188 /* Return 1 if ANDOP is a mask suitable for use with an rldic insn
16189 to perform a left shift. It must have exactly SHIFTOP least
16190 significant 0's, then one or more 1's, then zero or more 0's. */
16191
16192 int
16193 includes_rldic_lshift_p (rtx shiftop, rtx andop)
16194 {
16195 if (GET_CODE (andop) == CONST_INT)
16196 {
16197 HOST_WIDE_INT c, lsb, shift_mask;
16198
16199 c = INTVAL (andop);
16200 if (c == 0 || c == ~0)
16201 return 0;
16202
16203 shift_mask = ~0;
16204 shift_mask <<= INTVAL (shiftop);
16205
16206 /* Find the least significant one bit. */
16207 lsb = c & -c;
16208
16209 /* It must coincide with the LSB of the shift mask. */
16210 if (-lsb != shift_mask)
16211 return 0;
16212
16213 /* Invert to look for the next transition (if any). */
16214 c = ~c;
16215
16216 /* Remove the low group of ones (originally low group of zeros). */
16217 c &= -lsb;
16218
16219 /* Again find the lsb, and check we have all 1's above. */
16220 lsb = c & -c;
16221 return c == -lsb;
16222 }
16223 else
16224 return 0;
16225 }
16226
16227 /* Return 1 if ANDOP is a mask suitable for use with an rldicr insn
16228 to perform a left shift. It must have SHIFTOP or more least
16229 significant 0's, with the remainder of the word 1's. */
16230
16231 int
16232 includes_rldicr_lshift_p (rtx shiftop, rtx andop)
16233 {
16234 if (GET_CODE (andop) == CONST_INT)
16235 {
16236 HOST_WIDE_INT c, lsb, shift_mask;
16237
16238 shift_mask = ~0;
16239 shift_mask <<= INTVAL (shiftop);
16240 c = INTVAL (andop);
16241
16242 /* Find the least significant one bit. */
16243 lsb = c & -c;
16244
16245 /* It must be covered by the shift mask.
16246 This test also rejects c == 0. */
16247 if ((lsb & shift_mask) == 0)
16248 return 0;
16249
16250 /* Check we have all 1's above the transition, and reject all 1's. */
16251 return c == -lsb && lsb != 1;
16252 }
16253 else
16254 return 0;
16255 }
16256
16257 /* Return 1 if operands will generate a valid arguments to rlwimi
16258 instruction for insert with right shift in 64-bit mode. The mask may
16259 not start on the first bit or stop on the last bit because wrap-around
16260 effects of instruction do not correspond to semantics of RTL insn. */
16261
16262 int
16263 insvdi_rshift_rlwimi_p (rtx sizeop, rtx startop, rtx shiftop)
16264 {
16265 if (INTVAL (startop) > 32
16266 && INTVAL (startop) < 64
16267 && INTVAL (sizeop) > 1
16268 && INTVAL (sizeop) + INTVAL (startop) < 64
16269 && INTVAL (shiftop) > 0
16270 && INTVAL (sizeop) + INTVAL (shiftop) < 32
16271 && (64 - (INTVAL (shiftop) & 63)) >= INTVAL (sizeop))
16272 return 1;
16273
16274 return 0;
16275 }
16276
16277 /* Return 1 if REGNO (reg1) == REGNO (reg2) - 1 making them candidates
16278 for lfq and stfq insns iff the registers are hard registers. */
16279
16280 int
16281 registers_ok_for_quad_peep (rtx reg1, rtx reg2)
16282 {
16283 /* We might have been passed a SUBREG. */
16284 if (GET_CODE (reg1) != REG || GET_CODE (reg2) != REG)
16285 return 0;
16286
16287 /* We might have been passed non floating point registers. */
16288 if (!FP_REGNO_P (REGNO (reg1))
16289 || !FP_REGNO_P (REGNO (reg2)))
16290 return 0;
16291
16292 return (REGNO (reg1) == REGNO (reg2) - 1);
16293 }
16294
16295 /* Return 1 if addr1 and addr2 are suitable for lfq or stfq insn.
16296 addr1 and addr2 must be in consecutive memory locations
16297 (addr2 == addr1 + 8). */
16298
16299 int
16300 mems_ok_for_quad_peep (rtx mem1, rtx mem2)
16301 {
16302 rtx addr1, addr2;
16303 unsigned int reg1, reg2;
16304 int offset1, offset2;
16305
16306 /* The mems cannot be volatile. */
16307 if (MEM_VOLATILE_P (mem1) || MEM_VOLATILE_P (mem2))
16308 return 0;
16309
16310 addr1 = XEXP (mem1, 0);
16311 addr2 = XEXP (mem2, 0);
16312
16313 /* Extract an offset (if used) from the first addr. */
16314 if (GET_CODE (addr1) == PLUS)
16315 {
16316 /* If not a REG, return zero. */
16317 if (GET_CODE (XEXP (addr1, 0)) != REG)
16318 return 0;
16319 else
16320 {
16321 reg1 = REGNO (XEXP (addr1, 0));
16322 /* The offset must be constant! */
16323 if (GET_CODE (XEXP (addr1, 1)) != CONST_INT)
16324 return 0;
16325 offset1 = INTVAL (XEXP (addr1, 1));
16326 }
16327 }
16328 else if (GET_CODE (addr1) != REG)
16329 return 0;
16330 else
16331 {
16332 reg1 = REGNO (addr1);
16333 /* This was a simple (mem (reg)) expression. Offset is 0. */
16334 offset1 = 0;
16335 }
16336
16337 /* And now for the second addr. */
16338 if (GET_CODE (addr2) == PLUS)
16339 {
16340 /* If not a REG, return zero. */
16341 if (GET_CODE (XEXP (addr2, 0)) != REG)
16342 return 0;
16343 else
16344 {
16345 reg2 = REGNO (XEXP (addr2, 0));
16346 /* The offset must be constant. */
16347 if (GET_CODE (XEXP (addr2, 1)) != CONST_INT)
16348 return 0;
16349 offset2 = INTVAL (XEXP (addr2, 1));
16350 }
16351 }
16352 else if (GET_CODE (addr2) != REG)
16353 return 0;
16354 else
16355 {
16356 reg2 = REGNO (addr2);
16357 /* This was a simple (mem (reg)) expression. Offset is 0. */
16358 offset2 = 0;
16359 }
16360
16361 /* Both of these must have the same base register. */
16362 if (reg1 != reg2)
16363 return 0;
16364
16365 /* The offset for the second addr must be 8 more than the first addr. */
16366 if (offset2 != offset1 + 8)
16367 return 0;
16368
16369 /* All the tests passed. addr1 and addr2 are valid for lfq or stfq
16370 instructions. */
16371 return 1;
16372 }
16373 \f
16374
16375 rtx
16376 rs6000_secondary_memory_needed_rtx (machine_mode mode)
16377 {
16378 static bool eliminated = false;
16379 rtx ret;
16380
16381 if (mode != SDmode || TARGET_NO_SDMODE_STACK)
16382 ret = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
16383 else
16384 {
16385 rtx mem = cfun->machine->sdmode_stack_slot;
16386 gcc_assert (mem != NULL_RTX);
16387
16388 if (!eliminated)
16389 {
16390 mem = eliminate_regs (mem, VOIDmode, NULL_RTX);
16391 cfun->machine->sdmode_stack_slot = mem;
16392 eliminated = true;
16393 }
16394 ret = mem;
16395 }
16396
16397 if (TARGET_DEBUG_ADDR)
16398 {
16399 fprintf (stderr, "\nrs6000_secondary_memory_needed_rtx, mode %s, rtx:\n",
16400 GET_MODE_NAME (mode));
16401 if (!ret)
16402 fprintf (stderr, "\tNULL_RTX\n");
16403 else
16404 debug_rtx (ret);
16405 }
16406
16407 return ret;
16408 }
16409
16410 /* Return the mode to be used for memory when a secondary memory
16411 location is needed. For SDmode values we need to use DDmode, in
16412 all other cases we can use the same mode. */
16413 machine_mode
16414 rs6000_secondary_memory_needed_mode (machine_mode mode)
16415 {
16416 if (lra_in_progress && mode == SDmode)
16417 return DDmode;
16418 return mode;
16419 }
16420
16421 static tree
16422 rs6000_check_sdmode (tree *tp, int *walk_subtrees, void *data ATTRIBUTE_UNUSED)
16423 {
16424 /* Don't walk into types. */
16425 if (*tp == NULL_TREE || *tp == error_mark_node || TYPE_P (*tp))
16426 {
16427 *walk_subtrees = 0;
16428 return NULL_TREE;
16429 }
16430
16431 switch (TREE_CODE (*tp))
16432 {
16433 case VAR_DECL:
16434 case PARM_DECL:
16435 case FIELD_DECL:
16436 case RESULT_DECL:
16437 case SSA_NAME:
16438 case REAL_CST:
16439 case MEM_REF:
16440 case VIEW_CONVERT_EXPR:
16441 if (TYPE_MODE (TREE_TYPE (*tp)) == SDmode)
16442 return *tp;
16443 break;
16444 default:
16445 break;
16446 }
16447
16448 return NULL_TREE;
16449 }
16450
16451 /* Classify a register type. Because the FMRGOW/FMRGEW instructions only work
16452 on traditional floating point registers, and the VMRGOW/VMRGEW instructions
16453 only work on the traditional altivec registers, note if an altivec register
16454 was chosen. */
16455
16456 static enum rs6000_reg_type
16457 register_to_reg_type (rtx reg, bool *is_altivec)
16458 {
16459 HOST_WIDE_INT regno;
16460 enum reg_class rclass;
16461
16462 if (GET_CODE (reg) == SUBREG)
16463 reg = SUBREG_REG (reg);
16464
16465 if (!REG_P (reg))
16466 return NO_REG_TYPE;
16467
16468 regno = REGNO (reg);
16469 if (regno >= FIRST_PSEUDO_REGISTER)
16470 {
16471 if (!lra_in_progress && !reload_in_progress && !reload_completed)
16472 return PSEUDO_REG_TYPE;
16473
16474 regno = true_regnum (reg);
16475 if (regno < 0 || regno >= FIRST_PSEUDO_REGISTER)
16476 return PSEUDO_REG_TYPE;
16477 }
16478
16479 gcc_assert (regno >= 0);
16480
16481 if (is_altivec && ALTIVEC_REGNO_P (regno))
16482 *is_altivec = true;
16483
16484 rclass = rs6000_regno_regclass[regno];
16485 return reg_class_to_reg_type[(int)rclass];
16486 }
16487
16488 /* Helper function to return the cost of adding a TOC entry address. */
16489
16490 static inline int
16491 rs6000_secondary_reload_toc_costs (addr_mask_type addr_mask)
16492 {
16493 int ret;
16494
16495 if (TARGET_CMODEL != CMODEL_SMALL)
16496 ret = ((addr_mask & RELOAD_REG_OFFSET) == 0) ? 1 : 2;
16497
16498 else
16499 ret = (TARGET_MINIMAL_TOC) ? 6 : 3;
16500
16501 return ret;
16502 }
16503
16504 /* Helper function for rs6000_secondary_reload to determine whether the memory
16505 address (ADDR) with a given register class (RCLASS) and machine mode (MODE)
16506 needs reloading. Return negative if the memory is not handled by the memory
16507 helper functions and to try a different reload method, 0 if no additional
16508 instructions are need, and positive to give the extra cost for the
16509 memory. */
16510
16511 static int
16512 rs6000_secondary_reload_memory (rtx addr,
16513 enum reg_class rclass,
16514 enum machine_mode mode)
16515 {
16516 int extra_cost = 0;
16517 rtx reg, and_arg, plus_arg0, plus_arg1;
16518 addr_mask_type addr_mask;
16519 const char *type = NULL;
16520 const char *fail_msg = NULL;
16521
16522 if (GPR_REG_CLASS_P (rclass))
16523 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_GPR];
16524
16525 else if (rclass == FLOAT_REGS)
16526 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_FPR];
16527
16528 else if (rclass == ALTIVEC_REGS)
16529 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_VMX];
16530
16531 /* For the combined VSX_REGS, turn off Altivec AND -16. */
16532 else if (rclass == VSX_REGS)
16533 addr_mask = (reg_addr[mode].addr_mask[RELOAD_REG_VMX]
16534 & ~RELOAD_REG_AND_M16);
16535
16536 else
16537 {
16538 if (TARGET_DEBUG_ADDR)
16539 fprintf (stderr,
16540 "rs6000_secondary_reload_memory: mode = %s, class = %s, "
16541 "class is not GPR, FPR, VMX\n",
16542 GET_MODE_NAME (mode), reg_class_names[rclass]);
16543
16544 return -1;
16545 }
16546
16547 /* If the register isn't valid in this register class, just return now. */
16548 if ((addr_mask & RELOAD_REG_VALID) == 0)
16549 {
16550 if (TARGET_DEBUG_ADDR)
16551 fprintf (stderr,
16552 "rs6000_secondary_reload_memory: mode = %s, class = %s, "
16553 "not valid in class\n",
16554 GET_MODE_NAME (mode), reg_class_names[rclass]);
16555
16556 return -1;
16557 }
16558
16559 switch (GET_CODE (addr))
16560 {
16561 /* Does the register class supports auto update forms for this mode? We
16562 don't need a scratch register, since the powerpc only supports
16563 PRE_INC, PRE_DEC, and PRE_MODIFY. */
16564 case PRE_INC:
16565 case PRE_DEC:
16566 reg = XEXP (addr, 0);
16567 if (!base_reg_operand (addr, GET_MODE (reg)))
16568 {
16569 fail_msg = "no base register #1";
16570 extra_cost = -1;
16571 }
16572
16573 else if ((addr_mask & RELOAD_REG_PRE_INCDEC) == 0)
16574 {
16575 extra_cost = 1;
16576 type = "update";
16577 }
16578 break;
16579
16580 case PRE_MODIFY:
16581 reg = XEXP (addr, 0);
16582 plus_arg1 = XEXP (addr, 1);
16583 if (!base_reg_operand (reg, GET_MODE (reg))
16584 || GET_CODE (plus_arg1) != PLUS
16585 || !rtx_equal_p (reg, XEXP (plus_arg1, 0)))
16586 {
16587 fail_msg = "bad PRE_MODIFY";
16588 extra_cost = -1;
16589 }
16590
16591 else if ((addr_mask & RELOAD_REG_PRE_MODIFY) == 0)
16592 {
16593 extra_cost = 1;
16594 type = "update";
16595 }
16596 break;
16597
16598 /* Do we need to simulate AND -16 to clear the bottom address bits used
16599 in VMX load/stores? Only allow the AND for vector sizes. */
16600 case AND:
16601 and_arg = XEXP (addr, 0);
16602 if (GET_MODE_SIZE (mode) != 16
16603 || GET_CODE (XEXP (addr, 1)) != CONST_INT
16604 || INTVAL (XEXP (addr, 1)) != -16)
16605 {
16606 fail_msg = "bad Altivec AND #1";
16607 extra_cost = -1;
16608 }
16609
16610 if (rclass != ALTIVEC_REGS)
16611 {
16612 if (legitimate_indirect_address_p (and_arg, false))
16613 extra_cost = 1;
16614
16615 else if (legitimate_indexed_address_p (and_arg, false))
16616 extra_cost = 2;
16617
16618 else
16619 {
16620 fail_msg = "bad Altivec AND #2";
16621 extra_cost = -1;
16622 }
16623
16624 type = "and";
16625 }
16626 break;
16627
16628 /* If this is an indirect address, make sure it is a base register. */
16629 case REG:
16630 case SUBREG:
16631 if (!legitimate_indirect_address_p (addr, false))
16632 {
16633 extra_cost = 1;
16634 type = "move";
16635 }
16636 break;
16637
16638 /* If this is an indexed address, make sure the register class can handle
16639 indexed addresses for this mode. */
16640 case PLUS:
16641 plus_arg0 = XEXP (addr, 0);
16642 plus_arg1 = XEXP (addr, 1);
16643
16644 /* (plus (plus (reg) (constant)) (constant)) is generated during
16645 push_reload processing, so handle it now. */
16646 if (GET_CODE (plus_arg0) == PLUS && CONST_INT_P (plus_arg1))
16647 {
16648 if ((addr_mask & RELOAD_REG_OFFSET) == 0)
16649 {
16650 extra_cost = 1;
16651 type = "offset";
16652 }
16653 }
16654
16655 else if (!base_reg_operand (plus_arg0, GET_MODE (plus_arg0)))
16656 {
16657 fail_msg = "no base register #2";
16658 extra_cost = -1;
16659 }
16660
16661 else if (int_reg_operand (plus_arg1, GET_MODE (plus_arg1)))
16662 {
16663 if ((addr_mask & RELOAD_REG_INDEXED) == 0
16664 || !legitimate_indexed_address_p (addr, false))
16665 {
16666 extra_cost = 1;
16667 type = "indexed";
16668 }
16669 }
16670
16671 /* Make sure the register class can handle offset addresses. */
16672 else if (rs6000_legitimate_offset_address_p (mode, addr, false, true))
16673 {
16674 if ((addr_mask & RELOAD_REG_OFFSET) == 0)
16675 {
16676 extra_cost = 1;
16677 type = "offset";
16678 }
16679 }
16680
16681 else
16682 {
16683 fail_msg = "bad PLUS";
16684 extra_cost = -1;
16685 }
16686
16687 break;
16688
16689 case LO_SUM:
16690 if (!legitimate_lo_sum_address_p (mode, addr, false))
16691 {
16692 fail_msg = "bad LO_SUM";
16693 extra_cost = -1;
16694 }
16695
16696 if ((addr_mask & RELOAD_REG_OFFSET) == 0)
16697 {
16698 extra_cost = 1;
16699 type = "lo_sum";
16700 }
16701 break;
16702
16703 /* Static addresses need to create a TOC entry. */
16704 case CONST:
16705 case SYMBOL_REF:
16706 case LABEL_REF:
16707 type = "address";
16708 extra_cost = rs6000_secondary_reload_toc_costs (addr_mask);
16709 break;
16710
16711 /* TOC references look like offsetable memory. */
16712 case UNSPEC:
16713 if (TARGET_CMODEL == CMODEL_SMALL || XINT (addr, 1) != UNSPEC_TOCREL)
16714 {
16715 fail_msg = "bad UNSPEC";
16716 extra_cost = -1;
16717 }
16718
16719 else if ((addr_mask & RELOAD_REG_OFFSET) == 0)
16720 {
16721 extra_cost = 1;
16722 type = "toc reference";
16723 }
16724 break;
16725
16726 default:
16727 {
16728 fail_msg = "bad address";
16729 extra_cost = -1;
16730 }
16731 }
16732
16733 if (TARGET_DEBUG_ADDR /* && extra_cost != 0 */)
16734 {
16735 if (extra_cost < 0)
16736 fprintf (stderr,
16737 "rs6000_secondary_reload_memory error: mode = %s, "
16738 "class = %s, addr_mask = '%s', %s\n",
16739 GET_MODE_NAME (mode),
16740 reg_class_names[rclass],
16741 rs6000_debug_addr_mask (addr_mask, false),
16742 (fail_msg != NULL) ? fail_msg : "<bad address>");
16743
16744 else
16745 fprintf (stderr,
16746 "rs6000_secondary_reload_memory: mode = %s, class = %s, "
16747 "addr_mask = '%s', extra cost = %d, %s\n",
16748 GET_MODE_NAME (mode),
16749 reg_class_names[rclass],
16750 rs6000_debug_addr_mask (addr_mask, false),
16751 extra_cost,
16752 (type) ? type : "<none>");
16753
16754 debug_rtx (addr);
16755 }
16756
16757 return extra_cost;
16758 }
16759
16760 /* Helper function for rs6000_secondary_reload to return true if a move to a
16761 different register classe is really a simple move. */
16762
16763 static bool
16764 rs6000_secondary_reload_simple_move (enum rs6000_reg_type to_type,
16765 enum rs6000_reg_type from_type,
16766 machine_mode mode)
16767 {
16768 int size;
16769
16770 /* Add support for various direct moves available. In this function, we only
16771 look at cases where we don't need any extra registers, and one or more
16772 simple move insns are issued. At present, 32-bit integers are not allowed
16773 in FPR/VSX registers. Single precision binary floating is not a simple
16774 move because we need to convert to the single precision memory layout.
16775 The 4-byte SDmode can be moved. */
16776 size = GET_MODE_SIZE (mode);
16777 if (TARGET_DIRECT_MOVE
16778 && ((mode == SDmode) || (TARGET_POWERPC64 && size == 8))
16779 && ((to_type == GPR_REG_TYPE && from_type == VSX_REG_TYPE)
16780 || (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE)))
16781 return true;
16782
16783 else if (TARGET_MFPGPR && TARGET_POWERPC64 && size == 8
16784 && ((to_type == GPR_REG_TYPE && from_type == FPR_REG_TYPE)
16785 || (to_type == FPR_REG_TYPE && from_type == GPR_REG_TYPE)))
16786 return true;
16787
16788 else if ((size == 4 || (TARGET_POWERPC64 && size == 8))
16789 && ((to_type == GPR_REG_TYPE && from_type == SPR_REG_TYPE)
16790 || (to_type == SPR_REG_TYPE && from_type == GPR_REG_TYPE)))
16791 return true;
16792
16793 return false;
16794 }
16795
16796 /* Power8 helper function for rs6000_secondary_reload, handle all of the
16797 special direct moves that involve allocating an extra register, return the
16798 insn code of the helper function if there is such a function or
16799 CODE_FOR_nothing if not. */
16800
16801 static bool
16802 rs6000_secondary_reload_direct_move (enum rs6000_reg_type to_type,
16803 enum rs6000_reg_type from_type,
16804 machine_mode mode,
16805 secondary_reload_info *sri,
16806 bool altivec_p)
16807 {
16808 bool ret = false;
16809 enum insn_code icode = CODE_FOR_nothing;
16810 int cost = 0;
16811 int size = GET_MODE_SIZE (mode);
16812
16813 if (TARGET_POWERPC64)
16814 {
16815 if (size == 16)
16816 {
16817 /* Handle moving 128-bit values from GPRs to VSX point registers on
16818 power8 when running in 64-bit mode using XXPERMDI to glue the two
16819 64-bit values back together. */
16820 if (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE)
16821 {
16822 cost = 3; /* 2 mtvsrd's, 1 xxpermdi. */
16823 icode = reg_addr[mode].reload_vsx_gpr;
16824 }
16825
16826 /* Handle moving 128-bit values from VSX point registers to GPRs on
16827 power8 when running in 64-bit mode using XXPERMDI to get access to the
16828 bottom 64-bit value. */
16829 else if (to_type == GPR_REG_TYPE && from_type == VSX_REG_TYPE)
16830 {
16831 cost = 3; /* 2 mfvsrd's, 1 xxpermdi. */
16832 icode = reg_addr[mode].reload_gpr_vsx;
16833 }
16834 }
16835
16836 else if (mode == SFmode)
16837 {
16838 if (to_type == GPR_REG_TYPE && from_type == VSX_REG_TYPE)
16839 {
16840 cost = 3; /* xscvdpspn, mfvsrd, and. */
16841 icode = reg_addr[mode].reload_gpr_vsx;
16842 }
16843
16844 else if (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE)
16845 {
16846 cost = 2; /* mtvsrz, xscvspdpn. */
16847 icode = reg_addr[mode].reload_vsx_gpr;
16848 }
16849 }
16850 }
16851
16852 if (TARGET_POWERPC64 && size == 16)
16853 {
16854 /* Handle moving 128-bit values from GPRs to VSX point registers on
16855 power8 when running in 64-bit mode using XXPERMDI to glue the two
16856 64-bit values back together. */
16857 if (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE)
16858 {
16859 cost = 3; /* 2 mtvsrd's, 1 xxpermdi. */
16860 icode = reg_addr[mode].reload_vsx_gpr;
16861 }
16862
16863 /* Handle moving 128-bit values from VSX point registers to GPRs on
16864 power8 when running in 64-bit mode using XXPERMDI to get access to the
16865 bottom 64-bit value. */
16866 else if (to_type == GPR_REG_TYPE && from_type == VSX_REG_TYPE)
16867 {
16868 cost = 3; /* 2 mfvsrd's, 1 xxpermdi. */
16869 icode = reg_addr[mode].reload_gpr_vsx;
16870 }
16871 }
16872
16873 else if (!TARGET_POWERPC64 && size == 8)
16874 {
16875 /* Handle moving 64-bit values from GPRs to floating point registers on
16876 power8 when running in 32-bit mode using FMRGOW to glue the two 32-bit
16877 values back together. Altivec register classes must be handled
16878 specially since a different instruction is used, and the secondary
16879 reload support requires a single instruction class in the scratch
16880 register constraint. However, right now TFmode is not allowed in
16881 Altivec registers, so the pattern will never match. */
16882 if (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE && !altivec_p)
16883 {
16884 cost = 3; /* 2 mtvsrwz's, 1 fmrgow. */
16885 icode = reg_addr[mode].reload_fpr_gpr;
16886 }
16887 }
16888
16889 if (icode != CODE_FOR_nothing)
16890 {
16891 ret = true;
16892 if (sri)
16893 {
16894 sri->icode = icode;
16895 sri->extra_cost = cost;
16896 }
16897 }
16898
16899 return ret;
16900 }
16901
16902 /* Return whether a move between two register classes can be done either
16903 directly (simple move) or via a pattern that uses a single extra temporary
16904 (using power8's direct move in this case. */
16905
16906 static bool
16907 rs6000_secondary_reload_move (enum rs6000_reg_type to_type,
16908 enum rs6000_reg_type from_type,
16909 machine_mode mode,
16910 secondary_reload_info *sri,
16911 bool altivec_p)
16912 {
16913 /* Fall back to load/store reloads if either type is not a register. */
16914 if (to_type == NO_REG_TYPE || from_type == NO_REG_TYPE)
16915 return false;
16916
16917 /* If we haven't allocated registers yet, assume the move can be done for the
16918 standard register types. */
16919 if ((to_type == PSEUDO_REG_TYPE && from_type == PSEUDO_REG_TYPE)
16920 || (to_type == PSEUDO_REG_TYPE && IS_STD_REG_TYPE (from_type))
16921 || (from_type == PSEUDO_REG_TYPE && IS_STD_REG_TYPE (to_type)))
16922 return true;
16923
16924 /* Moves to the same set of registers is a simple move for non-specialized
16925 registers. */
16926 if (to_type == from_type && IS_STD_REG_TYPE (to_type))
16927 return true;
16928
16929 /* Check whether a simple move can be done directly. */
16930 if (rs6000_secondary_reload_simple_move (to_type, from_type, mode))
16931 {
16932 if (sri)
16933 {
16934 sri->icode = CODE_FOR_nothing;
16935 sri->extra_cost = 0;
16936 }
16937 return true;
16938 }
16939
16940 /* Now check if we can do it in a few steps. */
16941 return rs6000_secondary_reload_direct_move (to_type, from_type, mode, sri,
16942 altivec_p);
16943 }
16944
16945 /* Inform reload about cases where moving X with a mode MODE to a register in
16946 RCLASS requires an extra scratch or immediate register. Return the class
16947 needed for the immediate register.
16948
16949 For VSX and Altivec, we may need a register to convert sp+offset into
16950 reg+sp.
16951
16952 For misaligned 64-bit gpr loads and stores we need a register to
16953 convert an offset address to indirect. */
16954
16955 static reg_class_t
16956 rs6000_secondary_reload (bool in_p,
16957 rtx x,
16958 reg_class_t rclass_i,
16959 machine_mode mode,
16960 secondary_reload_info *sri)
16961 {
16962 enum reg_class rclass = (enum reg_class) rclass_i;
16963 reg_class_t ret = ALL_REGS;
16964 enum insn_code icode;
16965 bool default_p = false;
16966 bool done_p = false;
16967
16968 /* Allow subreg of memory before/during reload. */
16969 bool memory_p = (MEM_P (x)
16970 || (!reload_completed && GET_CODE (x) == SUBREG
16971 && MEM_P (SUBREG_REG (x))));
16972
16973 sri->icode = CODE_FOR_nothing;
16974 sri->extra_cost = 0;
16975 icode = ((in_p)
16976 ? reg_addr[mode].reload_load
16977 : reg_addr[mode].reload_store);
16978
16979 if (REG_P (x) || register_operand (x, mode))
16980 {
16981 enum rs6000_reg_type to_type = reg_class_to_reg_type[(int)rclass];
16982 bool altivec_p = (rclass == ALTIVEC_REGS);
16983 enum rs6000_reg_type from_type = register_to_reg_type (x, &altivec_p);
16984
16985 if (!in_p)
16986 {
16987 enum rs6000_reg_type exchange = to_type;
16988 to_type = from_type;
16989 from_type = exchange;
16990 }
16991
16992 /* Can we do a direct move of some sort? */
16993 if (rs6000_secondary_reload_move (to_type, from_type, mode, sri,
16994 altivec_p))
16995 {
16996 icode = (enum insn_code)sri->icode;
16997 default_p = false;
16998 done_p = true;
16999 ret = NO_REGS;
17000 }
17001 }
17002
17003 /* Make sure 0.0 is not reloaded or forced into memory. */
17004 if (x == CONST0_RTX (mode) && VSX_REG_CLASS_P (rclass))
17005 {
17006 ret = NO_REGS;
17007 default_p = false;
17008 done_p = true;
17009 }
17010
17011 /* If this is a scalar floating point value and we want to load it into the
17012 traditional Altivec registers, do it via a move via a traditional floating
17013 point register. Also make sure that non-zero constants use a FPR. */
17014 if (!done_p && reg_addr[mode].scalar_in_vmx_p
17015 && (rclass == VSX_REGS || rclass == ALTIVEC_REGS)
17016 && (memory_p || (GET_CODE (x) == CONST_DOUBLE)))
17017 {
17018 ret = FLOAT_REGS;
17019 default_p = false;
17020 done_p = true;
17021 }
17022
17023 /* Handle reload of load/stores if we have reload helper functions. */
17024 if (!done_p && icode != CODE_FOR_nothing && memory_p)
17025 {
17026 int extra_cost = rs6000_secondary_reload_memory (XEXP (x, 0), rclass,
17027 mode);
17028
17029 if (extra_cost >= 0)
17030 {
17031 done_p = true;
17032 ret = NO_REGS;
17033 if (extra_cost > 0)
17034 {
17035 sri->extra_cost = extra_cost;
17036 sri->icode = icode;
17037 }
17038 }
17039 }
17040
17041 /* Handle unaligned loads and stores of integer registers. */
17042 if (!done_p && TARGET_POWERPC64
17043 && reg_class_to_reg_type[(int)rclass] == GPR_REG_TYPE
17044 && memory_p
17045 && GET_MODE_SIZE (GET_MODE (x)) >= UNITS_PER_WORD)
17046 {
17047 rtx addr = XEXP (x, 0);
17048 rtx off = address_offset (addr);
17049
17050 if (off != NULL_RTX)
17051 {
17052 unsigned int extra = GET_MODE_SIZE (GET_MODE (x)) - UNITS_PER_WORD;
17053 unsigned HOST_WIDE_INT offset = INTVAL (off);
17054
17055 /* We need a secondary reload when our legitimate_address_p
17056 says the address is good (as otherwise the entire address
17057 will be reloaded), and the offset is not a multiple of
17058 four or we have an address wrap. Address wrap will only
17059 occur for LO_SUMs since legitimate_offset_address_p
17060 rejects addresses for 16-byte mems that will wrap. */
17061 if (GET_CODE (addr) == LO_SUM
17062 ? (1 /* legitimate_address_p allows any offset for lo_sum */
17063 && ((offset & 3) != 0
17064 || ((offset & 0xffff) ^ 0x8000) >= 0x10000 - extra))
17065 : (offset + 0x8000 < 0x10000 - extra /* legitimate_address_p */
17066 && (offset & 3) != 0))
17067 {
17068 if (in_p)
17069 sri->icode = CODE_FOR_reload_di_load;
17070 else
17071 sri->icode = CODE_FOR_reload_di_store;
17072 sri->extra_cost = 2;
17073 ret = NO_REGS;
17074 done_p = true;
17075 }
17076 else
17077 default_p = true;
17078 }
17079 else
17080 default_p = true;
17081 }
17082
17083 if (!done_p && !TARGET_POWERPC64
17084 && reg_class_to_reg_type[(int)rclass] == GPR_REG_TYPE
17085 && memory_p
17086 && GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD)
17087 {
17088 rtx addr = XEXP (x, 0);
17089 rtx off = address_offset (addr);
17090
17091 if (off != NULL_RTX)
17092 {
17093 unsigned int extra = GET_MODE_SIZE (GET_MODE (x)) - UNITS_PER_WORD;
17094 unsigned HOST_WIDE_INT offset = INTVAL (off);
17095
17096 /* We need a secondary reload when our legitimate_address_p
17097 says the address is good (as otherwise the entire address
17098 will be reloaded), and we have a wrap.
17099
17100 legitimate_lo_sum_address_p allows LO_SUM addresses to
17101 have any offset so test for wrap in the low 16 bits.
17102
17103 legitimate_offset_address_p checks for the range
17104 [-0x8000,0x7fff] for mode size of 8 and [-0x8000,0x7ff7]
17105 for mode size of 16. We wrap at [0x7ffc,0x7fff] and
17106 [0x7ff4,0x7fff] respectively, so test for the
17107 intersection of these ranges, [0x7ffc,0x7fff] and
17108 [0x7ff4,0x7ff7] respectively.
17109
17110 Note that the address we see here may have been
17111 manipulated by legitimize_reload_address. */
17112 if (GET_CODE (addr) == LO_SUM
17113 ? ((offset & 0xffff) ^ 0x8000) >= 0x10000 - extra
17114 : offset - (0x8000 - extra) < UNITS_PER_WORD)
17115 {
17116 if (in_p)
17117 sri->icode = CODE_FOR_reload_si_load;
17118 else
17119 sri->icode = CODE_FOR_reload_si_store;
17120 sri->extra_cost = 2;
17121 ret = NO_REGS;
17122 done_p = true;
17123 }
17124 else
17125 default_p = true;
17126 }
17127 else
17128 default_p = true;
17129 }
17130
17131 if (!done_p)
17132 default_p = true;
17133
17134 if (default_p)
17135 ret = default_secondary_reload (in_p, x, rclass, mode, sri);
17136
17137 gcc_assert (ret != ALL_REGS);
17138
17139 if (TARGET_DEBUG_ADDR)
17140 {
17141 fprintf (stderr,
17142 "\nrs6000_secondary_reload, return %s, in_p = %s, rclass = %s, "
17143 "mode = %s",
17144 reg_class_names[ret],
17145 in_p ? "true" : "false",
17146 reg_class_names[rclass],
17147 GET_MODE_NAME (mode));
17148
17149 if (reload_completed)
17150 fputs (", after reload", stderr);
17151
17152 if (!done_p)
17153 fputs (", done_p not set", stderr);
17154
17155 if (default_p)
17156 fputs (", default secondary reload", stderr);
17157
17158 if (sri->icode != CODE_FOR_nothing)
17159 fprintf (stderr, ", reload func = %s, extra cost = %d",
17160 insn_data[sri->icode].name, sri->extra_cost);
17161
17162 fputs ("\n", stderr);
17163 debug_rtx (x);
17164 }
17165
17166 return ret;
17167 }
17168
17169 /* Better tracing for rs6000_secondary_reload_inner. */
17170
17171 static void
17172 rs6000_secondary_reload_trace (int line, rtx reg, rtx mem, rtx scratch,
17173 bool store_p)
17174 {
17175 rtx set, clobber;
17176
17177 gcc_assert (reg != NULL_RTX && mem != NULL_RTX && scratch != NULL_RTX);
17178
17179 fprintf (stderr, "rs6000_secondary_reload_inner:%d, type = %s\n", line,
17180 store_p ? "store" : "load");
17181
17182 if (store_p)
17183 set = gen_rtx_SET (VOIDmode, mem, reg);
17184 else
17185 set = gen_rtx_SET (VOIDmode, reg, mem);
17186
17187 clobber = gen_rtx_CLOBBER (VOIDmode, scratch);
17188 debug_rtx (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, clobber)));
17189 }
17190
17191 static void rs6000_secondary_reload_fail (int, rtx, rtx, rtx, bool)
17192 ATTRIBUTE_NORETURN;
17193
17194 static void
17195 rs6000_secondary_reload_fail (int line, rtx reg, rtx mem, rtx scratch,
17196 bool store_p)
17197 {
17198 rs6000_secondary_reload_trace (line, reg, mem, scratch, store_p);
17199 gcc_unreachable ();
17200 }
17201
17202 /* Fixup reload addresses for values in GPR, FPR, and VMX registers that have
17203 reload helper functions. These were identified in
17204 rs6000_secondary_reload_memory, and if reload decided to use the secondary
17205 reload, it calls the insns:
17206 reload_<RELOAD:mode>_<P:mptrsize>_store
17207 reload_<RELOAD:mode>_<P:mptrsize>_load
17208
17209 which in turn calls this function, to do whatever is necessary to create
17210 valid addresses. */
17211
17212 void
17213 rs6000_secondary_reload_inner (rtx reg, rtx mem, rtx scratch, bool store_p)
17214 {
17215 int regno = true_regnum (reg);
17216 machine_mode mode = GET_MODE (reg);
17217 addr_mask_type addr_mask;
17218 rtx addr;
17219 rtx new_addr;
17220 rtx op_reg, op0, op1;
17221 rtx and_op;
17222 rtx cc_clobber;
17223 rtvec rv;
17224
17225 if (regno < 0 || regno >= FIRST_PSEUDO_REGISTER || !MEM_P (mem)
17226 || !base_reg_operand (scratch, GET_MODE (scratch)))
17227 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
17228
17229 if (IN_RANGE (regno, FIRST_GPR_REGNO, LAST_GPR_REGNO))
17230 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_GPR];
17231
17232 else if (IN_RANGE (regno, FIRST_FPR_REGNO, LAST_FPR_REGNO))
17233 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_FPR];
17234
17235 else if (IN_RANGE (regno, FIRST_ALTIVEC_REGNO, LAST_ALTIVEC_REGNO))
17236 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_VMX];
17237
17238 else
17239 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
17240
17241 /* Make sure the mode is valid in this register class. */
17242 if ((addr_mask & RELOAD_REG_VALID) == 0)
17243 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
17244
17245 if (TARGET_DEBUG_ADDR)
17246 rs6000_secondary_reload_trace (__LINE__, reg, mem, scratch, store_p);
17247
17248 new_addr = addr = XEXP (mem, 0);
17249 switch (GET_CODE (addr))
17250 {
17251 /* Does the register class support auto update forms for this mode? If
17252 not, do the update now. We don't need a scratch register, since the
17253 powerpc only supports PRE_INC, PRE_DEC, and PRE_MODIFY. */
17254 case PRE_INC:
17255 case PRE_DEC:
17256 op_reg = XEXP (addr, 0);
17257 if (!base_reg_operand (op_reg, Pmode))
17258 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
17259
17260 if ((addr_mask & RELOAD_REG_PRE_INCDEC) == 0)
17261 {
17262 emit_insn (gen_add2_insn (op_reg, GEN_INT (GET_MODE_SIZE (mode))));
17263 new_addr = op_reg;
17264 }
17265 break;
17266
17267 case PRE_MODIFY:
17268 op0 = XEXP (addr, 0);
17269 op1 = XEXP (addr, 1);
17270 if (!base_reg_operand (op0, Pmode)
17271 || GET_CODE (op1) != PLUS
17272 || !rtx_equal_p (op0, XEXP (op1, 0)))
17273 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
17274
17275 if ((addr_mask & RELOAD_REG_PRE_MODIFY) == 0)
17276 {
17277 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
17278 new_addr = reg;
17279 }
17280 break;
17281
17282 /* Do we need to simulate AND -16 to clear the bottom address bits used
17283 in VMX load/stores? */
17284 case AND:
17285 op0 = XEXP (addr, 0);
17286 op1 = XEXP (addr, 1);
17287 if ((addr_mask & RELOAD_REG_AND_M16) == 0)
17288 {
17289 if (REG_P (op0) || GET_CODE (op0) == SUBREG)
17290 op_reg = op0;
17291
17292 else if (GET_CODE (op1) == PLUS)
17293 {
17294 emit_insn (gen_rtx_SET (VOIDmode, scratch, op1));
17295 op_reg = scratch;
17296 }
17297
17298 else
17299 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
17300
17301 and_op = gen_rtx_AND (GET_MODE (scratch), op_reg, op1);
17302 cc_clobber = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (CCmode));
17303 rv = gen_rtvec (2, gen_rtx_SET (VOIDmode, scratch, and_op), cc_clobber);
17304 emit_insn (gen_rtx_PARALLEL (VOIDmode, rv));
17305 new_addr = scratch;
17306 }
17307 break;
17308
17309 /* If this is an indirect address, make sure it is a base register. */
17310 case REG:
17311 case SUBREG:
17312 if (!base_reg_operand (addr, GET_MODE (addr)))
17313 {
17314 emit_insn (gen_rtx_SET (VOIDmode, scratch, addr));
17315 new_addr = scratch;
17316 }
17317 break;
17318
17319 /* If this is an indexed address, make sure the register class can handle
17320 indexed addresses for this mode. */
17321 case PLUS:
17322 op0 = XEXP (addr, 0);
17323 op1 = XEXP (addr, 1);
17324 if (!base_reg_operand (op0, Pmode))
17325 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
17326
17327 else if (int_reg_operand (op1, Pmode))
17328 {
17329 if ((addr_mask & RELOAD_REG_INDEXED) == 0)
17330 {
17331 emit_insn (gen_rtx_SET (VOIDmode, scratch, addr));
17332 new_addr = scratch;
17333 }
17334 }
17335
17336 /* Make sure the register class can handle offset addresses. */
17337 else if (rs6000_legitimate_offset_address_p (mode, addr, false, true))
17338 {
17339 if ((addr_mask & RELOAD_REG_OFFSET) == 0)
17340 {
17341 emit_insn (gen_rtx_SET (VOIDmode, scratch, addr));
17342 new_addr = scratch;
17343 }
17344 }
17345
17346 else
17347 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
17348
17349 break;
17350
17351 case LO_SUM:
17352 op0 = XEXP (addr, 0);
17353 op1 = XEXP (addr, 1);
17354 if (!base_reg_operand (op0, Pmode))
17355 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
17356
17357 else if (int_reg_operand (op1, Pmode))
17358 {
17359 if ((addr_mask & RELOAD_REG_INDEXED) == 0)
17360 {
17361 emit_insn (gen_rtx_SET (VOIDmode, scratch, addr));
17362 new_addr = scratch;
17363 }
17364 }
17365
17366 /* Make sure the register class can handle offset addresses. */
17367 else if (legitimate_lo_sum_address_p (mode, addr, false))
17368 {
17369 if ((addr_mask & RELOAD_REG_OFFSET) == 0)
17370 {
17371 emit_insn (gen_rtx_SET (VOIDmode, scratch, addr));
17372 new_addr = scratch;
17373 }
17374 }
17375
17376 else
17377 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
17378
17379 break;
17380
17381 case SYMBOL_REF:
17382 case CONST:
17383 case LABEL_REF:
17384 if (TARGET_TOC)
17385 emit_insn (gen_rtx_SET (VOIDmode, scratch,
17386 create_TOC_reference (addr, scratch)));
17387 else
17388 rs6000_emit_move (scratch, addr, Pmode);
17389
17390 new_addr = scratch;
17391 break;
17392
17393 default:
17394 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
17395 }
17396
17397 /* Adjust the address if it changed. */
17398 if (addr != new_addr)
17399 {
17400 mem = replace_equiv_address_nv (mem, new_addr);
17401 if (TARGET_DEBUG_ADDR)
17402 fprintf (stderr, "\nrs6000_secondary_reload_inner, mem adjusted.\n");
17403 }
17404
17405 /* Now create the move. */
17406 if (store_p)
17407 emit_insn (gen_rtx_SET (VOIDmode, mem, reg));
17408 else
17409 emit_insn (gen_rtx_SET (VOIDmode, reg, mem));
17410
17411 return;
17412 }
17413
17414 /* Convert reloads involving 64-bit gprs and misaligned offset
17415 addressing, or multiple 32-bit gprs and offsets that are too large,
17416 to use indirect addressing. */
17417
17418 void
17419 rs6000_secondary_reload_gpr (rtx reg, rtx mem, rtx scratch, bool store_p)
17420 {
17421 int regno = true_regnum (reg);
17422 enum reg_class rclass;
17423 rtx addr;
17424 rtx scratch_or_premodify = scratch;
17425
17426 if (TARGET_DEBUG_ADDR)
17427 {
17428 fprintf (stderr, "\nrs6000_secondary_reload_gpr, type = %s\n",
17429 store_p ? "store" : "load");
17430 fprintf (stderr, "reg:\n");
17431 debug_rtx (reg);
17432 fprintf (stderr, "mem:\n");
17433 debug_rtx (mem);
17434 fprintf (stderr, "scratch:\n");
17435 debug_rtx (scratch);
17436 }
17437
17438 gcc_assert (regno >= 0 && regno < FIRST_PSEUDO_REGISTER);
17439 gcc_assert (GET_CODE (mem) == MEM);
17440 rclass = REGNO_REG_CLASS (regno);
17441 gcc_assert (rclass == GENERAL_REGS || rclass == BASE_REGS);
17442 addr = XEXP (mem, 0);
17443
17444 if (GET_CODE (addr) == PRE_MODIFY)
17445 {
17446 scratch_or_premodify = XEXP (addr, 0);
17447 gcc_assert (REG_P (scratch_or_premodify));
17448 addr = XEXP (addr, 1);
17449 }
17450 gcc_assert (GET_CODE (addr) == PLUS || GET_CODE (addr) == LO_SUM);
17451
17452 rs6000_emit_move (scratch_or_premodify, addr, Pmode);
17453
17454 mem = replace_equiv_address_nv (mem, scratch_or_premodify);
17455
17456 /* Now create the move. */
17457 if (store_p)
17458 emit_insn (gen_rtx_SET (VOIDmode, mem, reg));
17459 else
17460 emit_insn (gen_rtx_SET (VOIDmode, reg, mem));
17461
17462 return;
17463 }
17464
17465 /* Allocate a 64-bit stack slot to be used for copying SDmode values through if
17466 this function has any SDmode references. If we are on a power7 or later, we
17467 don't need the 64-bit stack slot since the LFIWZX and STIFWX instructions
17468 can load/store the value. */
17469
17470 static void
17471 rs6000_alloc_sdmode_stack_slot (void)
17472 {
17473 tree t;
17474 basic_block bb;
17475 gimple_stmt_iterator gsi;
17476
17477 gcc_assert (cfun->machine->sdmode_stack_slot == NULL_RTX);
17478 /* We use a different approach for dealing with the secondary
17479 memory in LRA. */
17480 if (ira_use_lra_p)
17481 return;
17482
17483 if (TARGET_NO_SDMODE_STACK)
17484 return;
17485
17486 FOR_EACH_BB_FN (bb, cfun)
17487 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
17488 {
17489 tree ret = walk_gimple_op (gsi_stmt (gsi), rs6000_check_sdmode, NULL);
17490 if (ret)
17491 {
17492 rtx stack = assign_stack_local (DDmode, GET_MODE_SIZE (DDmode), 0);
17493 cfun->machine->sdmode_stack_slot = adjust_address_nv (stack,
17494 SDmode, 0);
17495 return;
17496 }
17497 }
17498
17499 /* Check for any SDmode parameters of the function. */
17500 for (t = DECL_ARGUMENTS (cfun->decl); t; t = DECL_CHAIN (t))
17501 {
17502 if (TREE_TYPE (t) == error_mark_node)
17503 continue;
17504
17505 if (TYPE_MODE (TREE_TYPE (t)) == SDmode
17506 || TYPE_MODE (DECL_ARG_TYPE (t)) == SDmode)
17507 {
17508 rtx stack = assign_stack_local (DDmode, GET_MODE_SIZE (DDmode), 0);
17509 cfun->machine->sdmode_stack_slot = adjust_address_nv (stack,
17510 SDmode, 0);
17511 return;
17512 }
17513 }
17514 }
17515
17516 static void
17517 rs6000_instantiate_decls (void)
17518 {
17519 if (cfun->machine->sdmode_stack_slot != NULL_RTX)
17520 instantiate_decl_rtl (cfun->machine->sdmode_stack_slot);
17521 }
17522
17523 /* Given an rtx X being reloaded into a reg required to be
17524 in class CLASS, return the class of reg to actually use.
17525 In general this is just CLASS; but on some machines
17526 in some cases it is preferable to use a more restrictive class.
17527
17528 On the RS/6000, we have to return NO_REGS when we want to reload a
17529 floating-point CONST_DOUBLE to force it to be copied to memory.
17530
17531 We also don't want to reload integer values into floating-point
17532 registers if we can at all help it. In fact, this can
17533 cause reload to die, if it tries to generate a reload of CTR
17534 into a FP register and discovers it doesn't have the memory location
17535 required.
17536
17537 ??? Would it be a good idea to have reload do the converse, that is
17538 try to reload floating modes into FP registers if possible?
17539 */
17540
17541 static enum reg_class
17542 rs6000_preferred_reload_class (rtx x, enum reg_class rclass)
17543 {
17544 machine_mode mode = GET_MODE (x);
17545 bool is_constant = CONSTANT_P (x);
17546
17547 /* Do VSX tests before handling traditional floaitng point registers. */
17548 if (TARGET_VSX && VSX_REG_CLASS_P (rclass))
17549 {
17550 if (is_constant)
17551 {
17552 /* Zero is always allowed in all VSX registers. */
17553 if (x == CONST0_RTX (mode))
17554 return rclass;
17555
17556 /* If this is a vector constant that can be formed with a few Altivec
17557 instructions, we want altivec registers. */
17558 if (GET_CODE (x) == CONST_VECTOR && easy_vector_constant (x, mode))
17559 return ALTIVEC_REGS;
17560
17561 /* Force constant to memory. */
17562 return NO_REGS;
17563 }
17564
17565 /* If this is a scalar floating point value, prefer the traditional
17566 floating point registers so that we can use D-form (register+offset)
17567 addressing. */
17568 if (GET_MODE_SIZE (mode) < 16)
17569 return FLOAT_REGS;
17570
17571 /* Prefer the Altivec registers if Altivec is handling the vector
17572 operations (i.e. V16QI, V8HI, and V4SI), or if we prefer Altivec
17573 loads. */
17574 if (VECTOR_UNIT_ALTIVEC_P (mode) || VECTOR_MEM_ALTIVEC_P (mode)
17575 || mode == V1TImode)
17576 return ALTIVEC_REGS;
17577
17578 return rclass;
17579 }
17580
17581 if (is_constant || GET_CODE (x) == PLUS)
17582 {
17583 if (reg_class_subset_p (GENERAL_REGS, rclass))
17584 return GENERAL_REGS;
17585 if (reg_class_subset_p (BASE_REGS, rclass))
17586 return BASE_REGS;
17587 return NO_REGS;
17588 }
17589
17590 if (GET_MODE_CLASS (mode) == MODE_INT && rclass == NON_SPECIAL_REGS)
17591 return GENERAL_REGS;
17592
17593 return rclass;
17594 }
17595
17596 /* Debug version of rs6000_preferred_reload_class. */
17597 static enum reg_class
17598 rs6000_debug_preferred_reload_class (rtx x, enum reg_class rclass)
17599 {
17600 enum reg_class ret = rs6000_preferred_reload_class (x, rclass);
17601
17602 fprintf (stderr,
17603 "\nrs6000_preferred_reload_class, return %s, rclass = %s, "
17604 "mode = %s, x:\n",
17605 reg_class_names[ret], reg_class_names[rclass],
17606 GET_MODE_NAME (GET_MODE (x)));
17607 debug_rtx (x);
17608
17609 return ret;
17610 }
17611
17612 /* If we are copying between FP or AltiVec registers and anything else, we need
17613 a memory location. The exception is when we are targeting ppc64 and the
17614 move to/from fpr to gpr instructions are available. Also, under VSX, you
17615 can copy vector registers from the FP register set to the Altivec register
17616 set and vice versa. */
17617
17618 static bool
17619 rs6000_secondary_memory_needed (enum reg_class from_class,
17620 enum reg_class to_class,
17621 machine_mode mode)
17622 {
17623 enum rs6000_reg_type from_type, to_type;
17624 bool altivec_p = ((from_class == ALTIVEC_REGS)
17625 || (to_class == ALTIVEC_REGS));
17626
17627 /* If a simple/direct move is available, we don't need secondary memory */
17628 from_type = reg_class_to_reg_type[(int)from_class];
17629 to_type = reg_class_to_reg_type[(int)to_class];
17630
17631 if (rs6000_secondary_reload_move (to_type, from_type, mode,
17632 (secondary_reload_info *)0, altivec_p))
17633 return false;
17634
17635 /* If we have a floating point or vector register class, we need to use
17636 memory to transfer the data. */
17637 if (IS_FP_VECT_REG_TYPE (from_type) || IS_FP_VECT_REG_TYPE (to_type))
17638 return true;
17639
17640 return false;
17641 }
17642
17643 /* Debug version of rs6000_secondary_memory_needed. */
17644 static bool
17645 rs6000_debug_secondary_memory_needed (enum reg_class from_class,
17646 enum reg_class to_class,
17647 machine_mode mode)
17648 {
17649 bool ret = rs6000_secondary_memory_needed (from_class, to_class, mode);
17650
17651 fprintf (stderr,
17652 "rs6000_secondary_memory_needed, return: %s, from_class = %s, "
17653 "to_class = %s, mode = %s\n",
17654 ret ? "true" : "false",
17655 reg_class_names[from_class],
17656 reg_class_names[to_class],
17657 GET_MODE_NAME (mode));
17658
17659 return ret;
17660 }
17661
17662 /* Return the register class of a scratch register needed to copy IN into
17663 or out of a register in RCLASS in MODE. If it can be done directly,
17664 NO_REGS is returned. */
17665
17666 static enum reg_class
17667 rs6000_secondary_reload_class (enum reg_class rclass, machine_mode mode,
17668 rtx in)
17669 {
17670 int regno;
17671
17672 if (TARGET_ELF || (DEFAULT_ABI == ABI_DARWIN
17673 #if TARGET_MACHO
17674 && MACHOPIC_INDIRECT
17675 #endif
17676 ))
17677 {
17678 /* We cannot copy a symbolic operand directly into anything
17679 other than BASE_REGS for TARGET_ELF. So indicate that a
17680 register from BASE_REGS is needed as an intermediate
17681 register.
17682
17683 On Darwin, pic addresses require a load from memory, which
17684 needs a base register. */
17685 if (rclass != BASE_REGS
17686 && (GET_CODE (in) == SYMBOL_REF
17687 || GET_CODE (in) == HIGH
17688 || GET_CODE (in) == LABEL_REF
17689 || GET_CODE (in) == CONST))
17690 return BASE_REGS;
17691 }
17692
17693 if (GET_CODE (in) == REG)
17694 {
17695 regno = REGNO (in);
17696 if (regno >= FIRST_PSEUDO_REGISTER)
17697 {
17698 regno = true_regnum (in);
17699 if (regno >= FIRST_PSEUDO_REGISTER)
17700 regno = -1;
17701 }
17702 }
17703 else if (GET_CODE (in) == SUBREG)
17704 {
17705 regno = true_regnum (in);
17706 if (regno >= FIRST_PSEUDO_REGISTER)
17707 regno = -1;
17708 }
17709 else
17710 regno = -1;
17711
17712 /* If we have VSX register moves, prefer moving scalar values between
17713 Altivec registers and GPR by going via an FPR (and then via memory)
17714 instead of reloading the secondary memory address for Altivec moves. */
17715 if (TARGET_VSX
17716 && GET_MODE_SIZE (mode) < 16
17717 && (((rclass == GENERAL_REGS || rclass == BASE_REGS)
17718 && (regno >= 0 && ALTIVEC_REGNO_P (regno)))
17719 || ((rclass == VSX_REGS || rclass == ALTIVEC_REGS)
17720 && (regno >= 0 && INT_REGNO_P (regno)))))
17721 return FLOAT_REGS;
17722
17723 /* We can place anything into GENERAL_REGS and can put GENERAL_REGS
17724 into anything. */
17725 if (rclass == GENERAL_REGS || rclass == BASE_REGS
17726 || (regno >= 0 && INT_REGNO_P (regno)))
17727 return NO_REGS;
17728
17729 /* Constants, memory, and VSX registers can go into VSX registers (both the
17730 traditional floating point and the altivec registers). */
17731 if (rclass == VSX_REGS
17732 && (regno == -1 || VSX_REGNO_P (regno)))
17733 return NO_REGS;
17734
17735 /* Constants, memory, and FP registers can go into FP registers. */
17736 if ((regno == -1 || FP_REGNO_P (regno))
17737 && (rclass == FLOAT_REGS || rclass == NON_SPECIAL_REGS))
17738 return (mode != SDmode || lra_in_progress) ? NO_REGS : GENERAL_REGS;
17739
17740 /* Memory, and AltiVec registers can go into AltiVec registers. */
17741 if ((regno == -1 || ALTIVEC_REGNO_P (regno))
17742 && rclass == ALTIVEC_REGS)
17743 return NO_REGS;
17744
17745 /* We can copy among the CR registers. */
17746 if ((rclass == CR_REGS || rclass == CR0_REGS)
17747 && regno >= 0 && CR_REGNO_P (regno))
17748 return NO_REGS;
17749
17750 /* Otherwise, we need GENERAL_REGS. */
17751 return GENERAL_REGS;
17752 }
17753
17754 /* Debug version of rs6000_secondary_reload_class. */
17755 static enum reg_class
17756 rs6000_debug_secondary_reload_class (enum reg_class rclass,
17757 machine_mode mode, rtx in)
17758 {
17759 enum reg_class ret = rs6000_secondary_reload_class (rclass, mode, in);
17760 fprintf (stderr,
17761 "\nrs6000_secondary_reload_class, return %s, rclass = %s, "
17762 "mode = %s, input rtx:\n",
17763 reg_class_names[ret], reg_class_names[rclass],
17764 GET_MODE_NAME (mode));
17765 debug_rtx (in);
17766
17767 return ret;
17768 }
17769
17770 /* Return nonzero if for CLASS a mode change from FROM to TO is invalid. */
17771
17772 static bool
17773 rs6000_cannot_change_mode_class (machine_mode from,
17774 machine_mode to,
17775 enum reg_class rclass)
17776 {
17777 unsigned from_size = GET_MODE_SIZE (from);
17778 unsigned to_size = GET_MODE_SIZE (to);
17779
17780 if (from_size != to_size)
17781 {
17782 enum reg_class xclass = (TARGET_VSX) ? VSX_REGS : FLOAT_REGS;
17783
17784 if (reg_classes_intersect_p (xclass, rclass))
17785 {
17786 unsigned to_nregs = hard_regno_nregs[FIRST_FPR_REGNO][to];
17787 unsigned from_nregs = hard_regno_nregs[FIRST_FPR_REGNO][from];
17788
17789 /* Don't allow 64-bit types to overlap with 128-bit types that take a
17790 single register under VSX because the scalar part of the register
17791 is in the upper 64-bits, and not the lower 64-bits. Types like
17792 TFmode/TDmode that take 2 scalar register can overlap. 128-bit
17793 IEEE floating point can't overlap, and neither can small
17794 values. */
17795
17796 if (TARGET_IEEEQUAD && (to == TFmode || from == TFmode))
17797 return true;
17798
17799 /* TDmode in floating-mode registers must always go into a register
17800 pair with the most significant word in the even-numbered register
17801 to match ISA requirements. In little-endian mode, this does not
17802 match subreg numbering, so we cannot allow subregs. */
17803 if (!BYTES_BIG_ENDIAN && (to == TDmode || from == TDmode))
17804 return true;
17805
17806 if (from_size < 8 || to_size < 8)
17807 return true;
17808
17809 if (from_size == 8 && (8 * to_nregs) != to_size)
17810 return true;
17811
17812 if (to_size == 8 && (8 * from_nregs) != from_size)
17813 return true;
17814
17815 return false;
17816 }
17817 else
17818 return false;
17819 }
17820
17821 if (TARGET_E500_DOUBLE
17822 && ((((to) == DFmode) + ((from) == DFmode)) == 1
17823 || (((to) == TFmode) + ((from) == TFmode)) == 1
17824 || (((to) == DDmode) + ((from) == DDmode)) == 1
17825 || (((to) == TDmode) + ((from) == TDmode)) == 1
17826 || (((to) == DImode) + ((from) == DImode)) == 1))
17827 return true;
17828
17829 /* Since the VSX register set includes traditional floating point registers
17830 and altivec registers, just check for the size being different instead of
17831 trying to check whether the modes are vector modes. Otherwise it won't
17832 allow say DF and DI to change classes. For types like TFmode and TDmode
17833 that take 2 64-bit registers, rather than a single 128-bit register, don't
17834 allow subregs of those types to other 128 bit types. */
17835 if (TARGET_VSX && VSX_REG_CLASS_P (rclass))
17836 {
17837 unsigned num_regs = (from_size + 15) / 16;
17838 if (hard_regno_nregs[FIRST_FPR_REGNO][to] > num_regs
17839 || hard_regno_nregs[FIRST_FPR_REGNO][from] > num_regs)
17840 return true;
17841
17842 return (from_size != 8 && from_size != 16);
17843 }
17844
17845 if (TARGET_ALTIVEC && rclass == ALTIVEC_REGS
17846 && (ALTIVEC_VECTOR_MODE (from) + ALTIVEC_VECTOR_MODE (to)) == 1)
17847 return true;
17848
17849 if (TARGET_SPE && (SPE_VECTOR_MODE (from) + SPE_VECTOR_MODE (to)) == 1
17850 && reg_classes_intersect_p (GENERAL_REGS, rclass))
17851 return true;
17852
17853 return false;
17854 }
17855
17856 /* Debug version of rs6000_cannot_change_mode_class. */
17857 static bool
17858 rs6000_debug_cannot_change_mode_class (machine_mode from,
17859 machine_mode to,
17860 enum reg_class rclass)
17861 {
17862 bool ret = rs6000_cannot_change_mode_class (from, to, rclass);
17863
17864 fprintf (stderr,
17865 "rs6000_cannot_change_mode_class, return %s, from = %s, "
17866 "to = %s, rclass = %s\n",
17867 ret ? "true" : "false",
17868 GET_MODE_NAME (from), GET_MODE_NAME (to),
17869 reg_class_names[rclass]);
17870
17871 return ret;
17872 }
17873 \f
17874 /* Return a string to do a move operation of 128 bits of data. */
17875
17876 const char *
17877 rs6000_output_move_128bit (rtx operands[])
17878 {
17879 rtx dest = operands[0];
17880 rtx src = operands[1];
17881 machine_mode mode = GET_MODE (dest);
17882 int dest_regno;
17883 int src_regno;
17884 bool dest_gpr_p, dest_fp_p, dest_vmx_p, dest_vsx_p;
17885 bool src_gpr_p, src_fp_p, src_vmx_p, src_vsx_p;
17886
17887 if (REG_P (dest))
17888 {
17889 dest_regno = REGNO (dest);
17890 dest_gpr_p = INT_REGNO_P (dest_regno);
17891 dest_fp_p = FP_REGNO_P (dest_regno);
17892 dest_vmx_p = ALTIVEC_REGNO_P (dest_regno);
17893 dest_vsx_p = dest_fp_p | dest_vmx_p;
17894 }
17895 else
17896 {
17897 dest_regno = -1;
17898 dest_gpr_p = dest_fp_p = dest_vmx_p = dest_vsx_p = false;
17899 }
17900
17901 if (REG_P (src))
17902 {
17903 src_regno = REGNO (src);
17904 src_gpr_p = INT_REGNO_P (src_regno);
17905 src_fp_p = FP_REGNO_P (src_regno);
17906 src_vmx_p = ALTIVEC_REGNO_P (src_regno);
17907 src_vsx_p = src_fp_p | src_vmx_p;
17908 }
17909 else
17910 {
17911 src_regno = -1;
17912 src_gpr_p = src_fp_p = src_vmx_p = src_vsx_p = false;
17913 }
17914
17915 /* Register moves. */
17916 if (dest_regno >= 0 && src_regno >= 0)
17917 {
17918 if (dest_gpr_p)
17919 {
17920 if (src_gpr_p)
17921 return "#";
17922
17923 else if (TARGET_VSX && TARGET_DIRECT_MOVE && src_vsx_p)
17924 return "#";
17925 }
17926
17927 else if (TARGET_VSX && dest_vsx_p)
17928 {
17929 if (src_vsx_p)
17930 return "xxlor %x0,%x1,%x1";
17931
17932 else if (TARGET_DIRECT_MOVE && src_gpr_p)
17933 return "#";
17934 }
17935
17936 else if (TARGET_ALTIVEC && dest_vmx_p && src_vmx_p)
17937 return "vor %0,%1,%1";
17938
17939 else if (dest_fp_p && src_fp_p)
17940 return "#";
17941 }
17942
17943 /* Loads. */
17944 else if (dest_regno >= 0 && MEM_P (src))
17945 {
17946 if (dest_gpr_p)
17947 {
17948 if (TARGET_QUAD_MEMORY && quad_load_store_p (dest, src))
17949 return "lq %0,%1";
17950 else
17951 return "#";
17952 }
17953
17954 else if (TARGET_ALTIVEC && dest_vmx_p
17955 && altivec_indexed_or_indirect_operand (src, mode))
17956 return "lvx %0,%y1";
17957
17958 else if (TARGET_VSX && dest_vsx_p)
17959 {
17960 if (mode == V16QImode || mode == V8HImode || mode == V4SImode)
17961 return "lxvw4x %x0,%y1";
17962 else
17963 return "lxvd2x %x0,%y1";
17964 }
17965
17966 else if (TARGET_ALTIVEC && dest_vmx_p)
17967 return "lvx %0,%y1";
17968
17969 else if (dest_fp_p)
17970 return "#";
17971 }
17972
17973 /* Stores. */
17974 else if (src_regno >= 0 && MEM_P (dest))
17975 {
17976 if (src_gpr_p)
17977 {
17978 if (TARGET_QUAD_MEMORY && quad_load_store_p (dest, src))
17979 return "stq %1,%0";
17980 else
17981 return "#";
17982 }
17983
17984 else if (TARGET_ALTIVEC && src_vmx_p
17985 && altivec_indexed_or_indirect_operand (src, mode))
17986 return "stvx %1,%y0";
17987
17988 else if (TARGET_VSX && src_vsx_p)
17989 {
17990 if (mode == V16QImode || mode == V8HImode || mode == V4SImode)
17991 return "stxvw4x %x1,%y0";
17992 else
17993 return "stxvd2x %x1,%y0";
17994 }
17995
17996 else if (TARGET_ALTIVEC && src_vmx_p)
17997 return "stvx %1,%y0";
17998
17999 else if (src_fp_p)
18000 return "#";
18001 }
18002
18003 /* Constants. */
18004 else if (dest_regno >= 0
18005 && (GET_CODE (src) == CONST_INT
18006 || GET_CODE (src) == CONST_WIDE_INT
18007 || GET_CODE (src) == CONST_DOUBLE
18008 || GET_CODE (src) == CONST_VECTOR))
18009 {
18010 if (dest_gpr_p)
18011 return "#";
18012
18013 else if (TARGET_VSX && dest_vsx_p && zero_constant (src, mode))
18014 return "xxlxor %x0,%x0,%x0";
18015
18016 else if (TARGET_ALTIVEC && dest_vmx_p)
18017 return output_vec_const_move (operands);
18018 }
18019
18020 if (TARGET_DEBUG_ADDR)
18021 {
18022 fprintf (stderr, "\n===== Bad 128 bit move:\n");
18023 debug_rtx (gen_rtx_SET (VOIDmode, dest, src));
18024 }
18025
18026 gcc_unreachable ();
18027 }
18028
18029 /* Validate a 128-bit move. */
18030 bool
18031 rs6000_move_128bit_ok_p (rtx operands[])
18032 {
18033 machine_mode mode = GET_MODE (operands[0]);
18034 return (gpc_reg_operand (operands[0], mode)
18035 || gpc_reg_operand (operands[1], mode));
18036 }
18037
18038 /* Return true if a 128-bit move needs to be split. */
18039 bool
18040 rs6000_split_128bit_ok_p (rtx operands[])
18041 {
18042 if (!reload_completed)
18043 return false;
18044
18045 if (!gpr_or_gpr_p (operands[0], operands[1]))
18046 return false;
18047
18048 if (quad_load_store_p (operands[0], operands[1]))
18049 return false;
18050
18051 return true;
18052 }
18053
18054 \f
18055 /* Given a comparison operation, return the bit number in CCR to test. We
18056 know this is a valid comparison.
18057
18058 SCC_P is 1 if this is for an scc. That means that %D will have been
18059 used instead of %C, so the bits will be in different places.
18060
18061 Return -1 if OP isn't a valid comparison for some reason. */
18062
18063 int
18064 ccr_bit (rtx op, int scc_p)
18065 {
18066 enum rtx_code code = GET_CODE (op);
18067 machine_mode cc_mode;
18068 int cc_regnum;
18069 int base_bit;
18070 rtx reg;
18071
18072 if (!COMPARISON_P (op))
18073 return -1;
18074
18075 reg = XEXP (op, 0);
18076
18077 gcc_assert (GET_CODE (reg) == REG && CR_REGNO_P (REGNO (reg)));
18078
18079 cc_mode = GET_MODE (reg);
18080 cc_regnum = REGNO (reg);
18081 base_bit = 4 * (cc_regnum - CR0_REGNO);
18082
18083 validate_condition_mode (code, cc_mode);
18084
18085 /* When generating a sCOND operation, only positive conditions are
18086 allowed. */
18087 gcc_assert (!scc_p
18088 || code == EQ || code == GT || code == LT || code == UNORDERED
18089 || code == GTU || code == LTU);
18090
18091 switch (code)
18092 {
18093 case NE:
18094 return scc_p ? base_bit + 3 : base_bit + 2;
18095 case EQ:
18096 return base_bit + 2;
18097 case GT: case GTU: case UNLE:
18098 return base_bit + 1;
18099 case LT: case LTU: case UNGE:
18100 return base_bit;
18101 case ORDERED: case UNORDERED:
18102 return base_bit + 3;
18103
18104 case GE: case GEU:
18105 /* If scc, we will have done a cror to put the bit in the
18106 unordered position. So test that bit. For integer, this is ! LT
18107 unless this is an scc insn. */
18108 return scc_p ? base_bit + 3 : base_bit;
18109
18110 case LE: case LEU:
18111 return scc_p ? base_bit + 3 : base_bit + 1;
18112
18113 default:
18114 gcc_unreachable ();
18115 }
18116 }
18117 \f
18118 /* Return the GOT register. */
18119
18120 rtx
18121 rs6000_got_register (rtx value ATTRIBUTE_UNUSED)
18122 {
18123 /* The second flow pass currently (June 1999) can't update
18124 regs_ever_live without disturbing other parts of the compiler, so
18125 update it here to make the prolog/epilogue code happy. */
18126 if (!can_create_pseudo_p ()
18127 && !df_regs_ever_live_p (RS6000_PIC_OFFSET_TABLE_REGNUM))
18128 df_set_regs_ever_live (RS6000_PIC_OFFSET_TABLE_REGNUM, true);
18129
18130 crtl->uses_pic_offset_table = 1;
18131
18132 return pic_offset_table_rtx;
18133 }
18134 \f
18135 static rs6000_stack_t stack_info;
18136
18137 /* Function to init struct machine_function.
18138 This will be called, via a pointer variable,
18139 from push_function_context. */
18140
18141 static struct machine_function *
18142 rs6000_init_machine_status (void)
18143 {
18144 stack_info.reload_completed = 0;
18145 return ggc_cleared_alloc<machine_function> ();
18146 }
18147 \f
18148 #define INT_P(X) (GET_CODE (X) == CONST_INT && GET_MODE (X) == VOIDmode)
18149
18150 int
18151 extract_MB (rtx op)
18152 {
18153 int i;
18154 unsigned long val = INTVAL (op);
18155
18156 /* If the high bit is zero, the value is the first 1 bit we find
18157 from the left. */
18158 if ((val & 0x80000000) == 0)
18159 {
18160 gcc_assert (val & 0xffffffff);
18161
18162 i = 1;
18163 while (((val <<= 1) & 0x80000000) == 0)
18164 ++i;
18165 return i;
18166 }
18167
18168 /* If the high bit is set and the low bit is not, or the mask is all
18169 1's, the value is zero. */
18170 if ((val & 1) == 0 || (val & 0xffffffff) == 0xffffffff)
18171 return 0;
18172
18173 /* Otherwise we have a wrap-around mask. Look for the first 0 bit
18174 from the right. */
18175 i = 31;
18176 while (((val >>= 1) & 1) != 0)
18177 --i;
18178
18179 return i;
18180 }
18181
18182 int
18183 extract_ME (rtx op)
18184 {
18185 int i;
18186 unsigned long val = INTVAL (op);
18187
18188 /* If the low bit is zero, the value is the first 1 bit we find from
18189 the right. */
18190 if ((val & 1) == 0)
18191 {
18192 gcc_assert (val & 0xffffffff);
18193
18194 i = 30;
18195 while (((val >>= 1) & 1) == 0)
18196 --i;
18197
18198 return i;
18199 }
18200
18201 /* If the low bit is set and the high bit is not, or the mask is all
18202 1's, the value is 31. */
18203 if ((val & 0x80000000) == 0 || (val & 0xffffffff) == 0xffffffff)
18204 return 31;
18205
18206 /* Otherwise we have a wrap-around mask. Look for the first 0 bit
18207 from the left. */
18208 i = 0;
18209 while (((val <<= 1) & 0x80000000) != 0)
18210 ++i;
18211
18212 return i;
18213 }
18214
18215 /* Write out a function code label. */
18216
18217 void
18218 rs6000_output_function_entry (FILE *file, const char *fname)
18219 {
18220 if (fname[0] != '.')
18221 {
18222 switch (DEFAULT_ABI)
18223 {
18224 default:
18225 gcc_unreachable ();
18226
18227 case ABI_AIX:
18228 if (DOT_SYMBOLS)
18229 putc ('.', file);
18230 else
18231 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "L.");
18232 break;
18233
18234 case ABI_ELFv2:
18235 case ABI_V4:
18236 case ABI_DARWIN:
18237 break;
18238 }
18239 }
18240
18241 RS6000_OUTPUT_BASENAME (file, fname);
18242 }
18243
18244 /* Print an operand. Recognize special options, documented below. */
18245
18246 #if TARGET_ELF
18247 #define SMALL_DATA_RELOC ((rs6000_sdata == SDATA_EABI) ? "sda21" : "sdarel")
18248 #define SMALL_DATA_REG ((rs6000_sdata == SDATA_EABI) ? 0 : 13)
18249 #else
18250 #define SMALL_DATA_RELOC "sda21"
18251 #define SMALL_DATA_REG 0
18252 #endif
18253
18254 void
18255 print_operand (FILE *file, rtx x, int code)
18256 {
18257 int i;
18258 unsigned HOST_WIDE_INT uval;
18259
18260 switch (code)
18261 {
18262 /* %a is output_address. */
18263
18264 case 'b':
18265 /* If constant, low-order 16 bits of constant, unsigned.
18266 Otherwise, write normally. */
18267 if (INT_P (x))
18268 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 0xffff);
18269 else
18270 print_operand (file, x, 0);
18271 return;
18272
18273 case 'B':
18274 /* If the low-order bit is zero, write 'r'; otherwise, write 'l'
18275 for 64-bit mask direction. */
18276 putc (((INTVAL (x) & 1) == 0 ? 'r' : 'l'), file);
18277 return;
18278
18279 /* %c is output_addr_const if a CONSTANT_ADDRESS_P, otherwise
18280 output_operand. */
18281
18282 case 'D':
18283 /* Like 'J' but get to the GT bit only. */
18284 gcc_assert (REG_P (x));
18285
18286 /* Bit 1 is GT bit. */
18287 i = 4 * (REGNO (x) - CR0_REGNO) + 1;
18288
18289 /* Add one for shift count in rlinm for scc. */
18290 fprintf (file, "%d", i + 1);
18291 return;
18292
18293 case 'e':
18294 /* If the low 16 bits are 0, but some other bit is set, write 's'. */
18295 if (! INT_P (x))
18296 {
18297 output_operand_lossage ("invalid %%e value");
18298 return;
18299 }
18300
18301 uval = INTVAL (x);
18302 if ((uval & 0xffff) == 0 && uval != 0)
18303 putc ('s', file);
18304 return;
18305
18306 case 'E':
18307 /* X is a CR register. Print the number of the EQ bit of the CR */
18308 if (GET_CODE (x) != REG || ! CR_REGNO_P (REGNO (x)))
18309 output_operand_lossage ("invalid %%E value");
18310 else
18311 fprintf (file, "%d", 4 * (REGNO (x) - CR0_REGNO) + 2);
18312 return;
18313
18314 case 'f':
18315 /* X is a CR register. Print the shift count needed to move it
18316 to the high-order four bits. */
18317 if (GET_CODE (x) != REG || ! CR_REGNO_P (REGNO (x)))
18318 output_operand_lossage ("invalid %%f value");
18319 else
18320 fprintf (file, "%d", 4 * (REGNO (x) - CR0_REGNO));
18321 return;
18322
18323 case 'F':
18324 /* Similar, but print the count for the rotate in the opposite
18325 direction. */
18326 if (GET_CODE (x) != REG || ! CR_REGNO_P (REGNO (x)))
18327 output_operand_lossage ("invalid %%F value");
18328 else
18329 fprintf (file, "%d", 32 - 4 * (REGNO (x) - CR0_REGNO));
18330 return;
18331
18332 case 'G':
18333 /* X is a constant integer. If it is negative, print "m",
18334 otherwise print "z". This is to make an aze or ame insn. */
18335 if (GET_CODE (x) != CONST_INT)
18336 output_operand_lossage ("invalid %%G value");
18337 else if (INTVAL (x) >= 0)
18338 putc ('z', file);
18339 else
18340 putc ('m', file);
18341 return;
18342
18343 case 'h':
18344 /* If constant, output low-order five bits. Otherwise, write
18345 normally. */
18346 if (INT_P (x))
18347 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 31);
18348 else
18349 print_operand (file, x, 0);
18350 return;
18351
18352 case 'H':
18353 /* If constant, output low-order six bits. Otherwise, write
18354 normally. */
18355 if (INT_P (x))
18356 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 63);
18357 else
18358 print_operand (file, x, 0);
18359 return;
18360
18361 case 'I':
18362 /* Print `i' if this is a constant, else nothing. */
18363 if (INT_P (x))
18364 putc ('i', file);
18365 return;
18366
18367 case 'j':
18368 /* Write the bit number in CCR for jump. */
18369 i = ccr_bit (x, 0);
18370 if (i == -1)
18371 output_operand_lossage ("invalid %%j code");
18372 else
18373 fprintf (file, "%d", i);
18374 return;
18375
18376 case 'J':
18377 /* Similar, but add one for shift count in rlinm for scc and pass
18378 scc flag to `ccr_bit'. */
18379 i = ccr_bit (x, 1);
18380 if (i == -1)
18381 output_operand_lossage ("invalid %%J code");
18382 else
18383 /* If we want bit 31, write a shift count of zero, not 32. */
18384 fprintf (file, "%d", i == 31 ? 0 : i + 1);
18385 return;
18386
18387 case 'k':
18388 /* X must be a constant. Write the 1's complement of the
18389 constant. */
18390 if (! INT_P (x))
18391 output_operand_lossage ("invalid %%k value");
18392 else
18393 fprintf (file, HOST_WIDE_INT_PRINT_DEC, ~ INTVAL (x));
18394 return;
18395
18396 case 'K':
18397 /* X must be a symbolic constant on ELF. Write an
18398 expression suitable for an 'addi' that adds in the low 16
18399 bits of the MEM. */
18400 if (GET_CODE (x) == CONST)
18401 {
18402 if (GET_CODE (XEXP (x, 0)) != PLUS
18403 || (GET_CODE (XEXP (XEXP (x, 0), 0)) != SYMBOL_REF
18404 && GET_CODE (XEXP (XEXP (x, 0), 0)) != LABEL_REF)
18405 || GET_CODE (XEXP (XEXP (x, 0), 1)) != CONST_INT)
18406 output_operand_lossage ("invalid %%K value");
18407 }
18408 print_operand_address (file, x);
18409 fputs ("@l", file);
18410 return;
18411
18412 /* %l is output_asm_label. */
18413
18414 case 'L':
18415 /* Write second word of DImode or DFmode reference. Works on register
18416 or non-indexed memory only. */
18417 if (REG_P (x))
18418 fputs (reg_names[REGNO (x) + 1], file);
18419 else if (MEM_P (x))
18420 {
18421 /* Handle possible auto-increment. Since it is pre-increment and
18422 we have already done it, we can just use an offset of word. */
18423 if (GET_CODE (XEXP (x, 0)) == PRE_INC
18424 || GET_CODE (XEXP (x, 0)) == PRE_DEC)
18425 output_address (plus_constant (Pmode, XEXP (XEXP (x, 0), 0),
18426 UNITS_PER_WORD));
18427 else if (GET_CODE (XEXP (x, 0)) == PRE_MODIFY)
18428 output_address (plus_constant (Pmode, XEXP (XEXP (x, 0), 0),
18429 UNITS_PER_WORD));
18430 else
18431 output_address (XEXP (adjust_address_nv (x, SImode,
18432 UNITS_PER_WORD),
18433 0));
18434
18435 if (small_data_operand (x, GET_MODE (x)))
18436 fprintf (file, "@%s(%s)", SMALL_DATA_RELOC,
18437 reg_names[SMALL_DATA_REG]);
18438 }
18439 return;
18440
18441 case 'm':
18442 /* MB value for a mask operand. */
18443 if (! mask_operand (x, SImode))
18444 output_operand_lossage ("invalid %%m value");
18445
18446 fprintf (file, "%d", extract_MB (x));
18447 return;
18448
18449 case 'M':
18450 /* ME value for a mask operand. */
18451 if (! mask_operand (x, SImode))
18452 output_operand_lossage ("invalid %%M value");
18453
18454 fprintf (file, "%d", extract_ME (x));
18455 return;
18456
18457 /* %n outputs the negative of its operand. */
18458
18459 case 'N':
18460 /* Write the number of elements in the vector times 4. */
18461 if (GET_CODE (x) != PARALLEL)
18462 output_operand_lossage ("invalid %%N value");
18463 else
18464 fprintf (file, "%d", XVECLEN (x, 0) * 4);
18465 return;
18466
18467 case 'O':
18468 /* Similar, but subtract 1 first. */
18469 if (GET_CODE (x) != PARALLEL)
18470 output_operand_lossage ("invalid %%O value");
18471 else
18472 fprintf (file, "%d", (XVECLEN (x, 0) - 1) * 4);
18473 return;
18474
18475 case 'p':
18476 /* X is a CONST_INT that is a power of two. Output the logarithm. */
18477 if (! INT_P (x)
18478 || INTVAL (x) < 0
18479 || (i = exact_log2 (INTVAL (x))) < 0)
18480 output_operand_lossage ("invalid %%p value");
18481 else
18482 fprintf (file, "%d", i);
18483 return;
18484
18485 case 'P':
18486 /* The operand must be an indirect memory reference. The result
18487 is the register name. */
18488 if (GET_CODE (x) != MEM || GET_CODE (XEXP (x, 0)) != REG
18489 || REGNO (XEXP (x, 0)) >= 32)
18490 output_operand_lossage ("invalid %%P value");
18491 else
18492 fputs (reg_names[REGNO (XEXP (x, 0))], file);
18493 return;
18494
18495 case 'q':
18496 /* This outputs the logical code corresponding to a boolean
18497 expression. The expression may have one or both operands
18498 negated (if one, only the first one). For condition register
18499 logical operations, it will also treat the negated
18500 CR codes as NOTs, but not handle NOTs of them. */
18501 {
18502 const char *const *t = 0;
18503 const char *s;
18504 enum rtx_code code = GET_CODE (x);
18505 static const char * const tbl[3][3] = {
18506 { "and", "andc", "nor" },
18507 { "or", "orc", "nand" },
18508 { "xor", "eqv", "xor" } };
18509
18510 if (code == AND)
18511 t = tbl[0];
18512 else if (code == IOR)
18513 t = tbl[1];
18514 else if (code == XOR)
18515 t = tbl[2];
18516 else
18517 output_operand_lossage ("invalid %%q value");
18518
18519 if (GET_CODE (XEXP (x, 0)) != NOT)
18520 s = t[0];
18521 else
18522 {
18523 if (GET_CODE (XEXP (x, 1)) == NOT)
18524 s = t[2];
18525 else
18526 s = t[1];
18527 }
18528
18529 fputs (s, file);
18530 }
18531 return;
18532
18533 case 'Q':
18534 if (! TARGET_MFCRF)
18535 return;
18536 fputc (',', file);
18537 /* FALLTHRU */
18538
18539 case 'R':
18540 /* X is a CR register. Print the mask for `mtcrf'. */
18541 if (GET_CODE (x) != REG || ! CR_REGNO_P (REGNO (x)))
18542 output_operand_lossage ("invalid %%R value");
18543 else
18544 fprintf (file, "%d", 128 >> (REGNO (x) - CR0_REGNO));
18545 return;
18546
18547 case 's':
18548 /* Low 5 bits of 32 - value */
18549 if (! INT_P (x))
18550 output_operand_lossage ("invalid %%s value");
18551 else
18552 fprintf (file, HOST_WIDE_INT_PRINT_DEC, (32 - INTVAL (x)) & 31);
18553 return;
18554
18555 case 'S':
18556 /* PowerPC64 mask position. All 0's is excluded.
18557 CONST_INT 32-bit mask is considered sign-extended so any
18558 transition must occur within the CONST_INT, not on the boundary. */
18559 if (! mask64_operand (x, DImode))
18560 output_operand_lossage ("invalid %%S value");
18561
18562 uval = INTVAL (x);
18563
18564 if (uval & 1) /* Clear Left */
18565 {
18566 #if HOST_BITS_PER_WIDE_INT > 64
18567 uval &= ((unsigned HOST_WIDE_INT) 1 << 64) - 1;
18568 #endif
18569 i = 64;
18570 }
18571 else /* Clear Right */
18572 {
18573 uval = ~uval;
18574 #if HOST_BITS_PER_WIDE_INT > 64
18575 uval &= ((unsigned HOST_WIDE_INT) 1 << 64) - 1;
18576 #endif
18577 i = 63;
18578 }
18579 while (uval != 0)
18580 --i, uval >>= 1;
18581 gcc_assert (i >= 0);
18582 fprintf (file, "%d", i);
18583 return;
18584
18585 case 't':
18586 /* Like 'J' but get to the OVERFLOW/UNORDERED bit. */
18587 gcc_assert (REG_P (x) && GET_MODE (x) == CCmode);
18588
18589 /* Bit 3 is OV bit. */
18590 i = 4 * (REGNO (x) - CR0_REGNO) + 3;
18591
18592 /* If we want bit 31, write a shift count of zero, not 32. */
18593 fprintf (file, "%d", i == 31 ? 0 : i + 1);
18594 return;
18595
18596 case 'T':
18597 /* Print the symbolic name of a branch target register. */
18598 if (GET_CODE (x) != REG || (REGNO (x) != LR_REGNO
18599 && REGNO (x) != CTR_REGNO))
18600 output_operand_lossage ("invalid %%T value");
18601 else if (REGNO (x) == LR_REGNO)
18602 fputs ("lr", file);
18603 else
18604 fputs ("ctr", file);
18605 return;
18606
18607 case 'u':
18608 /* High-order or low-order 16 bits of constant, whichever is non-zero,
18609 for use in unsigned operand. */
18610 if (! INT_P (x))
18611 {
18612 output_operand_lossage ("invalid %%u value");
18613 return;
18614 }
18615
18616 uval = INTVAL (x);
18617 if ((uval & 0xffff) == 0)
18618 uval >>= 16;
18619
18620 fprintf (file, HOST_WIDE_INT_PRINT_HEX, uval & 0xffff);
18621 return;
18622
18623 case 'v':
18624 /* High-order 16 bits of constant for use in signed operand. */
18625 if (! INT_P (x))
18626 output_operand_lossage ("invalid %%v value");
18627 else
18628 fprintf (file, HOST_WIDE_INT_PRINT_HEX,
18629 (INTVAL (x) >> 16) & 0xffff);
18630 return;
18631
18632 case 'U':
18633 /* Print `u' if this has an auto-increment or auto-decrement. */
18634 if (MEM_P (x)
18635 && (GET_CODE (XEXP (x, 0)) == PRE_INC
18636 || GET_CODE (XEXP (x, 0)) == PRE_DEC
18637 || GET_CODE (XEXP (x, 0)) == PRE_MODIFY))
18638 putc ('u', file);
18639 return;
18640
18641 case 'V':
18642 /* Print the trap code for this operand. */
18643 switch (GET_CODE (x))
18644 {
18645 case EQ:
18646 fputs ("eq", file); /* 4 */
18647 break;
18648 case NE:
18649 fputs ("ne", file); /* 24 */
18650 break;
18651 case LT:
18652 fputs ("lt", file); /* 16 */
18653 break;
18654 case LE:
18655 fputs ("le", file); /* 20 */
18656 break;
18657 case GT:
18658 fputs ("gt", file); /* 8 */
18659 break;
18660 case GE:
18661 fputs ("ge", file); /* 12 */
18662 break;
18663 case LTU:
18664 fputs ("llt", file); /* 2 */
18665 break;
18666 case LEU:
18667 fputs ("lle", file); /* 6 */
18668 break;
18669 case GTU:
18670 fputs ("lgt", file); /* 1 */
18671 break;
18672 case GEU:
18673 fputs ("lge", file); /* 5 */
18674 break;
18675 default:
18676 gcc_unreachable ();
18677 }
18678 break;
18679
18680 case 'w':
18681 /* If constant, low-order 16 bits of constant, signed. Otherwise, write
18682 normally. */
18683 if (INT_P (x))
18684 fprintf (file, HOST_WIDE_INT_PRINT_DEC,
18685 ((INTVAL (x) & 0xffff) ^ 0x8000) - 0x8000);
18686 else
18687 print_operand (file, x, 0);
18688 return;
18689
18690 case 'W':
18691 /* MB value for a PowerPC64 rldic operand. */
18692 i = clz_hwi (INTVAL (x));
18693
18694 fprintf (file, "%d", i);
18695 return;
18696
18697 case 'x':
18698 /* X is a FPR or Altivec register used in a VSX context. */
18699 if (GET_CODE (x) != REG || !VSX_REGNO_P (REGNO (x)))
18700 output_operand_lossage ("invalid %%x value");
18701 else
18702 {
18703 int reg = REGNO (x);
18704 int vsx_reg = (FP_REGNO_P (reg)
18705 ? reg - 32
18706 : reg - FIRST_ALTIVEC_REGNO + 32);
18707
18708 #ifdef TARGET_REGNAMES
18709 if (TARGET_REGNAMES)
18710 fprintf (file, "%%vs%d", vsx_reg);
18711 else
18712 #endif
18713 fprintf (file, "%d", vsx_reg);
18714 }
18715 return;
18716
18717 case 'X':
18718 if (MEM_P (x)
18719 && (legitimate_indexed_address_p (XEXP (x, 0), 0)
18720 || (GET_CODE (XEXP (x, 0)) == PRE_MODIFY
18721 && legitimate_indexed_address_p (XEXP (XEXP (x, 0), 1), 0))))
18722 putc ('x', file);
18723 return;
18724
18725 case 'Y':
18726 /* Like 'L', for third word of TImode/PTImode */
18727 if (REG_P (x))
18728 fputs (reg_names[REGNO (x) + 2], file);
18729 else if (MEM_P (x))
18730 {
18731 if (GET_CODE (XEXP (x, 0)) == PRE_INC
18732 || GET_CODE (XEXP (x, 0)) == PRE_DEC)
18733 output_address (plus_constant (Pmode, XEXP (XEXP (x, 0), 0), 8));
18734 else if (GET_CODE (XEXP (x, 0)) == PRE_MODIFY)
18735 output_address (plus_constant (Pmode, XEXP (XEXP (x, 0), 0), 8));
18736 else
18737 output_address (XEXP (adjust_address_nv (x, SImode, 8), 0));
18738 if (small_data_operand (x, GET_MODE (x)))
18739 fprintf (file, "@%s(%s)", SMALL_DATA_RELOC,
18740 reg_names[SMALL_DATA_REG]);
18741 }
18742 return;
18743
18744 case 'z':
18745 /* X is a SYMBOL_REF. Write out the name preceded by a
18746 period and without any trailing data in brackets. Used for function
18747 names. If we are configured for System V (or the embedded ABI) on
18748 the PowerPC, do not emit the period, since those systems do not use
18749 TOCs and the like. */
18750 gcc_assert (GET_CODE (x) == SYMBOL_REF);
18751
18752 /* For macho, check to see if we need a stub. */
18753 if (TARGET_MACHO)
18754 {
18755 const char *name = XSTR (x, 0);
18756 #if TARGET_MACHO
18757 if (darwin_emit_branch_islands
18758 && MACHOPIC_INDIRECT
18759 && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
18760 name = machopic_indirection_name (x, /*stub_p=*/true);
18761 #endif
18762 assemble_name (file, name);
18763 }
18764 else if (!DOT_SYMBOLS)
18765 assemble_name (file, XSTR (x, 0));
18766 else
18767 rs6000_output_function_entry (file, XSTR (x, 0));
18768 return;
18769
18770 case 'Z':
18771 /* Like 'L', for last word of TImode/PTImode. */
18772 if (REG_P (x))
18773 fputs (reg_names[REGNO (x) + 3], file);
18774 else if (MEM_P (x))
18775 {
18776 if (GET_CODE (XEXP (x, 0)) == PRE_INC
18777 || GET_CODE (XEXP (x, 0)) == PRE_DEC)
18778 output_address (plus_constant (Pmode, XEXP (XEXP (x, 0), 0), 12));
18779 else if (GET_CODE (XEXP (x, 0)) == PRE_MODIFY)
18780 output_address (plus_constant (Pmode, XEXP (XEXP (x, 0), 0), 12));
18781 else
18782 output_address (XEXP (adjust_address_nv (x, SImode, 12), 0));
18783 if (small_data_operand (x, GET_MODE (x)))
18784 fprintf (file, "@%s(%s)", SMALL_DATA_RELOC,
18785 reg_names[SMALL_DATA_REG]);
18786 }
18787 return;
18788
18789 /* Print AltiVec or SPE memory operand. */
18790 case 'y':
18791 {
18792 rtx tmp;
18793
18794 gcc_assert (MEM_P (x));
18795
18796 tmp = XEXP (x, 0);
18797
18798 /* Ugly hack because %y is overloaded. */
18799 if ((TARGET_SPE || TARGET_E500_DOUBLE)
18800 && (GET_MODE_SIZE (GET_MODE (x)) == 8
18801 || GET_MODE (x) == TFmode
18802 || GET_MODE (x) == TImode
18803 || GET_MODE (x) == PTImode))
18804 {
18805 /* Handle [reg]. */
18806 if (REG_P (tmp))
18807 {
18808 fprintf (file, "0(%s)", reg_names[REGNO (tmp)]);
18809 break;
18810 }
18811 /* Handle [reg+UIMM]. */
18812 else if (GET_CODE (tmp) == PLUS &&
18813 GET_CODE (XEXP (tmp, 1)) == CONST_INT)
18814 {
18815 int x;
18816
18817 gcc_assert (REG_P (XEXP (tmp, 0)));
18818
18819 x = INTVAL (XEXP (tmp, 1));
18820 fprintf (file, "%d(%s)", x, reg_names[REGNO (XEXP (tmp, 0))]);
18821 break;
18822 }
18823
18824 /* Fall through. Must be [reg+reg]. */
18825 }
18826 if (VECTOR_MEM_ALTIVEC_P (GET_MODE (x))
18827 && GET_CODE (tmp) == AND
18828 && GET_CODE (XEXP (tmp, 1)) == CONST_INT
18829 && INTVAL (XEXP (tmp, 1)) == -16)
18830 tmp = XEXP (tmp, 0);
18831 else if (VECTOR_MEM_VSX_P (GET_MODE (x))
18832 && GET_CODE (tmp) == PRE_MODIFY)
18833 tmp = XEXP (tmp, 1);
18834 if (REG_P (tmp))
18835 fprintf (file, "0,%s", reg_names[REGNO (tmp)]);
18836 else
18837 {
18838 if (GET_CODE (tmp) != PLUS
18839 || !REG_P (XEXP (tmp, 0))
18840 || !REG_P (XEXP (tmp, 1)))
18841 {
18842 output_operand_lossage ("invalid %%y value, try using the 'Z' constraint");
18843 break;
18844 }
18845
18846 if (REGNO (XEXP (tmp, 0)) == 0)
18847 fprintf (file, "%s,%s", reg_names[ REGNO (XEXP (tmp, 1)) ],
18848 reg_names[ REGNO (XEXP (tmp, 0)) ]);
18849 else
18850 fprintf (file, "%s,%s", reg_names[ REGNO (XEXP (tmp, 0)) ],
18851 reg_names[ REGNO (XEXP (tmp, 1)) ]);
18852 }
18853 break;
18854 }
18855
18856 case 0:
18857 if (REG_P (x))
18858 fprintf (file, "%s", reg_names[REGNO (x)]);
18859 else if (MEM_P (x))
18860 {
18861 /* We need to handle PRE_INC and PRE_DEC here, since we need to
18862 know the width from the mode. */
18863 if (GET_CODE (XEXP (x, 0)) == PRE_INC)
18864 fprintf (file, "%d(%s)", GET_MODE_SIZE (GET_MODE (x)),
18865 reg_names[REGNO (XEXP (XEXP (x, 0), 0))]);
18866 else if (GET_CODE (XEXP (x, 0)) == PRE_DEC)
18867 fprintf (file, "%d(%s)", - GET_MODE_SIZE (GET_MODE (x)),
18868 reg_names[REGNO (XEXP (XEXP (x, 0), 0))]);
18869 else if (GET_CODE (XEXP (x, 0)) == PRE_MODIFY)
18870 output_address (XEXP (XEXP (x, 0), 1));
18871 else
18872 output_address (XEXP (x, 0));
18873 }
18874 else
18875 {
18876 if (toc_relative_expr_p (x, false))
18877 /* This hack along with a corresponding hack in
18878 rs6000_output_addr_const_extra arranges to output addends
18879 where the assembler expects to find them. eg.
18880 (plus (unspec [(symbol_ref ("x")) (reg 2)] tocrel) 4)
18881 without this hack would be output as "x@toc+4". We
18882 want "x+4@toc". */
18883 output_addr_const (file, CONST_CAST_RTX (tocrel_base));
18884 else
18885 output_addr_const (file, x);
18886 }
18887 return;
18888
18889 case '&':
18890 if (const char *name = get_some_local_dynamic_name ())
18891 assemble_name (file, name);
18892 else
18893 output_operand_lossage ("'%%&' used without any "
18894 "local dynamic TLS references");
18895 return;
18896
18897 default:
18898 output_operand_lossage ("invalid %%xn code");
18899 }
18900 }
18901 \f
18902 /* Print the address of an operand. */
18903
18904 void
18905 print_operand_address (FILE *file, rtx x)
18906 {
18907 if (REG_P (x))
18908 fprintf (file, "0(%s)", reg_names[ REGNO (x) ]);
18909 else if (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == CONST
18910 || GET_CODE (x) == LABEL_REF)
18911 {
18912 output_addr_const (file, x);
18913 if (small_data_operand (x, GET_MODE (x)))
18914 fprintf (file, "@%s(%s)", SMALL_DATA_RELOC,
18915 reg_names[SMALL_DATA_REG]);
18916 else
18917 gcc_assert (!TARGET_TOC);
18918 }
18919 else if (GET_CODE (x) == PLUS && REG_P (XEXP (x, 0))
18920 && REG_P (XEXP (x, 1)))
18921 {
18922 if (REGNO (XEXP (x, 0)) == 0)
18923 fprintf (file, "%s,%s", reg_names[ REGNO (XEXP (x, 1)) ],
18924 reg_names[ REGNO (XEXP (x, 0)) ]);
18925 else
18926 fprintf (file, "%s,%s", reg_names[ REGNO (XEXP (x, 0)) ],
18927 reg_names[ REGNO (XEXP (x, 1)) ]);
18928 }
18929 else if (GET_CODE (x) == PLUS && REG_P (XEXP (x, 0))
18930 && GET_CODE (XEXP (x, 1)) == CONST_INT)
18931 fprintf (file, HOST_WIDE_INT_PRINT_DEC "(%s)",
18932 INTVAL (XEXP (x, 1)), reg_names[ REGNO (XEXP (x, 0)) ]);
18933 #if TARGET_MACHO
18934 else if (GET_CODE (x) == LO_SUM && REG_P (XEXP (x, 0))
18935 && CONSTANT_P (XEXP (x, 1)))
18936 {
18937 fprintf (file, "lo16(");
18938 output_addr_const (file, XEXP (x, 1));
18939 fprintf (file, ")(%s)", reg_names[ REGNO (XEXP (x, 0)) ]);
18940 }
18941 #endif
18942 #if TARGET_ELF
18943 else if (GET_CODE (x) == LO_SUM && REG_P (XEXP (x, 0))
18944 && CONSTANT_P (XEXP (x, 1)))
18945 {
18946 output_addr_const (file, XEXP (x, 1));
18947 fprintf (file, "@l(%s)", reg_names[ REGNO (XEXP (x, 0)) ]);
18948 }
18949 #endif
18950 else if (toc_relative_expr_p (x, false))
18951 {
18952 /* This hack along with a corresponding hack in
18953 rs6000_output_addr_const_extra arranges to output addends
18954 where the assembler expects to find them. eg.
18955 (lo_sum (reg 9)
18956 . (plus (unspec [(symbol_ref ("x")) (reg 2)] tocrel) 8))
18957 without this hack would be output as "x@toc+8@l(9)". We
18958 want "x+8@toc@l(9)". */
18959 output_addr_const (file, CONST_CAST_RTX (tocrel_base));
18960 if (GET_CODE (x) == LO_SUM)
18961 fprintf (file, "@l(%s)", reg_names[REGNO (XEXP (x, 0))]);
18962 else
18963 fprintf (file, "(%s)", reg_names[REGNO (XVECEXP (tocrel_base, 0, 1))]);
18964 }
18965 else
18966 gcc_unreachable ();
18967 }
18968 \f
18969 /* Implement TARGET_OUTPUT_ADDR_CONST_EXTRA. */
18970
18971 static bool
18972 rs6000_output_addr_const_extra (FILE *file, rtx x)
18973 {
18974 if (GET_CODE (x) == UNSPEC)
18975 switch (XINT (x, 1))
18976 {
18977 case UNSPEC_TOCREL:
18978 gcc_checking_assert (GET_CODE (XVECEXP (x, 0, 0)) == SYMBOL_REF
18979 && REG_P (XVECEXP (x, 0, 1))
18980 && REGNO (XVECEXP (x, 0, 1)) == TOC_REGISTER);
18981 output_addr_const (file, XVECEXP (x, 0, 0));
18982 if (x == tocrel_base && tocrel_offset != const0_rtx)
18983 {
18984 if (INTVAL (tocrel_offset) >= 0)
18985 fprintf (file, "+");
18986 output_addr_const (file, CONST_CAST_RTX (tocrel_offset));
18987 }
18988 if (!TARGET_AIX || (TARGET_ELF && TARGET_MINIMAL_TOC))
18989 {
18990 putc ('-', file);
18991 assemble_name (file, toc_label_name);
18992 }
18993 else if (TARGET_ELF)
18994 fputs ("@toc", file);
18995 return true;
18996
18997 #if TARGET_MACHO
18998 case UNSPEC_MACHOPIC_OFFSET:
18999 output_addr_const (file, XVECEXP (x, 0, 0));
19000 putc ('-', file);
19001 machopic_output_function_base_name (file);
19002 return true;
19003 #endif
19004 }
19005 return false;
19006 }
19007 \f
19008 /* Target hook for assembling integer objects. The PowerPC version has
19009 to handle fixup entries for relocatable code if RELOCATABLE_NEEDS_FIXUP
19010 is defined. It also needs to handle DI-mode objects on 64-bit
19011 targets. */
19012
19013 static bool
19014 rs6000_assemble_integer (rtx x, unsigned int size, int aligned_p)
19015 {
19016 #ifdef RELOCATABLE_NEEDS_FIXUP
19017 /* Special handling for SI values. */
19018 if (RELOCATABLE_NEEDS_FIXUP && size == 4 && aligned_p)
19019 {
19020 static int recurse = 0;
19021
19022 /* For -mrelocatable, we mark all addresses that need to be fixed up in
19023 the .fixup section. Since the TOC section is already relocated, we
19024 don't need to mark it here. We used to skip the text section, but it
19025 should never be valid for relocated addresses to be placed in the text
19026 section. */
19027 if (TARGET_RELOCATABLE
19028 && in_section != toc_section
19029 && !recurse
19030 && !CONST_SCALAR_INT_P (x)
19031 && CONSTANT_P (x))
19032 {
19033 char buf[256];
19034
19035 recurse = 1;
19036 ASM_GENERATE_INTERNAL_LABEL (buf, "LCP", fixuplabelno);
19037 fixuplabelno++;
19038 ASM_OUTPUT_LABEL (asm_out_file, buf);
19039 fprintf (asm_out_file, "\t.long\t(");
19040 output_addr_const (asm_out_file, x);
19041 fprintf (asm_out_file, ")@fixup\n");
19042 fprintf (asm_out_file, "\t.section\t\".fixup\",\"aw\"\n");
19043 ASM_OUTPUT_ALIGN (asm_out_file, 2);
19044 fprintf (asm_out_file, "\t.long\t");
19045 assemble_name (asm_out_file, buf);
19046 fprintf (asm_out_file, "\n\t.previous\n");
19047 recurse = 0;
19048 return true;
19049 }
19050 /* Remove initial .'s to turn a -mcall-aixdesc function
19051 address into the address of the descriptor, not the function
19052 itself. */
19053 else if (GET_CODE (x) == SYMBOL_REF
19054 && XSTR (x, 0)[0] == '.'
19055 && DEFAULT_ABI == ABI_AIX)
19056 {
19057 const char *name = XSTR (x, 0);
19058 while (*name == '.')
19059 name++;
19060
19061 fprintf (asm_out_file, "\t.long\t%s\n", name);
19062 return true;
19063 }
19064 }
19065 #endif /* RELOCATABLE_NEEDS_FIXUP */
19066 return default_assemble_integer (x, size, aligned_p);
19067 }
19068
19069 #if defined (HAVE_GAS_HIDDEN) && !TARGET_MACHO
19070 /* Emit an assembler directive to set symbol visibility for DECL to
19071 VISIBILITY_TYPE. */
19072
19073 static void
19074 rs6000_assemble_visibility (tree decl, int vis)
19075 {
19076 if (TARGET_XCOFF)
19077 return;
19078
19079 /* Functions need to have their entry point symbol visibility set as
19080 well as their descriptor symbol visibility. */
19081 if (DEFAULT_ABI == ABI_AIX
19082 && DOT_SYMBOLS
19083 && TREE_CODE (decl) == FUNCTION_DECL)
19084 {
19085 static const char * const visibility_types[] = {
19086 NULL, "internal", "hidden", "protected"
19087 };
19088
19089 const char *name, *type;
19090
19091 name = ((* targetm.strip_name_encoding)
19092 (IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl))));
19093 type = visibility_types[vis];
19094
19095 fprintf (asm_out_file, "\t.%s\t%s\n", type, name);
19096 fprintf (asm_out_file, "\t.%s\t.%s\n", type, name);
19097 }
19098 else
19099 default_assemble_visibility (decl, vis);
19100 }
19101 #endif
19102 \f
19103 enum rtx_code
19104 rs6000_reverse_condition (machine_mode mode, enum rtx_code code)
19105 {
19106 /* Reversal of FP compares takes care -- an ordered compare
19107 becomes an unordered compare and vice versa. */
19108 if (mode == CCFPmode
19109 && (!flag_finite_math_only
19110 || code == UNLT || code == UNLE || code == UNGT || code == UNGE
19111 || code == UNEQ || code == LTGT))
19112 return reverse_condition_maybe_unordered (code);
19113 else
19114 return reverse_condition (code);
19115 }
19116
19117 /* Generate a compare for CODE. Return a brand-new rtx that
19118 represents the result of the compare. */
19119
19120 static rtx
19121 rs6000_generate_compare (rtx cmp, machine_mode mode)
19122 {
19123 machine_mode comp_mode;
19124 rtx compare_result;
19125 enum rtx_code code = GET_CODE (cmp);
19126 rtx op0 = XEXP (cmp, 0);
19127 rtx op1 = XEXP (cmp, 1);
19128
19129 if (FLOAT_MODE_P (mode))
19130 comp_mode = CCFPmode;
19131 else if (code == GTU || code == LTU
19132 || code == GEU || code == LEU)
19133 comp_mode = CCUNSmode;
19134 else if ((code == EQ || code == NE)
19135 && unsigned_reg_p (op0)
19136 && (unsigned_reg_p (op1)
19137 || (CONST_INT_P (op1) && INTVAL (op1) != 0)))
19138 /* These are unsigned values, perhaps there will be a later
19139 ordering compare that can be shared with this one. */
19140 comp_mode = CCUNSmode;
19141 else
19142 comp_mode = CCmode;
19143
19144 /* If we have an unsigned compare, make sure we don't have a signed value as
19145 an immediate. */
19146 if (comp_mode == CCUNSmode && GET_CODE (op1) == CONST_INT
19147 && INTVAL (op1) < 0)
19148 {
19149 op0 = copy_rtx_if_shared (op0);
19150 op1 = force_reg (GET_MODE (op0), op1);
19151 cmp = gen_rtx_fmt_ee (code, GET_MODE (cmp), op0, op1);
19152 }
19153
19154 /* First, the compare. */
19155 compare_result = gen_reg_rtx (comp_mode);
19156
19157 /* E500 FP compare instructions on the GPRs. Yuck! */
19158 if ((!TARGET_FPRS && TARGET_HARD_FLOAT)
19159 && FLOAT_MODE_P (mode))
19160 {
19161 rtx cmp, or_result, compare_result2;
19162 machine_mode op_mode = GET_MODE (op0);
19163 bool reverse_p;
19164
19165 if (op_mode == VOIDmode)
19166 op_mode = GET_MODE (op1);
19167
19168 /* First reverse the condition codes that aren't directly supported. */
19169 switch (code)
19170 {
19171 case NE:
19172 case UNLT:
19173 case UNLE:
19174 case UNGT:
19175 case UNGE:
19176 code = reverse_condition_maybe_unordered (code);
19177 reverse_p = true;
19178 break;
19179
19180 case EQ:
19181 case LT:
19182 case LE:
19183 case GT:
19184 case GE:
19185 reverse_p = false;
19186 break;
19187
19188 default:
19189 gcc_unreachable ();
19190 }
19191
19192 /* The E500 FP compare instructions toggle the GT bit (CR bit 1) only.
19193 This explains the following mess. */
19194
19195 switch (code)
19196 {
19197 case EQ:
19198 switch (op_mode)
19199 {
19200 case SFmode:
19201 cmp = (flag_finite_math_only && !flag_trapping_math)
19202 ? gen_tstsfeq_gpr (compare_result, op0, op1)
19203 : gen_cmpsfeq_gpr (compare_result, op0, op1);
19204 break;
19205
19206 case DFmode:
19207 cmp = (flag_finite_math_only && !flag_trapping_math)
19208 ? gen_tstdfeq_gpr (compare_result, op0, op1)
19209 : gen_cmpdfeq_gpr (compare_result, op0, op1);
19210 break;
19211
19212 case TFmode:
19213 cmp = (flag_finite_math_only && !flag_trapping_math)
19214 ? gen_tsttfeq_gpr (compare_result, op0, op1)
19215 : gen_cmptfeq_gpr (compare_result, op0, op1);
19216 break;
19217
19218 default:
19219 gcc_unreachable ();
19220 }
19221 break;
19222
19223 case GT:
19224 case GE:
19225 switch (op_mode)
19226 {
19227 case SFmode:
19228 cmp = (flag_finite_math_only && !flag_trapping_math)
19229 ? gen_tstsfgt_gpr (compare_result, op0, op1)
19230 : gen_cmpsfgt_gpr (compare_result, op0, op1);
19231 break;
19232
19233 case DFmode:
19234 cmp = (flag_finite_math_only && !flag_trapping_math)
19235 ? gen_tstdfgt_gpr (compare_result, op0, op1)
19236 : gen_cmpdfgt_gpr (compare_result, op0, op1);
19237 break;
19238
19239 case TFmode:
19240 cmp = (flag_finite_math_only && !flag_trapping_math)
19241 ? gen_tsttfgt_gpr (compare_result, op0, op1)
19242 : gen_cmptfgt_gpr (compare_result, op0, op1);
19243 break;
19244
19245 default:
19246 gcc_unreachable ();
19247 }
19248 break;
19249
19250 case LT:
19251 case LE:
19252 switch (op_mode)
19253 {
19254 case SFmode:
19255 cmp = (flag_finite_math_only && !flag_trapping_math)
19256 ? gen_tstsflt_gpr (compare_result, op0, op1)
19257 : gen_cmpsflt_gpr (compare_result, op0, op1);
19258 break;
19259
19260 case DFmode:
19261 cmp = (flag_finite_math_only && !flag_trapping_math)
19262 ? gen_tstdflt_gpr (compare_result, op0, op1)
19263 : gen_cmpdflt_gpr (compare_result, op0, op1);
19264 break;
19265
19266 case TFmode:
19267 cmp = (flag_finite_math_only && !flag_trapping_math)
19268 ? gen_tsttflt_gpr (compare_result, op0, op1)
19269 : gen_cmptflt_gpr (compare_result, op0, op1);
19270 break;
19271
19272 default:
19273 gcc_unreachable ();
19274 }
19275 break;
19276
19277 default:
19278 gcc_unreachable ();
19279 }
19280
19281 /* Synthesize LE and GE from LT/GT || EQ. */
19282 if (code == LE || code == GE)
19283 {
19284 emit_insn (cmp);
19285
19286 compare_result2 = gen_reg_rtx (CCFPmode);
19287
19288 /* Do the EQ. */
19289 switch (op_mode)
19290 {
19291 case SFmode:
19292 cmp = (flag_finite_math_only && !flag_trapping_math)
19293 ? gen_tstsfeq_gpr (compare_result2, op0, op1)
19294 : gen_cmpsfeq_gpr (compare_result2, op0, op1);
19295 break;
19296
19297 case DFmode:
19298 cmp = (flag_finite_math_only && !flag_trapping_math)
19299 ? gen_tstdfeq_gpr (compare_result2, op0, op1)
19300 : gen_cmpdfeq_gpr (compare_result2, op0, op1);
19301 break;
19302
19303 case TFmode:
19304 cmp = (flag_finite_math_only && !flag_trapping_math)
19305 ? gen_tsttfeq_gpr (compare_result2, op0, op1)
19306 : gen_cmptfeq_gpr (compare_result2, op0, op1);
19307 break;
19308
19309 default:
19310 gcc_unreachable ();
19311 }
19312
19313 emit_insn (cmp);
19314
19315 /* OR them together. */
19316 or_result = gen_reg_rtx (CCFPmode);
19317 cmp = gen_e500_cr_ior_compare (or_result, compare_result,
19318 compare_result2);
19319 compare_result = or_result;
19320 }
19321
19322 code = reverse_p ? NE : EQ;
19323
19324 emit_insn (cmp);
19325 }
19326 else
19327 {
19328 /* Generate XLC-compatible TFmode compare as PARALLEL with extra
19329 CLOBBERs to match cmptf_internal2 pattern. */
19330 if (comp_mode == CCFPmode && TARGET_XL_COMPAT
19331 && GET_MODE (op0) == TFmode
19332 && !TARGET_IEEEQUAD
19333 && TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_LONG_DOUBLE_128)
19334 emit_insn (gen_rtx_PARALLEL (VOIDmode,
19335 gen_rtvec (10,
19336 gen_rtx_SET (VOIDmode,
19337 compare_result,
19338 gen_rtx_COMPARE (comp_mode, op0, op1)),
19339 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
19340 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
19341 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
19342 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
19343 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
19344 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
19345 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
19346 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
19347 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (Pmode)))));
19348 else if (GET_CODE (op1) == UNSPEC
19349 && XINT (op1, 1) == UNSPEC_SP_TEST)
19350 {
19351 rtx op1b = XVECEXP (op1, 0, 0);
19352 comp_mode = CCEQmode;
19353 compare_result = gen_reg_rtx (CCEQmode);
19354 if (TARGET_64BIT)
19355 emit_insn (gen_stack_protect_testdi (compare_result, op0, op1b));
19356 else
19357 emit_insn (gen_stack_protect_testsi (compare_result, op0, op1b));
19358 }
19359 else
19360 emit_insn (gen_rtx_SET (VOIDmode, compare_result,
19361 gen_rtx_COMPARE (comp_mode, op0, op1)));
19362 }
19363
19364 /* Some kinds of FP comparisons need an OR operation;
19365 under flag_finite_math_only we don't bother. */
19366 if (FLOAT_MODE_P (mode)
19367 && !flag_finite_math_only
19368 && !(TARGET_HARD_FLOAT && !TARGET_FPRS)
19369 && (code == LE || code == GE
19370 || code == UNEQ || code == LTGT
19371 || code == UNGT || code == UNLT))
19372 {
19373 enum rtx_code or1, or2;
19374 rtx or1_rtx, or2_rtx, compare2_rtx;
19375 rtx or_result = gen_reg_rtx (CCEQmode);
19376
19377 switch (code)
19378 {
19379 case LE: or1 = LT; or2 = EQ; break;
19380 case GE: or1 = GT; or2 = EQ; break;
19381 case UNEQ: or1 = UNORDERED; or2 = EQ; break;
19382 case LTGT: or1 = LT; or2 = GT; break;
19383 case UNGT: or1 = UNORDERED; or2 = GT; break;
19384 case UNLT: or1 = UNORDERED; or2 = LT; break;
19385 default: gcc_unreachable ();
19386 }
19387 validate_condition_mode (or1, comp_mode);
19388 validate_condition_mode (or2, comp_mode);
19389 or1_rtx = gen_rtx_fmt_ee (or1, SImode, compare_result, const0_rtx);
19390 or2_rtx = gen_rtx_fmt_ee (or2, SImode, compare_result, const0_rtx);
19391 compare2_rtx = gen_rtx_COMPARE (CCEQmode,
19392 gen_rtx_IOR (SImode, or1_rtx, or2_rtx),
19393 const_true_rtx);
19394 emit_insn (gen_rtx_SET (VOIDmode, or_result, compare2_rtx));
19395
19396 compare_result = or_result;
19397 code = EQ;
19398 }
19399
19400 validate_condition_mode (code, GET_MODE (compare_result));
19401
19402 return gen_rtx_fmt_ee (code, VOIDmode, compare_result, const0_rtx);
19403 }
19404
19405
19406 /* Emit the RTL for an sISEL pattern. */
19407
19408 void
19409 rs6000_emit_sISEL (machine_mode mode ATTRIBUTE_UNUSED, rtx operands[])
19410 {
19411 rs6000_emit_int_cmove (operands[0], operands[1], const1_rtx, const0_rtx);
19412 }
19413
19414 void
19415 rs6000_emit_sCOND (machine_mode mode, rtx operands[])
19416 {
19417 rtx condition_rtx;
19418 machine_mode op_mode;
19419 enum rtx_code cond_code;
19420 rtx result = operands[0];
19421
19422 if (TARGET_ISEL && (mode == SImode || mode == DImode))
19423 {
19424 rs6000_emit_sISEL (mode, operands);
19425 return;
19426 }
19427
19428 condition_rtx = rs6000_generate_compare (operands[1], mode);
19429 cond_code = GET_CODE (condition_rtx);
19430
19431 if (FLOAT_MODE_P (mode)
19432 && !TARGET_FPRS && TARGET_HARD_FLOAT)
19433 {
19434 rtx t;
19435
19436 PUT_MODE (condition_rtx, SImode);
19437 t = XEXP (condition_rtx, 0);
19438
19439 gcc_assert (cond_code == NE || cond_code == EQ);
19440
19441 if (cond_code == NE)
19442 emit_insn (gen_e500_flip_gt_bit (t, t));
19443
19444 emit_insn (gen_move_from_CR_gt_bit (result, t));
19445 return;
19446 }
19447
19448 if (cond_code == NE
19449 || cond_code == GE || cond_code == LE
19450 || cond_code == GEU || cond_code == LEU
19451 || cond_code == ORDERED || cond_code == UNGE || cond_code == UNLE)
19452 {
19453 rtx not_result = gen_reg_rtx (CCEQmode);
19454 rtx not_op, rev_cond_rtx;
19455 machine_mode cc_mode;
19456
19457 cc_mode = GET_MODE (XEXP (condition_rtx, 0));
19458
19459 rev_cond_rtx = gen_rtx_fmt_ee (rs6000_reverse_condition (cc_mode, cond_code),
19460 SImode, XEXP (condition_rtx, 0), const0_rtx);
19461 not_op = gen_rtx_COMPARE (CCEQmode, rev_cond_rtx, const0_rtx);
19462 emit_insn (gen_rtx_SET (VOIDmode, not_result, not_op));
19463 condition_rtx = gen_rtx_EQ (VOIDmode, not_result, const0_rtx);
19464 }
19465
19466 op_mode = GET_MODE (XEXP (operands[1], 0));
19467 if (op_mode == VOIDmode)
19468 op_mode = GET_MODE (XEXP (operands[1], 1));
19469
19470 if (TARGET_POWERPC64 && (op_mode == DImode || FLOAT_MODE_P (mode)))
19471 {
19472 PUT_MODE (condition_rtx, DImode);
19473 convert_move (result, condition_rtx, 0);
19474 }
19475 else
19476 {
19477 PUT_MODE (condition_rtx, SImode);
19478 emit_insn (gen_rtx_SET (VOIDmode, result, condition_rtx));
19479 }
19480 }
19481
19482 /* Emit a branch of kind CODE to location LOC. */
19483
19484 void
19485 rs6000_emit_cbranch (machine_mode mode, rtx operands[])
19486 {
19487 rtx condition_rtx, loc_ref;
19488
19489 condition_rtx = rs6000_generate_compare (operands[0], mode);
19490 loc_ref = gen_rtx_LABEL_REF (VOIDmode, operands[3]);
19491 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx,
19492 gen_rtx_IF_THEN_ELSE (VOIDmode, condition_rtx,
19493 loc_ref, pc_rtx)));
19494 }
19495
19496 /* Return the string to output a conditional branch to LABEL, which is
19497 the operand template of the label, or NULL if the branch is really a
19498 conditional return.
19499
19500 OP is the conditional expression. XEXP (OP, 0) is assumed to be a
19501 condition code register and its mode specifies what kind of
19502 comparison we made.
19503
19504 REVERSED is nonzero if we should reverse the sense of the comparison.
19505
19506 INSN is the insn. */
19507
19508 char *
19509 output_cbranch (rtx op, const char *label, int reversed, rtx_insn *insn)
19510 {
19511 static char string[64];
19512 enum rtx_code code = GET_CODE (op);
19513 rtx cc_reg = XEXP (op, 0);
19514 machine_mode mode = GET_MODE (cc_reg);
19515 int cc_regno = REGNO (cc_reg) - CR0_REGNO;
19516 int need_longbranch = label != NULL && get_attr_length (insn) == 8;
19517 int really_reversed = reversed ^ need_longbranch;
19518 char *s = string;
19519 const char *ccode;
19520 const char *pred;
19521 rtx note;
19522
19523 validate_condition_mode (code, mode);
19524
19525 /* Work out which way this really branches. We could use
19526 reverse_condition_maybe_unordered here always but this
19527 makes the resulting assembler clearer. */
19528 if (really_reversed)
19529 {
19530 /* Reversal of FP compares takes care -- an ordered compare
19531 becomes an unordered compare and vice versa. */
19532 if (mode == CCFPmode)
19533 code = reverse_condition_maybe_unordered (code);
19534 else
19535 code = reverse_condition (code);
19536 }
19537
19538 if ((!TARGET_FPRS && TARGET_HARD_FLOAT) && mode == CCFPmode)
19539 {
19540 /* The efscmp/tst* instructions twiddle bit 2, which maps nicely
19541 to the GT bit. */
19542 switch (code)
19543 {
19544 case EQ:
19545 /* Opposite of GT. */
19546 code = GT;
19547 break;
19548
19549 case NE:
19550 code = UNLE;
19551 break;
19552
19553 default:
19554 gcc_unreachable ();
19555 }
19556 }
19557
19558 switch (code)
19559 {
19560 /* Not all of these are actually distinct opcodes, but
19561 we distinguish them for clarity of the resulting assembler. */
19562 case NE: case LTGT:
19563 ccode = "ne"; break;
19564 case EQ: case UNEQ:
19565 ccode = "eq"; break;
19566 case GE: case GEU:
19567 ccode = "ge"; break;
19568 case GT: case GTU: case UNGT:
19569 ccode = "gt"; break;
19570 case LE: case LEU:
19571 ccode = "le"; break;
19572 case LT: case LTU: case UNLT:
19573 ccode = "lt"; break;
19574 case UNORDERED: ccode = "un"; break;
19575 case ORDERED: ccode = "nu"; break;
19576 case UNGE: ccode = "nl"; break;
19577 case UNLE: ccode = "ng"; break;
19578 default:
19579 gcc_unreachable ();
19580 }
19581
19582 /* Maybe we have a guess as to how likely the branch is. */
19583 pred = "";
19584 note = find_reg_note (insn, REG_BR_PROB, NULL_RTX);
19585 if (note != NULL_RTX)
19586 {
19587 /* PROB is the difference from 50%. */
19588 int prob = XINT (note, 0) - REG_BR_PROB_BASE / 2;
19589
19590 /* Only hint for highly probable/improbable branches on newer
19591 cpus as static prediction overrides processor dynamic
19592 prediction. For older cpus we may as well always hint, but
19593 assume not taken for branches that are very close to 50% as a
19594 mispredicted taken branch is more expensive than a
19595 mispredicted not-taken branch. */
19596 if (rs6000_always_hint
19597 || (abs (prob) > REG_BR_PROB_BASE / 100 * 48
19598 && br_prob_note_reliable_p (note)))
19599 {
19600 if (abs (prob) > REG_BR_PROB_BASE / 20
19601 && ((prob > 0) ^ need_longbranch))
19602 pred = "+";
19603 else
19604 pred = "-";
19605 }
19606 }
19607
19608 if (label == NULL)
19609 s += sprintf (s, "b%slr%s ", ccode, pred);
19610 else
19611 s += sprintf (s, "b%s%s ", ccode, pred);
19612
19613 /* We need to escape any '%' characters in the reg_names string.
19614 Assume they'd only be the first character.... */
19615 if (reg_names[cc_regno + CR0_REGNO][0] == '%')
19616 *s++ = '%';
19617 s += sprintf (s, "%s", reg_names[cc_regno + CR0_REGNO]);
19618
19619 if (label != NULL)
19620 {
19621 /* If the branch distance was too far, we may have to use an
19622 unconditional branch to go the distance. */
19623 if (need_longbranch)
19624 s += sprintf (s, ",$+8\n\tb %s", label);
19625 else
19626 s += sprintf (s, ",%s", label);
19627 }
19628
19629 return string;
19630 }
19631
19632 /* Return the string to flip the GT bit on a CR. */
19633 char *
19634 output_e500_flip_gt_bit (rtx dst, rtx src)
19635 {
19636 static char string[64];
19637 int a, b;
19638
19639 gcc_assert (GET_CODE (dst) == REG && CR_REGNO_P (REGNO (dst))
19640 && GET_CODE (src) == REG && CR_REGNO_P (REGNO (src)));
19641
19642 /* GT bit. */
19643 a = 4 * (REGNO (dst) - CR0_REGNO) + 1;
19644 b = 4 * (REGNO (src) - CR0_REGNO) + 1;
19645
19646 sprintf (string, "crnot %d,%d", a, b);
19647 return string;
19648 }
19649
19650 /* Return insn for VSX or Altivec comparisons. */
19651
19652 static rtx
19653 rs6000_emit_vector_compare_inner (enum rtx_code code, rtx op0, rtx op1)
19654 {
19655 rtx mask;
19656 machine_mode mode = GET_MODE (op0);
19657
19658 switch (code)
19659 {
19660 default:
19661 break;
19662
19663 case GE:
19664 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
19665 return NULL_RTX;
19666
19667 case EQ:
19668 case GT:
19669 case GTU:
19670 case ORDERED:
19671 case UNORDERED:
19672 case UNEQ:
19673 case LTGT:
19674 mask = gen_reg_rtx (mode);
19675 emit_insn (gen_rtx_SET (VOIDmode,
19676 mask,
19677 gen_rtx_fmt_ee (code, mode, op0, op1)));
19678 return mask;
19679 }
19680
19681 return NULL_RTX;
19682 }
19683
19684 /* Emit vector compare for operands OP0 and OP1 using code RCODE.
19685 DMODE is expected destination mode. This is a recursive function. */
19686
19687 static rtx
19688 rs6000_emit_vector_compare (enum rtx_code rcode,
19689 rtx op0, rtx op1,
19690 machine_mode dmode)
19691 {
19692 rtx mask;
19693 bool swap_operands = false;
19694 bool try_again = false;
19695
19696 gcc_assert (VECTOR_UNIT_ALTIVEC_OR_VSX_P (dmode));
19697 gcc_assert (GET_MODE (op0) == GET_MODE (op1));
19698
19699 /* See if the comparison works as is. */
19700 mask = rs6000_emit_vector_compare_inner (rcode, op0, op1);
19701 if (mask)
19702 return mask;
19703
19704 switch (rcode)
19705 {
19706 case LT:
19707 rcode = GT;
19708 swap_operands = true;
19709 try_again = true;
19710 break;
19711 case LTU:
19712 rcode = GTU;
19713 swap_operands = true;
19714 try_again = true;
19715 break;
19716 case NE:
19717 case UNLE:
19718 case UNLT:
19719 case UNGE:
19720 case UNGT:
19721 /* Invert condition and try again.
19722 e.g., A != B becomes ~(A==B). */
19723 {
19724 enum rtx_code rev_code;
19725 enum insn_code nor_code;
19726 rtx mask2;
19727
19728 rev_code = reverse_condition_maybe_unordered (rcode);
19729 if (rev_code == UNKNOWN)
19730 return NULL_RTX;
19731
19732 nor_code = optab_handler (one_cmpl_optab, dmode);
19733 if (nor_code == CODE_FOR_nothing)
19734 return NULL_RTX;
19735
19736 mask2 = rs6000_emit_vector_compare (rev_code, op0, op1, dmode);
19737 if (!mask2)
19738 return NULL_RTX;
19739
19740 mask = gen_reg_rtx (dmode);
19741 emit_insn (GEN_FCN (nor_code) (mask, mask2));
19742 return mask;
19743 }
19744 break;
19745 case GE:
19746 case GEU:
19747 case LE:
19748 case LEU:
19749 /* Try GT/GTU/LT/LTU OR EQ */
19750 {
19751 rtx c_rtx, eq_rtx;
19752 enum insn_code ior_code;
19753 enum rtx_code new_code;
19754
19755 switch (rcode)
19756 {
19757 case GE:
19758 new_code = GT;
19759 break;
19760
19761 case GEU:
19762 new_code = GTU;
19763 break;
19764
19765 case LE:
19766 new_code = LT;
19767 break;
19768
19769 case LEU:
19770 new_code = LTU;
19771 break;
19772
19773 default:
19774 gcc_unreachable ();
19775 }
19776
19777 ior_code = optab_handler (ior_optab, dmode);
19778 if (ior_code == CODE_FOR_nothing)
19779 return NULL_RTX;
19780
19781 c_rtx = rs6000_emit_vector_compare (new_code, op0, op1, dmode);
19782 if (!c_rtx)
19783 return NULL_RTX;
19784
19785 eq_rtx = rs6000_emit_vector_compare (EQ, op0, op1, dmode);
19786 if (!eq_rtx)
19787 return NULL_RTX;
19788
19789 mask = gen_reg_rtx (dmode);
19790 emit_insn (GEN_FCN (ior_code) (mask, c_rtx, eq_rtx));
19791 return mask;
19792 }
19793 break;
19794 default:
19795 return NULL_RTX;
19796 }
19797
19798 if (try_again)
19799 {
19800 if (swap_operands)
19801 {
19802 rtx tmp;
19803 tmp = op0;
19804 op0 = op1;
19805 op1 = tmp;
19806 }
19807
19808 mask = rs6000_emit_vector_compare_inner (rcode, op0, op1);
19809 if (mask)
19810 return mask;
19811 }
19812
19813 /* You only get two chances. */
19814 return NULL_RTX;
19815 }
19816
19817 /* Emit vector conditional expression. DEST is destination. OP_TRUE and
19818 OP_FALSE are two VEC_COND_EXPR operands. CC_OP0 and CC_OP1 are the two
19819 operands for the relation operation COND. */
19820
19821 int
19822 rs6000_emit_vector_cond_expr (rtx dest, rtx op_true, rtx op_false,
19823 rtx cond, rtx cc_op0, rtx cc_op1)
19824 {
19825 machine_mode dest_mode = GET_MODE (dest);
19826 machine_mode mask_mode = GET_MODE (cc_op0);
19827 enum rtx_code rcode = GET_CODE (cond);
19828 machine_mode cc_mode = CCmode;
19829 rtx mask;
19830 rtx cond2;
19831 rtx tmp;
19832 bool invert_move = false;
19833
19834 if (VECTOR_UNIT_NONE_P (dest_mode))
19835 return 0;
19836
19837 gcc_assert (GET_MODE_SIZE (dest_mode) == GET_MODE_SIZE (mask_mode)
19838 && GET_MODE_NUNITS (dest_mode) == GET_MODE_NUNITS (mask_mode));
19839
19840 switch (rcode)
19841 {
19842 /* Swap operands if we can, and fall back to doing the operation as
19843 specified, and doing a NOR to invert the test. */
19844 case NE:
19845 case UNLE:
19846 case UNLT:
19847 case UNGE:
19848 case UNGT:
19849 /* Invert condition and try again.
19850 e.g., A = (B != C) ? D : E becomes A = (B == C) ? E : D. */
19851 invert_move = true;
19852 rcode = reverse_condition_maybe_unordered (rcode);
19853 if (rcode == UNKNOWN)
19854 return 0;
19855 break;
19856
19857 /* Mark unsigned tests with CCUNSmode. */
19858 case GTU:
19859 case GEU:
19860 case LTU:
19861 case LEU:
19862 cc_mode = CCUNSmode;
19863 break;
19864
19865 default:
19866 break;
19867 }
19868
19869 /* Get the vector mask for the given relational operations. */
19870 mask = rs6000_emit_vector_compare (rcode, cc_op0, cc_op1, mask_mode);
19871
19872 if (!mask)
19873 return 0;
19874
19875 if (invert_move)
19876 {
19877 tmp = op_true;
19878 op_true = op_false;
19879 op_false = tmp;
19880 }
19881
19882 cond2 = gen_rtx_fmt_ee (NE, cc_mode, gen_lowpart (dest_mode, mask),
19883 CONST0_RTX (dest_mode));
19884 emit_insn (gen_rtx_SET (VOIDmode,
19885 dest,
19886 gen_rtx_IF_THEN_ELSE (dest_mode,
19887 cond2,
19888 op_true,
19889 op_false)));
19890 return 1;
19891 }
19892
19893 /* Emit a conditional move: move TRUE_COND to DEST if OP of the
19894 operands of the last comparison is nonzero/true, FALSE_COND if it
19895 is zero/false. Return 0 if the hardware has no such operation. */
19896
19897 int
19898 rs6000_emit_cmove (rtx dest, rtx op, rtx true_cond, rtx false_cond)
19899 {
19900 enum rtx_code code = GET_CODE (op);
19901 rtx op0 = XEXP (op, 0);
19902 rtx op1 = XEXP (op, 1);
19903 REAL_VALUE_TYPE c1;
19904 machine_mode compare_mode = GET_MODE (op0);
19905 machine_mode result_mode = GET_MODE (dest);
19906 rtx temp;
19907 bool is_against_zero;
19908
19909 /* These modes should always match. */
19910 if (GET_MODE (op1) != compare_mode
19911 /* In the isel case however, we can use a compare immediate, so
19912 op1 may be a small constant. */
19913 && (!TARGET_ISEL || !short_cint_operand (op1, VOIDmode)))
19914 return 0;
19915 if (GET_MODE (true_cond) != result_mode)
19916 return 0;
19917 if (GET_MODE (false_cond) != result_mode)
19918 return 0;
19919
19920 /* Don't allow using floating point comparisons for integer results for
19921 now. */
19922 if (FLOAT_MODE_P (compare_mode) && !FLOAT_MODE_P (result_mode))
19923 return 0;
19924
19925 /* First, work out if the hardware can do this at all, or
19926 if it's too slow.... */
19927 if (!FLOAT_MODE_P (compare_mode))
19928 {
19929 if (TARGET_ISEL)
19930 return rs6000_emit_int_cmove (dest, op, true_cond, false_cond);
19931 return 0;
19932 }
19933 else if (TARGET_HARD_FLOAT && !TARGET_FPRS
19934 && SCALAR_FLOAT_MODE_P (compare_mode))
19935 return 0;
19936
19937 is_against_zero = op1 == CONST0_RTX (compare_mode);
19938
19939 /* A floating-point subtract might overflow, underflow, or produce
19940 an inexact result, thus changing the floating-point flags, so it
19941 can't be generated if we care about that. It's safe if one side
19942 of the construct is zero, since then no subtract will be
19943 generated. */
19944 if (SCALAR_FLOAT_MODE_P (compare_mode)
19945 && flag_trapping_math && ! is_against_zero)
19946 return 0;
19947
19948 /* Eliminate half of the comparisons by switching operands, this
19949 makes the remaining code simpler. */
19950 if (code == UNLT || code == UNGT || code == UNORDERED || code == NE
19951 || code == LTGT || code == LT || code == UNLE)
19952 {
19953 code = reverse_condition_maybe_unordered (code);
19954 temp = true_cond;
19955 true_cond = false_cond;
19956 false_cond = temp;
19957 }
19958
19959 /* UNEQ and LTGT take four instructions for a comparison with zero,
19960 it'll probably be faster to use a branch here too. */
19961 if (code == UNEQ && HONOR_NANS (compare_mode))
19962 return 0;
19963
19964 if (GET_CODE (op1) == CONST_DOUBLE)
19965 REAL_VALUE_FROM_CONST_DOUBLE (c1, op1);
19966
19967 /* We're going to try to implement comparisons by performing
19968 a subtract, then comparing against zero. Unfortunately,
19969 Inf - Inf is NaN which is not zero, and so if we don't
19970 know that the operand is finite and the comparison
19971 would treat EQ different to UNORDERED, we can't do it. */
19972 if (HONOR_INFINITIES (compare_mode)
19973 && code != GT && code != UNGE
19974 && (GET_CODE (op1) != CONST_DOUBLE || real_isinf (&c1))
19975 /* Constructs of the form (a OP b ? a : b) are safe. */
19976 && ((! rtx_equal_p (op0, false_cond) && ! rtx_equal_p (op1, false_cond))
19977 || (! rtx_equal_p (op0, true_cond)
19978 && ! rtx_equal_p (op1, true_cond))))
19979 return 0;
19980
19981 /* At this point we know we can use fsel. */
19982
19983 /* Reduce the comparison to a comparison against zero. */
19984 if (! is_against_zero)
19985 {
19986 temp = gen_reg_rtx (compare_mode);
19987 emit_insn (gen_rtx_SET (VOIDmode, temp,
19988 gen_rtx_MINUS (compare_mode, op0, op1)));
19989 op0 = temp;
19990 op1 = CONST0_RTX (compare_mode);
19991 }
19992
19993 /* If we don't care about NaNs we can reduce some of the comparisons
19994 down to faster ones. */
19995 if (! HONOR_NANS (compare_mode))
19996 switch (code)
19997 {
19998 case GT:
19999 code = LE;
20000 temp = true_cond;
20001 true_cond = false_cond;
20002 false_cond = temp;
20003 break;
20004 case UNGE:
20005 code = GE;
20006 break;
20007 case UNEQ:
20008 code = EQ;
20009 break;
20010 default:
20011 break;
20012 }
20013
20014 /* Now, reduce everything down to a GE. */
20015 switch (code)
20016 {
20017 case GE:
20018 break;
20019
20020 case LE:
20021 temp = gen_reg_rtx (compare_mode);
20022 emit_insn (gen_rtx_SET (VOIDmode, temp, gen_rtx_NEG (compare_mode, op0)));
20023 op0 = temp;
20024 break;
20025
20026 case ORDERED:
20027 temp = gen_reg_rtx (compare_mode);
20028 emit_insn (gen_rtx_SET (VOIDmode, temp, gen_rtx_ABS (compare_mode, op0)));
20029 op0 = temp;
20030 break;
20031
20032 case EQ:
20033 temp = gen_reg_rtx (compare_mode);
20034 emit_insn (gen_rtx_SET (VOIDmode, temp,
20035 gen_rtx_NEG (compare_mode,
20036 gen_rtx_ABS (compare_mode, op0))));
20037 op0 = temp;
20038 break;
20039
20040 case UNGE:
20041 /* a UNGE 0 <-> (a GE 0 || -a UNLT 0) */
20042 temp = gen_reg_rtx (result_mode);
20043 emit_insn (gen_rtx_SET (VOIDmode, temp,
20044 gen_rtx_IF_THEN_ELSE (result_mode,
20045 gen_rtx_GE (VOIDmode,
20046 op0, op1),
20047 true_cond, false_cond)));
20048 false_cond = true_cond;
20049 true_cond = temp;
20050
20051 temp = gen_reg_rtx (compare_mode);
20052 emit_insn (gen_rtx_SET (VOIDmode, temp, gen_rtx_NEG (compare_mode, op0)));
20053 op0 = temp;
20054 break;
20055
20056 case GT:
20057 /* a GT 0 <-> (a GE 0 && -a UNLT 0) */
20058 temp = gen_reg_rtx (result_mode);
20059 emit_insn (gen_rtx_SET (VOIDmode, temp,
20060 gen_rtx_IF_THEN_ELSE (result_mode,
20061 gen_rtx_GE (VOIDmode,
20062 op0, op1),
20063 true_cond, false_cond)));
20064 true_cond = false_cond;
20065 false_cond = temp;
20066
20067 temp = gen_reg_rtx (compare_mode);
20068 emit_insn (gen_rtx_SET (VOIDmode, temp, gen_rtx_NEG (compare_mode, op0)));
20069 op0 = temp;
20070 break;
20071
20072 default:
20073 gcc_unreachable ();
20074 }
20075
20076 emit_insn (gen_rtx_SET (VOIDmode, dest,
20077 gen_rtx_IF_THEN_ELSE (result_mode,
20078 gen_rtx_GE (VOIDmode,
20079 op0, op1),
20080 true_cond, false_cond)));
20081 return 1;
20082 }
20083
20084 /* Same as above, but for ints (isel). */
20085
20086 static int
20087 rs6000_emit_int_cmove (rtx dest, rtx op, rtx true_cond, rtx false_cond)
20088 {
20089 rtx condition_rtx, cr;
20090 machine_mode mode = GET_MODE (dest);
20091 enum rtx_code cond_code;
20092 rtx (*isel_func) (rtx, rtx, rtx, rtx, rtx);
20093 bool signedp;
20094
20095 if (mode != SImode && (!TARGET_POWERPC64 || mode != DImode))
20096 return 0;
20097
20098 /* We still have to do the compare, because isel doesn't do a
20099 compare, it just looks at the CRx bits set by a previous compare
20100 instruction. */
20101 condition_rtx = rs6000_generate_compare (op, mode);
20102 cond_code = GET_CODE (condition_rtx);
20103 cr = XEXP (condition_rtx, 0);
20104 signedp = GET_MODE (cr) == CCmode;
20105
20106 isel_func = (mode == SImode
20107 ? (signedp ? gen_isel_signed_si : gen_isel_unsigned_si)
20108 : (signedp ? gen_isel_signed_di : gen_isel_unsigned_di));
20109
20110 switch (cond_code)
20111 {
20112 case LT: case GT: case LTU: case GTU: case EQ:
20113 /* isel handles these directly. */
20114 break;
20115
20116 default:
20117 /* We need to swap the sense of the comparison. */
20118 {
20119 rtx t = true_cond;
20120 true_cond = false_cond;
20121 false_cond = t;
20122 PUT_CODE (condition_rtx, reverse_condition (cond_code));
20123 }
20124 break;
20125 }
20126
20127 false_cond = force_reg (mode, false_cond);
20128 if (true_cond != const0_rtx)
20129 true_cond = force_reg (mode, true_cond);
20130
20131 emit_insn (isel_func (dest, condition_rtx, true_cond, false_cond, cr));
20132
20133 return 1;
20134 }
20135
20136 const char *
20137 output_isel (rtx *operands)
20138 {
20139 enum rtx_code code;
20140
20141 code = GET_CODE (operands[1]);
20142
20143 if (code == GE || code == GEU || code == LE || code == LEU || code == NE)
20144 {
20145 gcc_assert (GET_CODE (operands[2]) == REG
20146 && GET_CODE (operands[3]) == REG);
20147 PUT_CODE (operands[1], reverse_condition (code));
20148 return "isel %0,%3,%2,%j1";
20149 }
20150
20151 return "isel %0,%2,%3,%j1";
20152 }
20153
20154 void
20155 rs6000_emit_minmax (rtx dest, enum rtx_code code, rtx op0, rtx op1)
20156 {
20157 machine_mode mode = GET_MODE (op0);
20158 enum rtx_code c;
20159 rtx target;
20160
20161 /* VSX/altivec have direct min/max insns. */
20162 if ((code == SMAX || code == SMIN)
20163 && (VECTOR_UNIT_ALTIVEC_OR_VSX_P (mode)
20164 || (mode == SFmode && VECTOR_UNIT_VSX_P (DFmode))))
20165 {
20166 emit_insn (gen_rtx_SET (VOIDmode,
20167 dest,
20168 gen_rtx_fmt_ee (code, mode, op0, op1)));
20169 return;
20170 }
20171
20172 if (code == SMAX || code == SMIN)
20173 c = GE;
20174 else
20175 c = GEU;
20176
20177 if (code == SMAX || code == UMAX)
20178 target = emit_conditional_move (dest, c, op0, op1, mode,
20179 op0, op1, mode, 0);
20180 else
20181 target = emit_conditional_move (dest, c, op0, op1, mode,
20182 op1, op0, mode, 0);
20183 gcc_assert (target);
20184 if (target != dest)
20185 emit_move_insn (dest, target);
20186 }
20187
20188 /* A subroutine of the atomic operation splitters. Jump to LABEL if
20189 COND is true. Mark the jump as unlikely to be taken. */
20190
20191 static void
20192 emit_unlikely_jump (rtx cond, rtx label)
20193 {
20194 int very_unlikely = REG_BR_PROB_BASE / 100 - 1;
20195 rtx x;
20196
20197 x = gen_rtx_IF_THEN_ELSE (VOIDmode, cond, label, pc_rtx);
20198 x = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, x));
20199 add_int_reg_note (x, REG_BR_PROB, very_unlikely);
20200 }
20201
20202 /* A subroutine of the atomic operation splitters. Emit a load-locked
20203 instruction in MODE. For QI/HImode, possibly use a pattern than includes
20204 the zero_extend operation. */
20205
20206 static void
20207 emit_load_locked (machine_mode mode, rtx reg, rtx mem)
20208 {
20209 rtx (*fn) (rtx, rtx) = NULL;
20210
20211 switch (mode)
20212 {
20213 case QImode:
20214 fn = gen_load_lockedqi;
20215 break;
20216 case HImode:
20217 fn = gen_load_lockedhi;
20218 break;
20219 case SImode:
20220 if (GET_MODE (mem) == QImode)
20221 fn = gen_load_lockedqi_si;
20222 else if (GET_MODE (mem) == HImode)
20223 fn = gen_load_lockedhi_si;
20224 else
20225 fn = gen_load_lockedsi;
20226 break;
20227 case DImode:
20228 fn = gen_load_lockeddi;
20229 break;
20230 case TImode:
20231 fn = gen_load_lockedti;
20232 break;
20233 default:
20234 gcc_unreachable ();
20235 }
20236 emit_insn (fn (reg, mem));
20237 }
20238
20239 /* A subroutine of the atomic operation splitters. Emit a store-conditional
20240 instruction in MODE. */
20241
20242 static void
20243 emit_store_conditional (machine_mode mode, rtx res, rtx mem, rtx val)
20244 {
20245 rtx (*fn) (rtx, rtx, rtx) = NULL;
20246
20247 switch (mode)
20248 {
20249 case QImode:
20250 fn = gen_store_conditionalqi;
20251 break;
20252 case HImode:
20253 fn = gen_store_conditionalhi;
20254 break;
20255 case SImode:
20256 fn = gen_store_conditionalsi;
20257 break;
20258 case DImode:
20259 fn = gen_store_conditionaldi;
20260 break;
20261 case TImode:
20262 fn = gen_store_conditionalti;
20263 break;
20264 default:
20265 gcc_unreachable ();
20266 }
20267
20268 /* Emit sync before stwcx. to address PPC405 Erratum. */
20269 if (PPC405_ERRATUM77)
20270 emit_insn (gen_hwsync ());
20271
20272 emit_insn (fn (res, mem, val));
20273 }
20274
20275 /* Expand barriers before and after a load_locked/store_cond sequence. */
20276
20277 static rtx
20278 rs6000_pre_atomic_barrier (rtx mem, enum memmodel model)
20279 {
20280 rtx addr = XEXP (mem, 0);
20281 int strict_p = (reload_in_progress || reload_completed);
20282
20283 if (!legitimate_indirect_address_p (addr, strict_p)
20284 && !legitimate_indexed_address_p (addr, strict_p))
20285 {
20286 addr = force_reg (Pmode, addr);
20287 mem = replace_equiv_address_nv (mem, addr);
20288 }
20289
20290 switch (model)
20291 {
20292 case MEMMODEL_RELAXED:
20293 case MEMMODEL_CONSUME:
20294 case MEMMODEL_ACQUIRE:
20295 break;
20296 case MEMMODEL_RELEASE:
20297 case MEMMODEL_ACQ_REL:
20298 emit_insn (gen_lwsync ());
20299 break;
20300 case MEMMODEL_SEQ_CST:
20301 emit_insn (gen_hwsync ());
20302 break;
20303 default:
20304 gcc_unreachable ();
20305 }
20306 return mem;
20307 }
20308
20309 static void
20310 rs6000_post_atomic_barrier (enum memmodel model)
20311 {
20312 switch (model)
20313 {
20314 case MEMMODEL_RELAXED:
20315 case MEMMODEL_CONSUME:
20316 case MEMMODEL_RELEASE:
20317 break;
20318 case MEMMODEL_ACQUIRE:
20319 case MEMMODEL_ACQ_REL:
20320 case MEMMODEL_SEQ_CST:
20321 emit_insn (gen_isync ());
20322 break;
20323 default:
20324 gcc_unreachable ();
20325 }
20326 }
20327
20328 /* A subroutine of the various atomic expanders. For sub-word operations,
20329 we must adjust things to operate on SImode. Given the original MEM,
20330 return a new aligned memory. Also build and return the quantities by
20331 which to shift and mask. */
20332
20333 static rtx
20334 rs6000_adjust_atomic_subword (rtx orig_mem, rtx *pshift, rtx *pmask)
20335 {
20336 rtx addr, align, shift, mask, mem;
20337 HOST_WIDE_INT shift_mask;
20338 machine_mode mode = GET_MODE (orig_mem);
20339
20340 /* For smaller modes, we have to implement this via SImode. */
20341 shift_mask = (mode == QImode ? 0x18 : 0x10);
20342
20343 addr = XEXP (orig_mem, 0);
20344 addr = force_reg (GET_MODE (addr), addr);
20345
20346 /* Aligned memory containing subword. Generate a new memory. We
20347 do not want any of the existing MEM_ATTR data, as we're now
20348 accessing memory outside the original object. */
20349 align = expand_simple_binop (Pmode, AND, addr, GEN_INT (-4),
20350 NULL_RTX, 1, OPTAB_LIB_WIDEN);
20351 mem = gen_rtx_MEM (SImode, align);
20352 MEM_VOLATILE_P (mem) = MEM_VOLATILE_P (orig_mem);
20353 if (MEM_ALIAS_SET (orig_mem) == ALIAS_SET_MEMORY_BARRIER)
20354 set_mem_alias_set (mem, ALIAS_SET_MEMORY_BARRIER);
20355
20356 /* Shift amount for subword relative to aligned word. */
20357 shift = gen_reg_rtx (SImode);
20358 addr = gen_lowpart (SImode, addr);
20359 emit_insn (gen_rlwinm (shift, addr, GEN_INT (3), GEN_INT (shift_mask)));
20360 if (BYTES_BIG_ENDIAN)
20361 shift = expand_simple_binop (SImode, XOR, shift, GEN_INT (shift_mask),
20362 shift, 1, OPTAB_LIB_WIDEN);
20363 *pshift = shift;
20364
20365 /* Mask for insertion. */
20366 mask = expand_simple_binop (SImode, ASHIFT, GEN_INT (GET_MODE_MASK (mode)),
20367 shift, NULL_RTX, 1, OPTAB_LIB_WIDEN);
20368 *pmask = mask;
20369
20370 return mem;
20371 }
20372
20373 /* A subroutine of the various atomic expanders. For sub-word operands,
20374 combine OLDVAL and NEWVAL via MASK. Returns a new pseduo. */
20375
20376 static rtx
20377 rs6000_mask_atomic_subword (rtx oldval, rtx newval, rtx mask)
20378 {
20379 rtx x;
20380
20381 x = gen_reg_rtx (SImode);
20382 emit_insn (gen_rtx_SET (VOIDmode, x,
20383 gen_rtx_AND (SImode,
20384 gen_rtx_NOT (SImode, mask),
20385 oldval)));
20386
20387 x = expand_simple_binop (SImode, IOR, newval, x, x, 1, OPTAB_LIB_WIDEN);
20388
20389 return x;
20390 }
20391
20392 /* A subroutine of the various atomic expanders. For sub-word operands,
20393 extract WIDE to NARROW via SHIFT. */
20394
20395 static void
20396 rs6000_finish_atomic_subword (rtx narrow, rtx wide, rtx shift)
20397 {
20398 wide = expand_simple_binop (SImode, LSHIFTRT, wide, shift,
20399 wide, 1, OPTAB_LIB_WIDEN);
20400 emit_move_insn (narrow, gen_lowpart (GET_MODE (narrow), wide));
20401 }
20402
20403 /* Expand an atomic compare and swap operation. */
20404
20405 void
20406 rs6000_expand_atomic_compare_and_swap (rtx operands[])
20407 {
20408 rtx boolval, retval, mem, oldval, newval, cond;
20409 rtx label1, label2, x, mask, shift;
20410 machine_mode mode, orig_mode;
20411 enum memmodel mod_s, mod_f;
20412 bool is_weak;
20413
20414 boolval = operands[0];
20415 retval = operands[1];
20416 mem = operands[2];
20417 oldval = operands[3];
20418 newval = operands[4];
20419 is_weak = (INTVAL (operands[5]) != 0);
20420 mod_s = (enum memmodel) INTVAL (operands[6]);
20421 mod_f = (enum memmodel) INTVAL (operands[7]);
20422 orig_mode = mode = GET_MODE (mem);
20423
20424 mask = shift = NULL_RTX;
20425 if (mode == QImode || mode == HImode)
20426 {
20427 /* Before power8, we didn't have access to lbarx/lharx, so generate a
20428 lwarx and shift/mask operations. With power8, we need to do the
20429 comparison in SImode, but the store is still done in QI/HImode. */
20430 oldval = convert_modes (SImode, mode, oldval, 1);
20431
20432 if (!TARGET_SYNC_HI_QI)
20433 {
20434 mem = rs6000_adjust_atomic_subword (mem, &shift, &mask);
20435
20436 /* Shift and mask OLDVAL into position with the word. */
20437 oldval = expand_simple_binop (SImode, ASHIFT, oldval, shift,
20438 NULL_RTX, 1, OPTAB_LIB_WIDEN);
20439
20440 /* Shift and mask NEWVAL into position within the word. */
20441 newval = convert_modes (SImode, mode, newval, 1);
20442 newval = expand_simple_binop (SImode, ASHIFT, newval, shift,
20443 NULL_RTX, 1, OPTAB_LIB_WIDEN);
20444 }
20445
20446 /* Prepare to adjust the return value. */
20447 retval = gen_reg_rtx (SImode);
20448 mode = SImode;
20449 }
20450 else if (reg_overlap_mentioned_p (retval, oldval))
20451 oldval = copy_to_reg (oldval);
20452
20453 mem = rs6000_pre_atomic_barrier (mem, mod_s);
20454
20455 label1 = NULL_RTX;
20456 if (!is_weak)
20457 {
20458 label1 = gen_rtx_LABEL_REF (VOIDmode, gen_label_rtx ());
20459 emit_label (XEXP (label1, 0));
20460 }
20461 label2 = gen_rtx_LABEL_REF (VOIDmode, gen_label_rtx ());
20462
20463 emit_load_locked (mode, retval, mem);
20464
20465 x = retval;
20466 if (mask)
20467 {
20468 x = expand_simple_binop (SImode, AND, retval, mask,
20469 NULL_RTX, 1, OPTAB_LIB_WIDEN);
20470 }
20471
20472 cond = gen_reg_rtx (CCmode);
20473 /* If we have TImode, synthesize a comparison. */
20474 if (mode != TImode)
20475 x = gen_rtx_COMPARE (CCmode, x, oldval);
20476 else
20477 {
20478 rtx xor1_result = gen_reg_rtx (DImode);
20479 rtx xor2_result = gen_reg_rtx (DImode);
20480 rtx or_result = gen_reg_rtx (DImode);
20481 rtx new_word0 = simplify_gen_subreg (DImode, x, TImode, 0);
20482 rtx new_word1 = simplify_gen_subreg (DImode, x, TImode, 8);
20483 rtx old_word0 = simplify_gen_subreg (DImode, oldval, TImode, 0);
20484 rtx old_word1 = simplify_gen_subreg (DImode, oldval, TImode, 8);
20485
20486 emit_insn (gen_xordi3 (xor1_result, new_word0, old_word0));
20487 emit_insn (gen_xordi3 (xor2_result, new_word1, old_word1));
20488 emit_insn (gen_iordi3 (or_result, xor1_result, xor2_result));
20489 x = gen_rtx_COMPARE (CCmode, or_result, const0_rtx);
20490 }
20491
20492 emit_insn (gen_rtx_SET (VOIDmode, cond, x));
20493
20494 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
20495 emit_unlikely_jump (x, label2);
20496
20497 x = newval;
20498 if (mask)
20499 x = rs6000_mask_atomic_subword (retval, newval, mask);
20500
20501 emit_store_conditional (orig_mode, cond, mem, x);
20502
20503 if (!is_weak)
20504 {
20505 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
20506 emit_unlikely_jump (x, label1);
20507 }
20508
20509 if (mod_f != MEMMODEL_RELAXED)
20510 emit_label (XEXP (label2, 0));
20511
20512 rs6000_post_atomic_barrier (mod_s);
20513
20514 if (mod_f == MEMMODEL_RELAXED)
20515 emit_label (XEXP (label2, 0));
20516
20517 if (shift)
20518 rs6000_finish_atomic_subword (operands[1], retval, shift);
20519 else if (mode != GET_MODE (operands[1]))
20520 convert_move (operands[1], retval, 1);
20521
20522 /* In all cases, CR0 contains EQ on success, and NE on failure. */
20523 x = gen_rtx_EQ (SImode, cond, const0_rtx);
20524 emit_insn (gen_rtx_SET (VOIDmode, boolval, x));
20525 }
20526
20527 /* Expand an atomic exchange operation. */
20528
20529 void
20530 rs6000_expand_atomic_exchange (rtx operands[])
20531 {
20532 rtx retval, mem, val, cond;
20533 machine_mode mode;
20534 enum memmodel model;
20535 rtx label, x, mask, shift;
20536
20537 retval = operands[0];
20538 mem = operands[1];
20539 val = operands[2];
20540 model = (enum memmodel) INTVAL (operands[3]);
20541 mode = GET_MODE (mem);
20542
20543 mask = shift = NULL_RTX;
20544 if (!TARGET_SYNC_HI_QI && (mode == QImode || mode == HImode))
20545 {
20546 mem = rs6000_adjust_atomic_subword (mem, &shift, &mask);
20547
20548 /* Shift and mask VAL into position with the word. */
20549 val = convert_modes (SImode, mode, val, 1);
20550 val = expand_simple_binop (SImode, ASHIFT, val, shift,
20551 NULL_RTX, 1, OPTAB_LIB_WIDEN);
20552
20553 /* Prepare to adjust the return value. */
20554 retval = gen_reg_rtx (SImode);
20555 mode = SImode;
20556 }
20557
20558 mem = rs6000_pre_atomic_barrier (mem, model);
20559
20560 label = gen_rtx_LABEL_REF (VOIDmode, gen_label_rtx ());
20561 emit_label (XEXP (label, 0));
20562
20563 emit_load_locked (mode, retval, mem);
20564
20565 x = val;
20566 if (mask)
20567 x = rs6000_mask_atomic_subword (retval, val, mask);
20568
20569 cond = gen_reg_rtx (CCmode);
20570 emit_store_conditional (mode, cond, mem, x);
20571
20572 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
20573 emit_unlikely_jump (x, label);
20574
20575 rs6000_post_atomic_barrier (model);
20576
20577 if (shift)
20578 rs6000_finish_atomic_subword (operands[0], retval, shift);
20579 }
20580
20581 /* Expand an atomic fetch-and-operate pattern. CODE is the binary operation
20582 to perform. MEM is the memory on which to operate. VAL is the second
20583 operand of the binary operator. BEFORE and AFTER are optional locations to
20584 return the value of MEM either before of after the operation. MODEL_RTX
20585 is a CONST_INT containing the memory model to use. */
20586
20587 void
20588 rs6000_expand_atomic_op (enum rtx_code code, rtx mem, rtx val,
20589 rtx orig_before, rtx orig_after, rtx model_rtx)
20590 {
20591 enum memmodel model = (enum memmodel) INTVAL (model_rtx);
20592 machine_mode mode = GET_MODE (mem);
20593 machine_mode store_mode = mode;
20594 rtx label, x, cond, mask, shift;
20595 rtx before = orig_before, after = orig_after;
20596
20597 mask = shift = NULL_RTX;
20598 /* On power8, we want to use SImode for the operation. On previous systems,
20599 use the operation in a subword and shift/mask to get the proper byte or
20600 halfword. */
20601 if (mode == QImode || mode == HImode)
20602 {
20603 if (TARGET_SYNC_HI_QI)
20604 {
20605 val = convert_modes (SImode, mode, val, 1);
20606
20607 /* Prepare to adjust the return value. */
20608 before = gen_reg_rtx (SImode);
20609 if (after)
20610 after = gen_reg_rtx (SImode);
20611 mode = SImode;
20612 }
20613 else
20614 {
20615 mem = rs6000_adjust_atomic_subword (mem, &shift, &mask);
20616
20617 /* Shift and mask VAL into position with the word. */
20618 val = convert_modes (SImode, mode, val, 1);
20619 val = expand_simple_binop (SImode, ASHIFT, val, shift,
20620 NULL_RTX, 1, OPTAB_LIB_WIDEN);
20621
20622 switch (code)
20623 {
20624 case IOR:
20625 case XOR:
20626 /* We've already zero-extended VAL. That is sufficient to
20627 make certain that it does not affect other bits. */
20628 mask = NULL;
20629 break;
20630
20631 case AND:
20632 /* If we make certain that all of the other bits in VAL are
20633 set, that will be sufficient to not affect other bits. */
20634 x = gen_rtx_NOT (SImode, mask);
20635 x = gen_rtx_IOR (SImode, x, val);
20636 emit_insn (gen_rtx_SET (VOIDmode, val, x));
20637 mask = NULL;
20638 break;
20639
20640 case NOT:
20641 case PLUS:
20642 case MINUS:
20643 /* These will all affect bits outside the field and need
20644 adjustment via MASK within the loop. */
20645 break;
20646
20647 default:
20648 gcc_unreachable ();
20649 }
20650
20651 /* Prepare to adjust the return value. */
20652 before = gen_reg_rtx (SImode);
20653 if (after)
20654 after = gen_reg_rtx (SImode);
20655 store_mode = mode = SImode;
20656 }
20657 }
20658
20659 mem = rs6000_pre_atomic_barrier (mem, model);
20660
20661 label = gen_label_rtx ();
20662 emit_label (label);
20663 label = gen_rtx_LABEL_REF (VOIDmode, label);
20664
20665 if (before == NULL_RTX)
20666 before = gen_reg_rtx (mode);
20667
20668 emit_load_locked (mode, before, mem);
20669
20670 if (code == NOT)
20671 {
20672 x = expand_simple_binop (mode, AND, before, val,
20673 NULL_RTX, 1, OPTAB_LIB_WIDEN);
20674 after = expand_simple_unop (mode, NOT, x, after, 1);
20675 }
20676 else
20677 {
20678 after = expand_simple_binop (mode, code, before, val,
20679 after, 1, OPTAB_LIB_WIDEN);
20680 }
20681
20682 x = after;
20683 if (mask)
20684 {
20685 x = expand_simple_binop (SImode, AND, after, mask,
20686 NULL_RTX, 1, OPTAB_LIB_WIDEN);
20687 x = rs6000_mask_atomic_subword (before, x, mask);
20688 }
20689 else if (store_mode != mode)
20690 x = convert_modes (store_mode, mode, x, 1);
20691
20692 cond = gen_reg_rtx (CCmode);
20693 emit_store_conditional (store_mode, cond, mem, x);
20694
20695 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
20696 emit_unlikely_jump (x, label);
20697
20698 rs6000_post_atomic_barrier (model);
20699
20700 if (shift)
20701 {
20702 /* QImode/HImode on machines without lbarx/lharx where we do a lwarx and
20703 then do the calcuations in a SImode register. */
20704 if (orig_before)
20705 rs6000_finish_atomic_subword (orig_before, before, shift);
20706 if (orig_after)
20707 rs6000_finish_atomic_subword (orig_after, after, shift);
20708 }
20709 else if (store_mode != mode)
20710 {
20711 /* QImode/HImode on machines with lbarx/lharx where we do the native
20712 operation and then do the calcuations in a SImode register. */
20713 if (orig_before)
20714 convert_move (orig_before, before, 1);
20715 if (orig_after)
20716 convert_move (orig_after, after, 1);
20717 }
20718 else if (orig_after && after != orig_after)
20719 emit_move_insn (orig_after, after);
20720 }
20721
20722 /* Emit instructions to move SRC to DST. Called by splitters for
20723 multi-register moves. It will emit at most one instruction for
20724 each register that is accessed; that is, it won't emit li/lis pairs
20725 (or equivalent for 64-bit code). One of SRC or DST must be a hard
20726 register. */
20727
20728 void
20729 rs6000_split_multireg_move (rtx dst, rtx src)
20730 {
20731 /* The register number of the first register being moved. */
20732 int reg;
20733 /* The mode that is to be moved. */
20734 machine_mode mode;
20735 /* The mode that the move is being done in, and its size. */
20736 machine_mode reg_mode;
20737 int reg_mode_size;
20738 /* The number of registers that will be moved. */
20739 int nregs;
20740
20741 reg = REG_P (dst) ? REGNO (dst) : REGNO (src);
20742 mode = GET_MODE (dst);
20743 nregs = hard_regno_nregs[reg][mode];
20744 if (FP_REGNO_P (reg))
20745 reg_mode = DECIMAL_FLOAT_MODE_P (mode) ? DDmode :
20746 ((TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT) ? DFmode : SFmode);
20747 else if (ALTIVEC_REGNO_P (reg))
20748 reg_mode = V16QImode;
20749 else if (TARGET_E500_DOUBLE && mode == TFmode)
20750 reg_mode = DFmode;
20751 else
20752 reg_mode = word_mode;
20753 reg_mode_size = GET_MODE_SIZE (reg_mode);
20754
20755 gcc_assert (reg_mode_size * nregs == GET_MODE_SIZE (mode));
20756
20757 /* TDmode residing in FP registers is special, since the ISA requires that
20758 the lower-numbered word of a register pair is always the most significant
20759 word, even in little-endian mode. This does not match the usual subreg
20760 semantics, so we cannnot use simplify_gen_subreg in those cases. Access
20761 the appropriate constituent registers "by hand" in little-endian mode.
20762
20763 Note we do not need to check for destructive overlap here since TDmode
20764 can only reside in even/odd register pairs. */
20765 if (FP_REGNO_P (reg) && DECIMAL_FLOAT_MODE_P (mode) && !BYTES_BIG_ENDIAN)
20766 {
20767 rtx p_src, p_dst;
20768 int i;
20769
20770 for (i = 0; i < nregs; i++)
20771 {
20772 if (REG_P (src) && FP_REGNO_P (REGNO (src)))
20773 p_src = gen_rtx_REG (reg_mode, REGNO (src) + nregs - 1 - i);
20774 else
20775 p_src = simplify_gen_subreg (reg_mode, src, mode,
20776 i * reg_mode_size);
20777
20778 if (REG_P (dst) && FP_REGNO_P (REGNO (dst)))
20779 p_dst = gen_rtx_REG (reg_mode, REGNO (dst) + nregs - 1 - i);
20780 else
20781 p_dst = simplify_gen_subreg (reg_mode, dst, mode,
20782 i * reg_mode_size);
20783
20784 emit_insn (gen_rtx_SET (VOIDmode, p_dst, p_src));
20785 }
20786
20787 return;
20788 }
20789
20790 if (REG_P (src) && REG_P (dst) && (REGNO (src) < REGNO (dst)))
20791 {
20792 /* Move register range backwards, if we might have destructive
20793 overlap. */
20794 int i;
20795 for (i = nregs - 1; i >= 0; i--)
20796 emit_insn (gen_rtx_SET (VOIDmode,
20797 simplify_gen_subreg (reg_mode, dst, mode,
20798 i * reg_mode_size),
20799 simplify_gen_subreg (reg_mode, src, mode,
20800 i * reg_mode_size)));
20801 }
20802 else
20803 {
20804 int i;
20805 int j = -1;
20806 bool used_update = false;
20807 rtx restore_basereg = NULL_RTX;
20808
20809 if (MEM_P (src) && INT_REGNO_P (reg))
20810 {
20811 rtx breg;
20812
20813 if (GET_CODE (XEXP (src, 0)) == PRE_INC
20814 || GET_CODE (XEXP (src, 0)) == PRE_DEC)
20815 {
20816 rtx delta_rtx;
20817 breg = XEXP (XEXP (src, 0), 0);
20818 delta_rtx = (GET_CODE (XEXP (src, 0)) == PRE_INC
20819 ? GEN_INT (GET_MODE_SIZE (GET_MODE (src)))
20820 : GEN_INT (-GET_MODE_SIZE (GET_MODE (src))));
20821 emit_insn (gen_add3_insn (breg, breg, delta_rtx));
20822 src = replace_equiv_address (src, breg);
20823 }
20824 else if (! rs6000_offsettable_memref_p (src, reg_mode))
20825 {
20826 if (GET_CODE (XEXP (src, 0)) == PRE_MODIFY)
20827 {
20828 rtx basereg = XEXP (XEXP (src, 0), 0);
20829 if (TARGET_UPDATE)
20830 {
20831 rtx ndst = simplify_gen_subreg (reg_mode, dst, mode, 0);
20832 emit_insn (gen_rtx_SET (VOIDmode, ndst,
20833 gen_rtx_MEM (reg_mode, XEXP (src, 0))));
20834 used_update = true;
20835 }
20836 else
20837 emit_insn (gen_rtx_SET (VOIDmode, basereg,
20838 XEXP (XEXP (src, 0), 1)));
20839 src = replace_equiv_address (src, basereg);
20840 }
20841 else
20842 {
20843 rtx basereg = gen_rtx_REG (Pmode, reg);
20844 emit_insn (gen_rtx_SET (VOIDmode, basereg, XEXP (src, 0)));
20845 src = replace_equiv_address (src, basereg);
20846 }
20847 }
20848
20849 breg = XEXP (src, 0);
20850 if (GET_CODE (breg) == PLUS || GET_CODE (breg) == LO_SUM)
20851 breg = XEXP (breg, 0);
20852
20853 /* If the base register we are using to address memory is
20854 also a destination reg, then change that register last. */
20855 if (REG_P (breg)
20856 && REGNO (breg) >= REGNO (dst)
20857 && REGNO (breg) < REGNO (dst) + nregs)
20858 j = REGNO (breg) - REGNO (dst);
20859 }
20860 else if (MEM_P (dst) && INT_REGNO_P (reg))
20861 {
20862 rtx breg;
20863
20864 if (GET_CODE (XEXP (dst, 0)) == PRE_INC
20865 || GET_CODE (XEXP (dst, 0)) == PRE_DEC)
20866 {
20867 rtx delta_rtx;
20868 breg = XEXP (XEXP (dst, 0), 0);
20869 delta_rtx = (GET_CODE (XEXP (dst, 0)) == PRE_INC
20870 ? GEN_INT (GET_MODE_SIZE (GET_MODE (dst)))
20871 : GEN_INT (-GET_MODE_SIZE (GET_MODE (dst))));
20872
20873 /* We have to update the breg before doing the store.
20874 Use store with update, if available. */
20875
20876 if (TARGET_UPDATE)
20877 {
20878 rtx nsrc = simplify_gen_subreg (reg_mode, src, mode, 0);
20879 emit_insn (TARGET_32BIT
20880 ? (TARGET_POWERPC64
20881 ? gen_movdi_si_update (breg, breg, delta_rtx, nsrc)
20882 : gen_movsi_update (breg, breg, delta_rtx, nsrc))
20883 : gen_movdi_di_update (breg, breg, delta_rtx, nsrc));
20884 used_update = true;
20885 }
20886 else
20887 emit_insn (gen_add3_insn (breg, breg, delta_rtx));
20888 dst = replace_equiv_address (dst, breg);
20889 }
20890 else if (!rs6000_offsettable_memref_p (dst, reg_mode)
20891 && GET_CODE (XEXP (dst, 0)) != LO_SUM)
20892 {
20893 if (GET_CODE (XEXP (dst, 0)) == PRE_MODIFY)
20894 {
20895 rtx basereg = XEXP (XEXP (dst, 0), 0);
20896 if (TARGET_UPDATE)
20897 {
20898 rtx nsrc = simplify_gen_subreg (reg_mode, src, mode, 0);
20899 emit_insn (gen_rtx_SET (VOIDmode,
20900 gen_rtx_MEM (reg_mode, XEXP (dst, 0)), nsrc));
20901 used_update = true;
20902 }
20903 else
20904 emit_insn (gen_rtx_SET (VOIDmode, basereg,
20905 XEXP (XEXP (dst, 0), 1)));
20906 dst = replace_equiv_address (dst, basereg);
20907 }
20908 else
20909 {
20910 rtx basereg = XEXP (XEXP (dst, 0), 0);
20911 rtx offsetreg = XEXP (XEXP (dst, 0), 1);
20912 gcc_assert (GET_CODE (XEXP (dst, 0)) == PLUS
20913 && REG_P (basereg)
20914 && REG_P (offsetreg)
20915 && REGNO (basereg) != REGNO (offsetreg));
20916 if (REGNO (basereg) == 0)
20917 {
20918 rtx tmp = offsetreg;
20919 offsetreg = basereg;
20920 basereg = tmp;
20921 }
20922 emit_insn (gen_add3_insn (basereg, basereg, offsetreg));
20923 restore_basereg = gen_sub3_insn (basereg, basereg, offsetreg);
20924 dst = replace_equiv_address (dst, basereg);
20925 }
20926 }
20927 else if (GET_CODE (XEXP (dst, 0)) != LO_SUM)
20928 gcc_assert (rs6000_offsettable_memref_p (dst, reg_mode));
20929 }
20930
20931 for (i = 0; i < nregs; i++)
20932 {
20933 /* Calculate index to next subword. */
20934 ++j;
20935 if (j == nregs)
20936 j = 0;
20937
20938 /* If compiler already emitted move of first word by
20939 store with update, no need to do anything. */
20940 if (j == 0 && used_update)
20941 continue;
20942
20943 emit_insn (gen_rtx_SET (VOIDmode,
20944 simplify_gen_subreg (reg_mode, dst, mode,
20945 j * reg_mode_size),
20946 simplify_gen_subreg (reg_mode, src, mode,
20947 j * reg_mode_size)));
20948 }
20949 if (restore_basereg != NULL_RTX)
20950 emit_insn (restore_basereg);
20951 }
20952 }
20953
20954 \f
20955 /* This page contains routines that are used to determine what the
20956 function prologue and epilogue code will do and write them out. */
20957
20958 static inline bool
20959 save_reg_p (int r)
20960 {
20961 return !call_used_regs[r] && df_regs_ever_live_p (r);
20962 }
20963
20964 /* Return the first fixed-point register that is required to be
20965 saved. 32 if none. */
20966
20967 int
20968 first_reg_to_save (void)
20969 {
20970 int first_reg;
20971
20972 /* Find lowest numbered live register. */
20973 for (first_reg = 13; first_reg <= 31; first_reg++)
20974 if (save_reg_p (first_reg))
20975 break;
20976
20977 if (first_reg > RS6000_PIC_OFFSET_TABLE_REGNUM
20978 && ((DEFAULT_ABI == ABI_V4 && flag_pic != 0)
20979 || (DEFAULT_ABI == ABI_DARWIN && flag_pic)
20980 || (TARGET_TOC && TARGET_MINIMAL_TOC))
20981 && df_regs_ever_live_p (RS6000_PIC_OFFSET_TABLE_REGNUM))
20982 first_reg = RS6000_PIC_OFFSET_TABLE_REGNUM;
20983
20984 #if TARGET_MACHO
20985 if (flag_pic
20986 && crtl->uses_pic_offset_table
20987 && first_reg > RS6000_PIC_OFFSET_TABLE_REGNUM)
20988 return RS6000_PIC_OFFSET_TABLE_REGNUM;
20989 #endif
20990
20991 return first_reg;
20992 }
20993
20994 /* Similar, for FP regs. */
20995
20996 int
20997 first_fp_reg_to_save (void)
20998 {
20999 int first_reg;
21000
21001 /* Find lowest numbered live register. */
21002 for (first_reg = 14 + 32; first_reg <= 63; first_reg++)
21003 if (save_reg_p (first_reg))
21004 break;
21005
21006 return first_reg;
21007 }
21008
21009 /* Similar, for AltiVec regs. */
21010
21011 static int
21012 first_altivec_reg_to_save (void)
21013 {
21014 int i;
21015
21016 /* Stack frame remains as is unless we are in AltiVec ABI. */
21017 if (! TARGET_ALTIVEC_ABI)
21018 return LAST_ALTIVEC_REGNO + 1;
21019
21020 /* On Darwin, the unwind routines are compiled without
21021 TARGET_ALTIVEC, and use save_world to save/restore the
21022 altivec registers when necessary. */
21023 if (DEFAULT_ABI == ABI_DARWIN && crtl->calls_eh_return
21024 && ! TARGET_ALTIVEC)
21025 return FIRST_ALTIVEC_REGNO + 20;
21026
21027 /* Find lowest numbered live register. */
21028 for (i = FIRST_ALTIVEC_REGNO + 20; i <= LAST_ALTIVEC_REGNO; ++i)
21029 if (save_reg_p (i))
21030 break;
21031
21032 return i;
21033 }
21034
21035 /* Return a 32-bit mask of the AltiVec registers we need to set in
21036 VRSAVE. Bit n of the return value is 1 if Vn is live. The MSB in
21037 the 32-bit word is 0. */
21038
21039 static unsigned int
21040 compute_vrsave_mask (void)
21041 {
21042 unsigned int i, mask = 0;
21043
21044 /* On Darwin, the unwind routines are compiled without
21045 TARGET_ALTIVEC, and use save_world to save/restore the
21046 call-saved altivec registers when necessary. */
21047 if (DEFAULT_ABI == ABI_DARWIN && crtl->calls_eh_return
21048 && ! TARGET_ALTIVEC)
21049 mask |= 0xFFF;
21050
21051 /* First, find out if we use _any_ altivec registers. */
21052 for (i = FIRST_ALTIVEC_REGNO; i <= LAST_ALTIVEC_REGNO; ++i)
21053 if (df_regs_ever_live_p (i))
21054 mask |= ALTIVEC_REG_BIT (i);
21055
21056 if (mask == 0)
21057 return mask;
21058
21059 /* Next, remove the argument registers from the set. These must
21060 be in the VRSAVE mask set by the caller, so we don't need to add
21061 them in again. More importantly, the mask we compute here is
21062 used to generate CLOBBERs in the set_vrsave insn, and we do not
21063 wish the argument registers to die. */
21064 for (i = crtl->args.info.vregno - 1; i >= ALTIVEC_ARG_MIN_REG; --i)
21065 mask &= ~ALTIVEC_REG_BIT (i);
21066
21067 /* Similarly, remove the return value from the set. */
21068 {
21069 bool yes = false;
21070 diddle_return_value (is_altivec_return_reg, &yes);
21071 if (yes)
21072 mask &= ~ALTIVEC_REG_BIT (ALTIVEC_ARG_RETURN);
21073 }
21074
21075 return mask;
21076 }
21077
21078 /* For a very restricted set of circumstances, we can cut down the
21079 size of prologues/epilogues by calling our own save/restore-the-world
21080 routines. */
21081
21082 static void
21083 compute_save_world_info (rs6000_stack_t *info_ptr)
21084 {
21085 info_ptr->world_save_p = 1;
21086 info_ptr->world_save_p
21087 = (WORLD_SAVE_P (info_ptr)
21088 && DEFAULT_ABI == ABI_DARWIN
21089 && !cfun->has_nonlocal_label
21090 && info_ptr->first_fp_reg_save == FIRST_SAVED_FP_REGNO
21091 && info_ptr->first_gp_reg_save == FIRST_SAVED_GP_REGNO
21092 && info_ptr->first_altivec_reg_save == FIRST_SAVED_ALTIVEC_REGNO
21093 && info_ptr->cr_save_p);
21094
21095 /* This will not work in conjunction with sibcalls. Make sure there
21096 are none. (This check is expensive, but seldom executed.) */
21097 if (WORLD_SAVE_P (info_ptr))
21098 {
21099 rtx_insn *insn;
21100 for (insn = get_last_insn_anywhere (); insn; insn = PREV_INSN (insn))
21101 if (CALL_P (insn) && SIBLING_CALL_P (insn))
21102 {
21103 info_ptr->world_save_p = 0;
21104 break;
21105 }
21106 }
21107
21108 if (WORLD_SAVE_P (info_ptr))
21109 {
21110 /* Even if we're not touching VRsave, make sure there's room on the
21111 stack for it, if it looks like we're calling SAVE_WORLD, which
21112 will attempt to save it. */
21113 info_ptr->vrsave_size = 4;
21114
21115 /* If we are going to save the world, we need to save the link register too. */
21116 info_ptr->lr_save_p = 1;
21117
21118 /* "Save" the VRsave register too if we're saving the world. */
21119 if (info_ptr->vrsave_mask == 0)
21120 info_ptr->vrsave_mask = compute_vrsave_mask ();
21121
21122 /* Because the Darwin register save/restore routines only handle
21123 F14 .. F31 and V20 .. V31 as per the ABI, perform a consistency
21124 check. */
21125 gcc_assert (info_ptr->first_fp_reg_save >= FIRST_SAVED_FP_REGNO
21126 && (info_ptr->first_altivec_reg_save
21127 >= FIRST_SAVED_ALTIVEC_REGNO));
21128 }
21129 return;
21130 }
21131
21132
21133 static void
21134 is_altivec_return_reg (rtx reg, void *xyes)
21135 {
21136 bool *yes = (bool *) xyes;
21137 if (REGNO (reg) == ALTIVEC_ARG_RETURN)
21138 *yes = true;
21139 }
21140
21141 \f
21142 /* Look for user-defined global regs in the range FIRST to LAST-1.
21143 We should not restore these, and so cannot use lmw or out-of-line
21144 restore functions if there are any. We also can't save them
21145 (well, emit frame notes for them), because frame unwinding during
21146 exception handling will restore saved registers. */
21147
21148 static bool
21149 global_regs_p (unsigned first, unsigned last)
21150 {
21151 while (first < last)
21152 if (global_regs[first++])
21153 return true;
21154 return false;
21155 }
21156
21157 /* Determine the strategy for savings/restoring registers. */
21158
21159 enum {
21160 SAVRES_MULTIPLE = 0x1,
21161 SAVE_INLINE_FPRS = 0x2,
21162 SAVE_INLINE_GPRS = 0x4,
21163 REST_INLINE_FPRS = 0x8,
21164 REST_INLINE_GPRS = 0x10,
21165 SAVE_NOINLINE_GPRS_SAVES_LR = 0x20,
21166 SAVE_NOINLINE_FPRS_SAVES_LR = 0x40,
21167 REST_NOINLINE_FPRS_DOESNT_RESTORE_LR = 0x80,
21168 SAVE_INLINE_VRS = 0x100,
21169 REST_INLINE_VRS = 0x200
21170 };
21171
21172 static int
21173 rs6000_savres_strategy (rs6000_stack_t *info,
21174 bool using_static_chain_p)
21175 {
21176 int strategy = 0;
21177 bool lr_save_p;
21178
21179 if (TARGET_MULTIPLE
21180 && !TARGET_POWERPC64
21181 && !(TARGET_SPE_ABI && info->spe_64bit_regs_used)
21182 && info->first_gp_reg_save < 31
21183 && !global_regs_p (info->first_gp_reg_save, 32))
21184 strategy |= SAVRES_MULTIPLE;
21185
21186 if (crtl->calls_eh_return
21187 || cfun->machine->ra_need_lr)
21188 strategy |= (SAVE_INLINE_FPRS | REST_INLINE_FPRS
21189 | SAVE_INLINE_GPRS | REST_INLINE_GPRS
21190 | SAVE_INLINE_VRS | REST_INLINE_VRS);
21191
21192 if (info->first_fp_reg_save == 64
21193 /* The out-of-line FP routines use double-precision stores;
21194 we can't use those routines if we don't have such stores. */
21195 || (TARGET_HARD_FLOAT && !TARGET_DOUBLE_FLOAT)
21196 || global_regs_p (info->first_fp_reg_save, 64))
21197 strategy |= SAVE_INLINE_FPRS | REST_INLINE_FPRS;
21198
21199 if (info->first_gp_reg_save == 32
21200 || (!(strategy & SAVRES_MULTIPLE)
21201 && global_regs_p (info->first_gp_reg_save, 32)))
21202 strategy |= SAVE_INLINE_GPRS | REST_INLINE_GPRS;
21203
21204 if (info->first_altivec_reg_save == LAST_ALTIVEC_REGNO + 1
21205 || global_regs_p (info->first_altivec_reg_save, LAST_ALTIVEC_REGNO + 1))
21206 strategy |= SAVE_INLINE_VRS | REST_INLINE_VRS;
21207
21208 /* Define cutoff for using out-of-line functions to save registers. */
21209 if (DEFAULT_ABI == ABI_V4 || TARGET_ELF)
21210 {
21211 if (!optimize_size)
21212 {
21213 strategy |= SAVE_INLINE_FPRS | REST_INLINE_FPRS;
21214 strategy |= SAVE_INLINE_GPRS | REST_INLINE_GPRS;
21215 strategy |= SAVE_INLINE_VRS | REST_INLINE_VRS;
21216 }
21217 else
21218 {
21219 /* Prefer out-of-line restore if it will exit. */
21220 if (info->first_fp_reg_save > 61)
21221 strategy |= SAVE_INLINE_FPRS;
21222 if (info->first_gp_reg_save > 29)
21223 {
21224 if (info->first_fp_reg_save == 64)
21225 strategy |= SAVE_INLINE_GPRS;
21226 else
21227 strategy |= SAVE_INLINE_GPRS | REST_INLINE_GPRS;
21228 }
21229 if (info->first_altivec_reg_save == LAST_ALTIVEC_REGNO)
21230 strategy |= SAVE_INLINE_VRS | REST_INLINE_VRS;
21231 }
21232 }
21233 else if (DEFAULT_ABI == ABI_DARWIN)
21234 {
21235 if (info->first_fp_reg_save > 60)
21236 strategy |= SAVE_INLINE_FPRS | REST_INLINE_FPRS;
21237 if (info->first_gp_reg_save > 29)
21238 strategy |= SAVE_INLINE_GPRS | REST_INLINE_GPRS;
21239 strategy |= SAVE_INLINE_VRS | REST_INLINE_VRS;
21240 }
21241 else
21242 {
21243 gcc_checking_assert (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2);
21244 if (info->first_fp_reg_save > 61)
21245 strategy |= SAVE_INLINE_FPRS | REST_INLINE_FPRS;
21246 strategy |= SAVE_INLINE_GPRS | REST_INLINE_GPRS;
21247 strategy |= SAVE_INLINE_VRS | REST_INLINE_VRS;
21248 }
21249
21250 /* Don't bother to try to save things out-of-line if r11 is occupied
21251 by the static chain. It would require too much fiddling and the
21252 static chain is rarely used anyway. FPRs are saved w.r.t the stack
21253 pointer on Darwin, and AIX uses r1 or r12. */
21254 if (using_static_chain_p
21255 && (DEFAULT_ABI == ABI_V4 || DEFAULT_ABI == ABI_DARWIN))
21256 strategy |= ((DEFAULT_ABI == ABI_DARWIN ? 0 : SAVE_INLINE_FPRS)
21257 | SAVE_INLINE_GPRS
21258 | SAVE_INLINE_VRS | REST_INLINE_VRS);
21259
21260 /* We can only use the out-of-line routines to restore if we've
21261 saved all the registers from first_fp_reg_save in the prologue.
21262 Otherwise, we risk loading garbage. */
21263 if ((strategy & (SAVE_INLINE_FPRS | REST_INLINE_FPRS)) == SAVE_INLINE_FPRS)
21264 {
21265 int i;
21266
21267 for (i = info->first_fp_reg_save; i < 64; i++)
21268 if (!save_reg_p (i))
21269 {
21270 strategy |= REST_INLINE_FPRS;
21271 break;
21272 }
21273 }
21274
21275 /* If we are going to use store multiple, then don't even bother
21276 with the out-of-line routines, since the store-multiple
21277 instruction will always be smaller. */
21278 if ((strategy & SAVRES_MULTIPLE))
21279 strategy |= SAVE_INLINE_GPRS;
21280
21281 /* info->lr_save_p isn't yet set if the only reason lr needs to be
21282 saved is an out-of-line save or restore. Set up the value for
21283 the next test (excluding out-of-line gpr restore). */
21284 lr_save_p = (info->lr_save_p
21285 || !(strategy & SAVE_INLINE_GPRS)
21286 || !(strategy & SAVE_INLINE_FPRS)
21287 || !(strategy & SAVE_INLINE_VRS)
21288 || !(strategy & REST_INLINE_FPRS)
21289 || !(strategy & REST_INLINE_VRS));
21290
21291 /* The situation is more complicated with load multiple. We'd
21292 prefer to use the out-of-line routines for restores, since the
21293 "exit" out-of-line routines can handle the restore of LR and the
21294 frame teardown. However if doesn't make sense to use the
21295 out-of-line routine if that is the only reason we'd need to save
21296 LR, and we can't use the "exit" out-of-line gpr restore if we
21297 have saved some fprs; In those cases it is advantageous to use
21298 load multiple when available. */
21299 if ((strategy & SAVRES_MULTIPLE)
21300 && (!lr_save_p
21301 || info->first_fp_reg_save != 64))
21302 strategy |= REST_INLINE_GPRS;
21303
21304 /* Saving CR interferes with the exit routines used on the SPE, so
21305 just punt here. */
21306 if (TARGET_SPE_ABI
21307 && info->spe_64bit_regs_used
21308 && info->cr_save_p)
21309 strategy |= REST_INLINE_GPRS;
21310
21311 /* We can only use load multiple or the out-of-line routines to
21312 restore if we've used store multiple or out-of-line routines
21313 in the prologue, i.e. if we've saved all the registers from
21314 first_gp_reg_save. Otherwise, we risk loading garbage. */
21315 if ((strategy & (SAVE_INLINE_GPRS | REST_INLINE_GPRS | SAVRES_MULTIPLE))
21316 == SAVE_INLINE_GPRS)
21317 {
21318 int i;
21319
21320 for (i = info->first_gp_reg_save; i < 32; i++)
21321 if (!save_reg_p (i))
21322 {
21323 strategy |= REST_INLINE_GPRS;
21324 break;
21325 }
21326 }
21327
21328 if (TARGET_ELF && TARGET_64BIT)
21329 {
21330 if (!(strategy & SAVE_INLINE_FPRS))
21331 strategy |= SAVE_NOINLINE_FPRS_SAVES_LR;
21332 else if (!(strategy & SAVE_INLINE_GPRS)
21333 && info->first_fp_reg_save == 64)
21334 strategy |= SAVE_NOINLINE_GPRS_SAVES_LR;
21335 }
21336 else if (TARGET_AIX && !(strategy & REST_INLINE_FPRS))
21337 strategy |= REST_NOINLINE_FPRS_DOESNT_RESTORE_LR;
21338
21339 if (TARGET_MACHO && !(strategy & SAVE_INLINE_FPRS))
21340 strategy |= SAVE_NOINLINE_FPRS_SAVES_LR;
21341
21342 return strategy;
21343 }
21344
21345 /* Calculate the stack information for the current function. This is
21346 complicated by having two separate calling sequences, the AIX calling
21347 sequence and the V.4 calling sequence.
21348
21349 AIX (and Darwin/Mac OS X) stack frames look like:
21350 32-bit 64-bit
21351 SP----> +---------------------------------------+
21352 | back chain to caller | 0 0
21353 +---------------------------------------+
21354 | saved CR | 4 8 (8-11)
21355 +---------------------------------------+
21356 | saved LR | 8 16
21357 +---------------------------------------+
21358 | reserved for compilers | 12 24
21359 +---------------------------------------+
21360 | reserved for binders | 16 32
21361 +---------------------------------------+
21362 | saved TOC pointer | 20 40
21363 +---------------------------------------+
21364 | Parameter save area (P) | 24 48
21365 +---------------------------------------+
21366 | Alloca space (A) | 24+P etc.
21367 +---------------------------------------+
21368 | Local variable space (L) | 24+P+A
21369 +---------------------------------------+
21370 | Float/int conversion temporary (X) | 24+P+A+L
21371 +---------------------------------------+
21372 | Save area for AltiVec registers (W) | 24+P+A+L+X
21373 +---------------------------------------+
21374 | AltiVec alignment padding (Y) | 24+P+A+L+X+W
21375 +---------------------------------------+
21376 | Save area for VRSAVE register (Z) | 24+P+A+L+X+W+Y
21377 +---------------------------------------+
21378 | Save area for GP registers (G) | 24+P+A+X+L+X+W+Y+Z
21379 +---------------------------------------+
21380 | Save area for FP registers (F) | 24+P+A+X+L+X+W+Y+Z+G
21381 +---------------------------------------+
21382 old SP->| back chain to caller's caller |
21383 +---------------------------------------+
21384
21385 The required alignment for AIX configurations is two words (i.e., 8
21386 or 16 bytes).
21387
21388 The ELFv2 ABI is a variant of the AIX ABI. Stack frames look like:
21389
21390 SP----> +---------------------------------------+
21391 | Back chain to caller | 0
21392 +---------------------------------------+
21393 | Save area for CR | 8
21394 +---------------------------------------+
21395 | Saved LR | 16
21396 +---------------------------------------+
21397 | Saved TOC pointer | 24
21398 +---------------------------------------+
21399 | Parameter save area (P) | 32
21400 +---------------------------------------+
21401 | Alloca space (A) | 32+P
21402 +---------------------------------------+
21403 | Local variable space (L) | 32+P+A
21404 +---------------------------------------+
21405 | Save area for AltiVec registers (W) | 32+P+A+L
21406 +---------------------------------------+
21407 | AltiVec alignment padding (Y) | 32+P+A+L+W
21408 +---------------------------------------+
21409 | Save area for GP registers (G) | 32+P+A+L+W+Y
21410 +---------------------------------------+
21411 | Save area for FP registers (F) | 32+P+A+L+W+Y+G
21412 +---------------------------------------+
21413 old SP->| back chain to caller's caller | 32+P+A+L+W+Y+G+F
21414 +---------------------------------------+
21415
21416
21417 V.4 stack frames look like:
21418
21419 SP----> +---------------------------------------+
21420 | back chain to caller | 0
21421 +---------------------------------------+
21422 | caller's saved LR | 4
21423 +---------------------------------------+
21424 | Parameter save area (P) | 8
21425 +---------------------------------------+
21426 | Alloca space (A) | 8+P
21427 +---------------------------------------+
21428 | Varargs save area (V) | 8+P+A
21429 +---------------------------------------+
21430 | Local variable space (L) | 8+P+A+V
21431 +---------------------------------------+
21432 | Float/int conversion temporary (X) | 8+P+A+V+L
21433 +---------------------------------------+
21434 | Save area for AltiVec registers (W) | 8+P+A+V+L+X
21435 +---------------------------------------+
21436 | AltiVec alignment padding (Y) | 8+P+A+V+L+X+W
21437 +---------------------------------------+
21438 | Save area for VRSAVE register (Z) | 8+P+A+V+L+X+W+Y
21439 +---------------------------------------+
21440 | SPE: area for 64-bit GP registers |
21441 +---------------------------------------+
21442 | SPE alignment padding |
21443 +---------------------------------------+
21444 | saved CR (C) | 8+P+A+V+L+X+W+Y+Z
21445 +---------------------------------------+
21446 | Save area for GP registers (G) | 8+P+A+V+L+X+W+Y+Z+C
21447 +---------------------------------------+
21448 | Save area for FP registers (F) | 8+P+A+V+L+X+W+Y+Z+C+G
21449 +---------------------------------------+
21450 old SP->| back chain to caller's caller |
21451 +---------------------------------------+
21452
21453 The required alignment for V.4 is 16 bytes, or 8 bytes if -meabi is
21454 given. (But note below and in sysv4.h that we require only 8 and
21455 may round up the size of our stack frame anyways. The historical
21456 reason is early versions of powerpc-linux which didn't properly
21457 align the stack at program startup. A happy side-effect is that
21458 -mno-eabi libraries can be used with -meabi programs.)
21459
21460 The EABI configuration defaults to the V.4 layout. However,
21461 the stack alignment requirements may differ. If -mno-eabi is not
21462 given, the required stack alignment is 8 bytes; if -mno-eabi is
21463 given, the required alignment is 16 bytes. (But see V.4 comment
21464 above.) */
21465
21466 #ifndef ABI_STACK_BOUNDARY
21467 #define ABI_STACK_BOUNDARY STACK_BOUNDARY
21468 #endif
21469
21470 static rs6000_stack_t *
21471 rs6000_stack_info (void)
21472 {
21473 rs6000_stack_t *info_ptr = &stack_info;
21474 int reg_size = TARGET_32BIT ? 4 : 8;
21475 int ehrd_size;
21476 int ehcr_size;
21477 int save_align;
21478 int first_gp;
21479 HOST_WIDE_INT non_fixed_size;
21480 bool using_static_chain_p;
21481
21482 if (reload_completed && info_ptr->reload_completed)
21483 return info_ptr;
21484
21485 memset (info_ptr, 0, sizeof (*info_ptr));
21486 info_ptr->reload_completed = reload_completed;
21487
21488 if (TARGET_SPE)
21489 {
21490 /* Cache value so we don't rescan instruction chain over and over. */
21491 if (cfun->machine->insn_chain_scanned_p == 0)
21492 cfun->machine->insn_chain_scanned_p
21493 = spe_func_has_64bit_regs_p () + 1;
21494 info_ptr->spe_64bit_regs_used = cfun->machine->insn_chain_scanned_p - 1;
21495 }
21496
21497 /* Select which calling sequence. */
21498 info_ptr->abi = DEFAULT_ABI;
21499
21500 /* Calculate which registers need to be saved & save area size. */
21501 info_ptr->first_gp_reg_save = first_reg_to_save ();
21502 /* Assume that we will have to save RS6000_PIC_OFFSET_TABLE_REGNUM,
21503 even if it currently looks like we won't. Reload may need it to
21504 get at a constant; if so, it will have already created a constant
21505 pool entry for it. */
21506 if (((TARGET_TOC && TARGET_MINIMAL_TOC)
21507 || (flag_pic == 1 && DEFAULT_ABI == ABI_V4)
21508 || (flag_pic && DEFAULT_ABI == ABI_DARWIN))
21509 && crtl->uses_const_pool
21510 && info_ptr->first_gp_reg_save > RS6000_PIC_OFFSET_TABLE_REGNUM)
21511 first_gp = RS6000_PIC_OFFSET_TABLE_REGNUM;
21512 else
21513 first_gp = info_ptr->first_gp_reg_save;
21514
21515 info_ptr->gp_size = reg_size * (32 - first_gp);
21516
21517 /* For the SPE, we have an additional upper 32-bits on each GPR.
21518 Ideally we should save the entire 64-bits only when the upper
21519 half is used in SIMD instructions. Since we only record
21520 registers live (not the size they are used in), this proves
21521 difficult because we'd have to traverse the instruction chain at
21522 the right time, taking reload into account. This is a real pain,
21523 so we opt to save the GPRs in 64-bits always if but one register
21524 gets used in 64-bits. Otherwise, all the registers in the frame
21525 get saved in 32-bits.
21526
21527 So... since when we save all GPRs (except the SP) in 64-bits, the
21528 traditional GP save area will be empty. */
21529 if (TARGET_SPE_ABI && info_ptr->spe_64bit_regs_used != 0)
21530 info_ptr->gp_size = 0;
21531
21532 info_ptr->first_fp_reg_save = first_fp_reg_to_save ();
21533 info_ptr->fp_size = 8 * (64 - info_ptr->first_fp_reg_save);
21534
21535 info_ptr->first_altivec_reg_save = first_altivec_reg_to_save ();
21536 info_ptr->altivec_size = 16 * (LAST_ALTIVEC_REGNO + 1
21537 - info_ptr->first_altivec_reg_save);
21538
21539 /* Does this function call anything? */
21540 info_ptr->calls_p = (! crtl->is_leaf
21541 || cfun->machine->ra_needs_full_frame);
21542
21543 /* Determine if we need to save the condition code registers. */
21544 if (df_regs_ever_live_p (CR2_REGNO)
21545 || df_regs_ever_live_p (CR3_REGNO)
21546 || df_regs_ever_live_p (CR4_REGNO))
21547 {
21548 info_ptr->cr_save_p = 1;
21549 if (DEFAULT_ABI == ABI_V4)
21550 info_ptr->cr_size = reg_size;
21551 }
21552
21553 /* If the current function calls __builtin_eh_return, then we need
21554 to allocate stack space for registers that will hold data for
21555 the exception handler. */
21556 if (crtl->calls_eh_return)
21557 {
21558 unsigned int i;
21559 for (i = 0; EH_RETURN_DATA_REGNO (i) != INVALID_REGNUM; ++i)
21560 continue;
21561
21562 /* SPE saves EH registers in 64-bits. */
21563 ehrd_size = i * (TARGET_SPE_ABI
21564 && info_ptr->spe_64bit_regs_used != 0
21565 ? UNITS_PER_SPE_WORD : UNITS_PER_WORD);
21566 }
21567 else
21568 ehrd_size = 0;
21569
21570 /* In the ELFv2 ABI, we also need to allocate space for separate
21571 CR field save areas if the function calls __builtin_eh_return. */
21572 if (DEFAULT_ABI == ABI_ELFv2 && crtl->calls_eh_return)
21573 {
21574 /* This hard-codes that we have three call-saved CR fields. */
21575 ehcr_size = 3 * reg_size;
21576 /* We do *not* use the regular CR save mechanism. */
21577 info_ptr->cr_save_p = 0;
21578 }
21579 else
21580 ehcr_size = 0;
21581
21582 /* Determine various sizes. */
21583 info_ptr->reg_size = reg_size;
21584 info_ptr->fixed_size = RS6000_SAVE_AREA;
21585 info_ptr->vars_size = RS6000_ALIGN (get_frame_size (), 8);
21586 info_ptr->parm_size = RS6000_ALIGN (crtl->outgoing_args_size,
21587 TARGET_ALTIVEC ? 16 : 8);
21588 if (FRAME_GROWS_DOWNWARD)
21589 info_ptr->vars_size
21590 += RS6000_ALIGN (info_ptr->fixed_size + info_ptr->vars_size
21591 + info_ptr->parm_size,
21592 ABI_STACK_BOUNDARY / BITS_PER_UNIT)
21593 - (info_ptr->fixed_size + info_ptr->vars_size
21594 + info_ptr->parm_size);
21595
21596 if (TARGET_SPE_ABI && info_ptr->spe_64bit_regs_used != 0)
21597 info_ptr->spe_gp_size = 8 * (32 - first_gp);
21598 else
21599 info_ptr->spe_gp_size = 0;
21600
21601 if (TARGET_ALTIVEC_ABI)
21602 info_ptr->vrsave_mask = compute_vrsave_mask ();
21603 else
21604 info_ptr->vrsave_mask = 0;
21605
21606 if (TARGET_ALTIVEC_VRSAVE && info_ptr->vrsave_mask)
21607 info_ptr->vrsave_size = 4;
21608 else
21609 info_ptr->vrsave_size = 0;
21610
21611 compute_save_world_info (info_ptr);
21612
21613 /* Calculate the offsets. */
21614 switch (DEFAULT_ABI)
21615 {
21616 case ABI_NONE:
21617 default:
21618 gcc_unreachable ();
21619
21620 case ABI_AIX:
21621 case ABI_ELFv2:
21622 case ABI_DARWIN:
21623 info_ptr->fp_save_offset = - info_ptr->fp_size;
21624 info_ptr->gp_save_offset = info_ptr->fp_save_offset - info_ptr->gp_size;
21625
21626 if (TARGET_ALTIVEC_ABI)
21627 {
21628 info_ptr->vrsave_save_offset
21629 = info_ptr->gp_save_offset - info_ptr->vrsave_size;
21630
21631 /* Align stack so vector save area is on a quadword boundary.
21632 The padding goes above the vectors. */
21633 if (info_ptr->altivec_size != 0)
21634 info_ptr->altivec_padding_size
21635 = info_ptr->vrsave_save_offset & 0xF;
21636 else
21637 info_ptr->altivec_padding_size = 0;
21638
21639 info_ptr->altivec_save_offset
21640 = info_ptr->vrsave_save_offset
21641 - info_ptr->altivec_padding_size
21642 - info_ptr->altivec_size;
21643 gcc_assert (info_ptr->altivec_size == 0
21644 || info_ptr->altivec_save_offset % 16 == 0);
21645
21646 /* Adjust for AltiVec case. */
21647 info_ptr->ehrd_offset = info_ptr->altivec_save_offset - ehrd_size;
21648 }
21649 else
21650 info_ptr->ehrd_offset = info_ptr->gp_save_offset - ehrd_size;
21651
21652 info_ptr->ehcr_offset = info_ptr->ehrd_offset - ehcr_size;
21653 info_ptr->cr_save_offset = reg_size; /* first word when 64-bit. */
21654 info_ptr->lr_save_offset = 2*reg_size;
21655 break;
21656
21657 case ABI_V4:
21658 info_ptr->fp_save_offset = - info_ptr->fp_size;
21659 info_ptr->gp_save_offset = info_ptr->fp_save_offset - info_ptr->gp_size;
21660 info_ptr->cr_save_offset = info_ptr->gp_save_offset - info_ptr->cr_size;
21661
21662 if (TARGET_SPE_ABI && info_ptr->spe_64bit_regs_used != 0)
21663 {
21664 /* Align stack so SPE GPR save area is aligned on a
21665 double-word boundary. */
21666 if (info_ptr->spe_gp_size != 0 && info_ptr->cr_save_offset != 0)
21667 info_ptr->spe_padding_size
21668 = 8 - (-info_ptr->cr_save_offset % 8);
21669 else
21670 info_ptr->spe_padding_size = 0;
21671
21672 info_ptr->spe_gp_save_offset
21673 = info_ptr->cr_save_offset
21674 - info_ptr->spe_padding_size
21675 - info_ptr->spe_gp_size;
21676
21677 /* Adjust for SPE case. */
21678 info_ptr->ehrd_offset = info_ptr->spe_gp_save_offset;
21679 }
21680 else if (TARGET_ALTIVEC_ABI)
21681 {
21682 info_ptr->vrsave_save_offset
21683 = info_ptr->cr_save_offset - info_ptr->vrsave_size;
21684
21685 /* Align stack so vector save area is on a quadword boundary. */
21686 if (info_ptr->altivec_size != 0)
21687 info_ptr->altivec_padding_size
21688 = 16 - (-info_ptr->vrsave_save_offset % 16);
21689 else
21690 info_ptr->altivec_padding_size = 0;
21691
21692 info_ptr->altivec_save_offset
21693 = info_ptr->vrsave_save_offset
21694 - info_ptr->altivec_padding_size
21695 - info_ptr->altivec_size;
21696
21697 /* Adjust for AltiVec case. */
21698 info_ptr->ehrd_offset = info_ptr->altivec_save_offset;
21699 }
21700 else
21701 info_ptr->ehrd_offset = info_ptr->cr_save_offset;
21702 info_ptr->ehrd_offset -= ehrd_size;
21703 info_ptr->lr_save_offset = reg_size;
21704 break;
21705 }
21706
21707 save_align = (TARGET_ALTIVEC_ABI || DEFAULT_ABI == ABI_DARWIN) ? 16 : 8;
21708 info_ptr->save_size = RS6000_ALIGN (info_ptr->fp_size
21709 + info_ptr->gp_size
21710 + info_ptr->altivec_size
21711 + info_ptr->altivec_padding_size
21712 + info_ptr->spe_gp_size
21713 + info_ptr->spe_padding_size
21714 + ehrd_size
21715 + ehcr_size
21716 + info_ptr->cr_size
21717 + info_ptr->vrsave_size,
21718 save_align);
21719
21720 non_fixed_size = (info_ptr->vars_size
21721 + info_ptr->parm_size
21722 + info_ptr->save_size);
21723
21724 info_ptr->total_size = RS6000_ALIGN (non_fixed_size + info_ptr->fixed_size,
21725 ABI_STACK_BOUNDARY / BITS_PER_UNIT);
21726
21727 /* Determine if we need to save the link register. */
21728 if (info_ptr->calls_p
21729 || ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
21730 && crtl->profile
21731 && !TARGET_PROFILE_KERNEL)
21732 || (DEFAULT_ABI == ABI_V4 && cfun->calls_alloca)
21733 #ifdef TARGET_RELOCATABLE
21734 || (TARGET_RELOCATABLE && (get_pool_size () != 0))
21735 #endif
21736 || rs6000_ra_ever_killed ())
21737 info_ptr->lr_save_p = 1;
21738
21739 using_static_chain_p = (cfun->static_chain_decl != NULL_TREE
21740 && df_regs_ever_live_p (STATIC_CHAIN_REGNUM)
21741 && call_used_regs[STATIC_CHAIN_REGNUM]);
21742 info_ptr->savres_strategy = rs6000_savres_strategy (info_ptr,
21743 using_static_chain_p);
21744
21745 if (!(info_ptr->savres_strategy & SAVE_INLINE_GPRS)
21746 || !(info_ptr->savres_strategy & SAVE_INLINE_FPRS)
21747 || !(info_ptr->savres_strategy & SAVE_INLINE_VRS)
21748 || !(info_ptr->savres_strategy & REST_INLINE_GPRS)
21749 || !(info_ptr->savres_strategy & REST_INLINE_FPRS)
21750 || !(info_ptr->savres_strategy & REST_INLINE_VRS))
21751 info_ptr->lr_save_p = 1;
21752
21753 if (info_ptr->lr_save_p)
21754 df_set_regs_ever_live (LR_REGNO, true);
21755
21756 /* Determine if we need to allocate any stack frame:
21757
21758 For AIX we need to push the stack if a frame pointer is needed
21759 (because the stack might be dynamically adjusted), if we are
21760 debugging, if we make calls, or if the sum of fp_save, gp_save,
21761 and local variables are more than the space needed to save all
21762 non-volatile registers: 32-bit: 18*8 + 19*4 = 220 or 64-bit: 18*8
21763 + 18*8 = 288 (GPR13 reserved).
21764
21765 For V.4 we don't have the stack cushion that AIX uses, but assume
21766 that the debugger can handle stackless frames. */
21767
21768 if (info_ptr->calls_p)
21769 info_ptr->push_p = 1;
21770
21771 else if (DEFAULT_ABI == ABI_V4)
21772 info_ptr->push_p = non_fixed_size != 0;
21773
21774 else if (frame_pointer_needed)
21775 info_ptr->push_p = 1;
21776
21777 else if (TARGET_XCOFF && write_symbols != NO_DEBUG)
21778 info_ptr->push_p = 1;
21779
21780 else
21781 info_ptr->push_p = non_fixed_size > (TARGET_32BIT ? 220 : 288);
21782
21783 /* Zero offsets if we're not saving those registers. */
21784 if (info_ptr->fp_size == 0)
21785 info_ptr->fp_save_offset = 0;
21786
21787 if (info_ptr->gp_size == 0)
21788 info_ptr->gp_save_offset = 0;
21789
21790 if (! TARGET_ALTIVEC_ABI || info_ptr->altivec_size == 0)
21791 info_ptr->altivec_save_offset = 0;
21792
21793 /* Zero VRSAVE offset if not saved and restored. */
21794 if (! TARGET_ALTIVEC_VRSAVE || info_ptr->vrsave_mask == 0)
21795 info_ptr->vrsave_save_offset = 0;
21796
21797 if (! TARGET_SPE_ABI
21798 || info_ptr->spe_64bit_regs_used == 0
21799 || info_ptr->spe_gp_size == 0)
21800 info_ptr->spe_gp_save_offset = 0;
21801
21802 if (! info_ptr->lr_save_p)
21803 info_ptr->lr_save_offset = 0;
21804
21805 if (! info_ptr->cr_save_p)
21806 info_ptr->cr_save_offset = 0;
21807
21808 return info_ptr;
21809 }
21810
21811 /* Return true if the current function uses any GPRs in 64-bit SIMD
21812 mode. */
21813
21814 static bool
21815 spe_func_has_64bit_regs_p (void)
21816 {
21817 rtx_insn *insns, *insn;
21818
21819 /* Functions that save and restore all the call-saved registers will
21820 need to save/restore the registers in 64-bits. */
21821 if (crtl->calls_eh_return
21822 || cfun->calls_setjmp
21823 || crtl->has_nonlocal_goto)
21824 return true;
21825
21826 insns = get_insns ();
21827
21828 for (insn = NEXT_INSN (insns); insn != NULL_RTX; insn = NEXT_INSN (insn))
21829 {
21830 if (INSN_P (insn))
21831 {
21832 rtx i;
21833
21834 /* FIXME: This should be implemented with attributes...
21835
21836 (set_attr "spe64" "true")....then,
21837 if (get_spe64(insn)) return true;
21838
21839 It's the only reliable way to do the stuff below. */
21840
21841 i = PATTERN (insn);
21842 if (GET_CODE (i) == SET)
21843 {
21844 machine_mode mode = GET_MODE (SET_SRC (i));
21845
21846 if (SPE_VECTOR_MODE (mode))
21847 return true;
21848 if (TARGET_E500_DOUBLE && (mode == DFmode || mode == TFmode))
21849 return true;
21850 }
21851 }
21852 }
21853
21854 return false;
21855 }
21856
21857 static void
21858 debug_stack_info (rs6000_stack_t *info)
21859 {
21860 const char *abi_string;
21861
21862 if (! info)
21863 info = rs6000_stack_info ();
21864
21865 fprintf (stderr, "\nStack information for function %s:\n",
21866 ((current_function_decl && DECL_NAME (current_function_decl))
21867 ? IDENTIFIER_POINTER (DECL_NAME (current_function_decl))
21868 : "<unknown>"));
21869
21870 switch (info->abi)
21871 {
21872 default: abi_string = "Unknown"; break;
21873 case ABI_NONE: abi_string = "NONE"; break;
21874 case ABI_AIX: abi_string = "AIX"; break;
21875 case ABI_ELFv2: abi_string = "ELFv2"; break;
21876 case ABI_DARWIN: abi_string = "Darwin"; break;
21877 case ABI_V4: abi_string = "V.4"; break;
21878 }
21879
21880 fprintf (stderr, "\tABI = %5s\n", abi_string);
21881
21882 if (TARGET_ALTIVEC_ABI)
21883 fprintf (stderr, "\tALTIVEC ABI extensions enabled.\n");
21884
21885 if (TARGET_SPE_ABI)
21886 fprintf (stderr, "\tSPE ABI extensions enabled.\n");
21887
21888 if (info->first_gp_reg_save != 32)
21889 fprintf (stderr, "\tfirst_gp_reg_save = %5d\n", info->first_gp_reg_save);
21890
21891 if (info->first_fp_reg_save != 64)
21892 fprintf (stderr, "\tfirst_fp_reg_save = %5d\n", info->first_fp_reg_save);
21893
21894 if (info->first_altivec_reg_save <= LAST_ALTIVEC_REGNO)
21895 fprintf (stderr, "\tfirst_altivec_reg_save = %5d\n",
21896 info->first_altivec_reg_save);
21897
21898 if (info->lr_save_p)
21899 fprintf (stderr, "\tlr_save_p = %5d\n", info->lr_save_p);
21900
21901 if (info->cr_save_p)
21902 fprintf (stderr, "\tcr_save_p = %5d\n", info->cr_save_p);
21903
21904 if (info->vrsave_mask)
21905 fprintf (stderr, "\tvrsave_mask = 0x%x\n", info->vrsave_mask);
21906
21907 if (info->push_p)
21908 fprintf (stderr, "\tpush_p = %5d\n", info->push_p);
21909
21910 if (info->calls_p)
21911 fprintf (stderr, "\tcalls_p = %5d\n", info->calls_p);
21912
21913 if (info->gp_save_offset)
21914 fprintf (stderr, "\tgp_save_offset = %5d\n", info->gp_save_offset);
21915
21916 if (info->fp_save_offset)
21917 fprintf (stderr, "\tfp_save_offset = %5d\n", info->fp_save_offset);
21918
21919 if (info->altivec_save_offset)
21920 fprintf (stderr, "\taltivec_save_offset = %5d\n",
21921 info->altivec_save_offset);
21922
21923 if (info->spe_gp_save_offset)
21924 fprintf (stderr, "\tspe_gp_save_offset = %5d\n",
21925 info->spe_gp_save_offset);
21926
21927 if (info->vrsave_save_offset)
21928 fprintf (stderr, "\tvrsave_save_offset = %5d\n",
21929 info->vrsave_save_offset);
21930
21931 if (info->lr_save_offset)
21932 fprintf (stderr, "\tlr_save_offset = %5d\n", info->lr_save_offset);
21933
21934 if (info->cr_save_offset)
21935 fprintf (stderr, "\tcr_save_offset = %5d\n", info->cr_save_offset);
21936
21937 if (info->varargs_save_offset)
21938 fprintf (stderr, "\tvarargs_save_offset = %5d\n", info->varargs_save_offset);
21939
21940 if (info->total_size)
21941 fprintf (stderr, "\ttotal_size = "HOST_WIDE_INT_PRINT_DEC"\n",
21942 info->total_size);
21943
21944 if (info->vars_size)
21945 fprintf (stderr, "\tvars_size = "HOST_WIDE_INT_PRINT_DEC"\n",
21946 info->vars_size);
21947
21948 if (info->parm_size)
21949 fprintf (stderr, "\tparm_size = %5d\n", info->parm_size);
21950
21951 if (info->fixed_size)
21952 fprintf (stderr, "\tfixed_size = %5d\n", info->fixed_size);
21953
21954 if (info->gp_size)
21955 fprintf (stderr, "\tgp_size = %5d\n", info->gp_size);
21956
21957 if (info->spe_gp_size)
21958 fprintf (stderr, "\tspe_gp_size = %5d\n", info->spe_gp_size);
21959
21960 if (info->fp_size)
21961 fprintf (stderr, "\tfp_size = %5d\n", info->fp_size);
21962
21963 if (info->altivec_size)
21964 fprintf (stderr, "\taltivec_size = %5d\n", info->altivec_size);
21965
21966 if (info->vrsave_size)
21967 fprintf (stderr, "\tvrsave_size = %5d\n", info->vrsave_size);
21968
21969 if (info->altivec_padding_size)
21970 fprintf (stderr, "\taltivec_padding_size= %5d\n",
21971 info->altivec_padding_size);
21972
21973 if (info->spe_padding_size)
21974 fprintf (stderr, "\tspe_padding_size = %5d\n",
21975 info->spe_padding_size);
21976
21977 if (info->cr_size)
21978 fprintf (stderr, "\tcr_size = %5d\n", info->cr_size);
21979
21980 if (info->save_size)
21981 fprintf (stderr, "\tsave_size = %5d\n", info->save_size);
21982
21983 if (info->reg_size != 4)
21984 fprintf (stderr, "\treg_size = %5d\n", info->reg_size);
21985
21986 fprintf (stderr, "\tsave-strategy = %04x\n", info->savres_strategy);
21987
21988 fprintf (stderr, "\n");
21989 }
21990
21991 rtx
21992 rs6000_return_addr (int count, rtx frame)
21993 {
21994 /* Currently we don't optimize very well between prolog and body
21995 code and for PIC code the code can be actually quite bad, so
21996 don't try to be too clever here. */
21997 if (count != 0
21998 || ((DEFAULT_ABI == ABI_V4 || DEFAULT_ABI == ABI_DARWIN) && flag_pic))
21999 {
22000 cfun->machine->ra_needs_full_frame = 1;
22001
22002 return
22003 gen_rtx_MEM
22004 (Pmode,
22005 memory_address
22006 (Pmode,
22007 plus_constant (Pmode,
22008 copy_to_reg
22009 (gen_rtx_MEM (Pmode,
22010 memory_address (Pmode, frame))),
22011 RETURN_ADDRESS_OFFSET)));
22012 }
22013
22014 cfun->machine->ra_need_lr = 1;
22015 return get_hard_reg_initial_val (Pmode, LR_REGNO);
22016 }
22017
22018 /* Say whether a function is a candidate for sibcall handling or not. */
22019
22020 static bool
22021 rs6000_function_ok_for_sibcall (tree decl, tree exp)
22022 {
22023 tree fntype;
22024
22025 if (decl)
22026 fntype = TREE_TYPE (decl);
22027 else
22028 fntype = TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp)));
22029
22030 /* We can't do it if the called function has more vector parameters
22031 than the current function; there's nowhere to put the VRsave code. */
22032 if (TARGET_ALTIVEC_ABI
22033 && TARGET_ALTIVEC_VRSAVE
22034 && !(decl && decl == current_function_decl))
22035 {
22036 function_args_iterator args_iter;
22037 tree type;
22038 int nvreg = 0;
22039
22040 /* Functions with vector parameters are required to have a
22041 prototype, so the argument type info must be available
22042 here. */
22043 FOREACH_FUNCTION_ARGS(fntype, type, args_iter)
22044 if (TREE_CODE (type) == VECTOR_TYPE
22045 && ALTIVEC_OR_VSX_VECTOR_MODE (TYPE_MODE (type)))
22046 nvreg++;
22047
22048 FOREACH_FUNCTION_ARGS(TREE_TYPE (current_function_decl), type, args_iter)
22049 if (TREE_CODE (type) == VECTOR_TYPE
22050 && ALTIVEC_OR_VSX_VECTOR_MODE (TYPE_MODE (type)))
22051 nvreg--;
22052
22053 if (nvreg > 0)
22054 return false;
22055 }
22056
22057 /* Under the AIX or ELFv2 ABIs we can't allow calls to non-local
22058 functions, because the callee may have a different TOC pointer to
22059 the caller and there's no way to ensure we restore the TOC when
22060 we return. With the secure-plt SYSV ABI we can't make non-local
22061 calls when -fpic/PIC because the plt call stubs use r30. */
22062 if (DEFAULT_ABI == ABI_DARWIN
22063 || ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
22064 && decl
22065 && !DECL_EXTERNAL (decl)
22066 && (*targetm.binds_local_p) (decl))
22067 || (DEFAULT_ABI == ABI_V4
22068 && (!TARGET_SECURE_PLT
22069 || !flag_pic
22070 || (decl
22071 && (*targetm.binds_local_p) (decl)))))
22072 {
22073 tree attr_list = TYPE_ATTRIBUTES (fntype);
22074
22075 if (!lookup_attribute ("longcall", attr_list)
22076 || lookup_attribute ("shortcall", attr_list))
22077 return true;
22078 }
22079
22080 return false;
22081 }
22082
22083 static int
22084 rs6000_ra_ever_killed (void)
22085 {
22086 rtx_insn *top;
22087 rtx reg;
22088 rtx_insn *insn;
22089
22090 if (cfun->is_thunk)
22091 return 0;
22092
22093 if (cfun->machine->lr_save_state)
22094 return cfun->machine->lr_save_state - 1;
22095
22096 /* regs_ever_live has LR marked as used if any sibcalls are present,
22097 but this should not force saving and restoring in the
22098 pro/epilogue. Likewise, reg_set_between_p thinks a sibcall
22099 clobbers LR, so that is inappropriate. */
22100
22101 /* Also, the prologue can generate a store into LR that
22102 doesn't really count, like this:
22103
22104 move LR->R0
22105 bcl to set PIC register
22106 move LR->R31
22107 move R0->LR
22108
22109 When we're called from the epilogue, we need to avoid counting
22110 this as a store. */
22111
22112 push_topmost_sequence ();
22113 top = get_insns ();
22114 pop_topmost_sequence ();
22115 reg = gen_rtx_REG (Pmode, LR_REGNO);
22116
22117 for (insn = NEXT_INSN (top); insn != NULL_RTX; insn = NEXT_INSN (insn))
22118 {
22119 if (INSN_P (insn))
22120 {
22121 if (CALL_P (insn))
22122 {
22123 if (!SIBLING_CALL_P (insn))
22124 return 1;
22125 }
22126 else if (find_regno_note (insn, REG_INC, LR_REGNO))
22127 return 1;
22128 else if (set_of (reg, insn) != NULL_RTX
22129 && !prologue_epilogue_contains (insn))
22130 return 1;
22131 }
22132 }
22133 return 0;
22134 }
22135 \f
22136 /* Emit instructions needed to load the TOC register.
22137 This is only needed when TARGET_TOC, TARGET_MINIMAL_TOC, and there is
22138 a constant pool; or for SVR4 -fpic. */
22139
22140 void
22141 rs6000_emit_load_toc_table (int fromprolog)
22142 {
22143 rtx dest;
22144 dest = gen_rtx_REG (Pmode, RS6000_PIC_OFFSET_TABLE_REGNUM);
22145
22146 if (TARGET_ELF && TARGET_SECURE_PLT && DEFAULT_ABI == ABI_V4 && flag_pic)
22147 {
22148 char buf[30];
22149 rtx lab, tmp1, tmp2, got;
22150
22151 lab = gen_label_rtx ();
22152 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (lab));
22153 lab = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (buf));
22154 if (flag_pic == 2)
22155 got = gen_rtx_SYMBOL_REF (Pmode, toc_label_name);
22156 else
22157 got = rs6000_got_sym ();
22158 tmp1 = tmp2 = dest;
22159 if (!fromprolog)
22160 {
22161 tmp1 = gen_reg_rtx (Pmode);
22162 tmp2 = gen_reg_rtx (Pmode);
22163 }
22164 emit_insn (gen_load_toc_v4_PIC_1 (lab));
22165 emit_move_insn (tmp1, gen_rtx_REG (Pmode, LR_REGNO));
22166 emit_insn (gen_load_toc_v4_PIC_3b (tmp2, tmp1, got, lab));
22167 emit_insn (gen_load_toc_v4_PIC_3c (dest, tmp2, got, lab));
22168 }
22169 else if (TARGET_ELF && DEFAULT_ABI == ABI_V4 && flag_pic == 1)
22170 {
22171 emit_insn (gen_load_toc_v4_pic_si ());
22172 emit_move_insn (dest, gen_rtx_REG (Pmode, LR_REGNO));
22173 }
22174 else if (TARGET_ELF && DEFAULT_ABI == ABI_V4 && flag_pic == 2)
22175 {
22176 char buf[30];
22177 rtx temp0 = (fromprolog
22178 ? gen_rtx_REG (Pmode, 0)
22179 : gen_reg_rtx (Pmode));
22180
22181 if (fromprolog)
22182 {
22183 rtx symF, symL;
22184
22185 ASM_GENERATE_INTERNAL_LABEL (buf, "LCF", rs6000_pic_labelno);
22186 symF = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (buf));
22187
22188 ASM_GENERATE_INTERNAL_LABEL (buf, "LCL", rs6000_pic_labelno);
22189 symL = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (buf));
22190
22191 emit_insn (gen_load_toc_v4_PIC_1 (symF));
22192 emit_move_insn (dest, gen_rtx_REG (Pmode, LR_REGNO));
22193 emit_insn (gen_load_toc_v4_PIC_2 (temp0, dest, symL, symF));
22194 }
22195 else
22196 {
22197 rtx tocsym, lab;
22198
22199 tocsym = gen_rtx_SYMBOL_REF (Pmode, toc_label_name);
22200 lab = gen_label_rtx ();
22201 emit_insn (gen_load_toc_v4_PIC_1b (tocsym, lab));
22202 emit_move_insn (dest, gen_rtx_REG (Pmode, LR_REGNO));
22203 if (TARGET_LINK_STACK)
22204 emit_insn (gen_addsi3 (dest, dest, GEN_INT (4)));
22205 emit_move_insn (temp0, gen_rtx_MEM (Pmode, dest));
22206 }
22207 emit_insn (gen_addsi3 (dest, temp0, dest));
22208 }
22209 else if (TARGET_ELF && !TARGET_AIX && flag_pic == 0 && TARGET_MINIMAL_TOC)
22210 {
22211 /* This is for AIX code running in non-PIC ELF32. */
22212 char buf[30];
22213 rtx realsym;
22214 ASM_GENERATE_INTERNAL_LABEL (buf, "LCTOC", 1);
22215 realsym = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (buf));
22216
22217 emit_insn (gen_elf_high (dest, realsym));
22218 emit_insn (gen_elf_low (dest, dest, realsym));
22219 }
22220 else
22221 {
22222 gcc_assert (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2);
22223
22224 if (TARGET_32BIT)
22225 emit_insn (gen_load_toc_aix_si (dest));
22226 else
22227 emit_insn (gen_load_toc_aix_di (dest));
22228 }
22229 }
22230
22231 /* Emit instructions to restore the link register after determining where
22232 its value has been stored. */
22233
22234 void
22235 rs6000_emit_eh_reg_restore (rtx source, rtx scratch)
22236 {
22237 rs6000_stack_t *info = rs6000_stack_info ();
22238 rtx operands[2];
22239
22240 operands[0] = source;
22241 operands[1] = scratch;
22242
22243 if (info->lr_save_p)
22244 {
22245 rtx frame_rtx = stack_pointer_rtx;
22246 HOST_WIDE_INT sp_offset = 0;
22247 rtx tmp;
22248
22249 if (frame_pointer_needed
22250 || cfun->calls_alloca
22251 || info->total_size > 32767)
22252 {
22253 tmp = gen_frame_mem (Pmode, frame_rtx);
22254 emit_move_insn (operands[1], tmp);
22255 frame_rtx = operands[1];
22256 }
22257 else if (info->push_p)
22258 sp_offset = info->total_size;
22259
22260 tmp = plus_constant (Pmode, frame_rtx,
22261 info->lr_save_offset + sp_offset);
22262 tmp = gen_frame_mem (Pmode, tmp);
22263 emit_move_insn (tmp, operands[0]);
22264 }
22265 else
22266 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNO), operands[0]);
22267
22268 /* Freeze lr_save_p. We've just emitted rtl that depends on the
22269 state of lr_save_p so any change from here on would be a bug. In
22270 particular, stop rs6000_ra_ever_killed from considering the SET
22271 of lr we may have added just above. */
22272 cfun->machine->lr_save_state = info->lr_save_p + 1;
22273 }
22274
22275 static GTY(()) alias_set_type set = -1;
22276
22277 alias_set_type
22278 get_TOC_alias_set (void)
22279 {
22280 if (set == -1)
22281 set = new_alias_set ();
22282 return set;
22283 }
22284
22285 /* This returns nonzero if the current function uses the TOC. This is
22286 determined by the presence of (use (unspec ... UNSPEC_TOC)), which
22287 is generated by the ABI_V4 load_toc_* patterns. */
22288 #if TARGET_ELF
22289 static int
22290 uses_TOC (void)
22291 {
22292 rtx_insn *insn;
22293
22294 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
22295 if (INSN_P (insn))
22296 {
22297 rtx pat = PATTERN (insn);
22298 int i;
22299
22300 if (GET_CODE (pat) == PARALLEL)
22301 for (i = 0; i < XVECLEN (pat, 0); i++)
22302 {
22303 rtx sub = XVECEXP (pat, 0, i);
22304 if (GET_CODE (sub) == USE)
22305 {
22306 sub = XEXP (sub, 0);
22307 if (GET_CODE (sub) == UNSPEC
22308 && XINT (sub, 1) == UNSPEC_TOC)
22309 return 1;
22310 }
22311 }
22312 }
22313 return 0;
22314 }
22315 #endif
22316
22317 rtx
22318 create_TOC_reference (rtx symbol, rtx largetoc_reg)
22319 {
22320 rtx tocrel, tocreg, hi;
22321
22322 if (TARGET_DEBUG_ADDR)
22323 {
22324 if (GET_CODE (symbol) == SYMBOL_REF)
22325 fprintf (stderr, "\ncreate_TOC_reference, (symbol_ref %s)\n",
22326 XSTR (symbol, 0));
22327 else
22328 {
22329 fprintf (stderr, "\ncreate_TOC_reference, code %s:\n",
22330 GET_RTX_NAME (GET_CODE (symbol)));
22331 debug_rtx (symbol);
22332 }
22333 }
22334
22335 if (!can_create_pseudo_p ())
22336 df_set_regs_ever_live (TOC_REGISTER, true);
22337
22338 tocreg = gen_rtx_REG (Pmode, TOC_REGISTER);
22339 tocrel = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, symbol, tocreg), UNSPEC_TOCREL);
22340 if (TARGET_CMODEL == CMODEL_SMALL || can_create_pseudo_p ())
22341 return tocrel;
22342
22343 hi = gen_rtx_HIGH (Pmode, copy_rtx (tocrel));
22344 if (largetoc_reg != NULL)
22345 {
22346 emit_move_insn (largetoc_reg, hi);
22347 hi = largetoc_reg;
22348 }
22349 return gen_rtx_LO_SUM (Pmode, hi, tocrel);
22350 }
22351
22352 /* Issue assembly directives that create a reference to the given DWARF
22353 FRAME_TABLE_LABEL from the current function section. */
22354 void
22355 rs6000_aix_asm_output_dwarf_table_ref (char * frame_table_label)
22356 {
22357 fprintf (asm_out_file, "\t.ref %s\n",
22358 (* targetm.strip_name_encoding) (frame_table_label));
22359 }
22360 \f
22361 /* This ties together stack memory (MEM with an alias set of frame_alias_set)
22362 and the change to the stack pointer. */
22363
22364 static void
22365 rs6000_emit_stack_tie (rtx fp, bool hard_frame_needed)
22366 {
22367 rtvec p;
22368 int i;
22369 rtx regs[3];
22370
22371 i = 0;
22372 regs[i++] = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
22373 if (hard_frame_needed)
22374 regs[i++] = gen_rtx_REG (Pmode, HARD_FRAME_POINTER_REGNUM);
22375 if (!(REGNO (fp) == STACK_POINTER_REGNUM
22376 || (hard_frame_needed
22377 && REGNO (fp) == HARD_FRAME_POINTER_REGNUM)))
22378 regs[i++] = fp;
22379
22380 p = rtvec_alloc (i);
22381 while (--i >= 0)
22382 {
22383 rtx mem = gen_frame_mem (BLKmode, regs[i]);
22384 RTVEC_ELT (p, i) = gen_rtx_SET (VOIDmode, mem, const0_rtx);
22385 }
22386
22387 emit_insn (gen_stack_tie (gen_rtx_PARALLEL (VOIDmode, p)));
22388 }
22389
22390 /* Emit the correct code for allocating stack space, as insns.
22391 If COPY_REG, make sure a copy of the old frame is left there.
22392 The generated code may use hard register 0 as a temporary. */
22393
22394 static void
22395 rs6000_emit_allocate_stack (HOST_WIDE_INT size, rtx copy_reg, int copy_off)
22396 {
22397 rtx_insn *insn;
22398 rtx stack_reg = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
22399 rtx tmp_reg = gen_rtx_REG (Pmode, 0);
22400 rtx todec = gen_int_mode (-size, Pmode);
22401 rtx par, set, mem;
22402
22403 if (INTVAL (todec) != -size)
22404 {
22405 warning (0, "stack frame too large");
22406 emit_insn (gen_trap ());
22407 return;
22408 }
22409
22410 if (crtl->limit_stack)
22411 {
22412 if (REG_P (stack_limit_rtx)
22413 && REGNO (stack_limit_rtx) > 1
22414 && REGNO (stack_limit_rtx) <= 31)
22415 {
22416 emit_insn (gen_add3_insn (tmp_reg, stack_limit_rtx, GEN_INT (size)));
22417 emit_insn (gen_cond_trap (LTU, stack_reg, tmp_reg,
22418 const0_rtx));
22419 }
22420 else if (GET_CODE (stack_limit_rtx) == SYMBOL_REF
22421 && TARGET_32BIT
22422 && DEFAULT_ABI == ABI_V4)
22423 {
22424 rtx toload = gen_rtx_CONST (VOIDmode,
22425 gen_rtx_PLUS (Pmode,
22426 stack_limit_rtx,
22427 GEN_INT (size)));
22428
22429 emit_insn (gen_elf_high (tmp_reg, toload));
22430 emit_insn (gen_elf_low (tmp_reg, tmp_reg, toload));
22431 emit_insn (gen_cond_trap (LTU, stack_reg, tmp_reg,
22432 const0_rtx));
22433 }
22434 else
22435 warning (0, "stack limit expression is not supported");
22436 }
22437
22438 if (copy_reg)
22439 {
22440 if (copy_off != 0)
22441 emit_insn (gen_add3_insn (copy_reg, stack_reg, GEN_INT (copy_off)));
22442 else
22443 emit_move_insn (copy_reg, stack_reg);
22444 }
22445
22446 if (size > 32767)
22447 {
22448 /* Need a note here so that try_split doesn't get confused. */
22449 if (get_last_insn () == NULL_RTX)
22450 emit_note (NOTE_INSN_DELETED);
22451 insn = emit_move_insn (tmp_reg, todec);
22452 try_split (PATTERN (insn), insn, 0);
22453 todec = tmp_reg;
22454 }
22455
22456 insn = emit_insn (TARGET_32BIT
22457 ? gen_movsi_update_stack (stack_reg, stack_reg,
22458 todec, stack_reg)
22459 : gen_movdi_di_update_stack (stack_reg, stack_reg,
22460 todec, stack_reg));
22461 /* Since we didn't use gen_frame_mem to generate the MEM, grab
22462 it now and set the alias set/attributes. The above gen_*_update
22463 calls will generate a PARALLEL with the MEM set being the first
22464 operation. */
22465 par = PATTERN (insn);
22466 gcc_assert (GET_CODE (par) == PARALLEL);
22467 set = XVECEXP (par, 0, 0);
22468 gcc_assert (GET_CODE (set) == SET);
22469 mem = SET_DEST (set);
22470 gcc_assert (MEM_P (mem));
22471 MEM_NOTRAP_P (mem) = 1;
22472 set_mem_alias_set (mem, get_frame_alias_set ());
22473
22474 RTX_FRAME_RELATED_P (insn) = 1;
22475 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
22476 gen_rtx_SET (VOIDmode, stack_reg,
22477 gen_rtx_PLUS (Pmode, stack_reg,
22478 GEN_INT (-size))));
22479 }
22480
22481 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
22482
22483 #if PROBE_INTERVAL > 32768
22484 #error Cannot use indexed addressing mode for stack probing
22485 #endif
22486
22487 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
22488 inclusive. These are offsets from the current stack pointer. */
22489
22490 static void
22491 rs6000_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size)
22492 {
22493 /* See if we have a constant small number of probes to generate. If so,
22494 that's the easy case. */
22495 if (first + size <= 32768)
22496 {
22497 HOST_WIDE_INT i;
22498
22499 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 1 until
22500 it exceeds SIZE. If only one probe is needed, this will not
22501 generate any code. Then probe at FIRST + SIZE. */
22502 for (i = PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
22503 emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
22504 -(first + i)));
22505
22506 emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
22507 -(first + size)));
22508 }
22509
22510 /* Otherwise, do the same as above, but in a loop. Note that we must be
22511 extra careful with variables wrapping around because we might be at
22512 the very top (or the very bottom) of the address space and we have
22513 to be able to handle this case properly; in particular, we use an
22514 equality test for the loop condition. */
22515 else
22516 {
22517 HOST_WIDE_INT rounded_size;
22518 rtx r12 = gen_rtx_REG (Pmode, 12);
22519 rtx r0 = gen_rtx_REG (Pmode, 0);
22520
22521 /* Sanity check for the addressing mode we're going to use. */
22522 gcc_assert (first <= 32768);
22523
22524 /* Step 1: round SIZE to the previous multiple of the interval. */
22525
22526 rounded_size = size & -PROBE_INTERVAL;
22527
22528
22529 /* Step 2: compute initial and final value of the loop counter. */
22530
22531 /* TEST_ADDR = SP + FIRST. */
22532 emit_insn (gen_rtx_SET (VOIDmode, r12,
22533 plus_constant (Pmode, stack_pointer_rtx,
22534 -first)));
22535
22536 /* LAST_ADDR = SP + FIRST + ROUNDED_SIZE. */
22537 if (rounded_size > 32768)
22538 {
22539 emit_move_insn (r0, GEN_INT (-rounded_size));
22540 emit_insn (gen_rtx_SET (VOIDmode, r0,
22541 gen_rtx_PLUS (Pmode, r12, r0)));
22542 }
22543 else
22544 emit_insn (gen_rtx_SET (VOIDmode, r0,
22545 plus_constant (Pmode, r12, -rounded_size)));
22546
22547
22548 /* Step 3: the loop
22549
22550 while (TEST_ADDR != LAST_ADDR)
22551 {
22552 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
22553 probe at TEST_ADDR
22554 }
22555
22556 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
22557 until it is equal to ROUNDED_SIZE. */
22558
22559 if (TARGET_64BIT)
22560 emit_insn (gen_probe_stack_rangedi (r12, r12, r0));
22561 else
22562 emit_insn (gen_probe_stack_rangesi (r12, r12, r0));
22563
22564
22565 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
22566 that SIZE is equal to ROUNDED_SIZE. */
22567
22568 if (size != rounded_size)
22569 emit_stack_probe (plus_constant (Pmode, r12, rounded_size - size));
22570 }
22571 }
22572
22573 /* Probe a range of stack addresses from REG1 to REG2 inclusive. These are
22574 absolute addresses. */
22575
22576 const char *
22577 output_probe_stack_range (rtx reg1, rtx reg2)
22578 {
22579 static int labelno = 0;
22580 char loop_lab[32], end_lab[32];
22581 rtx xops[2];
22582
22583 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno);
22584 ASM_GENERATE_INTERNAL_LABEL (end_lab, "LPSRE", labelno++);
22585
22586 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
22587
22588 /* Jump to END_LAB if TEST_ADDR == LAST_ADDR. */
22589 xops[0] = reg1;
22590 xops[1] = reg2;
22591 if (TARGET_64BIT)
22592 output_asm_insn ("cmpd 0,%0,%1", xops);
22593 else
22594 output_asm_insn ("cmpw 0,%0,%1", xops);
22595
22596 fputs ("\tbeq 0,", asm_out_file);
22597 assemble_name_raw (asm_out_file, end_lab);
22598 fputc ('\n', asm_out_file);
22599
22600 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
22601 xops[1] = GEN_INT (-PROBE_INTERVAL);
22602 output_asm_insn ("addi %0,%0,%1", xops);
22603
22604 /* Probe at TEST_ADDR and branch. */
22605 xops[1] = gen_rtx_REG (Pmode, 0);
22606 output_asm_insn ("stw %1,0(%0)", xops);
22607 fprintf (asm_out_file, "\tb ");
22608 assemble_name_raw (asm_out_file, loop_lab);
22609 fputc ('\n', asm_out_file);
22610
22611 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, end_lab);
22612
22613 return "";
22614 }
22615
22616 /* Add to 'insn' a note which is PATTERN (INSN) but with REG replaced
22617 with (plus:P (reg 1) VAL), and with REG2 replaced with RREG if REG2
22618 is not NULL. It would be nice if dwarf2out_frame_debug_expr could
22619 deduce these equivalences by itself so it wasn't necessary to hold
22620 its hand so much. Don't be tempted to always supply d2_f_d_e with
22621 the actual cfa register, ie. r31 when we are using a hard frame
22622 pointer. That fails when saving regs off r1, and sched moves the
22623 r31 setup past the reg saves. */
22624
22625 static rtx
22626 rs6000_frame_related (rtx insn, rtx reg, HOST_WIDE_INT val,
22627 rtx reg2, rtx rreg, rtx split_reg)
22628 {
22629 rtx real, temp;
22630
22631 if (REGNO (reg) == STACK_POINTER_REGNUM && reg2 == NULL_RTX)
22632 {
22633 /* No need for any replacement. Just set RTX_FRAME_RELATED_P. */
22634 int i;
22635
22636 gcc_checking_assert (val == 0);
22637 real = PATTERN (insn);
22638 if (GET_CODE (real) == PARALLEL)
22639 for (i = 0; i < XVECLEN (real, 0); i++)
22640 if (GET_CODE (XVECEXP (real, 0, i)) == SET)
22641 {
22642 rtx set = XVECEXP (real, 0, i);
22643
22644 RTX_FRAME_RELATED_P (set) = 1;
22645 }
22646 RTX_FRAME_RELATED_P (insn) = 1;
22647 return insn;
22648 }
22649
22650 /* copy_rtx will not make unique copies of registers, so we need to
22651 ensure we don't have unwanted sharing here. */
22652 if (reg == reg2)
22653 reg = gen_raw_REG (GET_MODE (reg), REGNO (reg));
22654
22655 if (reg == rreg)
22656 reg = gen_raw_REG (GET_MODE (reg), REGNO (reg));
22657
22658 real = copy_rtx (PATTERN (insn));
22659
22660 if (reg2 != NULL_RTX)
22661 real = replace_rtx (real, reg2, rreg);
22662
22663 if (REGNO (reg) == STACK_POINTER_REGNUM)
22664 gcc_checking_assert (val == 0);
22665 else
22666 real = replace_rtx (real, reg,
22667 gen_rtx_PLUS (Pmode, gen_rtx_REG (Pmode,
22668 STACK_POINTER_REGNUM),
22669 GEN_INT (val)));
22670
22671 /* We expect that 'real' is either a SET or a PARALLEL containing
22672 SETs (and possibly other stuff). In a PARALLEL, all the SETs
22673 are important so they all have to be marked RTX_FRAME_RELATED_P. */
22674
22675 if (GET_CODE (real) == SET)
22676 {
22677 rtx set = real;
22678
22679 temp = simplify_rtx (SET_SRC (set));
22680 if (temp)
22681 SET_SRC (set) = temp;
22682 temp = simplify_rtx (SET_DEST (set));
22683 if (temp)
22684 SET_DEST (set) = temp;
22685 if (GET_CODE (SET_DEST (set)) == MEM)
22686 {
22687 temp = simplify_rtx (XEXP (SET_DEST (set), 0));
22688 if (temp)
22689 XEXP (SET_DEST (set), 0) = temp;
22690 }
22691 }
22692 else
22693 {
22694 int i;
22695
22696 gcc_assert (GET_CODE (real) == PARALLEL);
22697 for (i = 0; i < XVECLEN (real, 0); i++)
22698 if (GET_CODE (XVECEXP (real, 0, i)) == SET)
22699 {
22700 rtx set = XVECEXP (real, 0, i);
22701
22702 temp = simplify_rtx (SET_SRC (set));
22703 if (temp)
22704 SET_SRC (set) = temp;
22705 temp = simplify_rtx (SET_DEST (set));
22706 if (temp)
22707 SET_DEST (set) = temp;
22708 if (GET_CODE (SET_DEST (set)) == MEM)
22709 {
22710 temp = simplify_rtx (XEXP (SET_DEST (set), 0));
22711 if (temp)
22712 XEXP (SET_DEST (set), 0) = temp;
22713 }
22714 RTX_FRAME_RELATED_P (set) = 1;
22715 }
22716 }
22717
22718 /* If a store insn has been split into multiple insns, the
22719 true source register is given by split_reg. */
22720 if (split_reg != NULL_RTX)
22721 real = gen_rtx_SET (VOIDmode, SET_DEST (real), split_reg);
22722
22723 RTX_FRAME_RELATED_P (insn) = 1;
22724 add_reg_note (insn, REG_FRAME_RELATED_EXPR, real);
22725
22726 return insn;
22727 }
22728
22729 /* Returns an insn that has a vrsave set operation with the
22730 appropriate CLOBBERs. */
22731
22732 static rtx
22733 generate_set_vrsave (rtx reg, rs6000_stack_t *info, int epiloguep)
22734 {
22735 int nclobs, i;
22736 rtx insn, clobs[TOTAL_ALTIVEC_REGS + 1];
22737 rtx vrsave = gen_rtx_REG (SImode, VRSAVE_REGNO);
22738
22739 clobs[0]
22740 = gen_rtx_SET (VOIDmode,
22741 vrsave,
22742 gen_rtx_UNSPEC_VOLATILE (SImode,
22743 gen_rtvec (2, reg, vrsave),
22744 UNSPECV_SET_VRSAVE));
22745
22746 nclobs = 1;
22747
22748 /* We need to clobber the registers in the mask so the scheduler
22749 does not move sets to VRSAVE before sets of AltiVec registers.
22750
22751 However, if the function receives nonlocal gotos, reload will set
22752 all call saved registers live. We will end up with:
22753
22754 (set (reg 999) (mem))
22755 (parallel [ (set (reg vrsave) (unspec blah))
22756 (clobber (reg 999))])
22757
22758 The clobber will cause the store into reg 999 to be dead, and
22759 flow will attempt to delete an epilogue insn. In this case, we
22760 need an unspec use/set of the register. */
22761
22762 for (i = FIRST_ALTIVEC_REGNO; i <= LAST_ALTIVEC_REGNO; ++i)
22763 if (info->vrsave_mask & ALTIVEC_REG_BIT (i))
22764 {
22765 if (!epiloguep || call_used_regs [i])
22766 clobs[nclobs++] = gen_rtx_CLOBBER (VOIDmode,
22767 gen_rtx_REG (V4SImode, i));
22768 else
22769 {
22770 rtx reg = gen_rtx_REG (V4SImode, i);
22771
22772 clobs[nclobs++]
22773 = gen_rtx_SET (VOIDmode,
22774 reg,
22775 gen_rtx_UNSPEC (V4SImode,
22776 gen_rtvec (1, reg), 27));
22777 }
22778 }
22779
22780 insn = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (nclobs));
22781
22782 for (i = 0; i < nclobs; ++i)
22783 XVECEXP (insn, 0, i) = clobs[i];
22784
22785 return insn;
22786 }
22787
22788 static rtx
22789 gen_frame_set (rtx reg, rtx frame_reg, int offset, bool store)
22790 {
22791 rtx addr, mem;
22792
22793 addr = gen_rtx_PLUS (Pmode, frame_reg, GEN_INT (offset));
22794 mem = gen_frame_mem (GET_MODE (reg), addr);
22795 return gen_rtx_SET (VOIDmode, store ? mem : reg, store ? reg : mem);
22796 }
22797
22798 static rtx
22799 gen_frame_load (rtx reg, rtx frame_reg, int offset)
22800 {
22801 return gen_frame_set (reg, frame_reg, offset, false);
22802 }
22803
22804 static rtx
22805 gen_frame_store (rtx reg, rtx frame_reg, int offset)
22806 {
22807 return gen_frame_set (reg, frame_reg, offset, true);
22808 }
22809
22810 /* Save a register into the frame, and emit RTX_FRAME_RELATED_P notes.
22811 Save REGNO into [FRAME_REG + OFFSET] in mode MODE. */
22812
22813 static rtx
22814 emit_frame_save (rtx frame_reg, machine_mode mode,
22815 unsigned int regno, int offset, HOST_WIDE_INT frame_reg_to_sp)
22816 {
22817 rtx reg, insn;
22818
22819 /* Some cases that need register indexed addressing. */
22820 gcc_checking_assert (!((TARGET_ALTIVEC_ABI && ALTIVEC_VECTOR_MODE (mode))
22821 || (TARGET_VSX && ALTIVEC_OR_VSX_VECTOR_MODE (mode))
22822 || (TARGET_E500_DOUBLE && mode == DFmode)
22823 || (TARGET_SPE_ABI
22824 && SPE_VECTOR_MODE (mode)
22825 && !SPE_CONST_OFFSET_OK (offset))));
22826
22827 reg = gen_rtx_REG (mode, regno);
22828 insn = emit_insn (gen_frame_store (reg, frame_reg, offset));
22829 return rs6000_frame_related (insn, frame_reg, frame_reg_to_sp,
22830 NULL_RTX, NULL_RTX, NULL_RTX);
22831 }
22832
22833 /* Emit an offset memory reference suitable for a frame store, while
22834 converting to a valid addressing mode. */
22835
22836 static rtx
22837 gen_frame_mem_offset (machine_mode mode, rtx reg, int offset)
22838 {
22839 rtx int_rtx, offset_rtx;
22840
22841 int_rtx = GEN_INT (offset);
22842
22843 if ((TARGET_SPE_ABI && SPE_VECTOR_MODE (mode) && !SPE_CONST_OFFSET_OK (offset))
22844 || (TARGET_E500_DOUBLE && mode == DFmode))
22845 {
22846 offset_rtx = gen_rtx_REG (Pmode, FIXED_SCRATCH);
22847 emit_move_insn (offset_rtx, int_rtx);
22848 }
22849 else
22850 offset_rtx = int_rtx;
22851
22852 return gen_frame_mem (mode, gen_rtx_PLUS (Pmode, reg, offset_rtx));
22853 }
22854
22855 #ifndef TARGET_FIX_AND_CONTINUE
22856 #define TARGET_FIX_AND_CONTINUE 0
22857 #endif
22858
22859 /* It's really GPR 13 or 14, FPR 14 and VR 20. We need the smallest. */
22860 #define FIRST_SAVRES_REGISTER FIRST_SAVED_GP_REGNO
22861 #define LAST_SAVRES_REGISTER 31
22862 #define N_SAVRES_REGISTERS (LAST_SAVRES_REGISTER - FIRST_SAVRES_REGISTER + 1)
22863
22864 enum {
22865 SAVRES_LR = 0x1,
22866 SAVRES_SAVE = 0x2,
22867 SAVRES_REG = 0x0c,
22868 SAVRES_GPR = 0,
22869 SAVRES_FPR = 4,
22870 SAVRES_VR = 8
22871 };
22872
22873 static GTY(()) rtx savres_routine_syms[N_SAVRES_REGISTERS][12];
22874
22875 /* Temporary holding space for an out-of-line register save/restore
22876 routine name. */
22877 static char savres_routine_name[30];
22878
22879 /* Return the name for an out-of-line register save/restore routine.
22880 We are saving/restoring GPRs if GPR is true. */
22881
22882 static char *
22883 rs6000_savres_routine_name (rs6000_stack_t *info, int regno, int sel)
22884 {
22885 const char *prefix = "";
22886 const char *suffix = "";
22887
22888 /* Different targets are supposed to define
22889 {SAVE,RESTORE}_FP_{PREFIX,SUFFIX} with the idea that the needed
22890 routine name could be defined with:
22891
22892 sprintf (name, "%s%d%s", SAVE_FP_PREFIX, regno, SAVE_FP_SUFFIX)
22893
22894 This is a nice idea in practice, but in reality, things are
22895 complicated in several ways:
22896
22897 - ELF targets have save/restore routines for GPRs.
22898
22899 - SPE targets use different prefixes for 32/64-bit registers, and
22900 neither of them fit neatly in the FOO_{PREFIX,SUFFIX} regimen.
22901
22902 - PPC64 ELF targets have routines for save/restore of GPRs that
22903 differ in what they do with the link register, so having a set
22904 prefix doesn't work. (We only use one of the save routines at
22905 the moment, though.)
22906
22907 - PPC32 elf targets have "exit" versions of the restore routines
22908 that restore the link register and can save some extra space.
22909 These require an extra suffix. (There are also "tail" versions
22910 of the restore routines and "GOT" versions of the save routines,
22911 but we don't generate those at present. Same problems apply,
22912 though.)
22913
22914 We deal with all this by synthesizing our own prefix/suffix and
22915 using that for the simple sprintf call shown above. */
22916 if (TARGET_SPE)
22917 {
22918 /* No floating point saves on the SPE. */
22919 gcc_assert ((sel & SAVRES_REG) == SAVRES_GPR);
22920
22921 if ((sel & SAVRES_SAVE))
22922 prefix = info->spe_64bit_regs_used ? "_save64gpr_" : "_save32gpr_";
22923 else
22924 prefix = info->spe_64bit_regs_used ? "_rest64gpr_" : "_rest32gpr_";
22925
22926 if ((sel & SAVRES_LR))
22927 suffix = "_x";
22928 }
22929 else if (DEFAULT_ABI == ABI_V4)
22930 {
22931 if (TARGET_64BIT)
22932 goto aix_names;
22933
22934 if ((sel & SAVRES_REG) == SAVRES_GPR)
22935 prefix = (sel & SAVRES_SAVE) ? "_savegpr_" : "_restgpr_";
22936 else if ((sel & SAVRES_REG) == SAVRES_FPR)
22937 prefix = (sel & SAVRES_SAVE) ? "_savefpr_" : "_restfpr_";
22938 else if ((sel & SAVRES_REG) == SAVRES_VR)
22939 prefix = (sel & SAVRES_SAVE) ? "_savevr_" : "_restvr_";
22940 else
22941 abort ();
22942
22943 if ((sel & SAVRES_LR))
22944 suffix = "_x";
22945 }
22946 else if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
22947 {
22948 #if !defined (POWERPC_LINUX) && !defined (POWERPC_FREEBSD)
22949 /* No out-of-line save/restore routines for GPRs on AIX. */
22950 gcc_assert (!TARGET_AIX || (sel & SAVRES_REG) != SAVRES_GPR);
22951 #endif
22952
22953 aix_names:
22954 if ((sel & SAVRES_REG) == SAVRES_GPR)
22955 prefix = ((sel & SAVRES_SAVE)
22956 ? ((sel & SAVRES_LR) ? "_savegpr0_" : "_savegpr1_")
22957 : ((sel & SAVRES_LR) ? "_restgpr0_" : "_restgpr1_"));
22958 else if ((sel & SAVRES_REG) == SAVRES_FPR)
22959 {
22960 #if defined (POWERPC_LINUX) || defined (POWERPC_FREEBSD)
22961 if ((sel & SAVRES_LR))
22962 prefix = ((sel & SAVRES_SAVE) ? "_savefpr_" : "_restfpr_");
22963 else
22964 #endif
22965 {
22966 prefix = (sel & SAVRES_SAVE) ? SAVE_FP_PREFIX : RESTORE_FP_PREFIX;
22967 suffix = (sel & SAVRES_SAVE) ? SAVE_FP_SUFFIX : RESTORE_FP_SUFFIX;
22968 }
22969 }
22970 else if ((sel & SAVRES_REG) == SAVRES_VR)
22971 prefix = (sel & SAVRES_SAVE) ? "_savevr_" : "_restvr_";
22972 else
22973 abort ();
22974 }
22975
22976 if (DEFAULT_ABI == ABI_DARWIN)
22977 {
22978 /* The Darwin approach is (slightly) different, in order to be
22979 compatible with code generated by the system toolchain. There is a
22980 single symbol for the start of save sequence, and the code here
22981 embeds an offset into that code on the basis of the first register
22982 to be saved. */
22983 prefix = (sel & SAVRES_SAVE) ? "save" : "rest" ;
22984 if ((sel & SAVRES_REG) == SAVRES_GPR)
22985 sprintf (savres_routine_name, "*%sGPR%s%s%.0d ; %s r%d-r31", prefix,
22986 ((sel & SAVRES_LR) ? "x" : ""), (regno == 13 ? "" : "+"),
22987 (regno - 13) * 4, prefix, regno);
22988 else if ((sel & SAVRES_REG) == SAVRES_FPR)
22989 sprintf (savres_routine_name, "*%sFP%s%.0d ; %s f%d-f31", prefix,
22990 (regno == 14 ? "" : "+"), (regno - 14) * 4, prefix, regno);
22991 else if ((sel & SAVRES_REG) == SAVRES_VR)
22992 sprintf (savres_routine_name, "*%sVEC%s%.0d ; %s v%d-v31", prefix,
22993 (regno == 20 ? "" : "+"), (regno - 20) * 8, prefix, regno);
22994 else
22995 abort ();
22996 }
22997 else
22998 sprintf (savres_routine_name, "%s%d%s", prefix, regno, suffix);
22999
23000 return savres_routine_name;
23001 }
23002
23003 /* Return an RTL SYMBOL_REF for an out-of-line register save/restore routine.
23004 We are saving/restoring GPRs if GPR is true. */
23005
23006 static rtx
23007 rs6000_savres_routine_sym (rs6000_stack_t *info, int sel)
23008 {
23009 int regno = ((sel & SAVRES_REG) == SAVRES_GPR
23010 ? info->first_gp_reg_save
23011 : (sel & SAVRES_REG) == SAVRES_FPR
23012 ? info->first_fp_reg_save - 32
23013 : (sel & SAVRES_REG) == SAVRES_VR
23014 ? info->first_altivec_reg_save - FIRST_ALTIVEC_REGNO
23015 : -1);
23016 rtx sym;
23017 int select = sel;
23018
23019 /* On the SPE, we never have any FPRs, but we do have 32/64-bit
23020 versions of the gpr routines. */
23021 if (TARGET_SPE_ABI && (sel & SAVRES_REG) == SAVRES_GPR
23022 && info->spe_64bit_regs_used)
23023 select ^= SAVRES_FPR ^ SAVRES_GPR;
23024
23025 /* Don't generate bogus routine names. */
23026 gcc_assert (FIRST_SAVRES_REGISTER <= regno
23027 && regno <= LAST_SAVRES_REGISTER
23028 && select >= 0 && select <= 12);
23029
23030 sym = savres_routine_syms[regno-FIRST_SAVRES_REGISTER][select];
23031
23032 if (sym == NULL)
23033 {
23034 char *name;
23035
23036 name = rs6000_savres_routine_name (info, regno, sel);
23037
23038 sym = savres_routine_syms[regno-FIRST_SAVRES_REGISTER][select]
23039 = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
23040 SYMBOL_REF_FLAGS (sym) |= SYMBOL_FLAG_FUNCTION;
23041 }
23042
23043 return sym;
23044 }
23045
23046 /* Emit a sequence of insns, including a stack tie if needed, for
23047 resetting the stack pointer. If UPDT_REGNO is not 1, then don't
23048 reset the stack pointer, but move the base of the frame into
23049 reg UPDT_REGNO for use by out-of-line register restore routines. */
23050
23051 static rtx
23052 rs6000_emit_stack_reset (rs6000_stack_t *info,
23053 rtx frame_reg_rtx, HOST_WIDE_INT frame_off,
23054 unsigned updt_regno)
23055 {
23056 rtx updt_reg_rtx;
23057
23058 /* This blockage is needed so that sched doesn't decide to move
23059 the sp change before the register restores. */
23060 if (DEFAULT_ABI == ABI_V4
23061 || (TARGET_SPE_ABI
23062 && info->spe_64bit_regs_used != 0
23063 && info->first_gp_reg_save != 32))
23064 rs6000_emit_stack_tie (frame_reg_rtx, frame_pointer_needed);
23065
23066 /* If we are restoring registers out-of-line, we will be using the
23067 "exit" variants of the restore routines, which will reset the
23068 stack for us. But we do need to point updt_reg into the
23069 right place for those routines. */
23070 updt_reg_rtx = gen_rtx_REG (Pmode, updt_regno);
23071
23072 if (frame_off != 0)
23073 return emit_insn (gen_add3_insn (updt_reg_rtx,
23074 frame_reg_rtx, GEN_INT (frame_off)));
23075 else if (REGNO (frame_reg_rtx) != updt_regno)
23076 return emit_move_insn (updt_reg_rtx, frame_reg_rtx);
23077
23078 return NULL_RTX;
23079 }
23080
23081 /* Return the register number used as a pointer by out-of-line
23082 save/restore functions. */
23083
23084 static inline unsigned
23085 ptr_regno_for_savres (int sel)
23086 {
23087 if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
23088 return (sel & SAVRES_REG) == SAVRES_FPR || (sel & SAVRES_LR) ? 1 : 12;
23089 return DEFAULT_ABI == ABI_DARWIN && (sel & SAVRES_REG) == SAVRES_FPR ? 1 : 11;
23090 }
23091
23092 /* Construct a parallel rtx describing the effect of a call to an
23093 out-of-line register save/restore routine, and emit the insn
23094 or jump_insn as appropriate. */
23095
23096 static rtx
23097 rs6000_emit_savres_rtx (rs6000_stack_t *info,
23098 rtx frame_reg_rtx, int save_area_offset, int lr_offset,
23099 machine_mode reg_mode, int sel)
23100 {
23101 int i;
23102 int offset, start_reg, end_reg, n_regs, use_reg;
23103 int reg_size = GET_MODE_SIZE (reg_mode);
23104 rtx sym;
23105 rtvec p;
23106 rtx par, insn;
23107
23108 offset = 0;
23109 start_reg = ((sel & SAVRES_REG) == SAVRES_GPR
23110 ? info->first_gp_reg_save
23111 : (sel & SAVRES_REG) == SAVRES_FPR
23112 ? info->first_fp_reg_save
23113 : (sel & SAVRES_REG) == SAVRES_VR
23114 ? info->first_altivec_reg_save
23115 : -1);
23116 end_reg = ((sel & SAVRES_REG) == SAVRES_GPR
23117 ? 32
23118 : (sel & SAVRES_REG) == SAVRES_FPR
23119 ? 64
23120 : (sel & SAVRES_REG) == SAVRES_VR
23121 ? LAST_ALTIVEC_REGNO + 1
23122 : -1);
23123 n_regs = end_reg - start_reg;
23124 p = rtvec_alloc (3 + ((sel & SAVRES_LR) ? 1 : 0)
23125 + ((sel & SAVRES_REG) == SAVRES_VR ? 1 : 0)
23126 + n_regs);
23127
23128 if (!(sel & SAVRES_SAVE) && (sel & SAVRES_LR))
23129 RTVEC_ELT (p, offset++) = ret_rtx;
23130
23131 RTVEC_ELT (p, offset++)
23132 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (Pmode, LR_REGNO));
23133
23134 sym = rs6000_savres_routine_sym (info, sel);
23135 RTVEC_ELT (p, offset++) = gen_rtx_USE (VOIDmode, sym);
23136
23137 use_reg = ptr_regno_for_savres (sel);
23138 if ((sel & SAVRES_REG) == SAVRES_VR)
23139 {
23140 /* Vector regs are saved/restored using [reg+reg] addressing. */
23141 RTVEC_ELT (p, offset++)
23142 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (Pmode, use_reg));
23143 RTVEC_ELT (p, offset++)
23144 = gen_rtx_USE (VOIDmode, gen_rtx_REG (Pmode, 0));
23145 }
23146 else
23147 RTVEC_ELT (p, offset++)
23148 = gen_rtx_USE (VOIDmode, gen_rtx_REG (Pmode, use_reg));
23149
23150 for (i = 0; i < end_reg - start_reg; i++)
23151 RTVEC_ELT (p, i + offset)
23152 = gen_frame_set (gen_rtx_REG (reg_mode, start_reg + i),
23153 frame_reg_rtx, save_area_offset + reg_size * i,
23154 (sel & SAVRES_SAVE) != 0);
23155
23156 if ((sel & SAVRES_SAVE) && (sel & SAVRES_LR))
23157 RTVEC_ELT (p, i + offset)
23158 = gen_frame_store (gen_rtx_REG (Pmode, 0), frame_reg_rtx, lr_offset);
23159
23160 par = gen_rtx_PARALLEL (VOIDmode, p);
23161
23162 if (!(sel & SAVRES_SAVE) && (sel & SAVRES_LR))
23163 {
23164 insn = emit_jump_insn (par);
23165 JUMP_LABEL (insn) = ret_rtx;
23166 }
23167 else
23168 insn = emit_insn (par);
23169 return insn;
23170 }
23171
23172 /* Emit code to store CR fields that need to be saved into REG. */
23173
23174 static void
23175 rs6000_emit_move_from_cr (rtx reg)
23176 {
23177 /* Only the ELFv2 ABI allows storing only selected fields. */
23178 if (DEFAULT_ABI == ABI_ELFv2 && TARGET_MFCRF)
23179 {
23180 int i, cr_reg[8], count = 0;
23181
23182 /* Collect CR fields that must be saved. */
23183 for (i = 0; i < 8; i++)
23184 if (save_reg_p (CR0_REGNO + i))
23185 cr_reg[count++] = i;
23186
23187 /* If it's just a single one, use mfcrf. */
23188 if (count == 1)
23189 {
23190 rtvec p = rtvec_alloc (1);
23191 rtvec r = rtvec_alloc (2);
23192 RTVEC_ELT (r, 0) = gen_rtx_REG (CCmode, CR0_REGNO + cr_reg[0]);
23193 RTVEC_ELT (r, 1) = GEN_INT (1 << (7 - cr_reg[0]));
23194 RTVEC_ELT (p, 0)
23195 = gen_rtx_SET (VOIDmode, reg,
23196 gen_rtx_UNSPEC (SImode, r, UNSPEC_MOVESI_FROM_CR));
23197
23198 emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
23199 return;
23200 }
23201
23202 /* ??? It might be better to handle count == 2 / 3 cases here
23203 as well, using logical operations to combine the values. */
23204 }
23205
23206 emit_insn (gen_movesi_from_cr (reg));
23207 }
23208
23209 /* Determine whether the gp REG is really used. */
23210
23211 static bool
23212 rs6000_reg_live_or_pic_offset_p (int reg)
23213 {
23214 /* If the function calls eh_return, claim used all the registers that would
23215 be checked for liveness otherwise. This is required for the PIC offset
23216 register with -mminimal-toc on AIX, as it is advertised as "fixed" for
23217 register allocation purposes in this case. */
23218
23219 return (((crtl->calls_eh_return || df_regs_ever_live_p (reg))
23220 && (!call_used_regs[reg]
23221 || (reg == RS6000_PIC_OFFSET_TABLE_REGNUM
23222 && !TARGET_SINGLE_PIC_BASE
23223 && TARGET_TOC && TARGET_MINIMAL_TOC)))
23224 || (reg == RS6000_PIC_OFFSET_TABLE_REGNUM
23225 && !TARGET_SINGLE_PIC_BASE
23226 && ((DEFAULT_ABI == ABI_V4 && flag_pic != 0)
23227 || (DEFAULT_ABI == ABI_DARWIN && flag_pic))));
23228 }
23229
23230 /* Emit function prologue as insns. */
23231
23232 void
23233 rs6000_emit_prologue (void)
23234 {
23235 rs6000_stack_t *info = rs6000_stack_info ();
23236 machine_mode reg_mode = Pmode;
23237 int reg_size = TARGET_32BIT ? 4 : 8;
23238 rtx sp_reg_rtx = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
23239 rtx frame_reg_rtx = sp_reg_rtx;
23240 unsigned int cr_save_regno;
23241 rtx cr_save_rtx = NULL_RTX;
23242 rtx insn;
23243 int strategy;
23244 int using_static_chain_p = (cfun->static_chain_decl != NULL_TREE
23245 && df_regs_ever_live_p (STATIC_CHAIN_REGNUM)
23246 && call_used_regs[STATIC_CHAIN_REGNUM]);
23247 /* Offset to top of frame for frame_reg and sp respectively. */
23248 HOST_WIDE_INT frame_off = 0;
23249 HOST_WIDE_INT sp_off = 0;
23250
23251 #ifdef ENABLE_CHECKING
23252 /* Track and check usage of r0, r11, r12. */
23253 int reg_inuse = using_static_chain_p ? 1 << 11 : 0;
23254 #define START_USE(R) do \
23255 { \
23256 gcc_assert ((reg_inuse & (1 << (R))) == 0); \
23257 reg_inuse |= 1 << (R); \
23258 } while (0)
23259 #define END_USE(R) do \
23260 { \
23261 gcc_assert ((reg_inuse & (1 << (R))) != 0); \
23262 reg_inuse &= ~(1 << (R)); \
23263 } while (0)
23264 #define NOT_INUSE(R) do \
23265 { \
23266 gcc_assert ((reg_inuse & (1 << (R))) == 0); \
23267 } while (0)
23268 #else
23269 #define START_USE(R) do {} while (0)
23270 #define END_USE(R) do {} while (0)
23271 #define NOT_INUSE(R) do {} while (0)
23272 #endif
23273
23274 if (DEFAULT_ABI == ABI_ELFv2)
23275 {
23276 cfun->machine->r2_setup_needed = df_regs_ever_live_p (TOC_REGNUM);
23277
23278 /* With -mminimal-toc we may generate an extra use of r2 below. */
23279 if (!TARGET_SINGLE_PIC_BASE
23280 && TARGET_TOC && TARGET_MINIMAL_TOC && get_pool_size () != 0)
23281 cfun->machine->r2_setup_needed = true;
23282 }
23283
23284
23285 if (flag_stack_usage_info)
23286 current_function_static_stack_size = info->total_size;
23287
23288 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
23289 {
23290 HOST_WIDE_INT size = info->total_size;
23291
23292 if (crtl->is_leaf && !cfun->calls_alloca)
23293 {
23294 if (size > PROBE_INTERVAL && size > STACK_CHECK_PROTECT)
23295 rs6000_emit_probe_stack_range (STACK_CHECK_PROTECT,
23296 size - STACK_CHECK_PROTECT);
23297 }
23298 else if (size > 0)
23299 rs6000_emit_probe_stack_range (STACK_CHECK_PROTECT, size);
23300 }
23301
23302 if (TARGET_FIX_AND_CONTINUE)
23303 {
23304 /* gdb on darwin arranges to forward a function from the old
23305 address by modifying the first 5 instructions of the function
23306 to branch to the overriding function. This is necessary to
23307 permit function pointers that point to the old function to
23308 actually forward to the new function. */
23309 emit_insn (gen_nop ());
23310 emit_insn (gen_nop ());
23311 emit_insn (gen_nop ());
23312 emit_insn (gen_nop ());
23313 emit_insn (gen_nop ());
23314 }
23315
23316 if (TARGET_SPE_ABI && info->spe_64bit_regs_used != 0)
23317 {
23318 reg_mode = V2SImode;
23319 reg_size = 8;
23320 }
23321
23322 /* Handle world saves specially here. */
23323 if (WORLD_SAVE_P (info))
23324 {
23325 int i, j, sz;
23326 rtx treg;
23327 rtvec p;
23328 rtx reg0;
23329
23330 /* save_world expects lr in r0. */
23331 reg0 = gen_rtx_REG (Pmode, 0);
23332 if (info->lr_save_p)
23333 {
23334 insn = emit_move_insn (reg0,
23335 gen_rtx_REG (Pmode, LR_REGNO));
23336 RTX_FRAME_RELATED_P (insn) = 1;
23337 }
23338
23339 /* The SAVE_WORLD and RESTORE_WORLD routines make a number of
23340 assumptions about the offsets of various bits of the stack
23341 frame. */
23342 gcc_assert (info->gp_save_offset == -220
23343 && info->fp_save_offset == -144
23344 && info->lr_save_offset == 8
23345 && info->cr_save_offset == 4
23346 && info->push_p
23347 && info->lr_save_p
23348 && (!crtl->calls_eh_return
23349 || info->ehrd_offset == -432)
23350 && info->vrsave_save_offset == -224
23351 && info->altivec_save_offset == -416);
23352
23353 treg = gen_rtx_REG (SImode, 11);
23354 emit_move_insn (treg, GEN_INT (-info->total_size));
23355
23356 /* SAVE_WORLD takes the caller's LR in R0 and the frame size
23357 in R11. It also clobbers R12, so beware! */
23358
23359 /* Preserve CR2 for save_world prologues */
23360 sz = 5;
23361 sz += 32 - info->first_gp_reg_save;
23362 sz += 64 - info->first_fp_reg_save;
23363 sz += LAST_ALTIVEC_REGNO - info->first_altivec_reg_save + 1;
23364 p = rtvec_alloc (sz);
23365 j = 0;
23366 RTVEC_ELT (p, j++) = gen_rtx_CLOBBER (VOIDmode,
23367 gen_rtx_REG (SImode,
23368 LR_REGNO));
23369 RTVEC_ELT (p, j++) = gen_rtx_USE (VOIDmode,
23370 gen_rtx_SYMBOL_REF (Pmode,
23371 "*save_world"));
23372 /* We do floats first so that the instruction pattern matches
23373 properly. */
23374 for (i = 0; i < 64 - info->first_fp_reg_save; i++)
23375 RTVEC_ELT (p, j++)
23376 = gen_frame_store (gen_rtx_REG (TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT
23377 ? DFmode : SFmode,
23378 info->first_fp_reg_save + i),
23379 frame_reg_rtx,
23380 info->fp_save_offset + frame_off + 8 * i);
23381 for (i = 0; info->first_altivec_reg_save + i <= LAST_ALTIVEC_REGNO; i++)
23382 RTVEC_ELT (p, j++)
23383 = gen_frame_store (gen_rtx_REG (V4SImode,
23384 info->first_altivec_reg_save + i),
23385 frame_reg_rtx,
23386 info->altivec_save_offset + frame_off + 16 * i);
23387 for (i = 0; i < 32 - info->first_gp_reg_save; i++)
23388 RTVEC_ELT (p, j++)
23389 = gen_frame_store (gen_rtx_REG (reg_mode, info->first_gp_reg_save + i),
23390 frame_reg_rtx,
23391 info->gp_save_offset + frame_off + reg_size * i);
23392
23393 /* CR register traditionally saved as CR2. */
23394 RTVEC_ELT (p, j++)
23395 = gen_frame_store (gen_rtx_REG (SImode, CR2_REGNO),
23396 frame_reg_rtx, info->cr_save_offset + frame_off);
23397 /* Explain about use of R0. */
23398 if (info->lr_save_p)
23399 RTVEC_ELT (p, j++)
23400 = gen_frame_store (reg0,
23401 frame_reg_rtx, info->lr_save_offset + frame_off);
23402 /* Explain what happens to the stack pointer. */
23403 {
23404 rtx newval = gen_rtx_PLUS (Pmode, sp_reg_rtx, treg);
23405 RTVEC_ELT (p, j++) = gen_rtx_SET (VOIDmode, sp_reg_rtx, newval);
23406 }
23407
23408 insn = emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
23409 rs6000_frame_related (insn, frame_reg_rtx, sp_off - frame_off,
23410 treg, GEN_INT (-info->total_size), NULL_RTX);
23411 sp_off = frame_off = info->total_size;
23412 }
23413
23414 strategy = info->savres_strategy;
23415
23416 /* For V.4, update stack before we do any saving and set back pointer. */
23417 if (! WORLD_SAVE_P (info)
23418 && info->push_p
23419 && (DEFAULT_ABI == ABI_V4
23420 || crtl->calls_eh_return))
23421 {
23422 bool need_r11 = (TARGET_SPE
23423 ? (!(strategy & SAVE_INLINE_GPRS)
23424 && info->spe_64bit_regs_used == 0)
23425 : (!(strategy & SAVE_INLINE_FPRS)
23426 || !(strategy & SAVE_INLINE_GPRS)
23427 || !(strategy & SAVE_INLINE_VRS)));
23428 int ptr_regno = -1;
23429 rtx ptr_reg = NULL_RTX;
23430 int ptr_off = 0;
23431
23432 if (info->total_size < 32767)
23433 frame_off = info->total_size;
23434 else if (need_r11)
23435 ptr_regno = 11;
23436 else if (info->cr_save_p
23437 || info->lr_save_p
23438 || info->first_fp_reg_save < 64
23439 || info->first_gp_reg_save < 32
23440 || info->altivec_size != 0
23441 || info->vrsave_mask != 0
23442 || crtl->calls_eh_return)
23443 ptr_regno = 12;
23444 else
23445 {
23446 /* The prologue won't be saving any regs so there is no need
23447 to set up a frame register to access any frame save area.
23448 We also won't be using frame_off anywhere below, but set
23449 the correct value anyway to protect against future
23450 changes to this function. */
23451 frame_off = info->total_size;
23452 }
23453 if (ptr_regno != -1)
23454 {
23455 /* Set up the frame offset to that needed by the first
23456 out-of-line save function. */
23457 START_USE (ptr_regno);
23458 ptr_reg = gen_rtx_REG (Pmode, ptr_regno);
23459 frame_reg_rtx = ptr_reg;
23460 if (!(strategy & SAVE_INLINE_FPRS) && info->fp_size != 0)
23461 gcc_checking_assert (info->fp_save_offset + info->fp_size == 0);
23462 else if (!(strategy & SAVE_INLINE_GPRS) && info->first_gp_reg_save < 32)
23463 ptr_off = info->gp_save_offset + info->gp_size;
23464 else if (!(strategy & SAVE_INLINE_VRS) && info->altivec_size != 0)
23465 ptr_off = info->altivec_save_offset + info->altivec_size;
23466 frame_off = -ptr_off;
23467 }
23468 rs6000_emit_allocate_stack (info->total_size, ptr_reg, ptr_off);
23469 sp_off = info->total_size;
23470 if (frame_reg_rtx != sp_reg_rtx)
23471 rs6000_emit_stack_tie (frame_reg_rtx, false);
23472 }
23473
23474 /* If we use the link register, get it into r0. */
23475 if (!WORLD_SAVE_P (info) && info->lr_save_p)
23476 {
23477 rtx addr, reg, mem;
23478
23479 reg = gen_rtx_REG (Pmode, 0);
23480 START_USE (0);
23481 insn = emit_move_insn (reg, gen_rtx_REG (Pmode, LR_REGNO));
23482 RTX_FRAME_RELATED_P (insn) = 1;
23483
23484 if (!(strategy & (SAVE_NOINLINE_GPRS_SAVES_LR
23485 | SAVE_NOINLINE_FPRS_SAVES_LR)))
23486 {
23487 addr = gen_rtx_PLUS (Pmode, frame_reg_rtx,
23488 GEN_INT (info->lr_save_offset + frame_off));
23489 mem = gen_rtx_MEM (Pmode, addr);
23490 /* This should not be of rs6000_sr_alias_set, because of
23491 __builtin_return_address. */
23492
23493 insn = emit_move_insn (mem, reg);
23494 rs6000_frame_related (insn, frame_reg_rtx, sp_off - frame_off,
23495 NULL_RTX, NULL_RTX, NULL_RTX);
23496 END_USE (0);
23497 }
23498 }
23499
23500 /* If we need to save CR, put it into r12 or r11. Choose r12 except when
23501 r12 will be needed by out-of-line gpr restore. */
23502 cr_save_regno = ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
23503 && !(strategy & (SAVE_INLINE_GPRS
23504 | SAVE_NOINLINE_GPRS_SAVES_LR))
23505 ? 11 : 12);
23506 if (!WORLD_SAVE_P (info)
23507 && info->cr_save_p
23508 && REGNO (frame_reg_rtx) != cr_save_regno
23509 && !(using_static_chain_p && cr_save_regno == 11))
23510 {
23511 cr_save_rtx = gen_rtx_REG (SImode, cr_save_regno);
23512 START_USE (cr_save_regno);
23513 rs6000_emit_move_from_cr (cr_save_rtx);
23514 }
23515
23516 /* Do any required saving of fpr's. If only one or two to save, do
23517 it ourselves. Otherwise, call function. */
23518 if (!WORLD_SAVE_P (info) && (strategy & SAVE_INLINE_FPRS))
23519 {
23520 int i;
23521 for (i = 0; i < 64 - info->first_fp_reg_save; i++)
23522 if (save_reg_p (info->first_fp_reg_save + i))
23523 emit_frame_save (frame_reg_rtx,
23524 (TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT
23525 ? DFmode : SFmode),
23526 info->first_fp_reg_save + i,
23527 info->fp_save_offset + frame_off + 8 * i,
23528 sp_off - frame_off);
23529 }
23530 else if (!WORLD_SAVE_P (info) && info->first_fp_reg_save != 64)
23531 {
23532 bool lr = (strategy & SAVE_NOINLINE_FPRS_SAVES_LR) != 0;
23533 int sel = SAVRES_SAVE | SAVRES_FPR | (lr ? SAVRES_LR : 0);
23534 unsigned ptr_regno = ptr_regno_for_savres (sel);
23535 rtx ptr_reg = frame_reg_rtx;
23536
23537 if (REGNO (frame_reg_rtx) == ptr_regno)
23538 gcc_checking_assert (frame_off == 0);
23539 else
23540 {
23541 ptr_reg = gen_rtx_REG (Pmode, ptr_regno);
23542 NOT_INUSE (ptr_regno);
23543 emit_insn (gen_add3_insn (ptr_reg,
23544 frame_reg_rtx, GEN_INT (frame_off)));
23545 }
23546 insn = rs6000_emit_savres_rtx (info, ptr_reg,
23547 info->fp_save_offset,
23548 info->lr_save_offset,
23549 DFmode, sel);
23550 rs6000_frame_related (insn, ptr_reg, sp_off,
23551 NULL_RTX, NULL_RTX, NULL_RTX);
23552 if (lr)
23553 END_USE (0);
23554 }
23555
23556 /* Save GPRs. This is done as a PARALLEL if we are using
23557 the store-multiple instructions. */
23558 if (!WORLD_SAVE_P (info)
23559 && TARGET_SPE_ABI
23560 && info->spe_64bit_regs_used != 0
23561 && info->first_gp_reg_save != 32)
23562 {
23563 int i;
23564 rtx spe_save_area_ptr;
23565 HOST_WIDE_INT save_off;
23566 int ool_adjust = 0;
23567
23568 /* Determine whether we can address all of the registers that need
23569 to be saved with an offset from frame_reg_rtx that fits in
23570 the small const field for SPE memory instructions. */
23571 int spe_regs_addressable
23572 = (SPE_CONST_OFFSET_OK (info->spe_gp_save_offset + frame_off
23573 + reg_size * (32 - info->first_gp_reg_save - 1))
23574 && (strategy & SAVE_INLINE_GPRS));
23575
23576 if (spe_regs_addressable)
23577 {
23578 spe_save_area_ptr = frame_reg_rtx;
23579 save_off = frame_off;
23580 }
23581 else
23582 {
23583 /* Make r11 point to the start of the SPE save area. We need
23584 to be careful here if r11 is holding the static chain. If
23585 it is, then temporarily save it in r0. */
23586 HOST_WIDE_INT offset;
23587
23588 if (!(strategy & SAVE_INLINE_GPRS))
23589 ool_adjust = 8 * (info->first_gp_reg_save - FIRST_SAVED_GP_REGNO);
23590 offset = info->spe_gp_save_offset + frame_off - ool_adjust;
23591 spe_save_area_ptr = gen_rtx_REG (Pmode, 11);
23592 save_off = frame_off - offset;
23593
23594 if (using_static_chain_p)
23595 {
23596 rtx r0 = gen_rtx_REG (Pmode, 0);
23597
23598 START_USE (0);
23599 gcc_assert (info->first_gp_reg_save > 11);
23600
23601 emit_move_insn (r0, spe_save_area_ptr);
23602 }
23603 else if (REGNO (frame_reg_rtx) != 11)
23604 START_USE (11);
23605
23606 emit_insn (gen_addsi3 (spe_save_area_ptr,
23607 frame_reg_rtx, GEN_INT (offset)));
23608 if (!using_static_chain_p && REGNO (frame_reg_rtx) == 11)
23609 frame_off = -info->spe_gp_save_offset + ool_adjust;
23610 }
23611
23612 if ((strategy & SAVE_INLINE_GPRS))
23613 {
23614 for (i = 0; i < 32 - info->first_gp_reg_save; i++)
23615 if (rs6000_reg_live_or_pic_offset_p (info->first_gp_reg_save + i))
23616 emit_frame_save (spe_save_area_ptr, reg_mode,
23617 info->first_gp_reg_save + i,
23618 (info->spe_gp_save_offset + save_off
23619 + reg_size * i),
23620 sp_off - save_off);
23621 }
23622 else
23623 {
23624 insn = rs6000_emit_savres_rtx (info, spe_save_area_ptr,
23625 info->spe_gp_save_offset + save_off,
23626 0, reg_mode,
23627 SAVRES_SAVE | SAVRES_GPR);
23628
23629 rs6000_frame_related (insn, spe_save_area_ptr, sp_off - save_off,
23630 NULL_RTX, NULL_RTX, NULL_RTX);
23631 }
23632
23633 /* Move the static chain pointer back. */
23634 if (!spe_regs_addressable)
23635 {
23636 if (using_static_chain_p)
23637 {
23638 emit_move_insn (spe_save_area_ptr, gen_rtx_REG (Pmode, 0));
23639 END_USE (0);
23640 }
23641 else if (REGNO (frame_reg_rtx) != 11)
23642 END_USE (11);
23643 }
23644 }
23645 else if (!WORLD_SAVE_P (info) && !(strategy & SAVE_INLINE_GPRS))
23646 {
23647 bool lr = (strategy & SAVE_NOINLINE_GPRS_SAVES_LR) != 0;
23648 int sel = SAVRES_SAVE | SAVRES_GPR | (lr ? SAVRES_LR : 0);
23649 unsigned ptr_regno = ptr_regno_for_savres (sel);
23650 rtx ptr_reg = frame_reg_rtx;
23651 bool ptr_set_up = REGNO (ptr_reg) == ptr_regno;
23652 int end_save = info->gp_save_offset + info->gp_size;
23653 int ptr_off;
23654
23655 if (!ptr_set_up)
23656 ptr_reg = gen_rtx_REG (Pmode, ptr_regno);
23657
23658 /* Need to adjust r11 (r12) if we saved any FPRs. */
23659 if (end_save + frame_off != 0)
23660 {
23661 rtx offset = GEN_INT (end_save + frame_off);
23662
23663 if (ptr_set_up)
23664 frame_off = -end_save;
23665 else
23666 NOT_INUSE (ptr_regno);
23667 emit_insn (gen_add3_insn (ptr_reg, frame_reg_rtx, offset));
23668 }
23669 else if (!ptr_set_up)
23670 {
23671 NOT_INUSE (ptr_regno);
23672 emit_move_insn (ptr_reg, frame_reg_rtx);
23673 }
23674 ptr_off = -end_save;
23675 insn = rs6000_emit_savres_rtx (info, ptr_reg,
23676 info->gp_save_offset + ptr_off,
23677 info->lr_save_offset + ptr_off,
23678 reg_mode, sel);
23679 rs6000_frame_related (insn, ptr_reg, sp_off - ptr_off,
23680 NULL_RTX, NULL_RTX, NULL_RTX);
23681 if (lr)
23682 END_USE (0);
23683 }
23684 else if (!WORLD_SAVE_P (info) && (strategy & SAVRES_MULTIPLE))
23685 {
23686 rtvec p;
23687 int i;
23688 p = rtvec_alloc (32 - info->first_gp_reg_save);
23689 for (i = 0; i < 32 - info->first_gp_reg_save; i++)
23690 RTVEC_ELT (p, i)
23691 = gen_frame_store (gen_rtx_REG (reg_mode, info->first_gp_reg_save + i),
23692 frame_reg_rtx,
23693 info->gp_save_offset + frame_off + reg_size * i);
23694 insn = emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
23695 rs6000_frame_related (insn, frame_reg_rtx, sp_off - frame_off,
23696 NULL_RTX, NULL_RTX, NULL_RTX);
23697 }
23698 else if (!WORLD_SAVE_P (info))
23699 {
23700 int i;
23701 for (i = 0; i < 32 - info->first_gp_reg_save; i++)
23702 if (rs6000_reg_live_or_pic_offset_p (info->first_gp_reg_save + i))
23703 emit_frame_save (frame_reg_rtx, reg_mode,
23704 info->first_gp_reg_save + i,
23705 info->gp_save_offset + frame_off + reg_size * i,
23706 sp_off - frame_off);
23707 }
23708
23709 if (crtl->calls_eh_return)
23710 {
23711 unsigned int i;
23712 rtvec p;
23713
23714 for (i = 0; ; ++i)
23715 {
23716 unsigned int regno = EH_RETURN_DATA_REGNO (i);
23717 if (regno == INVALID_REGNUM)
23718 break;
23719 }
23720
23721 p = rtvec_alloc (i);
23722
23723 for (i = 0; ; ++i)
23724 {
23725 unsigned int regno = EH_RETURN_DATA_REGNO (i);
23726 if (regno == INVALID_REGNUM)
23727 break;
23728
23729 insn
23730 = gen_frame_store (gen_rtx_REG (reg_mode, regno),
23731 sp_reg_rtx,
23732 info->ehrd_offset + sp_off + reg_size * (int) i);
23733 RTVEC_ELT (p, i) = insn;
23734 RTX_FRAME_RELATED_P (insn) = 1;
23735 }
23736
23737 insn = emit_insn (gen_blockage ());
23738 RTX_FRAME_RELATED_P (insn) = 1;
23739 add_reg_note (insn, REG_FRAME_RELATED_EXPR, gen_rtx_PARALLEL (VOIDmode, p));
23740 }
23741
23742 /* In AIX ABI we need to make sure r2 is really saved. */
23743 if (TARGET_AIX && crtl->calls_eh_return)
23744 {
23745 rtx tmp_reg, tmp_reg_si, hi, lo, compare_result, toc_save_done, jump;
23746 rtx save_insn, join_insn, note;
23747 long toc_restore_insn;
23748
23749 tmp_reg = gen_rtx_REG (Pmode, 11);
23750 tmp_reg_si = gen_rtx_REG (SImode, 11);
23751 if (using_static_chain_p)
23752 {
23753 START_USE (0);
23754 emit_move_insn (gen_rtx_REG (Pmode, 0), tmp_reg);
23755 }
23756 else
23757 START_USE (11);
23758 emit_move_insn (tmp_reg, gen_rtx_REG (Pmode, LR_REGNO));
23759 /* Peek at instruction to which this function returns. If it's
23760 restoring r2, then we know we've already saved r2. We can't
23761 unconditionally save r2 because the value we have will already
23762 be updated if we arrived at this function via a plt call or
23763 toc adjusting stub. */
23764 emit_move_insn (tmp_reg_si, gen_rtx_MEM (SImode, tmp_reg));
23765 toc_restore_insn = ((TARGET_32BIT ? 0x80410000 : 0xE8410000)
23766 + RS6000_TOC_SAVE_SLOT);
23767 hi = gen_int_mode (toc_restore_insn & ~0xffff, SImode);
23768 emit_insn (gen_xorsi3 (tmp_reg_si, tmp_reg_si, hi));
23769 compare_result = gen_rtx_REG (CCUNSmode, CR0_REGNO);
23770 validate_condition_mode (EQ, CCUNSmode);
23771 lo = gen_int_mode (toc_restore_insn & 0xffff, SImode);
23772 emit_insn (gen_rtx_SET (VOIDmode, compare_result,
23773 gen_rtx_COMPARE (CCUNSmode, tmp_reg_si, lo)));
23774 toc_save_done = gen_label_rtx ();
23775 jump = gen_rtx_IF_THEN_ELSE (VOIDmode,
23776 gen_rtx_EQ (VOIDmode, compare_result,
23777 const0_rtx),
23778 gen_rtx_LABEL_REF (VOIDmode, toc_save_done),
23779 pc_rtx);
23780 jump = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, jump));
23781 JUMP_LABEL (jump) = toc_save_done;
23782 LABEL_NUSES (toc_save_done) += 1;
23783
23784 save_insn = emit_frame_save (frame_reg_rtx, reg_mode,
23785 TOC_REGNUM, frame_off + RS6000_TOC_SAVE_SLOT,
23786 sp_off - frame_off);
23787
23788 emit_label (toc_save_done);
23789
23790 /* ??? If we leave SAVE_INSN as marked as saving R2, then we'll
23791 have a CFG that has different saves along different paths.
23792 Move the note to a dummy blockage insn, which describes that
23793 R2 is unconditionally saved after the label. */
23794 /* ??? An alternate representation might be a special insn pattern
23795 containing both the branch and the store. That might let the
23796 code that minimizes the number of DW_CFA_advance opcodes better
23797 freedom in placing the annotations. */
23798 note = find_reg_note (save_insn, REG_FRAME_RELATED_EXPR, NULL);
23799 if (note)
23800 remove_note (save_insn, note);
23801 else
23802 note = alloc_reg_note (REG_FRAME_RELATED_EXPR,
23803 copy_rtx (PATTERN (save_insn)), NULL_RTX);
23804 RTX_FRAME_RELATED_P (save_insn) = 0;
23805
23806 join_insn = emit_insn (gen_blockage ());
23807 REG_NOTES (join_insn) = note;
23808 RTX_FRAME_RELATED_P (join_insn) = 1;
23809
23810 if (using_static_chain_p)
23811 {
23812 emit_move_insn (tmp_reg, gen_rtx_REG (Pmode, 0));
23813 END_USE (0);
23814 }
23815 else
23816 END_USE (11);
23817 }
23818
23819 /* Save CR if we use any that must be preserved. */
23820 if (!WORLD_SAVE_P (info) && info->cr_save_p)
23821 {
23822 rtx addr = gen_rtx_PLUS (Pmode, frame_reg_rtx,
23823 GEN_INT (info->cr_save_offset + frame_off));
23824 rtx mem = gen_frame_mem (SImode, addr);
23825
23826 /* If we didn't copy cr before, do so now using r0. */
23827 if (cr_save_rtx == NULL_RTX)
23828 {
23829 START_USE (0);
23830 cr_save_rtx = gen_rtx_REG (SImode, 0);
23831 rs6000_emit_move_from_cr (cr_save_rtx);
23832 }
23833
23834 /* Saving CR requires a two-instruction sequence: one instruction
23835 to move the CR to a general-purpose register, and a second
23836 instruction that stores the GPR to memory.
23837
23838 We do not emit any DWARF CFI records for the first of these,
23839 because we cannot properly represent the fact that CR is saved in
23840 a register. One reason is that we cannot express that multiple
23841 CR fields are saved; another reason is that on 64-bit, the size
23842 of the CR register in DWARF (4 bytes) differs from the size of
23843 a general-purpose register.
23844
23845 This means if any intervening instruction were to clobber one of
23846 the call-saved CR fields, we'd have incorrect CFI. To prevent
23847 this from happening, we mark the store to memory as a use of
23848 those CR fields, which prevents any such instruction from being
23849 scheduled in between the two instructions. */
23850 rtx crsave_v[9];
23851 int n_crsave = 0;
23852 int i;
23853
23854 crsave_v[n_crsave++] = gen_rtx_SET (VOIDmode, mem, cr_save_rtx);
23855 for (i = 0; i < 8; i++)
23856 if (save_reg_p (CR0_REGNO + i))
23857 crsave_v[n_crsave++]
23858 = gen_rtx_USE (VOIDmode, gen_rtx_REG (CCmode, CR0_REGNO + i));
23859
23860 insn = emit_insn (gen_rtx_PARALLEL (VOIDmode,
23861 gen_rtvec_v (n_crsave, crsave_v)));
23862 END_USE (REGNO (cr_save_rtx));
23863
23864 /* Now, there's no way that dwarf2out_frame_debug_expr is going to
23865 understand '(unspec:SI [(reg:CC 68) ...] UNSPEC_MOVESI_FROM_CR)',
23866 so we need to construct a frame expression manually. */
23867 RTX_FRAME_RELATED_P (insn) = 1;
23868
23869 /* Update address to be stack-pointer relative, like
23870 rs6000_frame_related would do. */
23871 addr = gen_rtx_PLUS (Pmode, gen_rtx_REG (Pmode, STACK_POINTER_REGNUM),
23872 GEN_INT (info->cr_save_offset + sp_off));
23873 mem = gen_frame_mem (SImode, addr);
23874
23875 if (DEFAULT_ABI == ABI_ELFv2)
23876 {
23877 /* In the ELFv2 ABI we generate separate CFI records for each
23878 CR field that was actually saved. They all point to the
23879 same 32-bit stack slot. */
23880 rtx crframe[8];
23881 int n_crframe = 0;
23882
23883 for (i = 0; i < 8; i++)
23884 if (save_reg_p (CR0_REGNO + i))
23885 {
23886 crframe[n_crframe]
23887 = gen_rtx_SET (VOIDmode, mem,
23888 gen_rtx_REG (SImode, CR0_REGNO + i));
23889
23890 RTX_FRAME_RELATED_P (crframe[n_crframe]) = 1;
23891 n_crframe++;
23892 }
23893
23894 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
23895 gen_rtx_PARALLEL (VOIDmode,
23896 gen_rtvec_v (n_crframe, crframe)));
23897 }
23898 else
23899 {
23900 /* In other ABIs, by convention, we use a single CR regnum to
23901 represent the fact that all call-saved CR fields are saved.
23902 We use CR2_REGNO to be compatible with gcc-2.95 on Linux. */
23903 rtx set = gen_rtx_SET (VOIDmode, mem,
23904 gen_rtx_REG (SImode, CR2_REGNO));
23905 add_reg_note (insn, REG_FRAME_RELATED_EXPR, set);
23906 }
23907 }
23908
23909 /* In the ELFv2 ABI we need to save all call-saved CR fields into
23910 *separate* slots if the routine calls __builtin_eh_return, so
23911 that they can be independently restored by the unwinder. */
23912 if (DEFAULT_ABI == ABI_ELFv2 && crtl->calls_eh_return)
23913 {
23914 int i, cr_off = info->ehcr_offset;
23915 rtx crsave;
23916
23917 /* ??? We might get better performance by using multiple mfocrf
23918 instructions. */
23919 crsave = gen_rtx_REG (SImode, 0);
23920 emit_insn (gen_movesi_from_cr (crsave));
23921
23922 for (i = 0; i < 8; i++)
23923 if (!call_used_regs[CR0_REGNO + i])
23924 {
23925 rtvec p = rtvec_alloc (2);
23926 RTVEC_ELT (p, 0)
23927 = gen_frame_store (crsave, frame_reg_rtx, cr_off + frame_off);
23928 RTVEC_ELT (p, 1)
23929 = gen_rtx_USE (VOIDmode, gen_rtx_REG (CCmode, CR0_REGNO + i));
23930
23931 insn = emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
23932
23933 RTX_FRAME_RELATED_P (insn) = 1;
23934 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
23935 gen_frame_store (gen_rtx_REG (SImode, CR0_REGNO + i),
23936 sp_reg_rtx, cr_off + sp_off));
23937
23938 cr_off += reg_size;
23939 }
23940 }
23941
23942 /* Update stack and set back pointer unless this is V.4,
23943 for which it was done previously. */
23944 if (!WORLD_SAVE_P (info) && info->push_p
23945 && !(DEFAULT_ABI == ABI_V4 || crtl->calls_eh_return))
23946 {
23947 rtx ptr_reg = NULL;
23948 int ptr_off = 0;
23949
23950 /* If saving altivec regs we need to be able to address all save
23951 locations using a 16-bit offset. */
23952 if ((strategy & SAVE_INLINE_VRS) == 0
23953 || (info->altivec_size != 0
23954 && (info->altivec_save_offset + info->altivec_size - 16
23955 + info->total_size - frame_off) > 32767)
23956 || (info->vrsave_size != 0
23957 && (info->vrsave_save_offset
23958 + info->total_size - frame_off) > 32767))
23959 {
23960 int sel = SAVRES_SAVE | SAVRES_VR;
23961 unsigned ptr_regno = ptr_regno_for_savres (sel);
23962
23963 if (using_static_chain_p
23964 && ptr_regno == STATIC_CHAIN_REGNUM)
23965 ptr_regno = 12;
23966 if (REGNO (frame_reg_rtx) != ptr_regno)
23967 START_USE (ptr_regno);
23968 ptr_reg = gen_rtx_REG (Pmode, ptr_regno);
23969 frame_reg_rtx = ptr_reg;
23970 ptr_off = info->altivec_save_offset + info->altivec_size;
23971 frame_off = -ptr_off;
23972 }
23973 else if (REGNO (frame_reg_rtx) == 1)
23974 frame_off = info->total_size;
23975 rs6000_emit_allocate_stack (info->total_size, ptr_reg, ptr_off);
23976 sp_off = info->total_size;
23977 if (frame_reg_rtx != sp_reg_rtx)
23978 rs6000_emit_stack_tie (frame_reg_rtx, false);
23979 }
23980
23981 /* Set frame pointer, if needed. */
23982 if (frame_pointer_needed)
23983 {
23984 insn = emit_move_insn (gen_rtx_REG (Pmode, HARD_FRAME_POINTER_REGNUM),
23985 sp_reg_rtx);
23986 RTX_FRAME_RELATED_P (insn) = 1;
23987 }
23988
23989 /* Save AltiVec registers if needed. Save here because the red zone does
23990 not always include AltiVec registers. */
23991 if (!WORLD_SAVE_P (info) && TARGET_ALTIVEC_ABI
23992 && info->altivec_size != 0 && (strategy & SAVE_INLINE_VRS) == 0)
23993 {
23994 int end_save = info->altivec_save_offset + info->altivec_size;
23995 int ptr_off;
23996 /* Oddly, the vector save/restore functions point r0 at the end
23997 of the save area, then use r11 or r12 to load offsets for
23998 [reg+reg] addressing. */
23999 rtx ptr_reg = gen_rtx_REG (Pmode, 0);
24000 int scratch_regno = ptr_regno_for_savres (SAVRES_SAVE | SAVRES_VR);
24001 rtx scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
24002
24003 gcc_checking_assert (scratch_regno == 11 || scratch_regno == 12);
24004 NOT_INUSE (0);
24005 if (end_save + frame_off != 0)
24006 {
24007 rtx offset = GEN_INT (end_save + frame_off);
24008
24009 emit_insn (gen_add3_insn (ptr_reg, frame_reg_rtx, offset));
24010 }
24011 else
24012 emit_move_insn (ptr_reg, frame_reg_rtx);
24013
24014 ptr_off = -end_save;
24015 insn = rs6000_emit_savres_rtx (info, scratch_reg,
24016 info->altivec_save_offset + ptr_off,
24017 0, V4SImode, SAVRES_SAVE | SAVRES_VR);
24018 rs6000_frame_related (insn, scratch_reg, sp_off - ptr_off,
24019 NULL_RTX, NULL_RTX, NULL_RTX);
24020 if (REGNO (frame_reg_rtx) == REGNO (scratch_reg))
24021 {
24022 /* The oddity mentioned above clobbered our frame reg. */
24023 emit_move_insn (frame_reg_rtx, ptr_reg);
24024 frame_off = ptr_off;
24025 }
24026 }
24027 else if (!WORLD_SAVE_P (info) && TARGET_ALTIVEC_ABI
24028 && info->altivec_size != 0)
24029 {
24030 int i;
24031
24032 for (i = info->first_altivec_reg_save; i <= LAST_ALTIVEC_REGNO; ++i)
24033 if (info->vrsave_mask & ALTIVEC_REG_BIT (i))
24034 {
24035 rtx areg, savereg, mem, split_reg;
24036 int offset;
24037
24038 offset = (info->altivec_save_offset + frame_off
24039 + 16 * (i - info->first_altivec_reg_save));
24040
24041 savereg = gen_rtx_REG (V4SImode, i);
24042
24043 NOT_INUSE (0);
24044 areg = gen_rtx_REG (Pmode, 0);
24045 emit_move_insn (areg, GEN_INT (offset));
24046
24047 /* AltiVec addressing mode is [reg+reg]. */
24048 mem = gen_frame_mem (V4SImode,
24049 gen_rtx_PLUS (Pmode, frame_reg_rtx, areg));
24050
24051 insn = emit_move_insn (mem, savereg);
24052
24053 /* When we split a VSX store into two insns, we need to make
24054 sure the DWARF info knows which register we are storing.
24055 Pass it in to be used on the appropriate note. */
24056 if (!BYTES_BIG_ENDIAN
24057 && GET_CODE (PATTERN (insn)) == SET
24058 && GET_CODE (SET_SRC (PATTERN (insn))) == VEC_SELECT)
24059 split_reg = savereg;
24060 else
24061 split_reg = NULL_RTX;
24062
24063 rs6000_frame_related (insn, frame_reg_rtx, sp_off - frame_off,
24064 areg, GEN_INT (offset), split_reg);
24065 }
24066 }
24067
24068 /* VRSAVE is a bit vector representing which AltiVec registers
24069 are used. The OS uses this to determine which vector
24070 registers to save on a context switch. We need to save
24071 VRSAVE on the stack frame, add whatever AltiVec registers we
24072 used in this function, and do the corresponding magic in the
24073 epilogue. */
24074
24075 if (!WORLD_SAVE_P (info)
24076 && TARGET_ALTIVEC
24077 && TARGET_ALTIVEC_VRSAVE
24078 && info->vrsave_mask != 0)
24079 {
24080 rtx reg, vrsave;
24081 int offset;
24082 int save_regno;
24083
24084 /* Get VRSAVE onto a GPR. Note that ABI_V4 and ABI_DARWIN might
24085 be using r12 as frame_reg_rtx and r11 as the static chain
24086 pointer for nested functions. */
24087 save_regno = 12;
24088 if ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
24089 && !using_static_chain_p)
24090 save_regno = 11;
24091 else if (REGNO (frame_reg_rtx) == 12)
24092 {
24093 save_regno = 11;
24094 if (using_static_chain_p)
24095 save_regno = 0;
24096 }
24097
24098 NOT_INUSE (save_regno);
24099 reg = gen_rtx_REG (SImode, save_regno);
24100 vrsave = gen_rtx_REG (SImode, VRSAVE_REGNO);
24101 if (TARGET_MACHO)
24102 emit_insn (gen_get_vrsave_internal (reg));
24103 else
24104 emit_insn (gen_rtx_SET (VOIDmode, reg, vrsave));
24105
24106 /* Save VRSAVE. */
24107 offset = info->vrsave_save_offset + frame_off;
24108 insn = emit_insn (gen_frame_store (reg, frame_reg_rtx, offset));
24109
24110 /* Include the registers in the mask. */
24111 emit_insn (gen_iorsi3 (reg, reg, GEN_INT ((int) info->vrsave_mask)));
24112
24113 insn = emit_insn (generate_set_vrsave (reg, info, 0));
24114 }
24115
24116 /* If we are using RS6000_PIC_OFFSET_TABLE_REGNUM, we need to set it up. */
24117 if (!TARGET_SINGLE_PIC_BASE
24118 && ((TARGET_TOC && TARGET_MINIMAL_TOC && get_pool_size () != 0)
24119 || (DEFAULT_ABI == ABI_V4
24120 && (flag_pic == 1 || (flag_pic && TARGET_SECURE_PLT))
24121 && df_regs_ever_live_p (RS6000_PIC_OFFSET_TABLE_REGNUM))))
24122 {
24123 /* If emit_load_toc_table will use the link register, we need to save
24124 it. We use R12 for this purpose because emit_load_toc_table
24125 can use register 0. This allows us to use a plain 'blr' to return
24126 from the procedure more often. */
24127 int save_LR_around_toc_setup = (TARGET_ELF
24128 && DEFAULT_ABI == ABI_V4
24129 && flag_pic
24130 && ! info->lr_save_p
24131 && EDGE_COUNT (EXIT_BLOCK_PTR_FOR_FN (cfun)->preds) > 0);
24132 if (save_LR_around_toc_setup)
24133 {
24134 rtx lr = gen_rtx_REG (Pmode, LR_REGNO);
24135 rtx tmp = gen_rtx_REG (Pmode, 12);
24136
24137 insn = emit_move_insn (tmp, lr);
24138 RTX_FRAME_RELATED_P (insn) = 1;
24139
24140 rs6000_emit_load_toc_table (TRUE);
24141
24142 insn = emit_move_insn (lr, tmp);
24143 add_reg_note (insn, REG_CFA_RESTORE, lr);
24144 RTX_FRAME_RELATED_P (insn) = 1;
24145 }
24146 else
24147 rs6000_emit_load_toc_table (TRUE);
24148 }
24149
24150 #if TARGET_MACHO
24151 if (!TARGET_SINGLE_PIC_BASE
24152 && DEFAULT_ABI == ABI_DARWIN
24153 && flag_pic && crtl->uses_pic_offset_table)
24154 {
24155 rtx lr = gen_rtx_REG (Pmode, LR_REGNO);
24156 rtx src = gen_rtx_SYMBOL_REF (Pmode, MACHOPIC_FUNCTION_BASE_NAME);
24157
24158 /* Save and restore LR locally around this call (in R0). */
24159 if (!info->lr_save_p)
24160 emit_move_insn (gen_rtx_REG (Pmode, 0), lr);
24161
24162 emit_insn (gen_load_macho_picbase (src));
24163
24164 emit_move_insn (gen_rtx_REG (Pmode,
24165 RS6000_PIC_OFFSET_TABLE_REGNUM),
24166 lr);
24167
24168 if (!info->lr_save_p)
24169 emit_move_insn (lr, gen_rtx_REG (Pmode, 0));
24170 }
24171 #endif
24172
24173 /* If we need to, save the TOC register after doing the stack setup.
24174 Do not emit eh frame info for this save. The unwinder wants info,
24175 conceptually attached to instructions in this function, about
24176 register values in the caller of this function. This R2 may have
24177 already been changed from the value in the caller.
24178 We don't attempt to write accurate DWARF EH frame info for R2
24179 because code emitted by gcc for a (non-pointer) function call
24180 doesn't save and restore R2. Instead, R2 is managed out-of-line
24181 by a linker generated plt call stub when the function resides in
24182 a shared library. This behaviour is costly to describe in DWARF,
24183 both in terms of the size of DWARF info and the time taken in the
24184 unwinder to interpret it. R2 changes, apart from the
24185 calls_eh_return case earlier in this function, are handled by
24186 linux-unwind.h frob_update_context. */
24187 if (rs6000_save_toc_in_prologue_p ())
24188 {
24189 rtx reg = gen_rtx_REG (reg_mode, TOC_REGNUM);
24190 emit_insn (gen_frame_store (reg, sp_reg_rtx, RS6000_TOC_SAVE_SLOT));
24191 }
24192 }
24193
24194 /* Write function prologue. */
24195
24196 static void
24197 rs6000_output_function_prologue (FILE *file,
24198 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
24199 {
24200 rs6000_stack_t *info = rs6000_stack_info ();
24201
24202 if (TARGET_DEBUG_STACK)
24203 debug_stack_info (info);
24204
24205 /* Write .extern for any function we will call to save and restore
24206 fp values. */
24207 if (info->first_fp_reg_save < 64
24208 && !TARGET_MACHO
24209 && !TARGET_ELF)
24210 {
24211 char *name;
24212 int regno = info->first_fp_reg_save - 32;
24213
24214 if ((info->savres_strategy & SAVE_INLINE_FPRS) == 0)
24215 {
24216 bool lr = (info->savres_strategy & SAVE_NOINLINE_FPRS_SAVES_LR) != 0;
24217 int sel = SAVRES_SAVE | SAVRES_FPR | (lr ? SAVRES_LR : 0);
24218 name = rs6000_savres_routine_name (info, regno, sel);
24219 fprintf (file, "\t.extern %s\n", name);
24220 }
24221 if ((info->savres_strategy & REST_INLINE_FPRS) == 0)
24222 {
24223 bool lr = (info->savres_strategy
24224 & REST_NOINLINE_FPRS_DOESNT_RESTORE_LR) == 0;
24225 int sel = SAVRES_FPR | (lr ? SAVRES_LR : 0);
24226 name = rs6000_savres_routine_name (info, regno, sel);
24227 fprintf (file, "\t.extern %s\n", name);
24228 }
24229 }
24230
24231 /* ELFv2 ABI r2 setup code and local entry point. This must follow
24232 immediately after the global entry point label. */
24233 if (DEFAULT_ABI == ABI_ELFv2 && cfun->machine->r2_setup_needed)
24234 {
24235 const char *name = XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0);
24236
24237 fprintf (file, "0:\taddis 2,12,.TOC.-0b@ha\n");
24238 fprintf (file, "\taddi 2,2,.TOC.-0b@l\n");
24239
24240 fputs ("\t.localentry\t", file);
24241 assemble_name (file, name);
24242 fputs (",.-", file);
24243 assemble_name (file, name);
24244 fputs ("\n", file);
24245 }
24246
24247 /* Output -mprofile-kernel code. This needs to be done here instead of
24248 in output_function_profile since it must go after the ELFv2 ABI
24249 local entry point. */
24250 if (TARGET_PROFILE_KERNEL && crtl->profile)
24251 {
24252 gcc_assert (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2);
24253 gcc_assert (!TARGET_32BIT);
24254
24255 asm_fprintf (file, "\tmflr %s\n", reg_names[0]);
24256 asm_fprintf (file, "\tstd %s,16(%s)\n", reg_names[0], reg_names[1]);
24257
24258 /* In the ELFv2 ABI we have no compiler stack word. It must be
24259 the resposibility of _mcount to preserve the static chain
24260 register if required. */
24261 if (DEFAULT_ABI != ABI_ELFv2
24262 && cfun->static_chain_decl != NULL)
24263 {
24264 asm_fprintf (file, "\tstd %s,24(%s)\n",
24265 reg_names[STATIC_CHAIN_REGNUM], reg_names[1]);
24266 fprintf (file, "\tbl %s\n", RS6000_MCOUNT);
24267 asm_fprintf (file, "\tld %s,24(%s)\n",
24268 reg_names[STATIC_CHAIN_REGNUM], reg_names[1]);
24269 }
24270 else
24271 fprintf (file, "\tbl %s\n", RS6000_MCOUNT);
24272 }
24273
24274 rs6000_pic_labelno++;
24275 }
24276
24277 /* Non-zero if vmx regs are restored before the frame pop, zero if
24278 we restore after the pop when possible. */
24279 #define ALWAYS_RESTORE_ALTIVEC_BEFORE_POP 0
24280
24281 /* Restoring cr is a two step process: loading a reg from the frame
24282 save, then moving the reg to cr. For ABI_V4 we must let the
24283 unwinder know that the stack location is no longer valid at or
24284 before the stack deallocation, but we can't emit a cfa_restore for
24285 cr at the stack deallocation like we do for other registers.
24286 The trouble is that it is possible for the move to cr to be
24287 scheduled after the stack deallocation. So say exactly where cr
24288 is located on each of the two insns. */
24289
24290 static rtx
24291 load_cr_save (int regno, rtx frame_reg_rtx, int offset, bool exit_func)
24292 {
24293 rtx mem = gen_frame_mem_offset (SImode, frame_reg_rtx, offset);
24294 rtx reg = gen_rtx_REG (SImode, regno);
24295 rtx_insn *insn = emit_move_insn (reg, mem);
24296
24297 if (!exit_func && DEFAULT_ABI == ABI_V4)
24298 {
24299 rtx cr = gen_rtx_REG (SImode, CR2_REGNO);
24300 rtx set = gen_rtx_SET (VOIDmode, reg, cr);
24301
24302 add_reg_note (insn, REG_CFA_REGISTER, set);
24303 RTX_FRAME_RELATED_P (insn) = 1;
24304 }
24305 return reg;
24306 }
24307
24308 /* Reload CR from REG. */
24309
24310 static void
24311 restore_saved_cr (rtx reg, int using_mfcr_multiple, bool exit_func)
24312 {
24313 int count = 0;
24314 int i;
24315
24316 if (using_mfcr_multiple)
24317 {
24318 for (i = 0; i < 8; i++)
24319 if (save_reg_p (CR0_REGNO + i))
24320 count++;
24321 gcc_assert (count);
24322 }
24323
24324 if (using_mfcr_multiple && count > 1)
24325 {
24326 rtx_insn *insn;
24327 rtvec p;
24328 int ndx;
24329
24330 p = rtvec_alloc (count);
24331
24332 ndx = 0;
24333 for (i = 0; i < 8; i++)
24334 if (save_reg_p (CR0_REGNO + i))
24335 {
24336 rtvec r = rtvec_alloc (2);
24337 RTVEC_ELT (r, 0) = reg;
24338 RTVEC_ELT (r, 1) = GEN_INT (1 << (7-i));
24339 RTVEC_ELT (p, ndx) =
24340 gen_rtx_SET (VOIDmode, gen_rtx_REG (CCmode, CR0_REGNO + i),
24341 gen_rtx_UNSPEC (CCmode, r, UNSPEC_MOVESI_TO_CR));
24342 ndx++;
24343 }
24344 insn = emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
24345 gcc_assert (ndx == count);
24346
24347 /* For the ELFv2 ABI we generate a CFA_RESTORE for each
24348 CR field separately. */
24349 if (!exit_func && DEFAULT_ABI == ABI_ELFv2 && flag_shrink_wrap)
24350 {
24351 for (i = 0; i < 8; i++)
24352 if (save_reg_p (CR0_REGNO + i))
24353 add_reg_note (insn, REG_CFA_RESTORE,
24354 gen_rtx_REG (SImode, CR0_REGNO + i));
24355
24356 RTX_FRAME_RELATED_P (insn) = 1;
24357 }
24358 }
24359 else
24360 for (i = 0; i < 8; i++)
24361 if (save_reg_p (CR0_REGNO + i))
24362 {
24363 rtx insn = emit_insn (gen_movsi_to_cr_one
24364 (gen_rtx_REG (CCmode, CR0_REGNO + i), reg));
24365
24366 /* For the ELFv2 ABI we generate a CFA_RESTORE for each
24367 CR field separately, attached to the insn that in fact
24368 restores this particular CR field. */
24369 if (!exit_func && DEFAULT_ABI == ABI_ELFv2 && flag_shrink_wrap)
24370 {
24371 add_reg_note (insn, REG_CFA_RESTORE,
24372 gen_rtx_REG (SImode, CR0_REGNO + i));
24373
24374 RTX_FRAME_RELATED_P (insn) = 1;
24375 }
24376 }
24377
24378 /* For other ABIs, we just generate a single CFA_RESTORE for CR2. */
24379 if (!exit_func && DEFAULT_ABI != ABI_ELFv2
24380 && (DEFAULT_ABI == ABI_V4 || flag_shrink_wrap))
24381 {
24382 rtx_insn *insn = get_last_insn ();
24383 rtx cr = gen_rtx_REG (SImode, CR2_REGNO);
24384
24385 add_reg_note (insn, REG_CFA_RESTORE, cr);
24386 RTX_FRAME_RELATED_P (insn) = 1;
24387 }
24388 }
24389
24390 /* Like cr, the move to lr instruction can be scheduled after the
24391 stack deallocation, but unlike cr, its stack frame save is still
24392 valid. So we only need to emit the cfa_restore on the correct
24393 instruction. */
24394
24395 static void
24396 load_lr_save (int regno, rtx frame_reg_rtx, int offset)
24397 {
24398 rtx mem = gen_frame_mem_offset (Pmode, frame_reg_rtx, offset);
24399 rtx reg = gen_rtx_REG (Pmode, regno);
24400
24401 emit_move_insn (reg, mem);
24402 }
24403
24404 static void
24405 restore_saved_lr (int regno, bool exit_func)
24406 {
24407 rtx reg = gen_rtx_REG (Pmode, regno);
24408 rtx lr = gen_rtx_REG (Pmode, LR_REGNO);
24409 rtx_insn *insn = emit_move_insn (lr, reg);
24410
24411 if (!exit_func && flag_shrink_wrap)
24412 {
24413 add_reg_note (insn, REG_CFA_RESTORE, lr);
24414 RTX_FRAME_RELATED_P (insn) = 1;
24415 }
24416 }
24417
24418 static rtx
24419 add_crlr_cfa_restore (const rs6000_stack_t *info, rtx cfa_restores)
24420 {
24421 if (DEFAULT_ABI == ABI_ELFv2)
24422 {
24423 int i;
24424 for (i = 0; i < 8; i++)
24425 if (save_reg_p (CR0_REGNO + i))
24426 {
24427 rtx cr = gen_rtx_REG (SImode, CR0_REGNO + i);
24428 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, cr,
24429 cfa_restores);
24430 }
24431 }
24432 else if (info->cr_save_p)
24433 cfa_restores = alloc_reg_note (REG_CFA_RESTORE,
24434 gen_rtx_REG (SImode, CR2_REGNO),
24435 cfa_restores);
24436
24437 if (info->lr_save_p)
24438 cfa_restores = alloc_reg_note (REG_CFA_RESTORE,
24439 gen_rtx_REG (Pmode, LR_REGNO),
24440 cfa_restores);
24441 return cfa_restores;
24442 }
24443
24444 /* Return true if OFFSET from stack pointer can be clobbered by signals.
24445 V.4 doesn't have any stack cushion, AIX ABIs have 220 or 288 bytes
24446 below stack pointer not cloberred by signals. */
24447
24448 static inline bool
24449 offset_below_red_zone_p (HOST_WIDE_INT offset)
24450 {
24451 return offset < (DEFAULT_ABI == ABI_V4
24452 ? 0
24453 : TARGET_32BIT ? -220 : -288);
24454 }
24455
24456 /* Append CFA_RESTORES to any existing REG_NOTES on the last insn. */
24457
24458 static void
24459 emit_cfa_restores (rtx cfa_restores)
24460 {
24461 rtx_insn *insn = get_last_insn ();
24462 rtx *loc = &REG_NOTES (insn);
24463
24464 while (*loc)
24465 loc = &XEXP (*loc, 1);
24466 *loc = cfa_restores;
24467 RTX_FRAME_RELATED_P (insn) = 1;
24468 }
24469
24470 /* Emit function epilogue as insns. */
24471
24472 void
24473 rs6000_emit_epilogue (int sibcall)
24474 {
24475 rs6000_stack_t *info;
24476 int restoring_GPRs_inline;
24477 int restoring_FPRs_inline;
24478 int using_load_multiple;
24479 int using_mtcr_multiple;
24480 int use_backchain_to_restore_sp;
24481 int restore_lr;
24482 int strategy;
24483 HOST_WIDE_INT frame_off = 0;
24484 rtx sp_reg_rtx = gen_rtx_REG (Pmode, 1);
24485 rtx frame_reg_rtx = sp_reg_rtx;
24486 rtx cfa_restores = NULL_RTX;
24487 rtx insn;
24488 rtx cr_save_reg = NULL_RTX;
24489 machine_mode reg_mode = Pmode;
24490 int reg_size = TARGET_32BIT ? 4 : 8;
24491 int i;
24492 bool exit_func;
24493 unsigned ptr_regno;
24494
24495 info = rs6000_stack_info ();
24496
24497 if (TARGET_SPE_ABI && info->spe_64bit_regs_used != 0)
24498 {
24499 reg_mode = V2SImode;
24500 reg_size = 8;
24501 }
24502
24503 strategy = info->savres_strategy;
24504 using_load_multiple = strategy & SAVRES_MULTIPLE;
24505 restoring_FPRs_inline = sibcall || (strategy & REST_INLINE_FPRS);
24506 restoring_GPRs_inline = sibcall || (strategy & REST_INLINE_GPRS);
24507 using_mtcr_multiple = (rs6000_cpu == PROCESSOR_PPC601
24508 || rs6000_cpu == PROCESSOR_PPC603
24509 || rs6000_cpu == PROCESSOR_PPC750
24510 || optimize_size);
24511 /* Restore via the backchain when we have a large frame, since this
24512 is more efficient than an addis, addi pair. The second condition
24513 here will not trigger at the moment; We don't actually need a
24514 frame pointer for alloca, but the generic parts of the compiler
24515 give us one anyway. */
24516 use_backchain_to_restore_sp = (info->total_size > 32767 - info->lr_save_offset
24517 || (cfun->calls_alloca
24518 && !frame_pointer_needed));
24519 restore_lr = (info->lr_save_p
24520 && (restoring_FPRs_inline
24521 || (strategy & REST_NOINLINE_FPRS_DOESNT_RESTORE_LR))
24522 && (restoring_GPRs_inline
24523 || info->first_fp_reg_save < 64));
24524
24525 if (WORLD_SAVE_P (info))
24526 {
24527 int i, j;
24528 char rname[30];
24529 const char *alloc_rname;
24530 rtvec p;
24531
24532 /* eh_rest_world_r10 will return to the location saved in the LR
24533 stack slot (which is not likely to be our caller.)
24534 Input: R10 -- stack adjustment. Clobbers R0, R11, R12, R7, R8.
24535 rest_world is similar, except any R10 parameter is ignored.
24536 The exception-handling stuff that was here in 2.95 is no
24537 longer necessary. */
24538
24539 p = rtvec_alloc (9
24540 + 1
24541 + 32 - info->first_gp_reg_save
24542 + LAST_ALTIVEC_REGNO + 1 - info->first_altivec_reg_save
24543 + 63 + 1 - info->first_fp_reg_save);
24544
24545 strcpy (rname, ((crtl->calls_eh_return) ?
24546 "*eh_rest_world_r10" : "*rest_world"));
24547 alloc_rname = ggc_strdup (rname);
24548
24549 j = 0;
24550 RTVEC_ELT (p, j++) = ret_rtx;
24551 RTVEC_ELT (p, j++) = gen_rtx_USE (VOIDmode,
24552 gen_rtx_REG (Pmode,
24553 LR_REGNO));
24554 RTVEC_ELT (p, j++)
24555 = gen_rtx_USE (VOIDmode, gen_rtx_SYMBOL_REF (Pmode, alloc_rname));
24556 /* The instruction pattern requires a clobber here;
24557 it is shared with the restVEC helper. */
24558 RTVEC_ELT (p, j++)
24559 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (Pmode, 11));
24560
24561 {
24562 /* CR register traditionally saved as CR2. */
24563 rtx reg = gen_rtx_REG (SImode, CR2_REGNO);
24564 RTVEC_ELT (p, j++)
24565 = gen_frame_load (reg, frame_reg_rtx, info->cr_save_offset);
24566 if (flag_shrink_wrap)
24567 {
24568 cfa_restores = alloc_reg_note (REG_CFA_RESTORE,
24569 gen_rtx_REG (Pmode, LR_REGNO),
24570 cfa_restores);
24571 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
24572 }
24573 }
24574
24575 for (i = 0; i < 32 - info->first_gp_reg_save; i++)
24576 {
24577 rtx reg = gen_rtx_REG (reg_mode, info->first_gp_reg_save + i);
24578 RTVEC_ELT (p, j++)
24579 = gen_frame_load (reg,
24580 frame_reg_rtx, info->gp_save_offset + reg_size * i);
24581 if (flag_shrink_wrap)
24582 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
24583 }
24584 for (i = 0; info->first_altivec_reg_save + i <= LAST_ALTIVEC_REGNO; i++)
24585 {
24586 rtx reg = gen_rtx_REG (V4SImode, info->first_altivec_reg_save + i);
24587 RTVEC_ELT (p, j++)
24588 = gen_frame_load (reg,
24589 frame_reg_rtx, info->altivec_save_offset + 16 * i);
24590 if (flag_shrink_wrap)
24591 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
24592 }
24593 for (i = 0; info->first_fp_reg_save + i <= 63; i++)
24594 {
24595 rtx reg = gen_rtx_REG ((TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT
24596 ? DFmode : SFmode),
24597 info->first_fp_reg_save + i);
24598 RTVEC_ELT (p, j++)
24599 = gen_frame_load (reg, frame_reg_rtx, info->fp_save_offset + 8 * i);
24600 if (flag_shrink_wrap)
24601 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
24602 }
24603 RTVEC_ELT (p, j++)
24604 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (Pmode, 0));
24605 RTVEC_ELT (p, j++)
24606 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, 12));
24607 RTVEC_ELT (p, j++)
24608 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, 7));
24609 RTVEC_ELT (p, j++)
24610 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, 8));
24611 RTVEC_ELT (p, j++)
24612 = gen_rtx_USE (VOIDmode, gen_rtx_REG (SImode, 10));
24613 insn = emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, p));
24614
24615 if (flag_shrink_wrap)
24616 {
24617 REG_NOTES (insn) = cfa_restores;
24618 add_reg_note (insn, REG_CFA_DEF_CFA, sp_reg_rtx);
24619 RTX_FRAME_RELATED_P (insn) = 1;
24620 }
24621 return;
24622 }
24623
24624 /* frame_reg_rtx + frame_off points to the top of this stack frame. */
24625 if (info->push_p)
24626 frame_off = info->total_size;
24627
24628 /* Restore AltiVec registers if we must do so before adjusting the
24629 stack. */
24630 if (TARGET_ALTIVEC_ABI
24631 && info->altivec_size != 0
24632 && (ALWAYS_RESTORE_ALTIVEC_BEFORE_POP
24633 || (DEFAULT_ABI != ABI_V4
24634 && offset_below_red_zone_p (info->altivec_save_offset))))
24635 {
24636 int i;
24637 int scratch_regno = ptr_regno_for_savres (SAVRES_VR);
24638
24639 gcc_checking_assert (scratch_regno == 11 || scratch_regno == 12);
24640 if (use_backchain_to_restore_sp)
24641 {
24642 int frame_regno = 11;
24643
24644 if ((strategy & REST_INLINE_VRS) == 0)
24645 {
24646 /* Of r11 and r12, select the one not clobbered by an
24647 out-of-line restore function for the frame register. */
24648 frame_regno = 11 + 12 - scratch_regno;
24649 }
24650 frame_reg_rtx = gen_rtx_REG (Pmode, frame_regno);
24651 emit_move_insn (frame_reg_rtx,
24652 gen_rtx_MEM (Pmode, sp_reg_rtx));
24653 frame_off = 0;
24654 }
24655 else if (frame_pointer_needed)
24656 frame_reg_rtx = hard_frame_pointer_rtx;
24657
24658 if ((strategy & REST_INLINE_VRS) == 0)
24659 {
24660 int end_save = info->altivec_save_offset + info->altivec_size;
24661 int ptr_off;
24662 rtx ptr_reg = gen_rtx_REG (Pmode, 0);
24663 rtx scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
24664
24665 if (end_save + frame_off != 0)
24666 {
24667 rtx offset = GEN_INT (end_save + frame_off);
24668
24669 emit_insn (gen_add3_insn (ptr_reg, frame_reg_rtx, offset));
24670 }
24671 else
24672 emit_move_insn (ptr_reg, frame_reg_rtx);
24673
24674 ptr_off = -end_save;
24675 insn = rs6000_emit_savres_rtx (info, scratch_reg,
24676 info->altivec_save_offset + ptr_off,
24677 0, V4SImode, SAVRES_VR);
24678 }
24679 else
24680 {
24681 for (i = info->first_altivec_reg_save; i <= LAST_ALTIVEC_REGNO; ++i)
24682 if (info->vrsave_mask & ALTIVEC_REG_BIT (i))
24683 {
24684 rtx addr, areg, mem, reg;
24685
24686 areg = gen_rtx_REG (Pmode, 0);
24687 emit_move_insn
24688 (areg, GEN_INT (info->altivec_save_offset
24689 + frame_off
24690 + 16 * (i - info->first_altivec_reg_save)));
24691
24692 /* AltiVec addressing mode is [reg+reg]. */
24693 addr = gen_rtx_PLUS (Pmode, frame_reg_rtx, areg);
24694 mem = gen_frame_mem (V4SImode, addr);
24695
24696 reg = gen_rtx_REG (V4SImode, i);
24697 emit_move_insn (reg, mem);
24698 }
24699 }
24700
24701 for (i = info->first_altivec_reg_save; i <= LAST_ALTIVEC_REGNO; ++i)
24702 if (((strategy & REST_INLINE_VRS) == 0
24703 || (info->vrsave_mask & ALTIVEC_REG_BIT (i)) != 0)
24704 && (flag_shrink_wrap
24705 || (offset_below_red_zone_p
24706 (info->altivec_save_offset
24707 + 16 * (i - info->first_altivec_reg_save)))))
24708 {
24709 rtx reg = gen_rtx_REG (V4SImode, i);
24710 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
24711 }
24712 }
24713
24714 /* Restore VRSAVE if we must do so before adjusting the stack. */
24715 if (TARGET_ALTIVEC
24716 && TARGET_ALTIVEC_VRSAVE
24717 && info->vrsave_mask != 0
24718 && (ALWAYS_RESTORE_ALTIVEC_BEFORE_POP
24719 || (DEFAULT_ABI != ABI_V4
24720 && offset_below_red_zone_p (info->vrsave_save_offset))))
24721 {
24722 rtx reg;
24723
24724 if (frame_reg_rtx == sp_reg_rtx)
24725 {
24726 if (use_backchain_to_restore_sp)
24727 {
24728 frame_reg_rtx = gen_rtx_REG (Pmode, 11);
24729 emit_move_insn (frame_reg_rtx,
24730 gen_rtx_MEM (Pmode, sp_reg_rtx));
24731 frame_off = 0;
24732 }
24733 else if (frame_pointer_needed)
24734 frame_reg_rtx = hard_frame_pointer_rtx;
24735 }
24736
24737 reg = gen_rtx_REG (SImode, 12);
24738 emit_insn (gen_frame_load (reg, frame_reg_rtx,
24739 info->vrsave_save_offset + frame_off));
24740
24741 emit_insn (generate_set_vrsave (reg, info, 1));
24742 }
24743
24744 insn = NULL_RTX;
24745 /* If we have a large stack frame, restore the old stack pointer
24746 using the backchain. */
24747 if (use_backchain_to_restore_sp)
24748 {
24749 if (frame_reg_rtx == sp_reg_rtx)
24750 {
24751 /* Under V.4, don't reset the stack pointer until after we're done
24752 loading the saved registers. */
24753 if (DEFAULT_ABI == ABI_V4)
24754 frame_reg_rtx = gen_rtx_REG (Pmode, 11);
24755
24756 insn = emit_move_insn (frame_reg_rtx,
24757 gen_rtx_MEM (Pmode, sp_reg_rtx));
24758 frame_off = 0;
24759 }
24760 else if (ALWAYS_RESTORE_ALTIVEC_BEFORE_POP
24761 && DEFAULT_ABI == ABI_V4)
24762 /* frame_reg_rtx has been set up by the altivec restore. */
24763 ;
24764 else
24765 {
24766 insn = emit_move_insn (sp_reg_rtx, frame_reg_rtx);
24767 frame_reg_rtx = sp_reg_rtx;
24768 }
24769 }
24770 /* If we have a frame pointer, we can restore the old stack pointer
24771 from it. */
24772 else if (frame_pointer_needed)
24773 {
24774 frame_reg_rtx = sp_reg_rtx;
24775 if (DEFAULT_ABI == ABI_V4)
24776 frame_reg_rtx = gen_rtx_REG (Pmode, 11);
24777 /* Prevent reordering memory accesses against stack pointer restore. */
24778 else if (cfun->calls_alloca
24779 || offset_below_red_zone_p (-info->total_size))
24780 rs6000_emit_stack_tie (frame_reg_rtx, true);
24781
24782 insn = emit_insn (gen_add3_insn (frame_reg_rtx, hard_frame_pointer_rtx,
24783 GEN_INT (info->total_size)));
24784 frame_off = 0;
24785 }
24786 else if (info->push_p
24787 && DEFAULT_ABI != ABI_V4
24788 && !crtl->calls_eh_return)
24789 {
24790 /* Prevent reordering memory accesses against stack pointer restore. */
24791 if (cfun->calls_alloca
24792 || offset_below_red_zone_p (-info->total_size))
24793 rs6000_emit_stack_tie (frame_reg_rtx, false);
24794 insn = emit_insn (gen_add3_insn (sp_reg_rtx, sp_reg_rtx,
24795 GEN_INT (info->total_size)));
24796 frame_off = 0;
24797 }
24798 if (insn && frame_reg_rtx == sp_reg_rtx)
24799 {
24800 if (cfa_restores)
24801 {
24802 REG_NOTES (insn) = cfa_restores;
24803 cfa_restores = NULL_RTX;
24804 }
24805 add_reg_note (insn, REG_CFA_DEF_CFA, sp_reg_rtx);
24806 RTX_FRAME_RELATED_P (insn) = 1;
24807 }
24808
24809 /* Restore AltiVec registers if we have not done so already. */
24810 if (!ALWAYS_RESTORE_ALTIVEC_BEFORE_POP
24811 && TARGET_ALTIVEC_ABI
24812 && info->altivec_size != 0
24813 && (DEFAULT_ABI == ABI_V4
24814 || !offset_below_red_zone_p (info->altivec_save_offset)))
24815 {
24816 int i;
24817
24818 if ((strategy & REST_INLINE_VRS) == 0)
24819 {
24820 int end_save = info->altivec_save_offset + info->altivec_size;
24821 int ptr_off;
24822 rtx ptr_reg = gen_rtx_REG (Pmode, 0);
24823 int scratch_regno = ptr_regno_for_savres (SAVRES_VR);
24824 rtx scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
24825
24826 if (end_save + frame_off != 0)
24827 {
24828 rtx offset = GEN_INT (end_save + frame_off);
24829
24830 emit_insn (gen_add3_insn (ptr_reg, frame_reg_rtx, offset));
24831 }
24832 else
24833 emit_move_insn (ptr_reg, frame_reg_rtx);
24834
24835 ptr_off = -end_save;
24836 insn = rs6000_emit_savres_rtx (info, scratch_reg,
24837 info->altivec_save_offset + ptr_off,
24838 0, V4SImode, SAVRES_VR);
24839 if (REGNO (frame_reg_rtx) == REGNO (scratch_reg))
24840 {
24841 /* Frame reg was clobbered by out-of-line save. Restore it
24842 from ptr_reg, and if we are calling out-of-line gpr or
24843 fpr restore set up the correct pointer and offset. */
24844 unsigned newptr_regno = 1;
24845 if (!restoring_GPRs_inline)
24846 {
24847 bool lr = info->gp_save_offset + info->gp_size == 0;
24848 int sel = SAVRES_GPR | (lr ? SAVRES_LR : 0);
24849 newptr_regno = ptr_regno_for_savres (sel);
24850 end_save = info->gp_save_offset + info->gp_size;
24851 }
24852 else if (!restoring_FPRs_inline)
24853 {
24854 bool lr = !(strategy & REST_NOINLINE_FPRS_DOESNT_RESTORE_LR);
24855 int sel = SAVRES_FPR | (lr ? SAVRES_LR : 0);
24856 newptr_regno = ptr_regno_for_savres (sel);
24857 end_save = info->gp_save_offset + info->gp_size;
24858 }
24859
24860 if (newptr_regno != 1 && REGNO (frame_reg_rtx) != newptr_regno)
24861 frame_reg_rtx = gen_rtx_REG (Pmode, newptr_regno);
24862
24863 if (end_save + ptr_off != 0)
24864 {
24865 rtx offset = GEN_INT (end_save + ptr_off);
24866
24867 frame_off = -end_save;
24868 emit_insn (gen_add3_insn (frame_reg_rtx, ptr_reg, offset));
24869 }
24870 else
24871 {
24872 frame_off = ptr_off;
24873 emit_move_insn (frame_reg_rtx, ptr_reg);
24874 }
24875 }
24876 }
24877 else
24878 {
24879 for (i = info->first_altivec_reg_save; i <= LAST_ALTIVEC_REGNO; ++i)
24880 if (info->vrsave_mask & ALTIVEC_REG_BIT (i))
24881 {
24882 rtx addr, areg, mem, reg;
24883
24884 areg = gen_rtx_REG (Pmode, 0);
24885 emit_move_insn
24886 (areg, GEN_INT (info->altivec_save_offset
24887 + frame_off
24888 + 16 * (i - info->first_altivec_reg_save)));
24889
24890 /* AltiVec addressing mode is [reg+reg]. */
24891 addr = gen_rtx_PLUS (Pmode, frame_reg_rtx, areg);
24892 mem = gen_frame_mem (V4SImode, addr);
24893
24894 reg = gen_rtx_REG (V4SImode, i);
24895 emit_move_insn (reg, mem);
24896 }
24897 }
24898
24899 for (i = info->first_altivec_reg_save; i <= LAST_ALTIVEC_REGNO; ++i)
24900 if (((strategy & REST_INLINE_VRS) == 0
24901 || (info->vrsave_mask & ALTIVEC_REG_BIT (i)) != 0)
24902 && (DEFAULT_ABI == ABI_V4 || flag_shrink_wrap))
24903 {
24904 rtx reg = gen_rtx_REG (V4SImode, i);
24905 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
24906 }
24907 }
24908
24909 /* Restore VRSAVE if we have not done so already. */
24910 if (!ALWAYS_RESTORE_ALTIVEC_BEFORE_POP
24911 && TARGET_ALTIVEC
24912 && TARGET_ALTIVEC_VRSAVE
24913 && info->vrsave_mask != 0
24914 && (DEFAULT_ABI == ABI_V4
24915 || !offset_below_red_zone_p (info->vrsave_save_offset)))
24916 {
24917 rtx reg;
24918
24919 reg = gen_rtx_REG (SImode, 12);
24920 emit_insn (gen_frame_load (reg, frame_reg_rtx,
24921 info->vrsave_save_offset + frame_off));
24922
24923 emit_insn (generate_set_vrsave (reg, info, 1));
24924 }
24925
24926 /* If we exit by an out-of-line restore function on ABI_V4 then that
24927 function will deallocate the stack, so we don't need to worry
24928 about the unwinder restoring cr from an invalid stack frame
24929 location. */
24930 exit_func = (!restoring_FPRs_inline
24931 || (!restoring_GPRs_inline
24932 && info->first_fp_reg_save == 64));
24933
24934 /* In the ELFv2 ABI we need to restore all call-saved CR fields from
24935 *separate* slots if the routine calls __builtin_eh_return, so
24936 that they can be independently restored by the unwinder. */
24937 if (DEFAULT_ABI == ABI_ELFv2 && crtl->calls_eh_return)
24938 {
24939 int i, cr_off = info->ehcr_offset;
24940
24941 for (i = 0; i < 8; i++)
24942 if (!call_used_regs[CR0_REGNO + i])
24943 {
24944 rtx reg = gen_rtx_REG (SImode, 0);
24945 emit_insn (gen_frame_load (reg, frame_reg_rtx,
24946 cr_off + frame_off));
24947
24948 insn = emit_insn (gen_movsi_to_cr_one
24949 (gen_rtx_REG (CCmode, CR0_REGNO + i), reg));
24950
24951 if (!exit_func && flag_shrink_wrap)
24952 {
24953 add_reg_note (insn, REG_CFA_RESTORE,
24954 gen_rtx_REG (SImode, CR0_REGNO + i));
24955
24956 RTX_FRAME_RELATED_P (insn) = 1;
24957 }
24958
24959 cr_off += reg_size;
24960 }
24961 }
24962
24963 /* Get the old lr if we saved it. If we are restoring registers
24964 out-of-line, then the out-of-line routines can do this for us. */
24965 if (restore_lr && restoring_GPRs_inline)
24966 load_lr_save (0, frame_reg_rtx, info->lr_save_offset + frame_off);
24967
24968 /* Get the old cr if we saved it. */
24969 if (info->cr_save_p)
24970 {
24971 unsigned cr_save_regno = 12;
24972
24973 if (!restoring_GPRs_inline)
24974 {
24975 /* Ensure we don't use the register used by the out-of-line
24976 gpr register restore below. */
24977 bool lr = info->gp_save_offset + info->gp_size == 0;
24978 int sel = SAVRES_GPR | (lr ? SAVRES_LR : 0);
24979 int gpr_ptr_regno = ptr_regno_for_savres (sel);
24980
24981 if (gpr_ptr_regno == 12)
24982 cr_save_regno = 11;
24983 gcc_checking_assert (REGNO (frame_reg_rtx) != cr_save_regno);
24984 }
24985 else if (REGNO (frame_reg_rtx) == 12)
24986 cr_save_regno = 11;
24987
24988 cr_save_reg = load_cr_save (cr_save_regno, frame_reg_rtx,
24989 info->cr_save_offset + frame_off,
24990 exit_func);
24991 }
24992
24993 /* Set LR here to try to overlap restores below. */
24994 if (restore_lr && restoring_GPRs_inline)
24995 restore_saved_lr (0, exit_func);
24996
24997 /* Load exception handler data registers, if needed. */
24998 if (crtl->calls_eh_return)
24999 {
25000 unsigned int i, regno;
25001
25002 if (TARGET_AIX)
25003 {
25004 rtx reg = gen_rtx_REG (reg_mode, 2);
25005 emit_insn (gen_frame_load (reg, frame_reg_rtx,
25006 frame_off + RS6000_TOC_SAVE_SLOT));
25007 }
25008
25009 for (i = 0; ; ++i)
25010 {
25011 rtx mem;
25012
25013 regno = EH_RETURN_DATA_REGNO (i);
25014 if (regno == INVALID_REGNUM)
25015 break;
25016
25017 /* Note: possible use of r0 here to address SPE regs. */
25018 mem = gen_frame_mem_offset (reg_mode, frame_reg_rtx,
25019 info->ehrd_offset + frame_off
25020 + reg_size * (int) i);
25021
25022 emit_move_insn (gen_rtx_REG (reg_mode, regno), mem);
25023 }
25024 }
25025
25026 /* Restore GPRs. This is done as a PARALLEL if we are using
25027 the load-multiple instructions. */
25028 if (TARGET_SPE_ABI
25029 && info->spe_64bit_regs_used
25030 && info->first_gp_reg_save != 32)
25031 {
25032 /* Determine whether we can address all of the registers that need
25033 to be saved with an offset from frame_reg_rtx that fits in
25034 the small const field for SPE memory instructions. */
25035 int spe_regs_addressable
25036 = (SPE_CONST_OFFSET_OK (info->spe_gp_save_offset + frame_off
25037 + reg_size * (32 - info->first_gp_reg_save - 1))
25038 && restoring_GPRs_inline);
25039
25040 if (!spe_regs_addressable)
25041 {
25042 int ool_adjust = 0;
25043 rtx old_frame_reg_rtx = frame_reg_rtx;
25044 /* Make r11 point to the start of the SPE save area. We worried about
25045 not clobbering it when we were saving registers in the prologue.
25046 There's no need to worry here because the static chain is passed
25047 anew to every function. */
25048
25049 if (!restoring_GPRs_inline)
25050 ool_adjust = 8 * (info->first_gp_reg_save - FIRST_SAVED_GP_REGNO);
25051 frame_reg_rtx = gen_rtx_REG (Pmode, 11);
25052 emit_insn (gen_addsi3 (frame_reg_rtx, old_frame_reg_rtx,
25053 GEN_INT (info->spe_gp_save_offset
25054 + frame_off
25055 - ool_adjust)));
25056 /* Keep the invariant that frame_reg_rtx + frame_off points
25057 at the top of the stack frame. */
25058 frame_off = -info->spe_gp_save_offset + ool_adjust;
25059 }
25060
25061 if (restoring_GPRs_inline)
25062 {
25063 HOST_WIDE_INT spe_offset = info->spe_gp_save_offset + frame_off;
25064
25065 for (i = 0; i < 32 - info->first_gp_reg_save; i++)
25066 if (rs6000_reg_live_or_pic_offset_p (info->first_gp_reg_save + i))
25067 {
25068 rtx offset, addr, mem, reg;
25069
25070 /* We're doing all this to ensure that the immediate offset
25071 fits into the immediate field of 'evldd'. */
25072 gcc_assert (SPE_CONST_OFFSET_OK (spe_offset + reg_size * i));
25073
25074 offset = GEN_INT (spe_offset + reg_size * i);
25075 addr = gen_rtx_PLUS (Pmode, frame_reg_rtx, offset);
25076 mem = gen_rtx_MEM (V2SImode, addr);
25077 reg = gen_rtx_REG (reg_mode, info->first_gp_reg_save + i);
25078
25079 emit_move_insn (reg, mem);
25080 }
25081 }
25082 else
25083 rs6000_emit_savres_rtx (info, frame_reg_rtx,
25084 info->spe_gp_save_offset + frame_off,
25085 info->lr_save_offset + frame_off,
25086 reg_mode,
25087 SAVRES_GPR | SAVRES_LR);
25088 }
25089 else if (!restoring_GPRs_inline)
25090 {
25091 /* We are jumping to an out-of-line function. */
25092 rtx ptr_reg;
25093 int end_save = info->gp_save_offset + info->gp_size;
25094 bool can_use_exit = end_save == 0;
25095 int sel = SAVRES_GPR | (can_use_exit ? SAVRES_LR : 0);
25096 int ptr_off;
25097
25098 /* Emit stack reset code if we need it. */
25099 ptr_regno = ptr_regno_for_savres (sel);
25100 ptr_reg = gen_rtx_REG (Pmode, ptr_regno);
25101 if (can_use_exit)
25102 rs6000_emit_stack_reset (info, frame_reg_rtx, frame_off, ptr_regno);
25103 else if (end_save + frame_off != 0)
25104 emit_insn (gen_add3_insn (ptr_reg, frame_reg_rtx,
25105 GEN_INT (end_save + frame_off)));
25106 else if (REGNO (frame_reg_rtx) != ptr_regno)
25107 emit_move_insn (ptr_reg, frame_reg_rtx);
25108 if (REGNO (frame_reg_rtx) == ptr_regno)
25109 frame_off = -end_save;
25110
25111 if (can_use_exit && info->cr_save_p)
25112 restore_saved_cr (cr_save_reg, using_mtcr_multiple, true);
25113
25114 ptr_off = -end_save;
25115 rs6000_emit_savres_rtx (info, ptr_reg,
25116 info->gp_save_offset + ptr_off,
25117 info->lr_save_offset + ptr_off,
25118 reg_mode, sel);
25119 }
25120 else if (using_load_multiple)
25121 {
25122 rtvec p;
25123 p = rtvec_alloc (32 - info->first_gp_reg_save);
25124 for (i = 0; i < 32 - info->first_gp_reg_save; i++)
25125 RTVEC_ELT (p, i)
25126 = gen_frame_load (gen_rtx_REG (reg_mode, info->first_gp_reg_save + i),
25127 frame_reg_rtx,
25128 info->gp_save_offset + frame_off + reg_size * i);
25129 emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
25130 }
25131 else
25132 {
25133 for (i = 0; i < 32 - info->first_gp_reg_save; i++)
25134 if (rs6000_reg_live_or_pic_offset_p (info->first_gp_reg_save + i))
25135 emit_insn (gen_frame_load
25136 (gen_rtx_REG (reg_mode, info->first_gp_reg_save + i),
25137 frame_reg_rtx,
25138 info->gp_save_offset + frame_off + reg_size * i));
25139 }
25140
25141 if (DEFAULT_ABI == ABI_V4 || flag_shrink_wrap)
25142 {
25143 /* If the frame pointer was used then we can't delay emitting
25144 a REG_CFA_DEF_CFA note. This must happen on the insn that
25145 restores the frame pointer, r31. We may have already emitted
25146 a REG_CFA_DEF_CFA note, but that's OK; A duplicate is
25147 discarded by dwarf2cfi.c/dwarf2out.c, and in any case would
25148 be harmless if emitted. */
25149 if (frame_pointer_needed)
25150 {
25151 insn = get_last_insn ();
25152 add_reg_note (insn, REG_CFA_DEF_CFA,
25153 plus_constant (Pmode, frame_reg_rtx, frame_off));
25154 RTX_FRAME_RELATED_P (insn) = 1;
25155 }
25156
25157 /* Set up cfa_restores. We always need these when
25158 shrink-wrapping. If not shrink-wrapping then we only need
25159 the cfa_restore when the stack location is no longer valid.
25160 The cfa_restores must be emitted on or before the insn that
25161 invalidates the stack, and of course must not be emitted
25162 before the insn that actually does the restore. The latter
25163 is why it is a bad idea to emit the cfa_restores as a group
25164 on the last instruction here that actually does a restore:
25165 That insn may be reordered with respect to others doing
25166 restores. */
25167 if (flag_shrink_wrap
25168 && !restoring_GPRs_inline
25169 && info->first_fp_reg_save == 64)
25170 cfa_restores = add_crlr_cfa_restore (info, cfa_restores);
25171
25172 for (i = info->first_gp_reg_save; i < 32; i++)
25173 if (!restoring_GPRs_inline
25174 || using_load_multiple
25175 || rs6000_reg_live_or_pic_offset_p (i))
25176 {
25177 rtx reg = gen_rtx_REG (reg_mode, i);
25178
25179 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
25180 }
25181 }
25182
25183 if (!restoring_GPRs_inline
25184 && info->first_fp_reg_save == 64)
25185 {
25186 /* We are jumping to an out-of-line function. */
25187 if (cfa_restores)
25188 emit_cfa_restores (cfa_restores);
25189 return;
25190 }
25191
25192 if (restore_lr && !restoring_GPRs_inline)
25193 {
25194 load_lr_save (0, frame_reg_rtx, info->lr_save_offset + frame_off);
25195 restore_saved_lr (0, exit_func);
25196 }
25197
25198 /* Restore fpr's if we need to do it without calling a function. */
25199 if (restoring_FPRs_inline)
25200 for (i = 0; i < 64 - info->first_fp_reg_save; i++)
25201 if (save_reg_p (info->first_fp_reg_save + i))
25202 {
25203 rtx reg = gen_rtx_REG ((TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT
25204 ? DFmode : SFmode),
25205 info->first_fp_reg_save + i);
25206 emit_insn (gen_frame_load (reg, frame_reg_rtx,
25207 info->fp_save_offset + frame_off + 8 * i));
25208 if (DEFAULT_ABI == ABI_V4 || flag_shrink_wrap)
25209 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
25210 }
25211
25212 /* If we saved cr, restore it here. Just those that were used. */
25213 if (info->cr_save_p)
25214 restore_saved_cr (cr_save_reg, using_mtcr_multiple, exit_func);
25215
25216 /* If this is V.4, unwind the stack pointer after all of the loads
25217 have been done, or set up r11 if we are restoring fp out of line. */
25218 ptr_regno = 1;
25219 if (!restoring_FPRs_inline)
25220 {
25221 bool lr = (strategy & REST_NOINLINE_FPRS_DOESNT_RESTORE_LR) == 0;
25222 int sel = SAVRES_FPR | (lr ? SAVRES_LR : 0);
25223 ptr_regno = ptr_regno_for_savres (sel);
25224 }
25225
25226 insn = rs6000_emit_stack_reset (info, frame_reg_rtx, frame_off, ptr_regno);
25227 if (REGNO (frame_reg_rtx) == ptr_regno)
25228 frame_off = 0;
25229
25230 if (insn && restoring_FPRs_inline)
25231 {
25232 if (cfa_restores)
25233 {
25234 REG_NOTES (insn) = cfa_restores;
25235 cfa_restores = NULL_RTX;
25236 }
25237 add_reg_note (insn, REG_CFA_DEF_CFA, sp_reg_rtx);
25238 RTX_FRAME_RELATED_P (insn) = 1;
25239 }
25240
25241 if (crtl->calls_eh_return)
25242 {
25243 rtx sa = EH_RETURN_STACKADJ_RTX;
25244 emit_insn (gen_add3_insn (sp_reg_rtx, sp_reg_rtx, sa));
25245 }
25246
25247 if (!sibcall)
25248 {
25249 rtvec p;
25250 bool lr = (strategy & REST_NOINLINE_FPRS_DOESNT_RESTORE_LR) == 0;
25251 if (! restoring_FPRs_inline)
25252 {
25253 p = rtvec_alloc (4 + 64 - info->first_fp_reg_save);
25254 RTVEC_ELT (p, 0) = ret_rtx;
25255 }
25256 else
25257 {
25258 if (cfa_restores)
25259 {
25260 /* We can't hang the cfa_restores off a simple return,
25261 since the shrink-wrap code sometimes uses an existing
25262 return. This means there might be a path from
25263 pre-prologue code to this return, and dwarf2cfi code
25264 wants the eh_frame unwinder state to be the same on
25265 all paths to any point. So we need to emit the
25266 cfa_restores before the return. For -m64 we really
25267 don't need epilogue cfa_restores at all, except for
25268 this irritating dwarf2cfi with shrink-wrap
25269 requirement; The stack red-zone means eh_frame info
25270 from the prologue telling the unwinder to restore
25271 from the stack is perfectly good right to the end of
25272 the function. */
25273 emit_insn (gen_blockage ());
25274 emit_cfa_restores (cfa_restores);
25275 cfa_restores = NULL_RTX;
25276 }
25277 p = rtvec_alloc (2);
25278 RTVEC_ELT (p, 0) = simple_return_rtx;
25279 }
25280
25281 RTVEC_ELT (p, 1) = ((restoring_FPRs_inline || !lr)
25282 ? gen_rtx_USE (VOIDmode,
25283 gen_rtx_REG (Pmode, LR_REGNO))
25284 : gen_rtx_CLOBBER (VOIDmode,
25285 gen_rtx_REG (Pmode, LR_REGNO)));
25286
25287 /* If we have to restore more than two FP registers, branch to the
25288 restore function. It will return to our caller. */
25289 if (! restoring_FPRs_inline)
25290 {
25291 int i;
25292 int reg;
25293 rtx sym;
25294
25295 if (flag_shrink_wrap)
25296 cfa_restores = add_crlr_cfa_restore (info, cfa_restores);
25297
25298 sym = rs6000_savres_routine_sym (info,
25299 SAVRES_FPR | (lr ? SAVRES_LR : 0));
25300 RTVEC_ELT (p, 2) = gen_rtx_USE (VOIDmode, sym);
25301 reg = (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)? 1 : 11;
25302 RTVEC_ELT (p, 3) = gen_rtx_USE (VOIDmode, gen_rtx_REG (Pmode, reg));
25303
25304 for (i = 0; i < 64 - info->first_fp_reg_save; i++)
25305 {
25306 rtx reg = gen_rtx_REG (DFmode, info->first_fp_reg_save + i);
25307
25308 RTVEC_ELT (p, i + 4)
25309 = gen_frame_load (reg, sp_reg_rtx, info->fp_save_offset + 8 * i);
25310 if (flag_shrink_wrap)
25311 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg,
25312 cfa_restores);
25313 }
25314 }
25315
25316 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, p));
25317 }
25318
25319 if (cfa_restores)
25320 {
25321 if (sibcall)
25322 /* Ensure the cfa_restores are hung off an insn that won't
25323 be reordered above other restores. */
25324 emit_insn (gen_blockage ());
25325
25326 emit_cfa_restores (cfa_restores);
25327 }
25328 }
25329
25330 /* Write function epilogue. */
25331
25332 static void
25333 rs6000_output_function_epilogue (FILE *file,
25334 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
25335 {
25336 #if TARGET_MACHO
25337 macho_branch_islands ();
25338 /* Mach-O doesn't support labels at the end of objects, so if
25339 it looks like we might want one, insert a NOP. */
25340 {
25341 rtx_insn *insn = get_last_insn ();
25342 rtx_insn *deleted_debug_label = NULL;
25343 while (insn
25344 && NOTE_P (insn)
25345 && NOTE_KIND (insn) != NOTE_INSN_DELETED_LABEL)
25346 {
25347 /* Don't insert a nop for NOTE_INSN_DELETED_DEBUG_LABEL
25348 notes only, instead set their CODE_LABEL_NUMBER to -1,
25349 otherwise there would be code generation differences
25350 in between -g and -g0. */
25351 if (NOTE_P (insn) && NOTE_KIND (insn) == NOTE_INSN_DELETED_DEBUG_LABEL)
25352 deleted_debug_label = insn;
25353 insn = PREV_INSN (insn);
25354 }
25355 if (insn
25356 && (LABEL_P (insn)
25357 || (NOTE_P (insn)
25358 && NOTE_KIND (insn) == NOTE_INSN_DELETED_LABEL)))
25359 fputs ("\tnop\n", file);
25360 else if (deleted_debug_label)
25361 for (insn = deleted_debug_label; insn; insn = NEXT_INSN (insn))
25362 if (NOTE_KIND (insn) == NOTE_INSN_DELETED_DEBUG_LABEL)
25363 CODE_LABEL_NUMBER (insn) = -1;
25364 }
25365 #endif
25366
25367 /* Output a traceback table here. See /usr/include/sys/debug.h for info
25368 on its format.
25369
25370 We don't output a traceback table if -finhibit-size-directive was
25371 used. The documentation for -finhibit-size-directive reads
25372 ``don't output a @code{.size} assembler directive, or anything
25373 else that would cause trouble if the function is split in the
25374 middle, and the two halves are placed at locations far apart in
25375 memory.'' The traceback table has this property, since it
25376 includes the offset from the start of the function to the
25377 traceback table itself.
25378
25379 System V.4 Powerpc's (and the embedded ABI derived from it) use a
25380 different traceback table. */
25381 if ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
25382 && ! flag_inhibit_size_directive
25383 && rs6000_traceback != traceback_none && !cfun->is_thunk)
25384 {
25385 const char *fname = NULL;
25386 const char *language_string = lang_hooks.name;
25387 int fixed_parms = 0, float_parms = 0, parm_info = 0;
25388 int i;
25389 int optional_tbtab;
25390 rs6000_stack_t *info = rs6000_stack_info ();
25391
25392 if (rs6000_traceback == traceback_full)
25393 optional_tbtab = 1;
25394 else if (rs6000_traceback == traceback_part)
25395 optional_tbtab = 0;
25396 else
25397 optional_tbtab = !optimize_size && !TARGET_ELF;
25398
25399 if (optional_tbtab)
25400 {
25401 fname = XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0);
25402 while (*fname == '.') /* V.4 encodes . in the name */
25403 fname++;
25404
25405 /* Need label immediately before tbtab, so we can compute
25406 its offset from the function start. */
25407 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LT");
25408 ASM_OUTPUT_LABEL (file, fname);
25409 }
25410
25411 /* The .tbtab pseudo-op can only be used for the first eight
25412 expressions, since it can't handle the possibly variable
25413 length fields that follow. However, if you omit the optional
25414 fields, the assembler outputs zeros for all optional fields
25415 anyways, giving each variable length field is minimum length
25416 (as defined in sys/debug.h). Thus we can not use the .tbtab
25417 pseudo-op at all. */
25418
25419 /* An all-zero word flags the start of the tbtab, for debuggers
25420 that have to find it by searching forward from the entry
25421 point or from the current pc. */
25422 fputs ("\t.long 0\n", file);
25423
25424 /* Tbtab format type. Use format type 0. */
25425 fputs ("\t.byte 0,", file);
25426
25427 /* Language type. Unfortunately, there does not seem to be any
25428 official way to discover the language being compiled, so we
25429 use language_string.
25430 C is 0. Fortran is 1. Pascal is 2. Ada is 3. C++ is 9.
25431 Java is 13. Objective-C is 14. Objective-C++ isn't assigned
25432 a number, so for now use 9. LTO and Go aren't assigned numbers
25433 either, so for now use 0. */
25434 if (! strcmp (language_string, "GNU C")
25435 || ! strcmp (language_string, "GNU GIMPLE")
25436 || ! strcmp (language_string, "GNU Go"))
25437 i = 0;
25438 else if (! strcmp (language_string, "GNU F77")
25439 || ! strcmp (language_string, "GNU Fortran"))
25440 i = 1;
25441 else if (! strcmp (language_string, "GNU Pascal"))
25442 i = 2;
25443 else if (! strcmp (language_string, "GNU Ada"))
25444 i = 3;
25445 else if (! strcmp (language_string, "GNU C++")
25446 || ! strcmp (language_string, "GNU Objective-C++"))
25447 i = 9;
25448 else if (! strcmp (language_string, "GNU Java"))
25449 i = 13;
25450 else if (! strcmp (language_string, "GNU Objective-C"))
25451 i = 14;
25452 else
25453 gcc_unreachable ();
25454 fprintf (file, "%d,", i);
25455
25456 /* 8 single bit fields: global linkage (not set for C extern linkage,
25457 apparently a PL/I convention?), out-of-line epilogue/prologue, offset
25458 from start of procedure stored in tbtab, internal function, function
25459 has controlled storage, function has no toc, function uses fp,
25460 function logs/aborts fp operations. */
25461 /* Assume that fp operations are used if any fp reg must be saved. */
25462 fprintf (file, "%d,",
25463 (optional_tbtab << 5) | ((info->first_fp_reg_save != 64) << 1));
25464
25465 /* 6 bitfields: function is interrupt handler, name present in
25466 proc table, function calls alloca, on condition directives
25467 (controls stack walks, 3 bits), saves condition reg, saves
25468 link reg. */
25469 /* The `function calls alloca' bit seems to be set whenever reg 31 is
25470 set up as a frame pointer, even when there is no alloca call. */
25471 fprintf (file, "%d,",
25472 ((optional_tbtab << 6)
25473 | ((optional_tbtab & frame_pointer_needed) << 5)
25474 | (info->cr_save_p << 1)
25475 | (info->lr_save_p)));
25476
25477 /* 3 bitfields: saves backchain, fixup code, number of fpr saved
25478 (6 bits). */
25479 fprintf (file, "%d,",
25480 (info->push_p << 7) | (64 - info->first_fp_reg_save));
25481
25482 /* 2 bitfields: spare bits (2 bits), number of gpr saved (6 bits). */
25483 fprintf (file, "%d,", (32 - first_reg_to_save ()));
25484
25485 if (optional_tbtab)
25486 {
25487 /* Compute the parameter info from the function decl argument
25488 list. */
25489 tree decl;
25490 int next_parm_info_bit = 31;
25491
25492 for (decl = DECL_ARGUMENTS (current_function_decl);
25493 decl; decl = DECL_CHAIN (decl))
25494 {
25495 rtx parameter = DECL_INCOMING_RTL (decl);
25496 machine_mode mode = GET_MODE (parameter);
25497
25498 if (GET_CODE (parameter) == REG)
25499 {
25500 if (SCALAR_FLOAT_MODE_P (mode))
25501 {
25502 int bits;
25503
25504 float_parms++;
25505
25506 switch (mode)
25507 {
25508 case SFmode:
25509 case SDmode:
25510 bits = 0x2;
25511 break;
25512
25513 case DFmode:
25514 case DDmode:
25515 case TFmode:
25516 case TDmode:
25517 bits = 0x3;
25518 break;
25519
25520 default:
25521 gcc_unreachable ();
25522 }
25523
25524 /* If only one bit will fit, don't or in this entry. */
25525 if (next_parm_info_bit > 0)
25526 parm_info |= (bits << (next_parm_info_bit - 1));
25527 next_parm_info_bit -= 2;
25528 }
25529 else
25530 {
25531 fixed_parms += ((GET_MODE_SIZE (mode)
25532 + (UNITS_PER_WORD - 1))
25533 / UNITS_PER_WORD);
25534 next_parm_info_bit -= 1;
25535 }
25536 }
25537 }
25538 }
25539
25540 /* Number of fixed point parameters. */
25541 /* This is actually the number of words of fixed point parameters; thus
25542 an 8 byte struct counts as 2; and thus the maximum value is 8. */
25543 fprintf (file, "%d,", fixed_parms);
25544
25545 /* 2 bitfields: number of floating point parameters (7 bits), parameters
25546 all on stack. */
25547 /* This is actually the number of fp registers that hold parameters;
25548 and thus the maximum value is 13. */
25549 /* Set parameters on stack bit if parameters are not in their original
25550 registers, regardless of whether they are on the stack? Xlc
25551 seems to set the bit when not optimizing. */
25552 fprintf (file, "%d\n", ((float_parms << 1) | (! optimize)));
25553
25554 if (! optional_tbtab)
25555 return;
25556
25557 /* Optional fields follow. Some are variable length. */
25558
25559 /* Parameter types, left adjusted bit fields: 0 fixed, 10 single float,
25560 11 double float. */
25561 /* There is an entry for each parameter in a register, in the order that
25562 they occur in the parameter list. Any intervening arguments on the
25563 stack are ignored. If the list overflows a long (max possible length
25564 34 bits) then completely leave off all elements that don't fit. */
25565 /* Only emit this long if there was at least one parameter. */
25566 if (fixed_parms || float_parms)
25567 fprintf (file, "\t.long %d\n", parm_info);
25568
25569 /* Offset from start of code to tb table. */
25570 fputs ("\t.long ", file);
25571 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LT");
25572 RS6000_OUTPUT_BASENAME (file, fname);
25573 putc ('-', file);
25574 rs6000_output_function_entry (file, fname);
25575 putc ('\n', file);
25576
25577 /* Interrupt handler mask. */
25578 /* Omit this long, since we never set the interrupt handler bit
25579 above. */
25580
25581 /* Number of CTL (controlled storage) anchors. */
25582 /* Omit this long, since the has_ctl bit is never set above. */
25583
25584 /* Displacement into stack of each CTL anchor. */
25585 /* Omit this list of longs, because there are no CTL anchors. */
25586
25587 /* Length of function name. */
25588 if (*fname == '*')
25589 ++fname;
25590 fprintf (file, "\t.short %d\n", (int) strlen (fname));
25591
25592 /* Function name. */
25593 assemble_string (fname, strlen (fname));
25594
25595 /* Register for alloca automatic storage; this is always reg 31.
25596 Only emit this if the alloca bit was set above. */
25597 if (frame_pointer_needed)
25598 fputs ("\t.byte 31\n", file);
25599
25600 fputs ("\t.align 2\n", file);
25601 }
25602 }
25603 \f
25604 /* A C compound statement that outputs the assembler code for a thunk
25605 function, used to implement C++ virtual function calls with
25606 multiple inheritance. The thunk acts as a wrapper around a virtual
25607 function, adjusting the implicit object parameter before handing
25608 control off to the real function.
25609
25610 First, emit code to add the integer DELTA to the location that
25611 contains the incoming first argument. Assume that this argument
25612 contains a pointer, and is the one used to pass the `this' pointer
25613 in C++. This is the incoming argument *before* the function
25614 prologue, e.g. `%o0' on a sparc. The addition must preserve the
25615 values of all other incoming arguments.
25616
25617 After the addition, emit code to jump to FUNCTION, which is a
25618 `FUNCTION_DECL'. This is a direct pure jump, not a call, and does
25619 not touch the return address. Hence returning from FUNCTION will
25620 return to whoever called the current `thunk'.
25621
25622 The effect must be as if FUNCTION had been called directly with the
25623 adjusted first argument. This macro is responsible for emitting
25624 all of the code for a thunk function; output_function_prologue()
25625 and output_function_epilogue() are not invoked.
25626
25627 The THUNK_FNDECL is redundant. (DELTA and FUNCTION have already
25628 been extracted from it.) It might possibly be useful on some
25629 targets, but probably not.
25630
25631 If you do not define this macro, the target-independent code in the
25632 C++ frontend will generate a less efficient heavyweight thunk that
25633 calls FUNCTION instead of jumping to it. The generic approach does
25634 not support varargs. */
25635
25636 static void
25637 rs6000_output_mi_thunk (FILE *file, tree thunk_fndecl ATTRIBUTE_UNUSED,
25638 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
25639 tree function)
25640 {
25641 rtx this_rtx, funexp;
25642 rtx_insn *insn;
25643
25644 reload_completed = 1;
25645 epilogue_completed = 1;
25646
25647 /* Mark the end of the (empty) prologue. */
25648 emit_note (NOTE_INSN_PROLOGUE_END);
25649
25650 /* Find the "this" pointer. If the function returns a structure,
25651 the structure return pointer is in r3. */
25652 if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
25653 this_rtx = gen_rtx_REG (Pmode, 4);
25654 else
25655 this_rtx = gen_rtx_REG (Pmode, 3);
25656
25657 /* Apply the constant offset, if required. */
25658 if (delta)
25659 emit_insn (gen_add3_insn (this_rtx, this_rtx, GEN_INT (delta)));
25660
25661 /* Apply the offset from the vtable, if required. */
25662 if (vcall_offset)
25663 {
25664 rtx vcall_offset_rtx = GEN_INT (vcall_offset);
25665 rtx tmp = gen_rtx_REG (Pmode, 12);
25666
25667 emit_move_insn (tmp, gen_rtx_MEM (Pmode, this_rtx));
25668 if (((unsigned HOST_WIDE_INT) vcall_offset) + 0x8000 >= 0x10000)
25669 {
25670 emit_insn (gen_add3_insn (tmp, tmp, vcall_offset_rtx));
25671 emit_move_insn (tmp, gen_rtx_MEM (Pmode, tmp));
25672 }
25673 else
25674 {
25675 rtx loc = gen_rtx_PLUS (Pmode, tmp, vcall_offset_rtx);
25676
25677 emit_move_insn (tmp, gen_rtx_MEM (Pmode, loc));
25678 }
25679 emit_insn (gen_add3_insn (this_rtx, this_rtx, tmp));
25680 }
25681
25682 /* Generate a tail call to the target function. */
25683 if (!TREE_USED (function))
25684 {
25685 assemble_external (function);
25686 TREE_USED (function) = 1;
25687 }
25688 funexp = XEXP (DECL_RTL (function), 0);
25689 funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
25690
25691 #if TARGET_MACHO
25692 if (MACHOPIC_INDIRECT)
25693 funexp = machopic_indirect_call_target (funexp);
25694 #endif
25695
25696 /* gen_sibcall expects reload to convert scratch pseudo to LR so we must
25697 generate sibcall RTL explicitly. */
25698 insn = emit_call_insn (
25699 gen_rtx_PARALLEL (VOIDmode,
25700 gen_rtvec (4,
25701 gen_rtx_CALL (VOIDmode,
25702 funexp, const0_rtx),
25703 gen_rtx_USE (VOIDmode, const0_rtx),
25704 gen_rtx_USE (VOIDmode,
25705 gen_rtx_REG (SImode,
25706 LR_REGNO)),
25707 simple_return_rtx)));
25708 SIBLING_CALL_P (insn) = 1;
25709 emit_barrier ();
25710
25711 /* Ensure we have a global entry point for the thunk. ??? We could
25712 avoid that if the target routine doesn't need a global entry point,
25713 but we do not know whether this is the case at this point. */
25714 if (DEFAULT_ABI == ABI_ELFv2)
25715 cfun->machine->r2_setup_needed = true;
25716
25717 /* Run just enough of rest_of_compilation to get the insns emitted.
25718 There's not really enough bulk here to make other passes such as
25719 instruction scheduling worth while. Note that use_thunk calls
25720 assemble_start_function and assemble_end_function. */
25721 insn = get_insns ();
25722 shorten_branches (insn);
25723 final_start_function (insn, file, 1);
25724 final (insn, file, 1);
25725 final_end_function ();
25726
25727 reload_completed = 0;
25728 epilogue_completed = 0;
25729 }
25730 \f
25731 /* A quick summary of the various types of 'constant-pool tables'
25732 under PowerPC:
25733
25734 Target Flags Name One table per
25735 AIX (none) AIX TOC object file
25736 AIX -mfull-toc AIX TOC object file
25737 AIX -mminimal-toc AIX minimal TOC translation unit
25738 SVR4/EABI (none) SVR4 SDATA object file
25739 SVR4/EABI -fpic SVR4 pic object file
25740 SVR4/EABI -fPIC SVR4 PIC translation unit
25741 SVR4/EABI -mrelocatable EABI TOC function
25742 SVR4/EABI -maix AIX TOC object file
25743 SVR4/EABI -maix -mminimal-toc
25744 AIX minimal TOC translation unit
25745
25746 Name Reg. Set by entries contains:
25747 made by addrs? fp? sum?
25748
25749 AIX TOC 2 crt0 as Y option option
25750 AIX minimal TOC 30 prolog gcc Y Y option
25751 SVR4 SDATA 13 crt0 gcc N Y N
25752 SVR4 pic 30 prolog ld Y not yet N
25753 SVR4 PIC 30 prolog gcc Y option option
25754 EABI TOC 30 prolog gcc Y option option
25755
25756 */
25757
25758 /* Hash functions for the hash table. */
25759
25760 static unsigned
25761 rs6000_hash_constant (rtx k)
25762 {
25763 enum rtx_code code = GET_CODE (k);
25764 machine_mode mode = GET_MODE (k);
25765 unsigned result = (code << 3) ^ mode;
25766 const char *format;
25767 int flen, fidx;
25768
25769 format = GET_RTX_FORMAT (code);
25770 flen = strlen (format);
25771 fidx = 0;
25772
25773 switch (code)
25774 {
25775 case LABEL_REF:
25776 return result * 1231 + (unsigned) INSN_UID (XEXP (k, 0));
25777
25778 case CONST_WIDE_INT:
25779 {
25780 int i;
25781 flen = CONST_WIDE_INT_NUNITS (k);
25782 for (i = 0; i < flen; i++)
25783 result = result * 613 + CONST_WIDE_INT_ELT (k, i);
25784 return result;
25785 }
25786
25787 case CONST_DOUBLE:
25788 if (mode != VOIDmode)
25789 return real_hash (CONST_DOUBLE_REAL_VALUE (k)) * result;
25790 flen = 2;
25791 break;
25792
25793 case CODE_LABEL:
25794 fidx = 3;
25795 break;
25796
25797 default:
25798 break;
25799 }
25800
25801 for (; fidx < flen; fidx++)
25802 switch (format[fidx])
25803 {
25804 case 's':
25805 {
25806 unsigned i, len;
25807 const char *str = XSTR (k, fidx);
25808 len = strlen (str);
25809 result = result * 613 + len;
25810 for (i = 0; i < len; i++)
25811 result = result * 613 + (unsigned) str[i];
25812 break;
25813 }
25814 case 'u':
25815 case 'e':
25816 result = result * 1231 + rs6000_hash_constant (XEXP (k, fidx));
25817 break;
25818 case 'i':
25819 case 'n':
25820 result = result * 613 + (unsigned) XINT (k, fidx);
25821 break;
25822 case 'w':
25823 if (sizeof (unsigned) >= sizeof (HOST_WIDE_INT))
25824 result = result * 613 + (unsigned) XWINT (k, fidx);
25825 else
25826 {
25827 size_t i;
25828 for (i = 0; i < sizeof (HOST_WIDE_INT) / sizeof (unsigned); i++)
25829 result = result * 613 + (unsigned) (XWINT (k, fidx)
25830 >> CHAR_BIT * i);
25831 }
25832 break;
25833 case '0':
25834 break;
25835 default:
25836 gcc_unreachable ();
25837 }
25838
25839 return result;
25840 }
25841
25842 hashval_t
25843 toc_hasher::hash (toc_hash_struct *thc)
25844 {
25845 return rs6000_hash_constant (thc->key) ^ thc->key_mode;
25846 }
25847
25848 /* Compare H1 and H2 for equivalence. */
25849
25850 bool
25851 toc_hasher::equal (toc_hash_struct *h1, toc_hash_struct *h2)
25852 {
25853 rtx r1 = h1->key;
25854 rtx r2 = h2->key;
25855
25856 if (h1->key_mode != h2->key_mode)
25857 return 0;
25858
25859 return rtx_equal_p (r1, r2);
25860 }
25861
25862 /* These are the names given by the C++ front-end to vtables, and
25863 vtable-like objects. Ideally, this logic should not be here;
25864 instead, there should be some programmatic way of inquiring as
25865 to whether or not an object is a vtable. */
25866
25867 #define VTABLE_NAME_P(NAME) \
25868 (strncmp ("_vt.", name, strlen ("_vt.")) == 0 \
25869 || strncmp ("_ZTV", name, strlen ("_ZTV")) == 0 \
25870 || strncmp ("_ZTT", name, strlen ("_ZTT")) == 0 \
25871 || strncmp ("_ZTI", name, strlen ("_ZTI")) == 0 \
25872 || strncmp ("_ZTC", name, strlen ("_ZTC")) == 0)
25873
25874 #ifdef NO_DOLLAR_IN_LABEL
25875 /* Return a GGC-allocated character string translating dollar signs in
25876 input NAME to underscores. Used by XCOFF ASM_OUTPUT_LABELREF. */
25877
25878 const char *
25879 rs6000_xcoff_strip_dollar (const char *name)
25880 {
25881 char *strip, *p;
25882 const char *q;
25883 size_t len;
25884
25885 q = (const char *) strchr (name, '$');
25886
25887 if (q == 0 || q == name)
25888 return name;
25889
25890 len = strlen (name);
25891 strip = XALLOCAVEC (char, len + 1);
25892 strcpy (strip, name);
25893 p = strip + (q - name);
25894 while (p)
25895 {
25896 *p = '_';
25897 p = strchr (p + 1, '$');
25898 }
25899
25900 return ggc_alloc_string (strip, len);
25901 }
25902 #endif
25903
25904 void
25905 rs6000_output_symbol_ref (FILE *file, rtx x)
25906 {
25907 /* Currently C++ toc references to vtables can be emitted before it
25908 is decided whether the vtable is public or private. If this is
25909 the case, then the linker will eventually complain that there is
25910 a reference to an unknown section. Thus, for vtables only,
25911 we emit the TOC reference to reference the symbol and not the
25912 section. */
25913 const char *name = XSTR (x, 0);
25914
25915 if (VTABLE_NAME_P (name))
25916 {
25917 RS6000_OUTPUT_BASENAME (file, name);
25918 }
25919 else
25920 assemble_name (file, name);
25921 }
25922
25923 /* Output a TOC entry. We derive the entry name from what is being
25924 written. */
25925
25926 void
25927 output_toc (FILE *file, rtx x, int labelno, machine_mode mode)
25928 {
25929 char buf[256];
25930 const char *name = buf;
25931 rtx base = x;
25932 HOST_WIDE_INT offset = 0;
25933
25934 gcc_assert (!TARGET_NO_TOC);
25935
25936 /* When the linker won't eliminate them, don't output duplicate
25937 TOC entries (this happens on AIX if there is any kind of TOC,
25938 and on SVR4 under -fPIC or -mrelocatable). Don't do this for
25939 CODE_LABELs. */
25940 if (TARGET_TOC && GET_CODE (x) != LABEL_REF)
25941 {
25942 struct toc_hash_struct *h;
25943
25944 /* Create toc_hash_table. This can't be done at TARGET_OPTION_OVERRIDE
25945 time because GGC is not initialized at that point. */
25946 if (toc_hash_table == NULL)
25947 toc_hash_table = hash_table<toc_hasher>::create_ggc (1021);
25948
25949 h = ggc_alloc<toc_hash_struct> ();
25950 h->key = x;
25951 h->key_mode = mode;
25952 h->labelno = labelno;
25953
25954 toc_hash_struct **found = toc_hash_table->find_slot (h, INSERT);
25955 if (*found == NULL)
25956 *found = h;
25957 else /* This is indeed a duplicate.
25958 Set this label equal to that label. */
25959 {
25960 fputs ("\t.set ", file);
25961 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LC");
25962 fprintf (file, "%d,", labelno);
25963 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LC");
25964 fprintf (file, "%d\n", ((*found)->labelno));
25965
25966 #ifdef HAVE_AS_TLS
25967 if (TARGET_XCOFF && GET_CODE (x) == SYMBOL_REF
25968 && (SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_GLOBAL_DYNAMIC
25969 || SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC))
25970 {
25971 fputs ("\t.set ", file);
25972 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LCM");
25973 fprintf (file, "%d,", labelno);
25974 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LCM");
25975 fprintf (file, "%d\n", ((*found)->labelno));
25976 }
25977 #endif
25978 return;
25979 }
25980 }
25981
25982 /* If we're going to put a double constant in the TOC, make sure it's
25983 aligned properly when strict alignment is on. */
25984 if ((CONST_DOUBLE_P (x) || CONST_WIDE_INT_P (x))
25985 && STRICT_ALIGNMENT
25986 && GET_MODE_BITSIZE (mode) >= 64
25987 && ! (TARGET_NO_FP_IN_TOC && ! TARGET_MINIMAL_TOC)) {
25988 ASM_OUTPUT_ALIGN (file, 3);
25989 }
25990
25991 (*targetm.asm_out.internal_label) (file, "LC", labelno);
25992
25993 /* Handle FP constants specially. Note that if we have a minimal
25994 TOC, things we put here aren't actually in the TOC, so we can allow
25995 FP constants. */
25996 if (GET_CODE (x) == CONST_DOUBLE &&
25997 (GET_MODE (x) == TFmode || GET_MODE (x) == TDmode))
25998 {
25999 REAL_VALUE_TYPE rv;
26000 long k[4];
26001
26002 REAL_VALUE_FROM_CONST_DOUBLE (rv, x);
26003 if (DECIMAL_FLOAT_MODE_P (GET_MODE (x)))
26004 REAL_VALUE_TO_TARGET_DECIMAL128 (rv, k);
26005 else
26006 REAL_VALUE_TO_TARGET_LONG_DOUBLE (rv, k);
26007
26008 if (TARGET_64BIT)
26009 {
26010 if (TARGET_ELF || TARGET_MINIMAL_TOC)
26011 fputs (DOUBLE_INT_ASM_OP, file);
26012 else
26013 fprintf (file, "\t.tc FT_%lx_%lx_%lx_%lx[TC],",
26014 k[0] & 0xffffffff, k[1] & 0xffffffff,
26015 k[2] & 0xffffffff, k[3] & 0xffffffff);
26016 fprintf (file, "0x%lx%08lx,0x%lx%08lx\n",
26017 k[WORDS_BIG_ENDIAN ? 0 : 1] & 0xffffffff,
26018 k[WORDS_BIG_ENDIAN ? 1 : 0] & 0xffffffff,
26019 k[WORDS_BIG_ENDIAN ? 2 : 3] & 0xffffffff,
26020 k[WORDS_BIG_ENDIAN ? 3 : 2] & 0xffffffff);
26021 return;
26022 }
26023 else
26024 {
26025 if (TARGET_ELF || TARGET_MINIMAL_TOC)
26026 fputs ("\t.long ", file);
26027 else
26028 fprintf (file, "\t.tc FT_%lx_%lx_%lx_%lx[TC],",
26029 k[0] & 0xffffffff, k[1] & 0xffffffff,
26030 k[2] & 0xffffffff, k[3] & 0xffffffff);
26031 fprintf (file, "0x%lx,0x%lx,0x%lx,0x%lx\n",
26032 k[0] & 0xffffffff, k[1] & 0xffffffff,
26033 k[2] & 0xffffffff, k[3] & 0xffffffff);
26034 return;
26035 }
26036 }
26037 else if (GET_CODE (x) == CONST_DOUBLE &&
26038 (GET_MODE (x) == DFmode || GET_MODE (x) == DDmode))
26039 {
26040 REAL_VALUE_TYPE rv;
26041 long k[2];
26042
26043 REAL_VALUE_FROM_CONST_DOUBLE (rv, x);
26044
26045 if (DECIMAL_FLOAT_MODE_P (GET_MODE (x)))
26046 REAL_VALUE_TO_TARGET_DECIMAL64 (rv, k);
26047 else
26048 REAL_VALUE_TO_TARGET_DOUBLE (rv, k);
26049
26050 if (TARGET_64BIT)
26051 {
26052 if (TARGET_ELF || TARGET_MINIMAL_TOC)
26053 fputs (DOUBLE_INT_ASM_OP, file);
26054 else
26055 fprintf (file, "\t.tc FD_%lx_%lx[TC],",
26056 k[0] & 0xffffffff, k[1] & 0xffffffff);
26057 fprintf (file, "0x%lx%08lx\n",
26058 k[WORDS_BIG_ENDIAN ? 0 : 1] & 0xffffffff,
26059 k[WORDS_BIG_ENDIAN ? 1 : 0] & 0xffffffff);
26060 return;
26061 }
26062 else
26063 {
26064 if (TARGET_ELF || TARGET_MINIMAL_TOC)
26065 fputs ("\t.long ", file);
26066 else
26067 fprintf (file, "\t.tc FD_%lx_%lx[TC],",
26068 k[0] & 0xffffffff, k[1] & 0xffffffff);
26069 fprintf (file, "0x%lx,0x%lx\n",
26070 k[0] & 0xffffffff, k[1] & 0xffffffff);
26071 return;
26072 }
26073 }
26074 else if (GET_CODE (x) == CONST_DOUBLE &&
26075 (GET_MODE (x) == SFmode || GET_MODE (x) == SDmode))
26076 {
26077 REAL_VALUE_TYPE rv;
26078 long l;
26079
26080 REAL_VALUE_FROM_CONST_DOUBLE (rv, x);
26081 if (DECIMAL_FLOAT_MODE_P (GET_MODE (x)))
26082 REAL_VALUE_TO_TARGET_DECIMAL32 (rv, l);
26083 else
26084 REAL_VALUE_TO_TARGET_SINGLE (rv, l);
26085
26086 if (TARGET_64BIT)
26087 {
26088 if (TARGET_ELF || TARGET_MINIMAL_TOC)
26089 fputs (DOUBLE_INT_ASM_OP, file);
26090 else
26091 fprintf (file, "\t.tc FS_%lx[TC],", l & 0xffffffff);
26092 if (WORDS_BIG_ENDIAN)
26093 fprintf (file, "0x%lx00000000\n", l & 0xffffffff);
26094 else
26095 fprintf (file, "0x%lx\n", l & 0xffffffff);
26096 return;
26097 }
26098 else
26099 {
26100 if (TARGET_ELF || TARGET_MINIMAL_TOC)
26101 fputs ("\t.long ", file);
26102 else
26103 fprintf (file, "\t.tc FS_%lx[TC],", l & 0xffffffff);
26104 fprintf (file, "0x%lx\n", l & 0xffffffff);
26105 return;
26106 }
26107 }
26108 else if (GET_MODE (x) == VOIDmode && GET_CODE (x) == CONST_INT)
26109 {
26110 unsigned HOST_WIDE_INT low;
26111 HOST_WIDE_INT high;
26112
26113 low = INTVAL (x) & 0xffffffff;
26114 high = (HOST_WIDE_INT) INTVAL (x) >> 32;
26115
26116 /* TOC entries are always Pmode-sized, so when big-endian
26117 smaller integer constants in the TOC need to be padded.
26118 (This is still a win over putting the constants in
26119 a separate constant pool, because then we'd have
26120 to have both a TOC entry _and_ the actual constant.)
26121
26122 For a 32-bit target, CONST_INT values are loaded and shifted
26123 entirely within `low' and can be stored in one TOC entry. */
26124
26125 /* It would be easy to make this work, but it doesn't now. */
26126 gcc_assert (!TARGET_64BIT || POINTER_SIZE >= GET_MODE_BITSIZE (mode));
26127
26128 if (WORDS_BIG_ENDIAN && POINTER_SIZE > GET_MODE_BITSIZE (mode))
26129 {
26130 low |= high << 32;
26131 low <<= POINTER_SIZE - GET_MODE_BITSIZE (mode);
26132 high = (HOST_WIDE_INT) low >> 32;
26133 low &= 0xffffffff;
26134 }
26135
26136 if (TARGET_64BIT)
26137 {
26138 if (TARGET_ELF || TARGET_MINIMAL_TOC)
26139 fputs (DOUBLE_INT_ASM_OP, file);
26140 else
26141 fprintf (file, "\t.tc ID_%lx_%lx[TC],",
26142 (long) high & 0xffffffff, (long) low & 0xffffffff);
26143 fprintf (file, "0x%lx%08lx\n",
26144 (long) high & 0xffffffff, (long) low & 0xffffffff);
26145 return;
26146 }
26147 else
26148 {
26149 if (POINTER_SIZE < GET_MODE_BITSIZE (mode))
26150 {
26151 if (TARGET_ELF || TARGET_MINIMAL_TOC)
26152 fputs ("\t.long ", file);
26153 else
26154 fprintf (file, "\t.tc ID_%lx_%lx[TC],",
26155 (long) high & 0xffffffff, (long) low & 0xffffffff);
26156 fprintf (file, "0x%lx,0x%lx\n",
26157 (long) high & 0xffffffff, (long) low & 0xffffffff);
26158 }
26159 else
26160 {
26161 if (TARGET_ELF || TARGET_MINIMAL_TOC)
26162 fputs ("\t.long ", file);
26163 else
26164 fprintf (file, "\t.tc IS_%lx[TC],", (long) low & 0xffffffff);
26165 fprintf (file, "0x%lx\n", (long) low & 0xffffffff);
26166 }
26167 return;
26168 }
26169 }
26170
26171 if (GET_CODE (x) == CONST)
26172 {
26173 gcc_assert (GET_CODE (XEXP (x, 0)) == PLUS
26174 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT);
26175
26176 base = XEXP (XEXP (x, 0), 0);
26177 offset = INTVAL (XEXP (XEXP (x, 0), 1));
26178 }
26179
26180 switch (GET_CODE (base))
26181 {
26182 case SYMBOL_REF:
26183 name = XSTR (base, 0);
26184 break;
26185
26186 case LABEL_REF:
26187 ASM_GENERATE_INTERNAL_LABEL (buf, "L",
26188 CODE_LABEL_NUMBER (XEXP (base, 0)));
26189 break;
26190
26191 case CODE_LABEL:
26192 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (base));
26193 break;
26194
26195 default:
26196 gcc_unreachable ();
26197 }
26198
26199 if (TARGET_ELF || TARGET_MINIMAL_TOC)
26200 fputs (TARGET_32BIT ? "\t.long " : DOUBLE_INT_ASM_OP, file);
26201 else
26202 {
26203 fputs ("\t.tc ", file);
26204 RS6000_OUTPUT_BASENAME (file, name);
26205
26206 if (offset < 0)
26207 fprintf (file, ".N" HOST_WIDE_INT_PRINT_UNSIGNED, - offset);
26208 else if (offset)
26209 fprintf (file, ".P" HOST_WIDE_INT_PRINT_UNSIGNED, offset);
26210
26211 /* Mark large TOC symbols on AIX with [TE] so they are mapped
26212 after other TOC symbols, reducing overflow of small TOC access
26213 to [TC] symbols. */
26214 fputs (TARGET_XCOFF && TARGET_CMODEL != CMODEL_SMALL
26215 ? "[TE]," : "[TC],", file);
26216 }
26217
26218 /* Currently C++ toc references to vtables can be emitted before it
26219 is decided whether the vtable is public or private. If this is
26220 the case, then the linker will eventually complain that there is
26221 a TOC reference to an unknown section. Thus, for vtables only,
26222 we emit the TOC reference to reference the symbol and not the
26223 section. */
26224 if (VTABLE_NAME_P (name))
26225 {
26226 RS6000_OUTPUT_BASENAME (file, name);
26227 if (offset < 0)
26228 fprintf (file, HOST_WIDE_INT_PRINT_DEC, offset);
26229 else if (offset > 0)
26230 fprintf (file, "+" HOST_WIDE_INT_PRINT_DEC, offset);
26231 }
26232 else
26233 output_addr_const (file, x);
26234
26235 #if HAVE_AS_TLS
26236 if (TARGET_XCOFF && GET_CODE (base) == SYMBOL_REF
26237 && SYMBOL_REF_TLS_MODEL (base) != 0)
26238 {
26239 if (SYMBOL_REF_TLS_MODEL (base) == TLS_MODEL_LOCAL_EXEC)
26240 fputs ("@le", file);
26241 else if (SYMBOL_REF_TLS_MODEL (base) == TLS_MODEL_INITIAL_EXEC)
26242 fputs ("@ie", file);
26243 /* Use global-dynamic for local-dynamic. */
26244 else if (SYMBOL_REF_TLS_MODEL (base) == TLS_MODEL_GLOBAL_DYNAMIC
26245 || SYMBOL_REF_TLS_MODEL (base) == TLS_MODEL_LOCAL_DYNAMIC)
26246 {
26247 putc ('\n', file);
26248 (*targetm.asm_out.internal_label) (file, "LCM", labelno);
26249 fputs ("\t.tc .", file);
26250 RS6000_OUTPUT_BASENAME (file, name);
26251 fputs ("[TC],", file);
26252 output_addr_const (file, x);
26253 fputs ("@m", file);
26254 }
26255 }
26256 #endif
26257
26258 putc ('\n', file);
26259 }
26260 \f
26261 /* Output an assembler pseudo-op to write an ASCII string of N characters
26262 starting at P to FILE.
26263
26264 On the RS/6000, we have to do this using the .byte operation and
26265 write out special characters outside the quoted string.
26266 Also, the assembler is broken; very long strings are truncated,
26267 so we must artificially break them up early. */
26268
26269 void
26270 output_ascii (FILE *file, const char *p, int n)
26271 {
26272 char c;
26273 int i, count_string;
26274 const char *for_string = "\t.byte \"";
26275 const char *for_decimal = "\t.byte ";
26276 const char *to_close = NULL;
26277
26278 count_string = 0;
26279 for (i = 0; i < n; i++)
26280 {
26281 c = *p++;
26282 if (c >= ' ' && c < 0177)
26283 {
26284 if (for_string)
26285 fputs (for_string, file);
26286 putc (c, file);
26287
26288 /* Write two quotes to get one. */
26289 if (c == '"')
26290 {
26291 putc (c, file);
26292 ++count_string;
26293 }
26294
26295 for_string = NULL;
26296 for_decimal = "\"\n\t.byte ";
26297 to_close = "\"\n";
26298 ++count_string;
26299
26300 if (count_string >= 512)
26301 {
26302 fputs (to_close, file);
26303
26304 for_string = "\t.byte \"";
26305 for_decimal = "\t.byte ";
26306 to_close = NULL;
26307 count_string = 0;
26308 }
26309 }
26310 else
26311 {
26312 if (for_decimal)
26313 fputs (for_decimal, file);
26314 fprintf (file, "%d", c);
26315
26316 for_string = "\n\t.byte \"";
26317 for_decimal = ", ";
26318 to_close = "\n";
26319 count_string = 0;
26320 }
26321 }
26322
26323 /* Now close the string if we have written one. Then end the line. */
26324 if (to_close)
26325 fputs (to_close, file);
26326 }
26327 \f
26328 /* Generate a unique section name for FILENAME for a section type
26329 represented by SECTION_DESC. Output goes into BUF.
26330
26331 SECTION_DESC can be any string, as long as it is different for each
26332 possible section type.
26333
26334 We name the section in the same manner as xlc. The name begins with an
26335 underscore followed by the filename (after stripping any leading directory
26336 names) with the last period replaced by the string SECTION_DESC. If
26337 FILENAME does not contain a period, SECTION_DESC is appended to the end of
26338 the name. */
26339
26340 void
26341 rs6000_gen_section_name (char **buf, const char *filename,
26342 const char *section_desc)
26343 {
26344 const char *q, *after_last_slash, *last_period = 0;
26345 char *p;
26346 int len;
26347
26348 after_last_slash = filename;
26349 for (q = filename; *q; q++)
26350 {
26351 if (*q == '/')
26352 after_last_slash = q + 1;
26353 else if (*q == '.')
26354 last_period = q;
26355 }
26356
26357 len = strlen (after_last_slash) + strlen (section_desc) + 2;
26358 *buf = (char *) xmalloc (len);
26359
26360 p = *buf;
26361 *p++ = '_';
26362
26363 for (q = after_last_slash; *q; q++)
26364 {
26365 if (q == last_period)
26366 {
26367 strcpy (p, section_desc);
26368 p += strlen (section_desc);
26369 break;
26370 }
26371
26372 else if (ISALNUM (*q))
26373 *p++ = *q;
26374 }
26375
26376 if (last_period == 0)
26377 strcpy (p, section_desc);
26378 else
26379 *p = '\0';
26380 }
26381 \f
26382 /* Emit profile function. */
26383
26384 void
26385 output_profile_hook (int labelno ATTRIBUTE_UNUSED)
26386 {
26387 /* Non-standard profiling for kernels, which just saves LR then calls
26388 _mcount without worrying about arg saves. The idea is to change
26389 the function prologue as little as possible as it isn't easy to
26390 account for arg save/restore code added just for _mcount. */
26391 if (TARGET_PROFILE_KERNEL)
26392 return;
26393
26394 if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
26395 {
26396 #ifndef NO_PROFILE_COUNTERS
26397 # define NO_PROFILE_COUNTERS 0
26398 #endif
26399 if (NO_PROFILE_COUNTERS)
26400 emit_library_call (init_one_libfunc (RS6000_MCOUNT),
26401 LCT_NORMAL, VOIDmode, 0);
26402 else
26403 {
26404 char buf[30];
26405 const char *label_name;
26406 rtx fun;
26407
26408 ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno);
26409 label_name = ggc_strdup ((*targetm.strip_name_encoding) (buf));
26410 fun = gen_rtx_SYMBOL_REF (Pmode, label_name);
26411
26412 emit_library_call (init_one_libfunc (RS6000_MCOUNT),
26413 LCT_NORMAL, VOIDmode, 1, fun, Pmode);
26414 }
26415 }
26416 else if (DEFAULT_ABI == ABI_DARWIN)
26417 {
26418 const char *mcount_name = RS6000_MCOUNT;
26419 int caller_addr_regno = LR_REGNO;
26420
26421 /* Be conservative and always set this, at least for now. */
26422 crtl->uses_pic_offset_table = 1;
26423
26424 #if TARGET_MACHO
26425 /* For PIC code, set up a stub and collect the caller's address
26426 from r0, which is where the prologue puts it. */
26427 if (MACHOPIC_INDIRECT
26428 && crtl->uses_pic_offset_table)
26429 caller_addr_regno = 0;
26430 #endif
26431 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, mcount_name),
26432 LCT_NORMAL, VOIDmode, 1,
26433 gen_rtx_REG (Pmode, caller_addr_regno), Pmode);
26434 }
26435 }
26436
26437 /* Write function profiler code. */
26438
26439 void
26440 output_function_profiler (FILE *file, int labelno)
26441 {
26442 char buf[100];
26443
26444 switch (DEFAULT_ABI)
26445 {
26446 default:
26447 gcc_unreachable ();
26448
26449 case ABI_V4:
26450 if (!TARGET_32BIT)
26451 {
26452 warning (0, "no profiling of 64-bit code for this ABI");
26453 return;
26454 }
26455 ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno);
26456 fprintf (file, "\tmflr %s\n", reg_names[0]);
26457 if (NO_PROFILE_COUNTERS)
26458 {
26459 asm_fprintf (file, "\tstw %s,4(%s)\n",
26460 reg_names[0], reg_names[1]);
26461 }
26462 else if (TARGET_SECURE_PLT && flag_pic)
26463 {
26464 if (TARGET_LINK_STACK)
26465 {
26466 char name[32];
26467 get_ppc476_thunk_name (name);
26468 asm_fprintf (file, "\tbl %s\n", name);
26469 }
26470 else
26471 asm_fprintf (file, "\tbcl 20,31,1f\n1:\n");
26472 asm_fprintf (file, "\tstw %s,4(%s)\n",
26473 reg_names[0], reg_names[1]);
26474 asm_fprintf (file, "\tmflr %s\n", reg_names[12]);
26475 asm_fprintf (file, "\taddis %s,%s,",
26476 reg_names[12], reg_names[12]);
26477 assemble_name (file, buf);
26478 asm_fprintf (file, "-1b@ha\n\tla %s,", reg_names[0]);
26479 assemble_name (file, buf);
26480 asm_fprintf (file, "-1b@l(%s)\n", reg_names[12]);
26481 }
26482 else if (flag_pic == 1)
26483 {
26484 fputs ("\tbl _GLOBAL_OFFSET_TABLE_@local-4\n", file);
26485 asm_fprintf (file, "\tstw %s,4(%s)\n",
26486 reg_names[0], reg_names[1]);
26487 asm_fprintf (file, "\tmflr %s\n", reg_names[12]);
26488 asm_fprintf (file, "\tlwz %s,", reg_names[0]);
26489 assemble_name (file, buf);
26490 asm_fprintf (file, "@got(%s)\n", reg_names[12]);
26491 }
26492 else if (flag_pic > 1)
26493 {
26494 asm_fprintf (file, "\tstw %s,4(%s)\n",
26495 reg_names[0], reg_names[1]);
26496 /* Now, we need to get the address of the label. */
26497 if (TARGET_LINK_STACK)
26498 {
26499 char name[32];
26500 get_ppc476_thunk_name (name);
26501 asm_fprintf (file, "\tbl %s\n\tb 1f\n\t.long ", name);
26502 assemble_name (file, buf);
26503 fputs ("-.\n1:", file);
26504 asm_fprintf (file, "\tmflr %s\n", reg_names[11]);
26505 asm_fprintf (file, "\taddi %s,%s,4\n",
26506 reg_names[11], reg_names[11]);
26507 }
26508 else
26509 {
26510 fputs ("\tbcl 20,31,1f\n\t.long ", file);
26511 assemble_name (file, buf);
26512 fputs ("-.\n1:", file);
26513 asm_fprintf (file, "\tmflr %s\n", reg_names[11]);
26514 }
26515 asm_fprintf (file, "\tlwz %s,0(%s)\n",
26516 reg_names[0], reg_names[11]);
26517 asm_fprintf (file, "\tadd %s,%s,%s\n",
26518 reg_names[0], reg_names[0], reg_names[11]);
26519 }
26520 else
26521 {
26522 asm_fprintf (file, "\tlis %s,", reg_names[12]);
26523 assemble_name (file, buf);
26524 fputs ("@ha\n", file);
26525 asm_fprintf (file, "\tstw %s,4(%s)\n",
26526 reg_names[0], reg_names[1]);
26527 asm_fprintf (file, "\tla %s,", reg_names[0]);
26528 assemble_name (file, buf);
26529 asm_fprintf (file, "@l(%s)\n", reg_names[12]);
26530 }
26531
26532 /* ABI_V4 saves the static chain reg with ASM_OUTPUT_REG_PUSH. */
26533 fprintf (file, "\tbl %s%s\n",
26534 RS6000_MCOUNT, flag_pic ? "@plt" : "");
26535 break;
26536
26537 case ABI_AIX:
26538 case ABI_ELFv2:
26539 case ABI_DARWIN:
26540 /* Don't do anything, done in output_profile_hook (). */
26541 break;
26542 }
26543 }
26544
26545 \f
26546
26547 /* The following variable value is the last issued insn. */
26548
26549 static rtx last_scheduled_insn;
26550
26551 /* The following variable helps to balance issuing of load and
26552 store instructions */
26553
26554 static int load_store_pendulum;
26555
26556 /* Power4 load update and store update instructions are cracked into a
26557 load or store and an integer insn which are executed in the same cycle.
26558 Branches have their own dispatch slot which does not count against the
26559 GCC issue rate, but it changes the program flow so there are no other
26560 instructions to issue in this cycle. */
26561
26562 static int
26563 rs6000_variable_issue_1 (rtx_insn *insn, int more)
26564 {
26565 last_scheduled_insn = insn;
26566 if (GET_CODE (PATTERN (insn)) == USE
26567 || GET_CODE (PATTERN (insn)) == CLOBBER)
26568 {
26569 cached_can_issue_more = more;
26570 return cached_can_issue_more;
26571 }
26572
26573 if (insn_terminates_group_p (insn, current_group))
26574 {
26575 cached_can_issue_more = 0;
26576 return cached_can_issue_more;
26577 }
26578
26579 /* If no reservation, but reach here */
26580 if (recog_memoized (insn) < 0)
26581 return more;
26582
26583 if (rs6000_sched_groups)
26584 {
26585 if (is_microcoded_insn (insn))
26586 cached_can_issue_more = 0;
26587 else if (is_cracked_insn (insn))
26588 cached_can_issue_more = more > 2 ? more - 2 : 0;
26589 else
26590 cached_can_issue_more = more - 1;
26591
26592 return cached_can_issue_more;
26593 }
26594
26595 if (rs6000_cpu_attr == CPU_CELL && is_nonpipeline_insn (insn))
26596 return 0;
26597
26598 cached_can_issue_more = more - 1;
26599 return cached_can_issue_more;
26600 }
26601
26602 static int
26603 rs6000_variable_issue (FILE *stream, int verbose, rtx_insn *insn, int more)
26604 {
26605 int r = rs6000_variable_issue_1 (insn, more);
26606 if (verbose)
26607 fprintf (stream, "// rs6000_variable_issue (more = %d) = %d\n", more, r);
26608 return r;
26609 }
26610
26611 /* Adjust the cost of a scheduling dependency. Return the new cost of
26612 a dependency LINK or INSN on DEP_INSN. COST is the current cost. */
26613
26614 static int
26615 rs6000_adjust_cost (rtx_insn *insn, rtx link, rtx_insn *dep_insn, int cost)
26616 {
26617 enum attr_type attr_type;
26618
26619 if (! recog_memoized (insn))
26620 return 0;
26621
26622 switch (REG_NOTE_KIND (link))
26623 {
26624 case REG_DEP_TRUE:
26625 {
26626 /* Data dependency; DEP_INSN writes a register that INSN reads
26627 some cycles later. */
26628
26629 /* Separate a load from a narrower, dependent store. */
26630 if (rs6000_sched_groups
26631 && GET_CODE (PATTERN (insn)) == SET
26632 && GET_CODE (PATTERN (dep_insn)) == SET
26633 && GET_CODE (XEXP (PATTERN (insn), 1)) == MEM
26634 && GET_CODE (XEXP (PATTERN (dep_insn), 0)) == MEM
26635 && (GET_MODE_SIZE (GET_MODE (XEXP (PATTERN (insn), 1)))
26636 > GET_MODE_SIZE (GET_MODE (XEXP (PATTERN (dep_insn), 0)))))
26637 return cost + 14;
26638
26639 attr_type = get_attr_type (insn);
26640
26641 switch (attr_type)
26642 {
26643 case TYPE_JMPREG:
26644 /* Tell the first scheduling pass about the latency between
26645 a mtctr and bctr (and mtlr and br/blr). The first
26646 scheduling pass will not know about this latency since
26647 the mtctr instruction, which has the latency associated
26648 to it, will be generated by reload. */
26649 return 4;
26650 case TYPE_BRANCH:
26651 /* Leave some extra cycles between a compare and its
26652 dependent branch, to inhibit expensive mispredicts. */
26653 if ((rs6000_cpu_attr == CPU_PPC603
26654 || rs6000_cpu_attr == CPU_PPC604
26655 || rs6000_cpu_attr == CPU_PPC604E
26656 || rs6000_cpu_attr == CPU_PPC620
26657 || rs6000_cpu_attr == CPU_PPC630
26658 || rs6000_cpu_attr == CPU_PPC750
26659 || rs6000_cpu_attr == CPU_PPC7400
26660 || rs6000_cpu_attr == CPU_PPC7450
26661 || rs6000_cpu_attr == CPU_PPCE5500
26662 || rs6000_cpu_attr == CPU_PPCE6500
26663 || rs6000_cpu_attr == CPU_POWER4
26664 || rs6000_cpu_attr == CPU_POWER5
26665 || rs6000_cpu_attr == CPU_POWER7
26666 || rs6000_cpu_attr == CPU_POWER8
26667 || rs6000_cpu_attr == CPU_CELL)
26668 && recog_memoized (dep_insn)
26669 && (INSN_CODE (dep_insn) >= 0))
26670
26671 switch (get_attr_type (dep_insn))
26672 {
26673 case TYPE_CMP:
26674 case TYPE_COMPARE:
26675 case TYPE_FPCOMPARE:
26676 case TYPE_CR_LOGICAL:
26677 case TYPE_DELAYED_CR:
26678 return cost + 2;
26679 case TYPE_EXTS:
26680 case TYPE_MUL:
26681 if (get_attr_dot (dep_insn) == DOT_YES)
26682 return cost + 2;
26683 else
26684 break;
26685 case TYPE_SHIFT:
26686 if (get_attr_dot (dep_insn) == DOT_YES
26687 && get_attr_var_shift (dep_insn) == VAR_SHIFT_NO)
26688 return cost + 2;
26689 else
26690 break;
26691 default:
26692 break;
26693 }
26694 break;
26695
26696 case TYPE_STORE:
26697 case TYPE_FPSTORE:
26698 if ((rs6000_cpu == PROCESSOR_POWER6)
26699 && recog_memoized (dep_insn)
26700 && (INSN_CODE (dep_insn) >= 0))
26701 {
26702
26703 if (GET_CODE (PATTERN (insn)) != SET)
26704 /* If this happens, we have to extend this to schedule
26705 optimally. Return default for now. */
26706 return cost;
26707
26708 /* Adjust the cost for the case where the value written
26709 by a fixed point operation is used as the address
26710 gen value on a store. */
26711 switch (get_attr_type (dep_insn))
26712 {
26713 case TYPE_LOAD:
26714 case TYPE_CNTLZ:
26715 {
26716 if (! store_data_bypass_p (dep_insn, insn))
26717 return get_attr_sign_extend (dep_insn)
26718 == SIGN_EXTEND_YES ? 6 : 4;
26719 break;
26720 }
26721 case TYPE_SHIFT:
26722 {
26723 if (! store_data_bypass_p (dep_insn, insn))
26724 return get_attr_var_shift (dep_insn) == VAR_SHIFT_YES ?
26725 6 : 3;
26726 break;
26727 }
26728 case TYPE_INTEGER:
26729 case TYPE_ADD:
26730 case TYPE_LOGICAL:
26731 case TYPE_COMPARE:
26732 case TYPE_EXTS:
26733 case TYPE_INSERT:
26734 {
26735 if (! store_data_bypass_p (dep_insn, insn))
26736 return 3;
26737 break;
26738 }
26739 case TYPE_STORE:
26740 case TYPE_FPLOAD:
26741 case TYPE_FPSTORE:
26742 {
26743 if (get_attr_update (dep_insn) == UPDATE_YES
26744 && ! store_data_bypass_p (dep_insn, insn))
26745 return 3;
26746 break;
26747 }
26748 case TYPE_MUL:
26749 {
26750 if (! store_data_bypass_p (dep_insn, insn))
26751 return 17;
26752 break;
26753 }
26754 case TYPE_DIV:
26755 {
26756 if (! store_data_bypass_p (dep_insn, insn))
26757 return get_attr_size (dep_insn) == SIZE_32 ? 45 : 57;
26758 break;
26759 }
26760 default:
26761 break;
26762 }
26763 }
26764 break;
26765
26766 case TYPE_LOAD:
26767 if ((rs6000_cpu == PROCESSOR_POWER6)
26768 && recog_memoized (dep_insn)
26769 && (INSN_CODE (dep_insn) >= 0))
26770 {
26771
26772 /* Adjust the cost for the case where the value written
26773 by a fixed point instruction is used within the address
26774 gen portion of a subsequent load(u)(x) */
26775 switch (get_attr_type (dep_insn))
26776 {
26777 case TYPE_LOAD:
26778 case TYPE_CNTLZ:
26779 {
26780 if (set_to_load_agen (dep_insn, insn))
26781 return get_attr_sign_extend (dep_insn)
26782 == SIGN_EXTEND_YES ? 6 : 4;
26783 break;
26784 }
26785 case TYPE_SHIFT:
26786 {
26787 if (set_to_load_agen (dep_insn, insn))
26788 return get_attr_var_shift (dep_insn) == VAR_SHIFT_YES ?
26789 6 : 3;
26790 break;
26791 }
26792 case TYPE_INTEGER:
26793 case TYPE_ADD:
26794 case TYPE_LOGICAL:
26795 case TYPE_COMPARE:
26796 case TYPE_EXTS:
26797 case TYPE_INSERT:
26798 {
26799 if (set_to_load_agen (dep_insn, insn))
26800 return 3;
26801 break;
26802 }
26803 case TYPE_STORE:
26804 case TYPE_FPLOAD:
26805 case TYPE_FPSTORE:
26806 {
26807 if (get_attr_update (dep_insn) == UPDATE_YES
26808 && set_to_load_agen (dep_insn, insn))
26809 return 3;
26810 break;
26811 }
26812 case TYPE_MUL:
26813 {
26814 if (set_to_load_agen (dep_insn, insn))
26815 return 17;
26816 break;
26817 }
26818 case TYPE_DIV:
26819 {
26820 if (set_to_load_agen (dep_insn, insn))
26821 return get_attr_size (dep_insn) == SIZE_32 ? 45 : 57;
26822 break;
26823 }
26824 default:
26825 break;
26826 }
26827 }
26828 break;
26829
26830 case TYPE_FPLOAD:
26831 if ((rs6000_cpu == PROCESSOR_POWER6)
26832 && get_attr_update (insn) == UPDATE_NO
26833 && recog_memoized (dep_insn)
26834 && (INSN_CODE (dep_insn) >= 0)
26835 && (get_attr_type (dep_insn) == TYPE_MFFGPR))
26836 return 2;
26837
26838 default:
26839 break;
26840 }
26841
26842 /* Fall out to return default cost. */
26843 }
26844 break;
26845
26846 case REG_DEP_OUTPUT:
26847 /* Output dependency; DEP_INSN writes a register that INSN writes some
26848 cycles later. */
26849 if ((rs6000_cpu == PROCESSOR_POWER6)
26850 && recog_memoized (dep_insn)
26851 && (INSN_CODE (dep_insn) >= 0))
26852 {
26853 attr_type = get_attr_type (insn);
26854
26855 switch (attr_type)
26856 {
26857 case TYPE_FP:
26858 if (get_attr_type (dep_insn) == TYPE_FP)
26859 return 1;
26860 break;
26861 case TYPE_FPLOAD:
26862 if (get_attr_update (insn) == UPDATE_NO
26863 && get_attr_type (dep_insn) == TYPE_MFFGPR)
26864 return 2;
26865 break;
26866 default:
26867 break;
26868 }
26869 }
26870 case REG_DEP_ANTI:
26871 /* Anti dependency; DEP_INSN reads a register that INSN writes some
26872 cycles later. */
26873 return 0;
26874
26875 default:
26876 gcc_unreachable ();
26877 }
26878
26879 return cost;
26880 }
26881
26882 /* Debug version of rs6000_adjust_cost. */
26883
26884 static int
26885 rs6000_debug_adjust_cost (rtx_insn *insn, rtx link, rtx_insn *dep_insn,
26886 int cost)
26887 {
26888 int ret = rs6000_adjust_cost (insn, link, dep_insn, cost);
26889
26890 if (ret != cost)
26891 {
26892 const char *dep;
26893
26894 switch (REG_NOTE_KIND (link))
26895 {
26896 default: dep = "unknown depencency"; break;
26897 case REG_DEP_TRUE: dep = "data dependency"; break;
26898 case REG_DEP_OUTPUT: dep = "output dependency"; break;
26899 case REG_DEP_ANTI: dep = "anti depencency"; break;
26900 }
26901
26902 fprintf (stderr,
26903 "\nrs6000_adjust_cost, final cost = %d, orig cost = %d, "
26904 "%s, insn:\n", ret, cost, dep);
26905
26906 debug_rtx (insn);
26907 }
26908
26909 return ret;
26910 }
26911
26912 /* The function returns a true if INSN is microcoded.
26913 Return false otherwise. */
26914
26915 static bool
26916 is_microcoded_insn (rtx_insn *insn)
26917 {
26918 if (!insn || !NONDEBUG_INSN_P (insn)
26919 || GET_CODE (PATTERN (insn)) == USE
26920 || GET_CODE (PATTERN (insn)) == CLOBBER)
26921 return false;
26922
26923 if (rs6000_cpu_attr == CPU_CELL)
26924 return get_attr_cell_micro (insn) == CELL_MICRO_ALWAYS;
26925
26926 if (rs6000_sched_groups
26927 && (rs6000_cpu == PROCESSOR_POWER4 || rs6000_cpu == PROCESSOR_POWER5))
26928 {
26929 enum attr_type type = get_attr_type (insn);
26930 if ((type == TYPE_LOAD
26931 && get_attr_update (insn) == UPDATE_YES
26932 && get_attr_sign_extend (insn) == SIGN_EXTEND_YES)
26933 || ((type == TYPE_LOAD || type == TYPE_STORE)
26934 && get_attr_update (insn) == UPDATE_YES
26935 && get_attr_indexed (insn) == INDEXED_YES)
26936 || type == TYPE_MFCR)
26937 return true;
26938 }
26939
26940 return false;
26941 }
26942
26943 /* The function returns true if INSN is cracked into 2 instructions
26944 by the processor (and therefore occupies 2 issue slots). */
26945
26946 static bool
26947 is_cracked_insn (rtx_insn *insn)
26948 {
26949 if (!insn || !NONDEBUG_INSN_P (insn)
26950 || GET_CODE (PATTERN (insn)) == USE
26951 || GET_CODE (PATTERN (insn)) == CLOBBER)
26952 return false;
26953
26954 if (rs6000_sched_groups
26955 && (rs6000_cpu == PROCESSOR_POWER4 || rs6000_cpu == PROCESSOR_POWER5))
26956 {
26957 enum attr_type type = get_attr_type (insn);
26958 if ((type == TYPE_LOAD
26959 && get_attr_sign_extend (insn) == SIGN_EXTEND_YES
26960 && get_attr_update (insn) == UPDATE_NO)
26961 || (type == TYPE_LOAD
26962 && get_attr_sign_extend (insn) == SIGN_EXTEND_NO
26963 && get_attr_update (insn) == UPDATE_YES
26964 && get_attr_indexed (insn) == INDEXED_NO)
26965 || (type == TYPE_STORE
26966 && get_attr_update (insn) == UPDATE_YES
26967 && get_attr_indexed (insn) == INDEXED_NO)
26968 || ((type == TYPE_FPLOAD || type == TYPE_FPSTORE)
26969 && get_attr_update (insn) == UPDATE_YES)
26970 || type == TYPE_DELAYED_CR
26971 || type == TYPE_COMPARE
26972 || (type == TYPE_EXTS
26973 && get_attr_dot (insn) == DOT_YES)
26974 || (type == TYPE_SHIFT
26975 && get_attr_dot (insn) == DOT_YES
26976 && get_attr_var_shift (insn) == VAR_SHIFT_NO)
26977 || (type == TYPE_MUL
26978 && get_attr_dot (insn) == DOT_YES)
26979 || type == TYPE_DIV
26980 || (type == TYPE_INSERT
26981 && get_attr_size (insn) == SIZE_32))
26982 return true;
26983 }
26984
26985 return false;
26986 }
26987
26988 /* The function returns true if INSN can be issued only from
26989 the branch slot. */
26990
26991 static bool
26992 is_branch_slot_insn (rtx_insn *insn)
26993 {
26994 if (!insn || !NONDEBUG_INSN_P (insn)
26995 || GET_CODE (PATTERN (insn)) == USE
26996 || GET_CODE (PATTERN (insn)) == CLOBBER)
26997 return false;
26998
26999 if (rs6000_sched_groups)
27000 {
27001 enum attr_type type = get_attr_type (insn);
27002 if (type == TYPE_BRANCH || type == TYPE_JMPREG)
27003 return true;
27004 return false;
27005 }
27006
27007 return false;
27008 }
27009
27010 /* The function returns true if out_inst sets a value that is
27011 used in the address generation computation of in_insn */
27012 static bool
27013 set_to_load_agen (rtx_insn *out_insn, rtx_insn *in_insn)
27014 {
27015 rtx out_set, in_set;
27016
27017 /* For performance reasons, only handle the simple case where
27018 both loads are a single_set. */
27019 out_set = single_set (out_insn);
27020 if (out_set)
27021 {
27022 in_set = single_set (in_insn);
27023 if (in_set)
27024 return reg_mentioned_p (SET_DEST (out_set), SET_SRC (in_set));
27025 }
27026
27027 return false;
27028 }
27029
27030 /* Try to determine base/offset/size parts of the given MEM.
27031 Return true if successful, false if all the values couldn't
27032 be determined.
27033
27034 This function only looks for REG or REG+CONST address forms.
27035 REG+REG address form will return false. */
27036
27037 static bool
27038 get_memref_parts (rtx mem, rtx *base, HOST_WIDE_INT *offset,
27039 HOST_WIDE_INT *size)
27040 {
27041 rtx addr_rtx;
27042 if MEM_SIZE_KNOWN_P (mem)
27043 *size = MEM_SIZE (mem);
27044 else
27045 return false;
27046
27047 if (GET_CODE (XEXP (mem, 0)) == PRE_MODIFY)
27048 addr_rtx = XEXP (XEXP (mem, 0), 1);
27049 else
27050 addr_rtx = (XEXP (mem, 0));
27051
27052 if (GET_CODE (addr_rtx) == REG)
27053 {
27054 *base = addr_rtx;
27055 *offset = 0;
27056 }
27057 else if (GET_CODE (addr_rtx) == PLUS
27058 && CONST_INT_P (XEXP (addr_rtx, 1)))
27059 {
27060 *base = XEXP (addr_rtx, 0);
27061 *offset = INTVAL (XEXP (addr_rtx, 1));
27062 }
27063 else
27064 return false;
27065
27066 return true;
27067 }
27068
27069 /* The function returns true if the target storage location of
27070 mem1 is adjacent to the target storage location of mem2 */
27071 /* Return 1 if memory locations are adjacent. */
27072
27073 static bool
27074 adjacent_mem_locations (rtx mem1, rtx mem2)
27075 {
27076 rtx reg1, reg2;
27077 HOST_WIDE_INT off1, size1, off2, size2;
27078
27079 if (get_memref_parts (mem1, &reg1, &off1, &size1)
27080 && get_memref_parts (mem2, &reg2, &off2, &size2))
27081 return ((REGNO (reg1) == REGNO (reg2))
27082 && ((off1 + size1 == off2)
27083 || (off2 + size2 == off1)));
27084
27085 return false;
27086 }
27087
27088 /* This function returns true if it can be determined that the two MEM
27089 locations overlap by at least 1 byte based on base reg/offset/size. */
27090
27091 static bool
27092 mem_locations_overlap (rtx mem1, rtx mem2)
27093 {
27094 rtx reg1, reg2;
27095 HOST_WIDE_INT off1, size1, off2, size2;
27096
27097 if (get_memref_parts (mem1, &reg1, &off1, &size1)
27098 && get_memref_parts (mem2, &reg2, &off2, &size2))
27099 return ((REGNO (reg1) == REGNO (reg2))
27100 && (((off1 <= off2) && (off1 + size1 > off2))
27101 || ((off2 <= off1) && (off2 + size2 > off1))));
27102
27103 return false;
27104 }
27105
27106 /* A C statement (sans semicolon) to update the integer scheduling
27107 priority INSN_PRIORITY (INSN). Increase the priority to execute the
27108 INSN earlier, reduce the priority to execute INSN later. Do not
27109 define this macro if you do not need to adjust the scheduling
27110 priorities of insns. */
27111
27112 static int
27113 rs6000_adjust_priority (rtx_insn *insn ATTRIBUTE_UNUSED, int priority)
27114 {
27115 rtx load_mem, str_mem;
27116 /* On machines (like the 750) which have asymmetric integer units,
27117 where one integer unit can do multiply and divides and the other
27118 can't, reduce the priority of multiply/divide so it is scheduled
27119 before other integer operations. */
27120
27121 #if 0
27122 if (! INSN_P (insn))
27123 return priority;
27124
27125 if (GET_CODE (PATTERN (insn)) == USE)
27126 return priority;
27127
27128 switch (rs6000_cpu_attr) {
27129 case CPU_PPC750:
27130 switch (get_attr_type (insn))
27131 {
27132 default:
27133 break;
27134
27135 case TYPE_MUL:
27136 case TYPE_DIV:
27137 fprintf (stderr, "priority was %#x (%d) before adjustment\n",
27138 priority, priority);
27139 if (priority >= 0 && priority < 0x01000000)
27140 priority >>= 3;
27141 break;
27142 }
27143 }
27144 #endif
27145
27146 if (insn_must_be_first_in_group (insn)
27147 && reload_completed
27148 && current_sched_info->sched_max_insns_priority
27149 && rs6000_sched_restricted_insns_priority)
27150 {
27151
27152 /* Prioritize insns that can be dispatched only in the first
27153 dispatch slot. */
27154 if (rs6000_sched_restricted_insns_priority == 1)
27155 /* Attach highest priority to insn. This means that in
27156 haifa-sched.c:ready_sort(), dispatch-slot restriction considerations
27157 precede 'priority' (critical path) considerations. */
27158 return current_sched_info->sched_max_insns_priority;
27159 else if (rs6000_sched_restricted_insns_priority == 2)
27160 /* Increase priority of insn by a minimal amount. This means that in
27161 haifa-sched.c:ready_sort(), only 'priority' (critical path)
27162 considerations precede dispatch-slot restriction considerations. */
27163 return (priority + 1);
27164 }
27165
27166 if (rs6000_cpu == PROCESSOR_POWER6
27167 && ((load_store_pendulum == -2 && is_load_insn (insn, &load_mem))
27168 || (load_store_pendulum == 2 && is_store_insn (insn, &str_mem))))
27169 /* Attach highest priority to insn if the scheduler has just issued two
27170 stores and this instruction is a load, or two loads and this instruction
27171 is a store. Power6 wants loads and stores scheduled alternately
27172 when possible */
27173 return current_sched_info->sched_max_insns_priority;
27174
27175 return priority;
27176 }
27177
27178 /* Return true if the instruction is nonpipelined on the Cell. */
27179 static bool
27180 is_nonpipeline_insn (rtx_insn *insn)
27181 {
27182 enum attr_type type;
27183 if (!insn || !NONDEBUG_INSN_P (insn)
27184 || GET_CODE (PATTERN (insn)) == USE
27185 || GET_CODE (PATTERN (insn)) == CLOBBER)
27186 return false;
27187
27188 type = get_attr_type (insn);
27189 if (type == TYPE_MUL
27190 || type == TYPE_DIV
27191 || type == TYPE_SDIV
27192 || type == TYPE_DDIV
27193 || type == TYPE_SSQRT
27194 || type == TYPE_DSQRT
27195 || type == TYPE_MFCR
27196 || type == TYPE_MFCRF
27197 || type == TYPE_MFJMPR)
27198 {
27199 return true;
27200 }
27201 return false;
27202 }
27203
27204
27205 /* Return how many instructions the machine can issue per cycle. */
27206
27207 static int
27208 rs6000_issue_rate (void)
27209 {
27210 /* Unless scheduling for register pressure, use issue rate of 1 for
27211 first scheduling pass to decrease degradation. */
27212 if (!reload_completed && !flag_sched_pressure)
27213 return 1;
27214
27215 switch (rs6000_cpu_attr) {
27216 case CPU_RS64A:
27217 case CPU_PPC601: /* ? */
27218 case CPU_PPC7450:
27219 return 3;
27220 case CPU_PPC440:
27221 case CPU_PPC603:
27222 case CPU_PPC750:
27223 case CPU_PPC7400:
27224 case CPU_PPC8540:
27225 case CPU_PPC8548:
27226 case CPU_CELL:
27227 case CPU_PPCE300C2:
27228 case CPU_PPCE300C3:
27229 case CPU_PPCE500MC:
27230 case CPU_PPCE500MC64:
27231 case CPU_PPCE5500:
27232 case CPU_PPCE6500:
27233 case CPU_TITAN:
27234 return 2;
27235 case CPU_PPC476:
27236 case CPU_PPC604:
27237 case CPU_PPC604E:
27238 case CPU_PPC620:
27239 case CPU_PPC630:
27240 return 4;
27241 case CPU_POWER4:
27242 case CPU_POWER5:
27243 case CPU_POWER6:
27244 case CPU_POWER7:
27245 return 5;
27246 case CPU_POWER8:
27247 return 7;
27248 default:
27249 return 1;
27250 }
27251 }
27252
27253 /* Return how many instructions to look ahead for better insn
27254 scheduling. */
27255
27256 static int
27257 rs6000_use_sched_lookahead (void)
27258 {
27259 switch (rs6000_cpu_attr)
27260 {
27261 case CPU_PPC8540:
27262 case CPU_PPC8548:
27263 return 4;
27264
27265 case CPU_CELL:
27266 return (reload_completed ? 8 : 0);
27267
27268 default:
27269 return 0;
27270 }
27271 }
27272
27273 /* We are choosing insn from the ready queue. Return zero if INSN can be
27274 chosen. */
27275 static int
27276 rs6000_use_sched_lookahead_guard (rtx_insn *insn, int ready_index)
27277 {
27278 if (ready_index == 0)
27279 return 0;
27280
27281 if (rs6000_cpu_attr != CPU_CELL)
27282 return 0;
27283
27284 gcc_assert (insn != NULL_RTX && INSN_P (insn));
27285
27286 if (!reload_completed
27287 || is_nonpipeline_insn (insn)
27288 || is_microcoded_insn (insn))
27289 return 1;
27290
27291 return 0;
27292 }
27293
27294 /* Determine if PAT refers to memory. If so, set MEM_REF to the MEM rtx
27295 and return true. */
27296
27297 static bool
27298 find_mem_ref (rtx pat, rtx *mem_ref)
27299 {
27300 const char * fmt;
27301 int i, j;
27302
27303 /* stack_tie does not produce any real memory traffic. */
27304 if (tie_operand (pat, VOIDmode))
27305 return false;
27306
27307 if (GET_CODE (pat) == MEM)
27308 {
27309 *mem_ref = pat;
27310 return true;
27311 }
27312
27313 /* Recursively process the pattern. */
27314 fmt = GET_RTX_FORMAT (GET_CODE (pat));
27315
27316 for (i = GET_RTX_LENGTH (GET_CODE (pat)) - 1; i >= 0; i--)
27317 {
27318 if (fmt[i] == 'e')
27319 {
27320 if (find_mem_ref (XEXP (pat, i), mem_ref))
27321 return true;
27322 }
27323 else if (fmt[i] == 'E')
27324 for (j = XVECLEN (pat, i) - 1; j >= 0; j--)
27325 {
27326 if (find_mem_ref (XVECEXP (pat, i, j), mem_ref))
27327 return true;
27328 }
27329 }
27330
27331 return false;
27332 }
27333
27334 /* Determine if PAT is a PATTERN of a load insn. */
27335
27336 static bool
27337 is_load_insn1 (rtx pat, rtx *load_mem)
27338 {
27339 if (!pat || pat == NULL_RTX)
27340 return false;
27341
27342 if (GET_CODE (pat) == SET)
27343 return find_mem_ref (SET_SRC (pat), load_mem);
27344
27345 if (GET_CODE (pat) == PARALLEL)
27346 {
27347 int i;
27348
27349 for (i = 0; i < XVECLEN (pat, 0); i++)
27350 if (is_load_insn1 (XVECEXP (pat, 0, i), load_mem))
27351 return true;
27352 }
27353
27354 return false;
27355 }
27356
27357 /* Determine if INSN loads from memory. */
27358
27359 static bool
27360 is_load_insn (rtx insn, rtx *load_mem)
27361 {
27362 if (!insn || !INSN_P (insn))
27363 return false;
27364
27365 if (CALL_P (insn))
27366 return false;
27367
27368 return is_load_insn1 (PATTERN (insn), load_mem);
27369 }
27370
27371 /* Determine if PAT is a PATTERN of a store insn. */
27372
27373 static bool
27374 is_store_insn1 (rtx pat, rtx *str_mem)
27375 {
27376 if (!pat || pat == NULL_RTX)
27377 return false;
27378
27379 if (GET_CODE (pat) == SET)
27380 return find_mem_ref (SET_DEST (pat), str_mem);
27381
27382 if (GET_CODE (pat) == PARALLEL)
27383 {
27384 int i;
27385
27386 for (i = 0; i < XVECLEN (pat, 0); i++)
27387 if (is_store_insn1 (XVECEXP (pat, 0, i), str_mem))
27388 return true;
27389 }
27390
27391 return false;
27392 }
27393
27394 /* Determine if INSN stores to memory. */
27395
27396 static bool
27397 is_store_insn (rtx insn, rtx *str_mem)
27398 {
27399 if (!insn || !INSN_P (insn))
27400 return false;
27401
27402 return is_store_insn1 (PATTERN (insn), str_mem);
27403 }
27404
27405 /* Returns whether the dependence between INSN and NEXT is considered
27406 costly by the given target. */
27407
27408 static bool
27409 rs6000_is_costly_dependence (dep_t dep, int cost, int distance)
27410 {
27411 rtx insn;
27412 rtx next;
27413 rtx load_mem, str_mem;
27414
27415 /* If the flag is not enabled - no dependence is considered costly;
27416 allow all dependent insns in the same group.
27417 This is the most aggressive option. */
27418 if (rs6000_sched_costly_dep == no_dep_costly)
27419 return false;
27420
27421 /* If the flag is set to 1 - a dependence is always considered costly;
27422 do not allow dependent instructions in the same group.
27423 This is the most conservative option. */
27424 if (rs6000_sched_costly_dep == all_deps_costly)
27425 return true;
27426
27427 insn = DEP_PRO (dep);
27428 next = DEP_CON (dep);
27429
27430 if (rs6000_sched_costly_dep == store_to_load_dep_costly
27431 && is_load_insn (next, &load_mem)
27432 && is_store_insn (insn, &str_mem))
27433 /* Prevent load after store in the same group. */
27434 return true;
27435
27436 if (rs6000_sched_costly_dep == true_store_to_load_dep_costly
27437 && is_load_insn (next, &load_mem)
27438 && is_store_insn (insn, &str_mem)
27439 && DEP_TYPE (dep) == REG_DEP_TRUE
27440 && mem_locations_overlap(str_mem, load_mem))
27441 /* Prevent load after store in the same group if it is a true
27442 dependence. */
27443 return true;
27444
27445 /* The flag is set to X; dependences with latency >= X are considered costly,
27446 and will not be scheduled in the same group. */
27447 if (rs6000_sched_costly_dep <= max_dep_latency
27448 && ((cost - distance) >= (int)rs6000_sched_costly_dep))
27449 return true;
27450
27451 return false;
27452 }
27453
27454 /* Return the next insn after INSN that is found before TAIL is reached,
27455 skipping any "non-active" insns - insns that will not actually occupy
27456 an issue slot. Return NULL_RTX if such an insn is not found. */
27457
27458 static rtx_insn *
27459 get_next_active_insn (rtx_insn *insn, rtx_insn *tail)
27460 {
27461 if (insn == NULL_RTX || insn == tail)
27462 return NULL;
27463
27464 while (1)
27465 {
27466 insn = NEXT_INSN (insn);
27467 if (insn == NULL_RTX || insn == tail)
27468 return NULL;
27469
27470 if (CALL_P (insn)
27471 || JUMP_P (insn) || JUMP_TABLE_DATA_P (insn)
27472 || (NONJUMP_INSN_P (insn)
27473 && GET_CODE (PATTERN (insn)) != USE
27474 && GET_CODE (PATTERN (insn)) != CLOBBER
27475 && INSN_CODE (insn) != CODE_FOR_stack_tie))
27476 break;
27477 }
27478 return insn;
27479 }
27480
27481 /* We are about to begin issuing insns for this clock cycle. */
27482
27483 static int
27484 rs6000_sched_reorder (FILE *dump ATTRIBUTE_UNUSED, int sched_verbose,
27485 rtx_insn **ready ATTRIBUTE_UNUSED,
27486 int *pn_ready ATTRIBUTE_UNUSED,
27487 int clock_var ATTRIBUTE_UNUSED)
27488 {
27489 int n_ready = *pn_ready;
27490
27491 if (sched_verbose)
27492 fprintf (dump, "// rs6000_sched_reorder :\n");
27493
27494 /* Reorder the ready list, if the second to last ready insn
27495 is a nonepipeline insn. */
27496 if (rs6000_cpu_attr == CPU_CELL && n_ready > 1)
27497 {
27498 if (is_nonpipeline_insn (ready[n_ready - 1])
27499 && (recog_memoized (ready[n_ready - 2]) > 0))
27500 /* Simply swap first two insns. */
27501 {
27502 rtx_insn *tmp = ready[n_ready - 1];
27503 ready[n_ready - 1] = ready[n_ready - 2];
27504 ready[n_ready - 2] = tmp;
27505 }
27506 }
27507
27508 if (rs6000_cpu == PROCESSOR_POWER6)
27509 load_store_pendulum = 0;
27510
27511 return rs6000_issue_rate ();
27512 }
27513
27514 /* Like rs6000_sched_reorder, but called after issuing each insn. */
27515
27516 static int
27517 rs6000_sched_reorder2 (FILE *dump, int sched_verbose, rtx_insn **ready,
27518 int *pn_ready, int clock_var ATTRIBUTE_UNUSED)
27519 {
27520 if (sched_verbose)
27521 fprintf (dump, "// rs6000_sched_reorder2 :\n");
27522
27523 /* For Power6, we need to handle some special cases to try and keep the
27524 store queue from overflowing and triggering expensive flushes.
27525
27526 This code monitors how load and store instructions are being issued
27527 and skews the ready list one way or the other to increase the likelihood
27528 that a desired instruction is issued at the proper time.
27529
27530 A couple of things are done. First, we maintain a "load_store_pendulum"
27531 to track the current state of load/store issue.
27532
27533 - If the pendulum is at zero, then no loads or stores have been
27534 issued in the current cycle so we do nothing.
27535
27536 - If the pendulum is 1, then a single load has been issued in this
27537 cycle and we attempt to locate another load in the ready list to
27538 issue with it.
27539
27540 - If the pendulum is -2, then two stores have already been
27541 issued in this cycle, so we increase the priority of the first load
27542 in the ready list to increase it's likelihood of being chosen first
27543 in the next cycle.
27544
27545 - If the pendulum is -1, then a single store has been issued in this
27546 cycle and we attempt to locate another store in the ready list to
27547 issue with it, preferring a store to an adjacent memory location to
27548 facilitate store pairing in the store queue.
27549
27550 - If the pendulum is 2, then two loads have already been
27551 issued in this cycle, so we increase the priority of the first store
27552 in the ready list to increase it's likelihood of being chosen first
27553 in the next cycle.
27554
27555 - If the pendulum < -2 or > 2, then do nothing.
27556
27557 Note: This code covers the most common scenarios. There exist non
27558 load/store instructions which make use of the LSU and which
27559 would need to be accounted for to strictly model the behavior
27560 of the machine. Those instructions are currently unaccounted
27561 for to help minimize compile time overhead of this code.
27562 */
27563 if (rs6000_cpu == PROCESSOR_POWER6 && last_scheduled_insn)
27564 {
27565 int pos;
27566 int i;
27567 rtx_insn *tmp;
27568 rtx load_mem, str_mem;
27569
27570 if (is_store_insn (last_scheduled_insn, &str_mem))
27571 /* Issuing a store, swing the load_store_pendulum to the left */
27572 load_store_pendulum--;
27573 else if (is_load_insn (last_scheduled_insn, &load_mem))
27574 /* Issuing a load, swing the load_store_pendulum to the right */
27575 load_store_pendulum++;
27576 else
27577 return cached_can_issue_more;
27578
27579 /* If the pendulum is balanced, or there is only one instruction on
27580 the ready list, then all is well, so return. */
27581 if ((load_store_pendulum == 0) || (*pn_ready <= 1))
27582 return cached_can_issue_more;
27583
27584 if (load_store_pendulum == 1)
27585 {
27586 /* A load has been issued in this cycle. Scan the ready list
27587 for another load to issue with it */
27588 pos = *pn_ready-1;
27589
27590 while (pos >= 0)
27591 {
27592 if (is_load_insn (ready[pos], &load_mem))
27593 {
27594 /* Found a load. Move it to the head of the ready list,
27595 and adjust it's priority so that it is more likely to
27596 stay there */
27597 tmp = ready[pos];
27598 for (i=pos; i<*pn_ready-1; i++)
27599 ready[i] = ready[i + 1];
27600 ready[*pn_ready-1] = tmp;
27601
27602 if (!sel_sched_p () && INSN_PRIORITY_KNOWN (tmp))
27603 INSN_PRIORITY (tmp)++;
27604 break;
27605 }
27606 pos--;
27607 }
27608 }
27609 else if (load_store_pendulum == -2)
27610 {
27611 /* Two stores have been issued in this cycle. Increase the
27612 priority of the first load in the ready list to favor it for
27613 issuing in the next cycle. */
27614 pos = *pn_ready-1;
27615
27616 while (pos >= 0)
27617 {
27618 if (is_load_insn (ready[pos], &load_mem)
27619 && !sel_sched_p ()
27620 && INSN_PRIORITY_KNOWN (ready[pos]))
27621 {
27622 INSN_PRIORITY (ready[pos])++;
27623
27624 /* Adjust the pendulum to account for the fact that a load
27625 was found and increased in priority. This is to prevent
27626 increasing the priority of multiple loads */
27627 load_store_pendulum--;
27628
27629 break;
27630 }
27631 pos--;
27632 }
27633 }
27634 else if (load_store_pendulum == -1)
27635 {
27636 /* A store has been issued in this cycle. Scan the ready list for
27637 another store to issue with it, preferring a store to an adjacent
27638 memory location */
27639 int first_store_pos = -1;
27640
27641 pos = *pn_ready-1;
27642
27643 while (pos >= 0)
27644 {
27645 if (is_store_insn (ready[pos], &str_mem))
27646 {
27647 rtx str_mem2;
27648 /* Maintain the index of the first store found on the
27649 list */
27650 if (first_store_pos == -1)
27651 first_store_pos = pos;
27652
27653 if (is_store_insn (last_scheduled_insn, &str_mem2)
27654 && adjacent_mem_locations (str_mem, str_mem2))
27655 {
27656 /* Found an adjacent store. Move it to the head of the
27657 ready list, and adjust it's priority so that it is
27658 more likely to stay there */
27659 tmp = ready[pos];
27660 for (i=pos; i<*pn_ready-1; i++)
27661 ready[i] = ready[i + 1];
27662 ready[*pn_ready-1] = tmp;
27663
27664 if (!sel_sched_p () && INSN_PRIORITY_KNOWN (tmp))
27665 INSN_PRIORITY (tmp)++;
27666
27667 first_store_pos = -1;
27668
27669 break;
27670 };
27671 }
27672 pos--;
27673 }
27674
27675 if (first_store_pos >= 0)
27676 {
27677 /* An adjacent store wasn't found, but a non-adjacent store was,
27678 so move the non-adjacent store to the front of the ready
27679 list, and adjust its priority so that it is more likely to
27680 stay there. */
27681 tmp = ready[first_store_pos];
27682 for (i=first_store_pos; i<*pn_ready-1; i++)
27683 ready[i] = ready[i + 1];
27684 ready[*pn_ready-1] = tmp;
27685 if (!sel_sched_p () && INSN_PRIORITY_KNOWN (tmp))
27686 INSN_PRIORITY (tmp)++;
27687 }
27688 }
27689 else if (load_store_pendulum == 2)
27690 {
27691 /* Two loads have been issued in this cycle. Increase the priority
27692 of the first store in the ready list to favor it for issuing in
27693 the next cycle. */
27694 pos = *pn_ready-1;
27695
27696 while (pos >= 0)
27697 {
27698 if (is_store_insn (ready[pos], &str_mem)
27699 && !sel_sched_p ()
27700 && INSN_PRIORITY_KNOWN (ready[pos]))
27701 {
27702 INSN_PRIORITY (ready[pos])++;
27703
27704 /* Adjust the pendulum to account for the fact that a store
27705 was found and increased in priority. This is to prevent
27706 increasing the priority of multiple stores */
27707 load_store_pendulum++;
27708
27709 break;
27710 }
27711 pos--;
27712 }
27713 }
27714 }
27715
27716 return cached_can_issue_more;
27717 }
27718
27719 /* Return whether the presence of INSN causes a dispatch group termination
27720 of group WHICH_GROUP.
27721
27722 If WHICH_GROUP == current_group, this function will return true if INSN
27723 causes the termination of the current group (i.e, the dispatch group to
27724 which INSN belongs). This means that INSN will be the last insn in the
27725 group it belongs to.
27726
27727 If WHICH_GROUP == previous_group, this function will return true if INSN
27728 causes the termination of the previous group (i.e, the dispatch group that
27729 precedes the group to which INSN belongs). This means that INSN will be
27730 the first insn in the group it belongs to). */
27731
27732 static bool
27733 insn_terminates_group_p (rtx_insn *insn, enum group_termination which_group)
27734 {
27735 bool first, last;
27736
27737 if (! insn)
27738 return false;
27739
27740 first = insn_must_be_first_in_group (insn);
27741 last = insn_must_be_last_in_group (insn);
27742
27743 if (first && last)
27744 return true;
27745
27746 if (which_group == current_group)
27747 return last;
27748 else if (which_group == previous_group)
27749 return first;
27750
27751 return false;
27752 }
27753
27754
27755 static bool
27756 insn_must_be_first_in_group (rtx_insn *insn)
27757 {
27758 enum attr_type type;
27759
27760 if (!insn
27761 || NOTE_P (insn)
27762 || DEBUG_INSN_P (insn)
27763 || GET_CODE (PATTERN (insn)) == USE
27764 || GET_CODE (PATTERN (insn)) == CLOBBER)
27765 return false;
27766
27767 switch (rs6000_cpu)
27768 {
27769 case PROCESSOR_POWER5:
27770 if (is_cracked_insn (insn))
27771 return true;
27772 case PROCESSOR_POWER4:
27773 if (is_microcoded_insn (insn))
27774 return true;
27775
27776 if (!rs6000_sched_groups)
27777 return false;
27778
27779 type = get_attr_type (insn);
27780
27781 switch (type)
27782 {
27783 case TYPE_MFCR:
27784 case TYPE_MFCRF:
27785 case TYPE_MTCR:
27786 case TYPE_DELAYED_CR:
27787 case TYPE_CR_LOGICAL:
27788 case TYPE_MTJMPR:
27789 case TYPE_MFJMPR:
27790 case TYPE_DIV:
27791 case TYPE_LOAD_L:
27792 case TYPE_STORE_C:
27793 case TYPE_ISYNC:
27794 case TYPE_SYNC:
27795 return true;
27796 default:
27797 break;
27798 }
27799 break;
27800 case PROCESSOR_POWER6:
27801 type = get_attr_type (insn);
27802
27803 switch (type)
27804 {
27805 case TYPE_EXTS:
27806 case TYPE_CNTLZ:
27807 case TYPE_TRAP:
27808 case TYPE_MUL:
27809 case TYPE_INSERT:
27810 case TYPE_FPCOMPARE:
27811 case TYPE_MFCR:
27812 case TYPE_MTCR:
27813 case TYPE_MFJMPR:
27814 case TYPE_MTJMPR:
27815 case TYPE_ISYNC:
27816 case TYPE_SYNC:
27817 case TYPE_LOAD_L:
27818 case TYPE_STORE_C:
27819 return true;
27820 case TYPE_SHIFT:
27821 if (get_attr_dot (insn) == DOT_NO
27822 || get_attr_var_shift (insn) == VAR_SHIFT_NO)
27823 return true;
27824 else
27825 break;
27826 case TYPE_DIV:
27827 if (get_attr_size (insn) == SIZE_32)
27828 return true;
27829 else
27830 break;
27831 case TYPE_LOAD:
27832 case TYPE_STORE:
27833 case TYPE_FPLOAD:
27834 case TYPE_FPSTORE:
27835 if (get_attr_update (insn) == UPDATE_YES)
27836 return true;
27837 else
27838 break;
27839 default:
27840 break;
27841 }
27842 break;
27843 case PROCESSOR_POWER7:
27844 type = get_attr_type (insn);
27845
27846 switch (type)
27847 {
27848 case TYPE_CR_LOGICAL:
27849 case TYPE_MFCR:
27850 case TYPE_MFCRF:
27851 case TYPE_MTCR:
27852 case TYPE_DIV:
27853 case TYPE_COMPARE:
27854 case TYPE_ISYNC:
27855 case TYPE_LOAD_L:
27856 case TYPE_STORE_C:
27857 case TYPE_MFJMPR:
27858 case TYPE_MTJMPR:
27859 return true;
27860 case TYPE_MUL:
27861 case TYPE_SHIFT:
27862 case TYPE_EXTS:
27863 if (get_attr_dot (insn) == DOT_YES)
27864 return true;
27865 else
27866 break;
27867 case TYPE_LOAD:
27868 if (get_attr_sign_extend (insn) == SIGN_EXTEND_YES
27869 || get_attr_update (insn) == UPDATE_YES)
27870 return true;
27871 else
27872 break;
27873 case TYPE_STORE:
27874 case TYPE_FPLOAD:
27875 case TYPE_FPSTORE:
27876 if (get_attr_update (insn) == UPDATE_YES)
27877 return true;
27878 else
27879 break;
27880 default:
27881 break;
27882 }
27883 break;
27884 case PROCESSOR_POWER8:
27885 type = get_attr_type (insn);
27886
27887 switch (type)
27888 {
27889 case TYPE_CR_LOGICAL:
27890 case TYPE_DELAYED_CR:
27891 case TYPE_MFCR:
27892 case TYPE_MFCRF:
27893 case TYPE_MTCR:
27894 case TYPE_COMPARE:
27895 case TYPE_SYNC:
27896 case TYPE_ISYNC:
27897 case TYPE_LOAD_L:
27898 case TYPE_STORE_C:
27899 case TYPE_VECSTORE:
27900 case TYPE_MFJMPR:
27901 case TYPE_MTJMPR:
27902 return true;
27903 case TYPE_SHIFT:
27904 case TYPE_EXTS:
27905 case TYPE_MUL:
27906 if (get_attr_dot (insn) == DOT_YES)
27907 return true;
27908 else
27909 break;
27910 case TYPE_LOAD:
27911 if (get_attr_sign_extend (insn) == SIGN_EXTEND_YES
27912 || get_attr_update (insn) == UPDATE_YES)
27913 return true;
27914 else
27915 break;
27916 case TYPE_STORE:
27917 if (get_attr_update (insn) == UPDATE_YES
27918 && get_attr_indexed (insn) == INDEXED_YES)
27919 return true;
27920 else
27921 break;
27922 default:
27923 break;
27924 }
27925 break;
27926 default:
27927 break;
27928 }
27929
27930 return false;
27931 }
27932
27933 static bool
27934 insn_must_be_last_in_group (rtx_insn *insn)
27935 {
27936 enum attr_type type;
27937
27938 if (!insn
27939 || NOTE_P (insn)
27940 || DEBUG_INSN_P (insn)
27941 || GET_CODE (PATTERN (insn)) == USE
27942 || GET_CODE (PATTERN (insn)) == CLOBBER)
27943 return false;
27944
27945 switch (rs6000_cpu) {
27946 case PROCESSOR_POWER4:
27947 case PROCESSOR_POWER5:
27948 if (is_microcoded_insn (insn))
27949 return true;
27950
27951 if (is_branch_slot_insn (insn))
27952 return true;
27953
27954 break;
27955 case PROCESSOR_POWER6:
27956 type = get_attr_type (insn);
27957
27958 switch (type)
27959 {
27960 case TYPE_EXTS:
27961 case TYPE_CNTLZ:
27962 case TYPE_TRAP:
27963 case TYPE_MUL:
27964 case TYPE_FPCOMPARE:
27965 case TYPE_MFCR:
27966 case TYPE_MTCR:
27967 case TYPE_MFJMPR:
27968 case TYPE_MTJMPR:
27969 case TYPE_ISYNC:
27970 case TYPE_SYNC:
27971 case TYPE_LOAD_L:
27972 case TYPE_STORE_C:
27973 return true;
27974 case TYPE_SHIFT:
27975 if (get_attr_dot (insn) == DOT_NO
27976 || get_attr_var_shift (insn) == VAR_SHIFT_NO)
27977 return true;
27978 else
27979 break;
27980 case TYPE_DIV:
27981 if (get_attr_size (insn) == SIZE_32)
27982 return true;
27983 else
27984 break;
27985 default:
27986 break;
27987 }
27988 break;
27989 case PROCESSOR_POWER7:
27990 type = get_attr_type (insn);
27991
27992 switch (type)
27993 {
27994 case TYPE_ISYNC:
27995 case TYPE_SYNC:
27996 case TYPE_LOAD_L:
27997 case TYPE_STORE_C:
27998 return true;
27999 case TYPE_LOAD:
28000 if (get_attr_sign_extend (insn) == SIGN_EXTEND_YES
28001 && get_attr_update (insn) == UPDATE_YES)
28002 return true;
28003 else
28004 break;
28005 case TYPE_STORE:
28006 if (get_attr_update (insn) == UPDATE_YES
28007 && get_attr_indexed (insn) == INDEXED_YES)
28008 return true;
28009 else
28010 break;
28011 default:
28012 break;
28013 }
28014 break;
28015 case PROCESSOR_POWER8:
28016 type = get_attr_type (insn);
28017
28018 switch (type)
28019 {
28020 case TYPE_MFCR:
28021 case TYPE_MTCR:
28022 case TYPE_ISYNC:
28023 case TYPE_SYNC:
28024 case TYPE_LOAD_L:
28025 case TYPE_STORE_C:
28026 return true;
28027 case TYPE_LOAD:
28028 if (get_attr_sign_extend (insn) == SIGN_EXTEND_YES
28029 && get_attr_update (insn) == UPDATE_YES)
28030 return true;
28031 else
28032 break;
28033 case TYPE_STORE:
28034 if (get_attr_update (insn) == UPDATE_YES
28035 && get_attr_indexed (insn) == INDEXED_YES)
28036 return true;
28037 else
28038 break;
28039 default:
28040 break;
28041 }
28042 break;
28043 default:
28044 break;
28045 }
28046
28047 return false;
28048 }
28049
28050 /* Return true if it is recommended to keep NEXT_INSN "far" (in a separate
28051 dispatch group) from the insns in GROUP_INSNS. Return false otherwise. */
28052
28053 static bool
28054 is_costly_group (rtx *group_insns, rtx next_insn)
28055 {
28056 int i;
28057 int issue_rate = rs6000_issue_rate ();
28058
28059 for (i = 0; i < issue_rate; i++)
28060 {
28061 sd_iterator_def sd_it;
28062 dep_t dep;
28063 rtx insn = group_insns[i];
28064
28065 if (!insn)
28066 continue;
28067
28068 FOR_EACH_DEP (insn, SD_LIST_RES_FORW, sd_it, dep)
28069 {
28070 rtx next = DEP_CON (dep);
28071
28072 if (next == next_insn
28073 && rs6000_is_costly_dependence (dep, dep_cost (dep), 0))
28074 return true;
28075 }
28076 }
28077
28078 return false;
28079 }
28080
28081 /* Utility of the function redefine_groups.
28082 Check if it is too costly to schedule NEXT_INSN together with GROUP_INSNS
28083 in the same dispatch group. If so, insert nops before NEXT_INSN, in order
28084 to keep it "far" (in a separate group) from GROUP_INSNS, following
28085 one of the following schemes, depending on the value of the flag
28086 -minsert_sched_nops = X:
28087 (1) X == sched_finish_regroup_exact: insert exactly as many nops as needed
28088 in order to force NEXT_INSN into a separate group.
28089 (2) X < sched_finish_regroup_exact: insert exactly X nops.
28090 GROUP_END, CAN_ISSUE_MORE and GROUP_COUNT record the state after nop
28091 insertion (has a group just ended, how many vacant issue slots remain in the
28092 last group, and how many dispatch groups were encountered so far). */
28093
28094 static int
28095 force_new_group (int sched_verbose, FILE *dump, rtx *group_insns,
28096 rtx_insn *next_insn, bool *group_end, int can_issue_more,
28097 int *group_count)
28098 {
28099 rtx nop;
28100 bool force;
28101 int issue_rate = rs6000_issue_rate ();
28102 bool end = *group_end;
28103 int i;
28104
28105 if (next_insn == NULL_RTX || DEBUG_INSN_P (next_insn))
28106 return can_issue_more;
28107
28108 if (rs6000_sched_insert_nops > sched_finish_regroup_exact)
28109 return can_issue_more;
28110
28111 force = is_costly_group (group_insns, next_insn);
28112 if (!force)
28113 return can_issue_more;
28114
28115 if (sched_verbose > 6)
28116 fprintf (dump,"force: group count = %d, can_issue_more = %d\n",
28117 *group_count ,can_issue_more);
28118
28119 if (rs6000_sched_insert_nops == sched_finish_regroup_exact)
28120 {
28121 if (*group_end)
28122 can_issue_more = 0;
28123
28124 /* Since only a branch can be issued in the last issue_slot, it is
28125 sufficient to insert 'can_issue_more - 1' nops if next_insn is not
28126 a branch. If next_insn is a branch, we insert 'can_issue_more' nops;
28127 in this case the last nop will start a new group and the branch
28128 will be forced to the new group. */
28129 if (can_issue_more && !is_branch_slot_insn (next_insn))
28130 can_issue_more--;
28131
28132 /* Do we have a special group ending nop? */
28133 if (rs6000_cpu_attr == CPU_POWER6 || rs6000_cpu_attr == CPU_POWER7
28134 || rs6000_cpu_attr == CPU_POWER8)
28135 {
28136 nop = gen_group_ending_nop ();
28137 emit_insn_before (nop, next_insn);
28138 can_issue_more = 0;
28139 }
28140 else
28141 while (can_issue_more > 0)
28142 {
28143 nop = gen_nop ();
28144 emit_insn_before (nop, next_insn);
28145 can_issue_more--;
28146 }
28147
28148 *group_end = true;
28149 return 0;
28150 }
28151
28152 if (rs6000_sched_insert_nops < sched_finish_regroup_exact)
28153 {
28154 int n_nops = rs6000_sched_insert_nops;
28155
28156 /* Nops can't be issued from the branch slot, so the effective
28157 issue_rate for nops is 'issue_rate - 1'. */
28158 if (can_issue_more == 0)
28159 can_issue_more = issue_rate;
28160 can_issue_more--;
28161 if (can_issue_more == 0)
28162 {
28163 can_issue_more = issue_rate - 1;
28164 (*group_count)++;
28165 end = true;
28166 for (i = 0; i < issue_rate; i++)
28167 {
28168 group_insns[i] = 0;
28169 }
28170 }
28171
28172 while (n_nops > 0)
28173 {
28174 nop = gen_nop ();
28175 emit_insn_before (nop, next_insn);
28176 if (can_issue_more == issue_rate - 1) /* new group begins */
28177 end = false;
28178 can_issue_more--;
28179 if (can_issue_more == 0)
28180 {
28181 can_issue_more = issue_rate - 1;
28182 (*group_count)++;
28183 end = true;
28184 for (i = 0; i < issue_rate; i++)
28185 {
28186 group_insns[i] = 0;
28187 }
28188 }
28189 n_nops--;
28190 }
28191
28192 /* Scale back relative to 'issue_rate' (instead of 'issue_rate - 1'). */
28193 can_issue_more++;
28194
28195 /* Is next_insn going to start a new group? */
28196 *group_end
28197 = (end
28198 || (can_issue_more == 1 && !is_branch_slot_insn (next_insn))
28199 || (can_issue_more <= 2 && is_cracked_insn (next_insn))
28200 || (can_issue_more < issue_rate &&
28201 insn_terminates_group_p (next_insn, previous_group)));
28202 if (*group_end && end)
28203 (*group_count)--;
28204
28205 if (sched_verbose > 6)
28206 fprintf (dump, "done force: group count = %d, can_issue_more = %d\n",
28207 *group_count, can_issue_more);
28208 return can_issue_more;
28209 }
28210
28211 return can_issue_more;
28212 }
28213
28214 /* This function tries to synch the dispatch groups that the compiler "sees"
28215 with the dispatch groups that the processor dispatcher is expected to
28216 form in practice. It tries to achieve this synchronization by forcing the
28217 estimated processor grouping on the compiler (as opposed to the function
28218 'pad_goups' which tries to force the scheduler's grouping on the processor).
28219
28220 The function scans the insn sequence between PREV_HEAD_INSN and TAIL and
28221 examines the (estimated) dispatch groups that will be formed by the processor
28222 dispatcher. It marks these group boundaries to reflect the estimated
28223 processor grouping, overriding the grouping that the scheduler had marked.
28224 Depending on the value of the flag '-minsert-sched-nops' this function can
28225 force certain insns into separate groups or force a certain distance between
28226 them by inserting nops, for example, if there exists a "costly dependence"
28227 between the insns.
28228
28229 The function estimates the group boundaries that the processor will form as
28230 follows: It keeps track of how many vacant issue slots are available after
28231 each insn. A subsequent insn will start a new group if one of the following
28232 4 cases applies:
28233 - no more vacant issue slots remain in the current dispatch group.
28234 - only the last issue slot, which is the branch slot, is vacant, but the next
28235 insn is not a branch.
28236 - only the last 2 or less issue slots, including the branch slot, are vacant,
28237 which means that a cracked insn (which occupies two issue slots) can't be
28238 issued in this group.
28239 - less than 'issue_rate' slots are vacant, and the next insn always needs to
28240 start a new group. */
28241
28242 static int
28243 redefine_groups (FILE *dump, int sched_verbose, rtx_insn *prev_head_insn,
28244 rtx_insn *tail)
28245 {
28246 rtx_insn *insn, *next_insn;
28247 int issue_rate;
28248 int can_issue_more;
28249 int slot, i;
28250 bool group_end;
28251 int group_count = 0;
28252 rtx *group_insns;
28253
28254 /* Initialize. */
28255 issue_rate = rs6000_issue_rate ();
28256 group_insns = XALLOCAVEC (rtx, issue_rate);
28257 for (i = 0; i < issue_rate; i++)
28258 {
28259 group_insns[i] = 0;
28260 }
28261 can_issue_more = issue_rate;
28262 slot = 0;
28263 insn = get_next_active_insn (prev_head_insn, tail);
28264 group_end = false;
28265
28266 while (insn != NULL_RTX)
28267 {
28268 slot = (issue_rate - can_issue_more);
28269 group_insns[slot] = insn;
28270 can_issue_more =
28271 rs6000_variable_issue (dump, sched_verbose, insn, can_issue_more);
28272 if (insn_terminates_group_p (insn, current_group))
28273 can_issue_more = 0;
28274
28275 next_insn = get_next_active_insn (insn, tail);
28276 if (next_insn == NULL_RTX)
28277 return group_count + 1;
28278
28279 /* Is next_insn going to start a new group? */
28280 group_end
28281 = (can_issue_more == 0
28282 || (can_issue_more == 1 && !is_branch_slot_insn (next_insn))
28283 || (can_issue_more <= 2 && is_cracked_insn (next_insn))
28284 || (can_issue_more < issue_rate &&
28285 insn_terminates_group_p (next_insn, previous_group)));
28286
28287 can_issue_more = force_new_group (sched_verbose, dump, group_insns,
28288 next_insn, &group_end, can_issue_more,
28289 &group_count);
28290
28291 if (group_end)
28292 {
28293 group_count++;
28294 can_issue_more = 0;
28295 for (i = 0; i < issue_rate; i++)
28296 {
28297 group_insns[i] = 0;
28298 }
28299 }
28300
28301 if (GET_MODE (next_insn) == TImode && can_issue_more)
28302 PUT_MODE (next_insn, VOIDmode);
28303 else if (!can_issue_more && GET_MODE (next_insn) != TImode)
28304 PUT_MODE (next_insn, TImode);
28305
28306 insn = next_insn;
28307 if (can_issue_more == 0)
28308 can_issue_more = issue_rate;
28309 } /* while */
28310
28311 return group_count;
28312 }
28313
28314 /* Scan the insn sequence between PREV_HEAD_INSN and TAIL and examine the
28315 dispatch group boundaries that the scheduler had marked. Pad with nops
28316 any dispatch groups which have vacant issue slots, in order to force the
28317 scheduler's grouping on the processor dispatcher. The function
28318 returns the number of dispatch groups found. */
28319
28320 static int
28321 pad_groups (FILE *dump, int sched_verbose, rtx_insn *prev_head_insn,
28322 rtx_insn *tail)
28323 {
28324 rtx_insn *insn, *next_insn;
28325 rtx nop;
28326 int issue_rate;
28327 int can_issue_more;
28328 int group_end;
28329 int group_count = 0;
28330
28331 /* Initialize issue_rate. */
28332 issue_rate = rs6000_issue_rate ();
28333 can_issue_more = issue_rate;
28334
28335 insn = get_next_active_insn (prev_head_insn, tail);
28336 next_insn = get_next_active_insn (insn, tail);
28337
28338 while (insn != NULL_RTX)
28339 {
28340 can_issue_more =
28341 rs6000_variable_issue (dump, sched_verbose, insn, can_issue_more);
28342
28343 group_end = (next_insn == NULL_RTX || GET_MODE (next_insn) == TImode);
28344
28345 if (next_insn == NULL_RTX)
28346 break;
28347
28348 if (group_end)
28349 {
28350 /* If the scheduler had marked group termination at this location
28351 (between insn and next_insn), and neither insn nor next_insn will
28352 force group termination, pad the group with nops to force group
28353 termination. */
28354 if (can_issue_more
28355 && (rs6000_sched_insert_nops == sched_finish_pad_groups)
28356 && !insn_terminates_group_p (insn, current_group)
28357 && !insn_terminates_group_p (next_insn, previous_group))
28358 {
28359 if (!is_branch_slot_insn (next_insn))
28360 can_issue_more--;
28361
28362 while (can_issue_more)
28363 {
28364 nop = gen_nop ();
28365 emit_insn_before (nop, next_insn);
28366 can_issue_more--;
28367 }
28368 }
28369
28370 can_issue_more = issue_rate;
28371 group_count++;
28372 }
28373
28374 insn = next_insn;
28375 next_insn = get_next_active_insn (insn, tail);
28376 }
28377
28378 return group_count;
28379 }
28380
28381 /* We're beginning a new block. Initialize data structures as necessary. */
28382
28383 static void
28384 rs6000_sched_init (FILE *dump ATTRIBUTE_UNUSED,
28385 int sched_verbose ATTRIBUTE_UNUSED,
28386 int max_ready ATTRIBUTE_UNUSED)
28387 {
28388 last_scheduled_insn = NULL_RTX;
28389 load_store_pendulum = 0;
28390 }
28391
28392 /* The following function is called at the end of scheduling BB.
28393 After reload, it inserts nops at insn group bundling. */
28394
28395 static void
28396 rs6000_sched_finish (FILE *dump, int sched_verbose)
28397 {
28398 int n_groups;
28399
28400 if (sched_verbose)
28401 fprintf (dump, "=== Finishing schedule.\n");
28402
28403 if (reload_completed && rs6000_sched_groups)
28404 {
28405 /* Do not run sched_finish hook when selective scheduling enabled. */
28406 if (sel_sched_p ())
28407 return;
28408
28409 if (rs6000_sched_insert_nops == sched_finish_none)
28410 return;
28411
28412 if (rs6000_sched_insert_nops == sched_finish_pad_groups)
28413 n_groups = pad_groups (dump, sched_verbose,
28414 current_sched_info->prev_head,
28415 current_sched_info->next_tail);
28416 else
28417 n_groups = redefine_groups (dump, sched_verbose,
28418 current_sched_info->prev_head,
28419 current_sched_info->next_tail);
28420
28421 if (sched_verbose >= 6)
28422 {
28423 fprintf (dump, "ngroups = %d\n", n_groups);
28424 print_rtl (dump, current_sched_info->prev_head);
28425 fprintf (dump, "Done finish_sched\n");
28426 }
28427 }
28428 }
28429
28430 struct _rs6000_sched_context
28431 {
28432 short cached_can_issue_more;
28433 rtx last_scheduled_insn;
28434 int load_store_pendulum;
28435 };
28436
28437 typedef struct _rs6000_sched_context rs6000_sched_context_def;
28438 typedef rs6000_sched_context_def *rs6000_sched_context_t;
28439
28440 /* Allocate store for new scheduling context. */
28441 static void *
28442 rs6000_alloc_sched_context (void)
28443 {
28444 return xmalloc (sizeof (rs6000_sched_context_def));
28445 }
28446
28447 /* If CLEAN_P is true then initializes _SC with clean data,
28448 and from the global context otherwise. */
28449 static void
28450 rs6000_init_sched_context (void *_sc, bool clean_p)
28451 {
28452 rs6000_sched_context_t sc = (rs6000_sched_context_t) _sc;
28453
28454 if (clean_p)
28455 {
28456 sc->cached_can_issue_more = 0;
28457 sc->last_scheduled_insn = NULL_RTX;
28458 sc->load_store_pendulum = 0;
28459 }
28460 else
28461 {
28462 sc->cached_can_issue_more = cached_can_issue_more;
28463 sc->last_scheduled_insn = last_scheduled_insn;
28464 sc->load_store_pendulum = load_store_pendulum;
28465 }
28466 }
28467
28468 /* Sets the global scheduling context to the one pointed to by _SC. */
28469 static void
28470 rs6000_set_sched_context (void *_sc)
28471 {
28472 rs6000_sched_context_t sc = (rs6000_sched_context_t) _sc;
28473
28474 gcc_assert (sc != NULL);
28475
28476 cached_can_issue_more = sc->cached_can_issue_more;
28477 last_scheduled_insn = sc->last_scheduled_insn;
28478 load_store_pendulum = sc->load_store_pendulum;
28479 }
28480
28481 /* Free _SC. */
28482 static void
28483 rs6000_free_sched_context (void *_sc)
28484 {
28485 gcc_assert (_sc != NULL);
28486
28487 free (_sc);
28488 }
28489
28490 \f
28491 /* Length in units of the trampoline for entering a nested function. */
28492
28493 int
28494 rs6000_trampoline_size (void)
28495 {
28496 int ret = 0;
28497
28498 switch (DEFAULT_ABI)
28499 {
28500 default:
28501 gcc_unreachable ();
28502
28503 case ABI_AIX:
28504 ret = (TARGET_32BIT) ? 12 : 24;
28505 break;
28506
28507 case ABI_ELFv2:
28508 gcc_assert (!TARGET_32BIT);
28509 ret = 32;
28510 break;
28511
28512 case ABI_DARWIN:
28513 case ABI_V4:
28514 ret = (TARGET_32BIT) ? 40 : 48;
28515 break;
28516 }
28517
28518 return ret;
28519 }
28520
28521 /* Emit RTL insns to initialize the variable parts of a trampoline.
28522 FNADDR is an RTX for the address of the function's pure code.
28523 CXT is an RTX for the static chain value for the function. */
28524
28525 static void
28526 rs6000_trampoline_init (rtx m_tramp, tree fndecl, rtx cxt)
28527 {
28528 int regsize = (TARGET_32BIT) ? 4 : 8;
28529 rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
28530 rtx ctx_reg = force_reg (Pmode, cxt);
28531 rtx addr = force_reg (Pmode, XEXP (m_tramp, 0));
28532
28533 switch (DEFAULT_ABI)
28534 {
28535 default:
28536 gcc_unreachable ();
28537
28538 /* Under AIX, just build the 3 word function descriptor */
28539 case ABI_AIX:
28540 {
28541 rtx fnmem, fn_reg, toc_reg;
28542
28543 if (!TARGET_POINTERS_TO_NESTED_FUNCTIONS)
28544 error ("You cannot take the address of a nested function if you use "
28545 "the -mno-pointers-to-nested-functions option.");
28546
28547 fnmem = gen_const_mem (Pmode, force_reg (Pmode, fnaddr));
28548 fn_reg = gen_reg_rtx (Pmode);
28549 toc_reg = gen_reg_rtx (Pmode);
28550
28551 /* Macro to shorten the code expansions below. */
28552 # define MEM_PLUS(MEM, OFFSET) adjust_address (MEM, Pmode, OFFSET)
28553
28554 m_tramp = replace_equiv_address (m_tramp, addr);
28555
28556 emit_move_insn (fn_reg, MEM_PLUS (fnmem, 0));
28557 emit_move_insn (toc_reg, MEM_PLUS (fnmem, regsize));
28558 emit_move_insn (MEM_PLUS (m_tramp, 0), fn_reg);
28559 emit_move_insn (MEM_PLUS (m_tramp, regsize), toc_reg);
28560 emit_move_insn (MEM_PLUS (m_tramp, 2*regsize), ctx_reg);
28561
28562 # undef MEM_PLUS
28563 }
28564 break;
28565
28566 /* Under V.4/eabi/darwin, __trampoline_setup does the real work. */
28567 case ABI_ELFv2:
28568 case ABI_DARWIN:
28569 case ABI_V4:
28570 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__trampoline_setup"),
28571 LCT_NORMAL, VOIDmode, 4,
28572 addr, Pmode,
28573 GEN_INT (rs6000_trampoline_size ()), SImode,
28574 fnaddr, Pmode,
28575 ctx_reg, Pmode);
28576 break;
28577 }
28578 }
28579
28580 \f
28581 /* Returns TRUE iff the target attribute indicated by ATTR_ID takes a plain
28582 identifier as an argument, so the front end shouldn't look it up. */
28583
28584 static bool
28585 rs6000_attribute_takes_identifier_p (const_tree attr_id)
28586 {
28587 return is_attribute_p ("altivec", attr_id);
28588 }
28589
28590 /* Handle the "altivec" attribute. The attribute may have
28591 arguments as follows:
28592
28593 __attribute__((altivec(vector__)))
28594 __attribute__((altivec(pixel__))) (always followed by 'unsigned short')
28595 __attribute__((altivec(bool__))) (always followed by 'unsigned')
28596
28597 and may appear more than once (e.g., 'vector bool char') in a
28598 given declaration. */
28599
28600 static tree
28601 rs6000_handle_altivec_attribute (tree *node,
28602 tree name ATTRIBUTE_UNUSED,
28603 tree args,
28604 int flags ATTRIBUTE_UNUSED,
28605 bool *no_add_attrs)
28606 {
28607 tree type = *node, result = NULL_TREE;
28608 machine_mode mode;
28609 int unsigned_p;
28610 char altivec_type
28611 = ((args && TREE_CODE (args) == TREE_LIST && TREE_VALUE (args)
28612 && TREE_CODE (TREE_VALUE (args)) == IDENTIFIER_NODE)
28613 ? *IDENTIFIER_POINTER (TREE_VALUE (args))
28614 : '?');
28615
28616 while (POINTER_TYPE_P (type)
28617 || TREE_CODE (type) == FUNCTION_TYPE
28618 || TREE_CODE (type) == METHOD_TYPE
28619 || TREE_CODE (type) == ARRAY_TYPE)
28620 type = TREE_TYPE (type);
28621
28622 mode = TYPE_MODE (type);
28623
28624 /* Check for invalid AltiVec type qualifiers. */
28625 if (type == long_double_type_node)
28626 error ("use of %<long double%> in AltiVec types is invalid");
28627 else if (type == boolean_type_node)
28628 error ("use of boolean types in AltiVec types is invalid");
28629 else if (TREE_CODE (type) == COMPLEX_TYPE)
28630 error ("use of %<complex%> in AltiVec types is invalid");
28631 else if (DECIMAL_FLOAT_MODE_P (mode))
28632 error ("use of decimal floating point types in AltiVec types is invalid");
28633 else if (!TARGET_VSX)
28634 {
28635 if (type == long_unsigned_type_node || type == long_integer_type_node)
28636 {
28637 if (TARGET_64BIT)
28638 error ("use of %<long%> in AltiVec types is invalid for "
28639 "64-bit code without -mvsx");
28640 else if (rs6000_warn_altivec_long)
28641 warning (0, "use of %<long%> in AltiVec types is deprecated; "
28642 "use %<int%>");
28643 }
28644 else if (type == long_long_unsigned_type_node
28645 || type == long_long_integer_type_node)
28646 error ("use of %<long long%> in AltiVec types is invalid without "
28647 "-mvsx");
28648 else if (type == double_type_node)
28649 error ("use of %<double%> in AltiVec types is invalid without -mvsx");
28650 }
28651
28652 switch (altivec_type)
28653 {
28654 case 'v':
28655 unsigned_p = TYPE_UNSIGNED (type);
28656 switch (mode)
28657 {
28658 case TImode:
28659 result = (unsigned_p ? unsigned_V1TI_type_node : V1TI_type_node);
28660 break;
28661 case DImode:
28662 result = (unsigned_p ? unsigned_V2DI_type_node : V2DI_type_node);
28663 break;
28664 case SImode:
28665 result = (unsigned_p ? unsigned_V4SI_type_node : V4SI_type_node);
28666 break;
28667 case HImode:
28668 result = (unsigned_p ? unsigned_V8HI_type_node : V8HI_type_node);
28669 break;
28670 case QImode:
28671 result = (unsigned_p ? unsigned_V16QI_type_node : V16QI_type_node);
28672 break;
28673 case SFmode: result = V4SF_type_node; break;
28674 case DFmode: result = V2DF_type_node; break;
28675 /* If the user says 'vector int bool', we may be handed the 'bool'
28676 attribute _before_ the 'vector' attribute, and so select the
28677 proper type in the 'b' case below. */
28678 case V4SImode: case V8HImode: case V16QImode: case V4SFmode:
28679 case V2DImode: case V2DFmode:
28680 result = type;
28681 default: break;
28682 }
28683 break;
28684 case 'b':
28685 switch (mode)
28686 {
28687 case DImode: case V2DImode: result = bool_V2DI_type_node; break;
28688 case SImode: case V4SImode: result = bool_V4SI_type_node; break;
28689 case HImode: case V8HImode: result = bool_V8HI_type_node; break;
28690 case QImode: case V16QImode: result = bool_V16QI_type_node;
28691 default: break;
28692 }
28693 break;
28694 case 'p':
28695 switch (mode)
28696 {
28697 case V8HImode: result = pixel_V8HI_type_node;
28698 default: break;
28699 }
28700 default: break;
28701 }
28702
28703 /* Propagate qualifiers attached to the element type
28704 onto the vector type. */
28705 if (result && result != type && TYPE_QUALS (type))
28706 result = build_qualified_type (result, TYPE_QUALS (type));
28707
28708 *no_add_attrs = true; /* No need to hang on to the attribute. */
28709
28710 if (result)
28711 *node = lang_hooks.types.reconstruct_complex_type (*node, result);
28712
28713 return NULL_TREE;
28714 }
28715
28716 /* AltiVec defines four built-in scalar types that serve as vector
28717 elements; we must teach the compiler how to mangle them. */
28718
28719 static const char *
28720 rs6000_mangle_type (const_tree type)
28721 {
28722 type = TYPE_MAIN_VARIANT (type);
28723
28724 if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
28725 && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
28726 return NULL;
28727
28728 if (type == bool_char_type_node) return "U6__boolc";
28729 if (type == bool_short_type_node) return "U6__bools";
28730 if (type == pixel_type_node) return "u7__pixel";
28731 if (type == bool_int_type_node) return "U6__booli";
28732 if (type == bool_long_type_node) return "U6__booll";
28733
28734 /* Mangle IBM extended float long double as `g' (__float128) on
28735 powerpc*-linux where long-double-64 previously was the default. */
28736 if (TYPE_MAIN_VARIANT (type) == long_double_type_node
28737 && TARGET_ELF
28738 && TARGET_LONG_DOUBLE_128
28739 && !TARGET_IEEEQUAD)
28740 return "g";
28741
28742 /* For all other types, use normal C++ mangling. */
28743 return NULL;
28744 }
28745
28746 /* Handle a "longcall" or "shortcall" attribute; arguments as in
28747 struct attribute_spec.handler. */
28748
28749 static tree
28750 rs6000_handle_longcall_attribute (tree *node, tree name,
28751 tree args ATTRIBUTE_UNUSED,
28752 int flags ATTRIBUTE_UNUSED,
28753 bool *no_add_attrs)
28754 {
28755 if (TREE_CODE (*node) != FUNCTION_TYPE
28756 && TREE_CODE (*node) != FIELD_DECL
28757 && TREE_CODE (*node) != TYPE_DECL)
28758 {
28759 warning (OPT_Wattributes, "%qE attribute only applies to functions",
28760 name);
28761 *no_add_attrs = true;
28762 }
28763
28764 return NULL_TREE;
28765 }
28766
28767 /* Set longcall attributes on all functions declared when
28768 rs6000_default_long_calls is true. */
28769 static void
28770 rs6000_set_default_type_attributes (tree type)
28771 {
28772 if (rs6000_default_long_calls
28773 && (TREE_CODE (type) == FUNCTION_TYPE
28774 || TREE_CODE (type) == METHOD_TYPE))
28775 TYPE_ATTRIBUTES (type) = tree_cons (get_identifier ("longcall"),
28776 NULL_TREE,
28777 TYPE_ATTRIBUTES (type));
28778
28779 #if TARGET_MACHO
28780 darwin_set_default_type_attributes (type);
28781 #endif
28782 }
28783
28784 /* Return a reference suitable for calling a function with the
28785 longcall attribute. */
28786
28787 rtx
28788 rs6000_longcall_ref (rtx call_ref)
28789 {
28790 const char *call_name;
28791 tree node;
28792
28793 if (GET_CODE (call_ref) != SYMBOL_REF)
28794 return call_ref;
28795
28796 /* System V adds '.' to the internal name, so skip them. */
28797 call_name = XSTR (call_ref, 0);
28798 if (*call_name == '.')
28799 {
28800 while (*call_name == '.')
28801 call_name++;
28802
28803 node = get_identifier (call_name);
28804 call_ref = gen_rtx_SYMBOL_REF (VOIDmode, IDENTIFIER_POINTER (node));
28805 }
28806
28807 return force_reg (Pmode, call_ref);
28808 }
28809 \f
28810 #ifndef TARGET_USE_MS_BITFIELD_LAYOUT
28811 #define TARGET_USE_MS_BITFIELD_LAYOUT 0
28812 #endif
28813
28814 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
28815 struct attribute_spec.handler. */
28816 static tree
28817 rs6000_handle_struct_attribute (tree *node, tree name,
28818 tree args ATTRIBUTE_UNUSED,
28819 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
28820 {
28821 tree *type = NULL;
28822 if (DECL_P (*node))
28823 {
28824 if (TREE_CODE (*node) == TYPE_DECL)
28825 type = &TREE_TYPE (*node);
28826 }
28827 else
28828 type = node;
28829
28830 if (!(type && (TREE_CODE (*type) == RECORD_TYPE
28831 || TREE_CODE (*type) == UNION_TYPE)))
28832 {
28833 warning (OPT_Wattributes, "%qE attribute ignored", name);
28834 *no_add_attrs = true;
28835 }
28836
28837 else if ((is_attribute_p ("ms_struct", name)
28838 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
28839 || ((is_attribute_p ("gcc_struct", name)
28840 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
28841 {
28842 warning (OPT_Wattributes, "%qE incompatible attribute ignored",
28843 name);
28844 *no_add_attrs = true;
28845 }
28846
28847 return NULL_TREE;
28848 }
28849
28850 static bool
28851 rs6000_ms_bitfield_layout_p (const_tree record_type)
28852 {
28853 return (TARGET_USE_MS_BITFIELD_LAYOUT &&
28854 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
28855 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type));
28856 }
28857 \f
28858 #ifdef USING_ELFOS_H
28859
28860 /* A get_unnamed_section callback, used for switching to toc_section. */
28861
28862 static void
28863 rs6000_elf_output_toc_section_asm_op (const void *data ATTRIBUTE_UNUSED)
28864 {
28865 if ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
28866 && TARGET_MINIMAL_TOC
28867 && !TARGET_RELOCATABLE)
28868 {
28869 if (!toc_initialized)
28870 {
28871 toc_initialized = 1;
28872 fprintf (asm_out_file, "%s\n", TOC_SECTION_ASM_OP);
28873 (*targetm.asm_out.internal_label) (asm_out_file, "LCTOC", 0);
28874 fprintf (asm_out_file, "\t.tc ");
28875 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file, "LCTOC1[TC],");
28876 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file, "LCTOC1");
28877 fprintf (asm_out_file, "\n");
28878
28879 fprintf (asm_out_file, "%s\n", MINIMAL_TOC_SECTION_ASM_OP);
28880 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file, "LCTOC1");
28881 fprintf (asm_out_file, " = .+32768\n");
28882 }
28883 else
28884 fprintf (asm_out_file, "%s\n", MINIMAL_TOC_SECTION_ASM_OP);
28885 }
28886 else if ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
28887 && !TARGET_RELOCATABLE)
28888 fprintf (asm_out_file, "%s\n", TOC_SECTION_ASM_OP);
28889 else
28890 {
28891 fprintf (asm_out_file, "%s\n", MINIMAL_TOC_SECTION_ASM_OP);
28892 if (!toc_initialized)
28893 {
28894 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file, "LCTOC1");
28895 fprintf (asm_out_file, " = .+32768\n");
28896 toc_initialized = 1;
28897 }
28898 }
28899 }
28900
28901 /* Implement TARGET_ASM_INIT_SECTIONS. */
28902
28903 static void
28904 rs6000_elf_asm_init_sections (void)
28905 {
28906 toc_section
28907 = get_unnamed_section (0, rs6000_elf_output_toc_section_asm_op, NULL);
28908
28909 sdata2_section
28910 = get_unnamed_section (SECTION_WRITE, output_section_asm_op,
28911 SDATA2_SECTION_ASM_OP);
28912 }
28913
28914 /* Implement TARGET_SELECT_RTX_SECTION. */
28915
28916 static section *
28917 rs6000_elf_select_rtx_section (machine_mode mode, rtx x,
28918 unsigned HOST_WIDE_INT align)
28919 {
28920 if (ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (x, mode))
28921 return toc_section;
28922 else
28923 return default_elf_select_rtx_section (mode, x, align);
28924 }
28925 \f
28926 /* For a SYMBOL_REF, set generic flags and then perform some
28927 target-specific processing.
28928
28929 When the AIX ABI is requested on a non-AIX system, replace the
28930 function name with the real name (with a leading .) rather than the
28931 function descriptor name. This saves a lot of overriding code to
28932 read the prefixes. */
28933
28934 static void rs6000_elf_encode_section_info (tree, rtx, int) ATTRIBUTE_UNUSED;
28935 static void
28936 rs6000_elf_encode_section_info (tree decl, rtx rtl, int first)
28937 {
28938 default_encode_section_info (decl, rtl, first);
28939
28940 if (first
28941 && TREE_CODE (decl) == FUNCTION_DECL
28942 && !TARGET_AIX
28943 && DEFAULT_ABI == ABI_AIX)
28944 {
28945 rtx sym_ref = XEXP (rtl, 0);
28946 size_t len = strlen (XSTR (sym_ref, 0));
28947 char *str = XALLOCAVEC (char, len + 2);
28948 str[0] = '.';
28949 memcpy (str + 1, XSTR (sym_ref, 0), len + 1);
28950 XSTR (sym_ref, 0) = ggc_alloc_string (str, len + 1);
28951 }
28952 }
28953
28954 static inline bool
28955 compare_section_name (const char *section, const char *templ)
28956 {
28957 int len;
28958
28959 len = strlen (templ);
28960 return (strncmp (section, templ, len) == 0
28961 && (section[len] == 0 || section[len] == '.'));
28962 }
28963
28964 bool
28965 rs6000_elf_in_small_data_p (const_tree decl)
28966 {
28967 if (rs6000_sdata == SDATA_NONE)
28968 return false;
28969
28970 /* We want to merge strings, so we never consider them small data. */
28971 if (TREE_CODE (decl) == STRING_CST)
28972 return false;
28973
28974 /* Functions are never in the small data area. */
28975 if (TREE_CODE (decl) == FUNCTION_DECL)
28976 return false;
28977
28978 if (TREE_CODE (decl) == VAR_DECL && DECL_SECTION_NAME (decl))
28979 {
28980 const char *section = DECL_SECTION_NAME (decl);
28981 if (compare_section_name (section, ".sdata")
28982 || compare_section_name (section, ".sdata2")
28983 || compare_section_name (section, ".gnu.linkonce.s")
28984 || compare_section_name (section, ".sbss")
28985 || compare_section_name (section, ".sbss2")
28986 || compare_section_name (section, ".gnu.linkonce.sb")
28987 || strcmp (section, ".PPC.EMB.sdata0") == 0
28988 || strcmp (section, ".PPC.EMB.sbss0") == 0)
28989 return true;
28990 }
28991 else
28992 {
28993 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (decl));
28994
28995 if (size > 0
28996 && size <= g_switch_value
28997 /* If it's not public, and we're not going to reference it there,
28998 there's no need to put it in the small data section. */
28999 && (rs6000_sdata != SDATA_DATA || TREE_PUBLIC (decl)))
29000 return true;
29001 }
29002
29003 return false;
29004 }
29005
29006 #endif /* USING_ELFOS_H */
29007 \f
29008 /* Implement TARGET_USE_BLOCKS_FOR_CONSTANT_P. */
29009
29010 static bool
29011 rs6000_use_blocks_for_constant_p (machine_mode mode, const_rtx x)
29012 {
29013 return !ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (x, mode);
29014 }
29015
29016 /* Do not place thread-local symbols refs in the object blocks. */
29017
29018 static bool
29019 rs6000_use_blocks_for_decl_p (const_tree decl)
29020 {
29021 return !DECL_THREAD_LOCAL_P (decl);
29022 }
29023 \f
29024 /* Return a REG that occurs in ADDR with coefficient 1.
29025 ADDR can be effectively incremented by incrementing REG.
29026
29027 r0 is special and we must not select it as an address
29028 register by this routine since our caller will try to
29029 increment the returned register via an "la" instruction. */
29030
29031 rtx
29032 find_addr_reg (rtx addr)
29033 {
29034 while (GET_CODE (addr) == PLUS)
29035 {
29036 if (GET_CODE (XEXP (addr, 0)) == REG
29037 && REGNO (XEXP (addr, 0)) != 0)
29038 addr = XEXP (addr, 0);
29039 else if (GET_CODE (XEXP (addr, 1)) == REG
29040 && REGNO (XEXP (addr, 1)) != 0)
29041 addr = XEXP (addr, 1);
29042 else if (CONSTANT_P (XEXP (addr, 0)))
29043 addr = XEXP (addr, 1);
29044 else if (CONSTANT_P (XEXP (addr, 1)))
29045 addr = XEXP (addr, 0);
29046 else
29047 gcc_unreachable ();
29048 }
29049 gcc_assert (GET_CODE (addr) == REG && REGNO (addr) != 0);
29050 return addr;
29051 }
29052
29053 void
29054 rs6000_fatal_bad_address (rtx op)
29055 {
29056 fatal_insn ("bad address", op);
29057 }
29058
29059 #if TARGET_MACHO
29060
29061 typedef struct branch_island_d {
29062 tree function_name;
29063 tree label_name;
29064 int line_number;
29065 } branch_island;
29066
29067
29068 static vec<branch_island, va_gc> *branch_islands;
29069
29070 /* Remember to generate a branch island for far calls to the given
29071 function. */
29072
29073 static void
29074 add_compiler_branch_island (tree label_name, tree function_name,
29075 int line_number)
29076 {
29077 branch_island bi = {function_name, label_name, line_number};
29078 vec_safe_push (branch_islands, bi);
29079 }
29080
29081 /* Generate far-jump branch islands for everything recorded in
29082 branch_islands. Invoked immediately after the last instruction of
29083 the epilogue has been emitted; the branch islands must be appended
29084 to, and contiguous with, the function body. Mach-O stubs are
29085 generated in machopic_output_stub(). */
29086
29087 static void
29088 macho_branch_islands (void)
29089 {
29090 char tmp_buf[512];
29091
29092 while (!vec_safe_is_empty (branch_islands))
29093 {
29094 branch_island *bi = &branch_islands->last ();
29095 const char *label = IDENTIFIER_POINTER (bi->label_name);
29096 const char *name = IDENTIFIER_POINTER (bi->function_name);
29097 char name_buf[512];
29098 /* Cheap copy of the details from the Darwin ASM_OUTPUT_LABELREF(). */
29099 if (name[0] == '*' || name[0] == '&')
29100 strcpy (name_buf, name+1);
29101 else
29102 {
29103 name_buf[0] = '_';
29104 strcpy (name_buf+1, name);
29105 }
29106 strcpy (tmp_buf, "\n");
29107 strcat (tmp_buf, label);
29108 #if defined (DBX_DEBUGGING_INFO) || defined (XCOFF_DEBUGGING_INFO)
29109 if (write_symbols == DBX_DEBUG || write_symbols == XCOFF_DEBUG)
29110 dbxout_stabd (N_SLINE, bi->line_number);
29111 #endif /* DBX_DEBUGGING_INFO || XCOFF_DEBUGGING_INFO */
29112 if (flag_pic)
29113 {
29114 if (TARGET_LINK_STACK)
29115 {
29116 char name[32];
29117 get_ppc476_thunk_name (name);
29118 strcat (tmp_buf, ":\n\tmflr r0\n\tbl ");
29119 strcat (tmp_buf, name);
29120 strcat (tmp_buf, "\n");
29121 strcat (tmp_buf, label);
29122 strcat (tmp_buf, "_pic:\n\tmflr r11\n");
29123 }
29124 else
29125 {
29126 strcat (tmp_buf, ":\n\tmflr r0\n\tbcl 20,31,");
29127 strcat (tmp_buf, label);
29128 strcat (tmp_buf, "_pic\n");
29129 strcat (tmp_buf, label);
29130 strcat (tmp_buf, "_pic:\n\tmflr r11\n");
29131 }
29132
29133 strcat (tmp_buf, "\taddis r11,r11,ha16(");
29134 strcat (tmp_buf, name_buf);
29135 strcat (tmp_buf, " - ");
29136 strcat (tmp_buf, label);
29137 strcat (tmp_buf, "_pic)\n");
29138
29139 strcat (tmp_buf, "\tmtlr r0\n");
29140
29141 strcat (tmp_buf, "\taddi r12,r11,lo16(");
29142 strcat (tmp_buf, name_buf);
29143 strcat (tmp_buf, " - ");
29144 strcat (tmp_buf, label);
29145 strcat (tmp_buf, "_pic)\n");
29146
29147 strcat (tmp_buf, "\tmtctr r12\n\tbctr\n");
29148 }
29149 else
29150 {
29151 strcat (tmp_buf, ":\nlis r12,hi16(");
29152 strcat (tmp_buf, name_buf);
29153 strcat (tmp_buf, ")\n\tori r12,r12,lo16(");
29154 strcat (tmp_buf, name_buf);
29155 strcat (tmp_buf, ")\n\tmtctr r12\n\tbctr");
29156 }
29157 output_asm_insn (tmp_buf, 0);
29158 #if defined (DBX_DEBUGGING_INFO) || defined (XCOFF_DEBUGGING_INFO)
29159 if (write_symbols == DBX_DEBUG || write_symbols == XCOFF_DEBUG)
29160 dbxout_stabd (N_SLINE, bi->line_number);
29161 #endif /* DBX_DEBUGGING_INFO || XCOFF_DEBUGGING_INFO */
29162 branch_islands->pop ();
29163 }
29164 }
29165
29166 /* NO_PREVIOUS_DEF checks in the link list whether the function name is
29167 already there or not. */
29168
29169 static int
29170 no_previous_def (tree function_name)
29171 {
29172 branch_island *bi;
29173 unsigned ix;
29174
29175 FOR_EACH_VEC_SAFE_ELT (branch_islands, ix, bi)
29176 if (function_name == bi->function_name)
29177 return 0;
29178 return 1;
29179 }
29180
29181 /* GET_PREV_LABEL gets the label name from the previous definition of
29182 the function. */
29183
29184 static tree
29185 get_prev_label (tree function_name)
29186 {
29187 branch_island *bi;
29188 unsigned ix;
29189
29190 FOR_EACH_VEC_SAFE_ELT (branch_islands, ix, bi)
29191 if (function_name == bi->function_name)
29192 return bi->label_name;
29193 return NULL_TREE;
29194 }
29195
29196 /* INSN is either a function call or a millicode call. It may have an
29197 unconditional jump in its delay slot.
29198
29199 CALL_DEST is the routine we are calling. */
29200
29201 char *
29202 output_call (rtx_insn *insn, rtx *operands, int dest_operand_number,
29203 int cookie_operand_number)
29204 {
29205 static char buf[256];
29206 if (darwin_emit_branch_islands
29207 && GET_CODE (operands[dest_operand_number]) == SYMBOL_REF
29208 && (INTVAL (operands[cookie_operand_number]) & CALL_LONG))
29209 {
29210 tree labelname;
29211 tree funname = get_identifier (XSTR (operands[dest_operand_number], 0));
29212
29213 if (no_previous_def (funname))
29214 {
29215 rtx label_rtx = gen_label_rtx ();
29216 char *label_buf, temp_buf[256];
29217 ASM_GENERATE_INTERNAL_LABEL (temp_buf, "L",
29218 CODE_LABEL_NUMBER (label_rtx));
29219 label_buf = temp_buf[0] == '*' ? temp_buf + 1 : temp_buf;
29220 labelname = get_identifier (label_buf);
29221 add_compiler_branch_island (labelname, funname, insn_line (insn));
29222 }
29223 else
29224 labelname = get_prev_label (funname);
29225
29226 /* "jbsr foo, L42" is Mach-O for "Link as 'bl foo' if a 'bl'
29227 instruction will reach 'foo', otherwise link as 'bl L42'".
29228 "L42" should be a 'branch island', that will do a far jump to
29229 'foo'. Branch islands are generated in
29230 macho_branch_islands(). */
29231 sprintf (buf, "jbsr %%z%d,%.246s",
29232 dest_operand_number, IDENTIFIER_POINTER (labelname));
29233 }
29234 else
29235 sprintf (buf, "bl %%z%d", dest_operand_number);
29236 return buf;
29237 }
29238
29239 /* Generate PIC and indirect symbol stubs. */
29240
29241 void
29242 machopic_output_stub (FILE *file, const char *symb, const char *stub)
29243 {
29244 unsigned int length;
29245 char *symbol_name, *lazy_ptr_name;
29246 char *local_label_0;
29247 static int label = 0;
29248
29249 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
29250 symb = (*targetm.strip_name_encoding) (symb);
29251
29252
29253 length = strlen (symb);
29254 symbol_name = XALLOCAVEC (char, length + 32);
29255 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
29256
29257 lazy_ptr_name = XALLOCAVEC (char, length + 32);
29258 GEN_LAZY_PTR_NAME_FOR_SYMBOL (lazy_ptr_name, symb, length);
29259
29260 if (flag_pic == 2)
29261 switch_to_section (darwin_sections[machopic_picsymbol_stub1_section]);
29262 else
29263 switch_to_section (darwin_sections[machopic_symbol_stub1_section]);
29264
29265 if (flag_pic == 2)
29266 {
29267 fprintf (file, "\t.align 5\n");
29268
29269 fprintf (file, "%s:\n", stub);
29270 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
29271
29272 label++;
29273 local_label_0 = XALLOCAVEC (char, sizeof ("\"L00000000000$spb\""));
29274 sprintf (local_label_0, "\"L%011d$spb\"", label);
29275
29276 fprintf (file, "\tmflr r0\n");
29277 if (TARGET_LINK_STACK)
29278 {
29279 char name[32];
29280 get_ppc476_thunk_name (name);
29281 fprintf (file, "\tbl %s\n", name);
29282 fprintf (file, "%s:\n\tmflr r11\n", local_label_0);
29283 }
29284 else
29285 {
29286 fprintf (file, "\tbcl 20,31,%s\n", local_label_0);
29287 fprintf (file, "%s:\n\tmflr r11\n", local_label_0);
29288 }
29289 fprintf (file, "\taddis r11,r11,ha16(%s-%s)\n",
29290 lazy_ptr_name, local_label_0);
29291 fprintf (file, "\tmtlr r0\n");
29292 fprintf (file, "\t%s r12,lo16(%s-%s)(r11)\n",
29293 (TARGET_64BIT ? "ldu" : "lwzu"),
29294 lazy_ptr_name, local_label_0);
29295 fprintf (file, "\tmtctr r12\n");
29296 fprintf (file, "\tbctr\n");
29297 }
29298 else
29299 {
29300 fprintf (file, "\t.align 4\n");
29301
29302 fprintf (file, "%s:\n", stub);
29303 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
29304
29305 fprintf (file, "\tlis r11,ha16(%s)\n", lazy_ptr_name);
29306 fprintf (file, "\t%s r12,lo16(%s)(r11)\n",
29307 (TARGET_64BIT ? "ldu" : "lwzu"),
29308 lazy_ptr_name);
29309 fprintf (file, "\tmtctr r12\n");
29310 fprintf (file, "\tbctr\n");
29311 }
29312
29313 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr_section]);
29314 fprintf (file, "%s:\n", lazy_ptr_name);
29315 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
29316 fprintf (file, "%sdyld_stub_binding_helper\n",
29317 (TARGET_64BIT ? DOUBLE_INT_ASM_OP : "\t.long\t"));
29318 }
29319
29320 /* Legitimize PIC addresses. If the address is already
29321 position-independent, we return ORIG. Newly generated
29322 position-independent addresses go into a reg. This is REG if non
29323 zero, otherwise we allocate register(s) as necessary. */
29324
29325 #define SMALL_INT(X) ((UINTVAL (X) + 0x8000) < 0x10000)
29326
29327 rtx
29328 rs6000_machopic_legitimize_pic_address (rtx orig, machine_mode mode,
29329 rtx reg)
29330 {
29331 rtx base, offset;
29332
29333 if (reg == NULL && ! reload_in_progress && ! reload_completed)
29334 reg = gen_reg_rtx (Pmode);
29335
29336 if (GET_CODE (orig) == CONST)
29337 {
29338 rtx reg_temp;
29339
29340 if (GET_CODE (XEXP (orig, 0)) == PLUS
29341 && XEXP (XEXP (orig, 0), 0) == pic_offset_table_rtx)
29342 return orig;
29343
29344 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
29345
29346 /* Use a different reg for the intermediate value, as
29347 it will be marked UNCHANGING. */
29348 reg_temp = !can_create_pseudo_p () ? reg : gen_reg_rtx (Pmode);
29349 base = rs6000_machopic_legitimize_pic_address (XEXP (XEXP (orig, 0), 0),
29350 Pmode, reg_temp);
29351 offset =
29352 rs6000_machopic_legitimize_pic_address (XEXP (XEXP (orig, 0), 1),
29353 Pmode, reg);
29354
29355 if (GET_CODE (offset) == CONST_INT)
29356 {
29357 if (SMALL_INT (offset))
29358 return plus_constant (Pmode, base, INTVAL (offset));
29359 else if (! reload_in_progress && ! reload_completed)
29360 offset = force_reg (Pmode, offset);
29361 else
29362 {
29363 rtx mem = force_const_mem (Pmode, orig);
29364 return machopic_legitimize_pic_address (mem, Pmode, reg);
29365 }
29366 }
29367 return gen_rtx_PLUS (Pmode, base, offset);
29368 }
29369
29370 /* Fall back on generic machopic code. */
29371 return machopic_legitimize_pic_address (orig, mode, reg);
29372 }
29373
29374 /* Output a .machine directive for the Darwin assembler, and call
29375 the generic start_file routine. */
29376
29377 static void
29378 rs6000_darwin_file_start (void)
29379 {
29380 static const struct
29381 {
29382 const char *arg;
29383 const char *name;
29384 HOST_WIDE_INT if_set;
29385 } mapping[] = {
29386 { "ppc64", "ppc64", MASK_64BIT },
29387 { "970", "ppc970", MASK_PPC_GPOPT | MASK_MFCRF | MASK_POWERPC64 },
29388 { "power4", "ppc970", 0 },
29389 { "G5", "ppc970", 0 },
29390 { "7450", "ppc7450", 0 },
29391 { "7400", "ppc7400", MASK_ALTIVEC },
29392 { "G4", "ppc7400", 0 },
29393 { "750", "ppc750", 0 },
29394 { "740", "ppc750", 0 },
29395 { "G3", "ppc750", 0 },
29396 { "604e", "ppc604e", 0 },
29397 { "604", "ppc604", 0 },
29398 { "603e", "ppc603", 0 },
29399 { "603", "ppc603", 0 },
29400 { "601", "ppc601", 0 },
29401 { NULL, "ppc", 0 } };
29402 const char *cpu_id = "";
29403 size_t i;
29404
29405 rs6000_file_start ();
29406 darwin_file_start ();
29407
29408 /* Determine the argument to -mcpu=. Default to G3 if not specified. */
29409
29410 if (rs6000_default_cpu != 0 && rs6000_default_cpu[0] != '\0')
29411 cpu_id = rs6000_default_cpu;
29412
29413 if (global_options_set.x_rs6000_cpu_index)
29414 cpu_id = processor_target_table[rs6000_cpu_index].name;
29415
29416 /* Look through the mapping array. Pick the first name that either
29417 matches the argument, has a bit set in IF_SET that is also set
29418 in the target flags, or has a NULL name. */
29419
29420 i = 0;
29421 while (mapping[i].arg != NULL
29422 && strcmp (mapping[i].arg, cpu_id) != 0
29423 && (mapping[i].if_set & rs6000_isa_flags) == 0)
29424 i++;
29425
29426 fprintf (asm_out_file, "\t.machine %s\n", mapping[i].name);
29427 }
29428
29429 #endif /* TARGET_MACHO */
29430
29431 #if TARGET_ELF
29432 static int
29433 rs6000_elf_reloc_rw_mask (void)
29434 {
29435 if (flag_pic)
29436 return 3;
29437 else if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
29438 return 2;
29439 else
29440 return 0;
29441 }
29442
29443 /* Record an element in the table of global constructors. SYMBOL is
29444 a SYMBOL_REF of the function to be called; PRIORITY is a number
29445 between 0 and MAX_INIT_PRIORITY.
29446
29447 This differs from default_named_section_asm_out_constructor in
29448 that we have special handling for -mrelocatable. */
29449
29450 static void rs6000_elf_asm_out_constructor (rtx, int) ATTRIBUTE_UNUSED;
29451 static void
29452 rs6000_elf_asm_out_constructor (rtx symbol, int priority)
29453 {
29454 const char *section = ".ctors";
29455 char buf[16];
29456
29457 if (priority != DEFAULT_INIT_PRIORITY)
29458 {
29459 sprintf (buf, ".ctors.%.5u",
29460 /* Invert the numbering so the linker puts us in the proper
29461 order; constructors are run from right to left, and the
29462 linker sorts in increasing order. */
29463 MAX_INIT_PRIORITY - priority);
29464 section = buf;
29465 }
29466
29467 switch_to_section (get_section (section, SECTION_WRITE, NULL));
29468 assemble_align (POINTER_SIZE);
29469
29470 if (TARGET_RELOCATABLE)
29471 {
29472 fputs ("\t.long (", asm_out_file);
29473 output_addr_const (asm_out_file, symbol);
29474 fputs (")@fixup\n", asm_out_file);
29475 }
29476 else
29477 assemble_integer (symbol, POINTER_SIZE / BITS_PER_UNIT, POINTER_SIZE, 1);
29478 }
29479
29480 static void rs6000_elf_asm_out_destructor (rtx, int) ATTRIBUTE_UNUSED;
29481 static void
29482 rs6000_elf_asm_out_destructor (rtx symbol, int priority)
29483 {
29484 const char *section = ".dtors";
29485 char buf[16];
29486
29487 if (priority != DEFAULT_INIT_PRIORITY)
29488 {
29489 sprintf (buf, ".dtors.%.5u",
29490 /* Invert the numbering so the linker puts us in the proper
29491 order; constructors are run from right to left, and the
29492 linker sorts in increasing order. */
29493 MAX_INIT_PRIORITY - priority);
29494 section = buf;
29495 }
29496
29497 switch_to_section (get_section (section, SECTION_WRITE, NULL));
29498 assemble_align (POINTER_SIZE);
29499
29500 if (TARGET_RELOCATABLE)
29501 {
29502 fputs ("\t.long (", asm_out_file);
29503 output_addr_const (asm_out_file, symbol);
29504 fputs (")@fixup\n", asm_out_file);
29505 }
29506 else
29507 assemble_integer (symbol, POINTER_SIZE / BITS_PER_UNIT, POINTER_SIZE, 1);
29508 }
29509
29510 void
29511 rs6000_elf_declare_function_name (FILE *file, const char *name, tree decl)
29512 {
29513 if (TARGET_64BIT && DEFAULT_ABI != ABI_ELFv2)
29514 {
29515 fputs ("\t.section\t\".opd\",\"aw\"\n\t.align 3\n", file);
29516 ASM_OUTPUT_LABEL (file, name);
29517 fputs (DOUBLE_INT_ASM_OP, file);
29518 rs6000_output_function_entry (file, name);
29519 fputs (",.TOC.@tocbase,0\n\t.previous\n", file);
29520 if (DOT_SYMBOLS)
29521 {
29522 fputs ("\t.size\t", file);
29523 assemble_name (file, name);
29524 fputs (",24\n\t.type\t.", file);
29525 assemble_name (file, name);
29526 fputs (",@function\n", file);
29527 if (TREE_PUBLIC (decl) && ! DECL_WEAK (decl))
29528 {
29529 fputs ("\t.globl\t.", file);
29530 assemble_name (file, name);
29531 putc ('\n', file);
29532 }
29533 }
29534 else
29535 ASM_OUTPUT_TYPE_DIRECTIVE (file, name, "function");
29536 ASM_DECLARE_RESULT (file, DECL_RESULT (decl));
29537 rs6000_output_function_entry (file, name);
29538 fputs (":\n", file);
29539 return;
29540 }
29541
29542 if (TARGET_RELOCATABLE
29543 && !TARGET_SECURE_PLT
29544 && (get_pool_size () != 0 || crtl->profile)
29545 && uses_TOC ())
29546 {
29547 char buf[256];
29548
29549 (*targetm.asm_out.internal_label) (file, "LCL", rs6000_pic_labelno);
29550
29551 ASM_GENERATE_INTERNAL_LABEL (buf, "LCTOC", 1);
29552 fprintf (file, "\t.long ");
29553 assemble_name (file, buf);
29554 putc ('-', file);
29555 ASM_GENERATE_INTERNAL_LABEL (buf, "LCF", rs6000_pic_labelno);
29556 assemble_name (file, buf);
29557 putc ('\n', file);
29558 }
29559
29560 ASM_OUTPUT_TYPE_DIRECTIVE (file, name, "function");
29561 ASM_DECLARE_RESULT (file, DECL_RESULT (decl));
29562
29563 if (DEFAULT_ABI == ABI_AIX)
29564 {
29565 const char *desc_name, *orig_name;
29566
29567 orig_name = (*targetm.strip_name_encoding) (name);
29568 desc_name = orig_name;
29569 while (*desc_name == '.')
29570 desc_name++;
29571
29572 if (TREE_PUBLIC (decl))
29573 fprintf (file, "\t.globl %s\n", desc_name);
29574
29575 fprintf (file, "%s\n", MINIMAL_TOC_SECTION_ASM_OP);
29576 fprintf (file, "%s:\n", desc_name);
29577 fprintf (file, "\t.long %s\n", orig_name);
29578 fputs ("\t.long _GLOBAL_OFFSET_TABLE_\n", file);
29579 fputs ("\t.long 0\n", file);
29580 fprintf (file, "\t.previous\n");
29581 }
29582 ASM_OUTPUT_LABEL (file, name);
29583 }
29584
29585 static void rs6000_elf_file_end (void) ATTRIBUTE_UNUSED;
29586 static void
29587 rs6000_elf_file_end (void)
29588 {
29589 #ifdef HAVE_AS_GNU_ATTRIBUTE
29590 if (TARGET_32BIT && DEFAULT_ABI == ABI_V4)
29591 {
29592 if (rs6000_passes_float)
29593 fprintf (asm_out_file, "\t.gnu_attribute 4, %d\n",
29594 ((TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT) ? 1
29595 : (TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_SINGLE_FLOAT) ? 3
29596 : 2));
29597 if (rs6000_passes_vector)
29598 fprintf (asm_out_file, "\t.gnu_attribute 8, %d\n",
29599 (TARGET_ALTIVEC_ABI ? 2
29600 : TARGET_SPE_ABI ? 3
29601 : 1));
29602 if (rs6000_returns_struct)
29603 fprintf (asm_out_file, "\t.gnu_attribute 12, %d\n",
29604 aix_struct_return ? 2 : 1);
29605 }
29606 #endif
29607 #if defined (POWERPC_LINUX) || defined (POWERPC_FREEBSD)
29608 if (TARGET_32BIT || DEFAULT_ABI == ABI_ELFv2)
29609 file_end_indicate_exec_stack ();
29610 #endif
29611 }
29612 #endif
29613
29614 #if TARGET_XCOFF
29615 static void
29616 rs6000_xcoff_asm_output_anchor (rtx symbol)
29617 {
29618 char buffer[100];
29619
29620 sprintf (buffer, "$ + " HOST_WIDE_INT_PRINT_DEC,
29621 SYMBOL_REF_BLOCK_OFFSET (symbol));
29622 fprintf (asm_out_file, "%s", SET_ASM_OP);
29623 RS6000_OUTPUT_BASENAME (asm_out_file, XSTR (symbol, 0));
29624 fprintf (asm_out_file, ",");
29625 RS6000_OUTPUT_BASENAME (asm_out_file, buffer);
29626 fprintf (asm_out_file, "\n");
29627 }
29628
29629 static void
29630 rs6000_xcoff_asm_globalize_label (FILE *stream, const char *name)
29631 {
29632 fputs (GLOBAL_ASM_OP, stream);
29633 RS6000_OUTPUT_BASENAME (stream, name);
29634 putc ('\n', stream);
29635 }
29636
29637 /* A get_unnamed_decl callback, used for read-only sections. PTR
29638 points to the section string variable. */
29639
29640 static void
29641 rs6000_xcoff_output_readonly_section_asm_op (const void *directive)
29642 {
29643 fprintf (asm_out_file, "\t.csect %s[RO],%s\n",
29644 *(const char *const *) directive,
29645 XCOFF_CSECT_DEFAULT_ALIGNMENT_STR);
29646 }
29647
29648 /* Likewise for read-write sections. */
29649
29650 static void
29651 rs6000_xcoff_output_readwrite_section_asm_op (const void *directive)
29652 {
29653 fprintf (asm_out_file, "\t.csect %s[RW],%s\n",
29654 *(const char *const *) directive,
29655 XCOFF_CSECT_DEFAULT_ALIGNMENT_STR);
29656 }
29657
29658 static void
29659 rs6000_xcoff_output_tls_section_asm_op (const void *directive)
29660 {
29661 fprintf (asm_out_file, "\t.csect %s[TL],%s\n",
29662 *(const char *const *) directive,
29663 XCOFF_CSECT_DEFAULT_ALIGNMENT_STR);
29664 }
29665
29666 /* A get_unnamed_section callback, used for switching to toc_section. */
29667
29668 static void
29669 rs6000_xcoff_output_toc_section_asm_op (const void *data ATTRIBUTE_UNUSED)
29670 {
29671 if (TARGET_MINIMAL_TOC)
29672 {
29673 /* toc_section is always selected at least once from
29674 rs6000_xcoff_file_start, so this is guaranteed to
29675 always be defined once and only once in each file. */
29676 if (!toc_initialized)
29677 {
29678 fputs ("\t.toc\nLCTOC..1:\n", asm_out_file);
29679 fputs ("\t.tc toc_table[TC],toc_table[RW]\n", asm_out_file);
29680 toc_initialized = 1;
29681 }
29682 fprintf (asm_out_file, "\t.csect toc_table[RW]%s\n",
29683 (TARGET_32BIT ? "" : ",3"));
29684 }
29685 else
29686 fputs ("\t.toc\n", asm_out_file);
29687 }
29688
29689 /* Implement TARGET_ASM_INIT_SECTIONS. */
29690
29691 static void
29692 rs6000_xcoff_asm_init_sections (void)
29693 {
29694 read_only_data_section
29695 = get_unnamed_section (0, rs6000_xcoff_output_readonly_section_asm_op,
29696 &xcoff_read_only_section_name);
29697
29698 private_data_section
29699 = get_unnamed_section (SECTION_WRITE,
29700 rs6000_xcoff_output_readwrite_section_asm_op,
29701 &xcoff_private_data_section_name);
29702
29703 tls_data_section
29704 = get_unnamed_section (SECTION_TLS,
29705 rs6000_xcoff_output_tls_section_asm_op,
29706 &xcoff_tls_data_section_name);
29707
29708 tls_private_data_section
29709 = get_unnamed_section (SECTION_TLS,
29710 rs6000_xcoff_output_tls_section_asm_op,
29711 &xcoff_private_data_section_name);
29712
29713 read_only_private_data_section
29714 = get_unnamed_section (0, rs6000_xcoff_output_readonly_section_asm_op,
29715 &xcoff_private_data_section_name);
29716
29717 toc_section
29718 = get_unnamed_section (0, rs6000_xcoff_output_toc_section_asm_op, NULL);
29719
29720 readonly_data_section = read_only_data_section;
29721 exception_section = data_section;
29722 }
29723
29724 static int
29725 rs6000_xcoff_reloc_rw_mask (void)
29726 {
29727 return 3;
29728 }
29729
29730 static void
29731 rs6000_xcoff_asm_named_section (const char *name, unsigned int flags,
29732 tree decl ATTRIBUTE_UNUSED)
29733 {
29734 int smclass;
29735 static const char * const suffix[4] = { "PR", "RO", "RW", "TL" };
29736
29737 if (flags & SECTION_CODE)
29738 smclass = 0;
29739 else if (flags & SECTION_TLS)
29740 smclass = 3;
29741 else if (flags & SECTION_WRITE)
29742 smclass = 2;
29743 else
29744 smclass = 1;
29745
29746 fprintf (asm_out_file, "\t.csect %s%s[%s],%u\n",
29747 (flags & SECTION_CODE) ? "." : "",
29748 name, suffix[smclass], flags & SECTION_ENTSIZE);
29749 }
29750
29751 #define IN_NAMED_SECTION(DECL) \
29752 ((TREE_CODE (DECL) == FUNCTION_DECL || TREE_CODE (DECL) == VAR_DECL) \
29753 && DECL_SECTION_NAME (DECL) != NULL)
29754
29755 static section *
29756 rs6000_xcoff_select_section (tree decl, int reloc,
29757 unsigned HOST_WIDE_INT align)
29758 {
29759 /* Place variables with alignment stricter than BIGGEST_ALIGNMENT into
29760 named section. */
29761 if (align > BIGGEST_ALIGNMENT)
29762 {
29763 resolve_unique_section (decl, reloc, true);
29764 if (IN_NAMED_SECTION (decl))
29765 return get_named_section (decl, NULL, reloc);
29766 }
29767
29768 if (decl_readonly_section (decl, reloc))
29769 {
29770 if (TREE_PUBLIC (decl))
29771 return read_only_data_section;
29772 else
29773 return read_only_private_data_section;
29774 }
29775 else
29776 {
29777 #if HAVE_AS_TLS
29778 if (TREE_CODE (decl) == VAR_DECL && DECL_THREAD_LOCAL_P (decl))
29779 {
29780 if (TREE_PUBLIC (decl))
29781 return tls_data_section;
29782 else if (bss_initializer_p (decl))
29783 {
29784 /* Convert to COMMON to emit in BSS. */
29785 DECL_COMMON (decl) = 1;
29786 return tls_comm_section;
29787 }
29788 else
29789 return tls_private_data_section;
29790 }
29791 else
29792 #endif
29793 if (TREE_PUBLIC (decl))
29794 return data_section;
29795 else
29796 return private_data_section;
29797 }
29798 }
29799
29800 static void
29801 rs6000_xcoff_unique_section (tree decl, int reloc ATTRIBUTE_UNUSED)
29802 {
29803 const char *name;
29804
29805 /* Use select_section for private data and uninitialized data with
29806 alignment <= BIGGEST_ALIGNMENT. */
29807 if (!TREE_PUBLIC (decl)
29808 || DECL_COMMON (decl)
29809 || (DECL_INITIAL (decl) == NULL_TREE
29810 && DECL_ALIGN (decl) <= BIGGEST_ALIGNMENT)
29811 || DECL_INITIAL (decl) == error_mark_node
29812 || (flag_zero_initialized_in_bss
29813 && initializer_zerop (DECL_INITIAL (decl))))
29814 return;
29815
29816 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
29817 name = (*targetm.strip_name_encoding) (name);
29818 set_decl_section_name (decl, name);
29819 }
29820
29821 /* Select section for constant in constant pool.
29822
29823 On RS/6000, all constants are in the private read-only data area.
29824 However, if this is being placed in the TOC it must be output as a
29825 toc entry. */
29826
29827 static section *
29828 rs6000_xcoff_select_rtx_section (machine_mode mode, rtx x,
29829 unsigned HOST_WIDE_INT align ATTRIBUTE_UNUSED)
29830 {
29831 if (ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (x, mode))
29832 return toc_section;
29833 else
29834 return read_only_private_data_section;
29835 }
29836
29837 /* Remove any trailing [DS] or the like from the symbol name. */
29838
29839 static const char *
29840 rs6000_xcoff_strip_name_encoding (const char *name)
29841 {
29842 size_t len;
29843 if (*name == '*')
29844 name++;
29845 len = strlen (name);
29846 if (name[len - 1] == ']')
29847 return ggc_alloc_string (name, len - 4);
29848 else
29849 return name;
29850 }
29851
29852 /* Section attributes. AIX is always PIC. */
29853
29854 static unsigned int
29855 rs6000_xcoff_section_type_flags (tree decl, const char *name, int reloc)
29856 {
29857 unsigned int align;
29858 unsigned int flags = default_section_type_flags (decl, name, reloc);
29859
29860 /* Align to at least UNIT size. */
29861 if ((flags & SECTION_CODE) != 0 || !decl || !DECL_P (decl))
29862 align = MIN_UNITS_PER_WORD;
29863 else
29864 /* Increase alignment of large objects if not already stricter. */
29865 align = MAX ((DECL_ALIGN (decl) / BITS_PER_UNIT),
29866 int_size_in_bytes (TREE_TYPE (decl)) > MIN_UNITS_PER_WORD
29867 ? UNITS_PER_FP_WORD : MIN_UNITS_PER_WORD);
29868
29869 return flags | (exact_log2 (align) & SECTION_ENTSIZE);
29870 }
29871
29872 /* Output at beginning of assembler file.
29873
29874 Initialize the section names for the RS/6000 at this point.
29875
29876 Specify filename, including full path, to assembler.
29877
29878 We want to go into the TOC section so at least one .toc will be emitted.
29879 Also, in order to output proper .bs/.es pairs, we need at least one static
29880 [RW] section emitted.
29881
29882 Finally, declare mcount when profiling to make the assembler happy. */
29883
29884 static void
29885 rs6000_xcoff_file_start (void)
29886 {
29887 rs6000_gen_section_name (&xcoff_bss_section_name,
29888 main_input_filename, ".bss_");
29889 rs6000_gen_section_name (&xcoff_private_data_section_name,
29890 main_input_filename, ".rw_");
29891 rs6000_gen_section_name (&xcoff_read_only_section_name,
29892 main_input_filename, ".ro_");
29893 rs6000_gen_section_name (&xcoff_tls_data_section_name,
29894 main_input_filename, ".tls_");
29895 rs6000_gen_section_name (&xcoff_tbss_section_name,
29896 main_input_filename, ".tbss_[UL]");
29897
29898 fputs ("\t.file\t", asm_out_file);
29899 output_quoted_string (asm_out_file, main_input_filename);
29900 fputc ('\n', asm_out_file);
29901 if (write_symbols != NO_DEBUG)
29902 switch_to_section (private_data_section);
29903 switch_to_section (text_section);
29904 if (profile_flag)
29905 fprintf (asm_out_file, "\t.extern %s\n", RS6000_MCOUNT);
29906 rs6000_file_start ();
29907 }
29908
29909 /* Output at end of assembler file.
29910 On the RS/6000, referencing data should automatically pull in text. */
29911
29912 static void
29913 rs6000_xcoff_file_end (void)
29914 {
29915 switch_to_section (text_section);
29916 fputs ("_section_.text:\n", asm_out_file);
29917 switch_to_section (data_section);
29918 fputs (TARGET_32BIT
29919 ? "\t.long _section_.text\n" : "\t.llong _section_.text\n",
29920 asm_out_file);
29921 }
29922
29923 struct declare_alias_data
29924 {
29925 FILE *file;
29926 bool function_descriptor;
29927 };
29928
29929 /* Declare alias N. A helper function for for_node_and_aliases. */
29930
29931 static bool
29932 rs6000_declare_alias (struct symtab_node *n, void *d)
29933 {
29934 struct declare_alias_data *data = (struct declare_alias_data *)d;
29935 /* Main symbol is output specially, because varasm machinery does part of
29936 the job for us - we do not need to declare .globl/lglobs and such. */
29937 if (!n->alias || n->weakref)
29938 return false;
29939
29940 if (lookup_attribute ("ifunc", DECL_ATTRIBUTES (n->decl)))
29941 return false;
29942
29943 /* Prevent assemble_alias from trying to use .set pseudo operation
29944 that does not behave as expected by the middle-end. */
29945 TREE_ASM_WRITTEN (n->decl) = true;
29946
29947 const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (n->decl));
29948 char *buffer = (char *) alloca (strlen (name) + 2);
29949 char *p;
29950 int dollar_inside = 0;
29951
29952 strcpy (buffer, name);
29953 p = strchr (buffer, '$');
29954 while (p) {
29955 *p = '_';
29956 dollar_inside++;
29957 p = strchr (p + 1, '$');
29958 }
29959 if (TREE_PUBLIC (n->decl))
29960 {
29961 if (!RS6000_WEAK || !DECL_WEAK (n->decl))
29962 {
29963 if (dollar_inside) {
29964 if (data->function_descriptor)
29965 fprintf(data->file, "\t.rename .%s,\".%s\"\n", buffer, name);
29966 else
29967 fprintf(data->file, "\t.rename %s,\"%s\"\n", buffer, name);
29968 }
29969 if (data->function_descriptor)
29970 fputs ("\t.globl .", data->file);
29971 else
29972 fputs ("\t.globl ", data->file);
29973 RS6000_OUTPUT_BASENAME (data->file, buffer);
29974 putc ('\n', data->file);
29975 }
29976 else if (DECL_WEAK (n->decl) && !data->function_descriptor)
29977 ASM_WEAKEN_DECL (data->file, n->decl, name, NULL);
29978 }
29979 else
29980 {
29981 if (dollar_inside)
29982 {
29983 if (data->function_descriptor)
29984 fprintf(data->file, "\t.rename %s,\"%s\"\n", buffer, name);
29985 else
29986 fprintf(data->file, "\t.rename .%s,\".%s\"\n", buffer, name);
29987 }
29988 if (data->function_descriptor)
29989 fputs ("\t.lglobl .", data->file);
29990 else
29991 fputs ("\t.lglobl ", data->file);
29992 RS6000_OUTPUT_BASENAME (data->file, buffer);
29993 putc ('\n', data->file);
29994 }
29995 if (data->function_descriptor)
29996 fputs (".", data->file);
29997 RS6000_OUTPUT_BASENAME (data->file, buffer);
29998 fputs (":\n", data->file);
29999 return false;
30000 }
30001
30002 /* This macro produces the initial definition of a function name.
30003 On the RS/6000, we need to place an extra '.' in the function name and
30004 output the function descriptor.
30005 Dollar signs are converted to underscores.
30006
30007 The csect for the function will have already been created when
30008 text_section was selected. We do have to go back to that csect, however.
30009
30010 The third and fourth parameters to the .function pseudo-op (16 and 044)
30011 are placeholders which no longer have any use.
30012
30013 Because AIX assembler's .set command has unexpected semantics, we output
30014 all aliases as alternative labels in front of the definition. */
30015
30016 void
30017 rs6000_xcoff_declare_function_name (FILE *file, const char *name, tree decl)
30018 {
30019 char *buffer = (char *) alloca (strlen (name) + 1);
30020 char *p;
30021 int dollar_inside = 0;
30022 struct declare_alias_data data = {file, false};
30023
30024 strcpy (buffer, name);
30025 p = strchr (buffer, '$');
30026 while (p) {
30027 *p = '_';
30028 dollar_inside++;
30029 p = strchr (p + 1, '$');
30030 }
30031 if (TREE_PUBLIC (decl))
30032 {
30033 if (!RS6000_WEAK || !DECL_WEAK (decl))
30034 {
30035 if (dollar_inside) {
30036 fprintf(file, "\t.rename .%s,\".%s\"\n", buffer, name);
30037 fprintf(file, "\t.rename %s,\"%s\"\n", buffer, name);
30038 }
30039 fputs ("\t.globl .", file);
30040 RS6000_OUTPUT_BASENAME (file, buffer);
30041 putc ('\n', file);
30042 }
30043 }
30044 else
30045 {
30046 if (dollar_inside) {
30047 fprintf(file, "\t.rename .%s,\".%s\"\n", buffer, name);
30048 fprintf(file, "\t.rename %s,\"%s\"\n", buffer, name);
30049 }
30050 fputs ("\t.lglobl .", file);
30051 RS6000_OUTPUT_BASENAME (file, buffer);
30052 putc ('\n', file);
30053 }
30054 fputs ("\t.csect ", file);
30055 RS6000_OUTPUT_BASENAME (file, buffer);
30056 fputs (TARGET_32BIT ? "[DS]\n" : "[DS],3\n", file);
30057 RS6000_OUTPUT_BASENAME (file, buffer);
30058 fputs (":\n", file);
30059 symtab_node::get (decl)->call_for_symbol_and_aliases (rs6000_declare_alias, &data, true);
30060 fputs (TARGET_32BIT ? "\t.long ." : "\t.llong .", file);
30061 RS6000_OUTPUT_BASENAME (file, buffer);
30062 fputs (", TOC[tc0], 0\n", file);
30063 in_section = NULL;
30064 switch_to_section (function_section (decl));
30065 putc ('.', file);
30066 RS6000_OUTPUT_BASENAME (file, buffer);
30067 fputs (":\n", file);
30068 data.function_descriptor = true;
30069 symtab_node::get (decl)->call_for_symbol_and_aliases (rs6000_declare_alias, &data, true);
30070 if (write_symbols != NO_DEBUG && !DECL_IGNORED_P (decl))
30071 xcoffout_declare_function (file, decl, buffer);
30072 return;
30073 }
30074
30075 /* This macro produces the initial definition of a object (variable) name.
30076 Because AIX assembler's .set command has unexpected semantics, we output
30077 all aliases as alternative labels in front of the definition. */
30078
30079 void
30080 rs6000_xcoff_declare_object_name (FILE *file, const char *name, tree decl)
30081 {
30082 struct declare_alias_data data = {file, false};
30083 RS6000_OUTPUT_BASENAME (file, name);
30084 fputs (":\n", file);
30085 symtab_node::get (decl)->call_for_symbol_and_aliases (rs6000_declare_alias, &data, true);
30086 }
30087
30088 #ifdef HAVE_AS_TLS
30089 static void
30090 rs6000_xcoff_encode_section_info (tree decl, rtx rtl, int first)
30091 {
30092 rtx symbol;
30093 int flags;
30094
30095 default_encode_section_info (decl, rtl, first);
30096
30097 /* Careful not to prod global register variables. */
30098 if (!MEM_P (rtl))
30099 return;
30100 symbol = XEXP (rtl, 0);
30101 if (GET_CODE (symbol) != SYMBOL_REF)
30102 return;
30103
30104 flags = SYMBOL_REF_FLAGS (symbol);
30105
30106 if (TREE_CODE (decl) == VAR_DECL && DECL_THREAD_LOCAL_P (decl))
30107 flags &= ~SYMBOL_FLAG_HAS_BLOCK_INFO;
30108
30109 SYMBOL_REF_FLAGS (symbol) = flags;
30110 }
30111 #endif /* HAVE_AS_TLS */
30112 #endif /* TARGET_XCOFF */
30113
30114 /* Compute a (partial) cost for rtx X. Return true if the complete
30115 cost has been computed, and false if subexpressions should be
30116 scanned. In either case, *TOTAL contains the cost result. */
30117
30118 static bool
30119 rs6000_rtx_costs (rtx x, int code, int outer_code, int opno ATTRIBUTE_UNUSED,
30120 int *total, bool speed)
30121 {
30122 machine_mode mode = GET_MODE (x);
30123
30124 switch (code)
30125 {
30126 /* On the RS/6000, if it is valid in the insn, it is free. */
30127 case CONST_INT:
30128 if (((outer_code == SET
30129 || outer_code == PLUS
30130 || outer_code == MINUS)
30131 && (satisfies_constraint_I (x)
30132 || satisfies_constraint_L (x)))
30133 || (outer_code == AND
30134 && (satisfies_constraint_K (x)
30135 || (mode == SImode
30136 ? satisfies_constraint_L (x)
30137 : satisfies_constraint_J (x))
30138 || mask_operand (x, mode)
30139 || (mode == DImode
30140 && mask64_operand (x, DImode))))
30141 || ((outer_code == IOR || outer_code == XOR)
30142 && (satisfies_constraint_K (x)
30143 || (mode == SImode
30144 ? satisfies_constraint_L (x)
30145 : satisfies_constraint_J (x))))
30146 || outer_code == ASHIFT
30147 || outer_code == ASHIFTRT
30148 || outer_code == LSHIFTRT
30149 || outer_code == ROTATE
30150 || outer_code == ROTATERT
30151 || outer_code == ZERO_EXTRACT
30152 || (outer_code == MULT
30153 && satisfies_constraint_I (x))
30154 || ((outer_code == DIV || outer_code == UDIV
30155 || outer_code == MOD || outer_code == UMOD)
30156 && exact_log2 (INTVAL (x)) >= 0)
30157 || (outer_code == COMPARE
30158 && (satisfies_constraint_I (x)
30159 || satisfies_constraint_K (x)))
30160 || ((outer_code == EQ || outer_code == NE)
30161 && (satisfies_constraint_I (x)
30162 || satisfies_constraint_K (x)
30163 || (mode == SImode
30164 ? satisfies_constraint_L (x)
30165 : satisfies_constraint_J (x))))
30166 || (outer_code == GTU
30167 && satisfies_constraint_I (x))
30168 || (outer_code == LTU
30169 && satisfies_constraint_P (x)))
30170 {
30171 *total = 0;
30172 return true;
30173 }
30174 else if ((outer_code == PLUS
30175 && reg_or_add_cint_operand (x, VOIDmode))
30176 || (outer_code == MINUS
30177 && reg_or_sub_cint_operand (x, VOIDmode))
30178 || ((outer_code == SET
30179 || outer_code == IOR
30180 || outer_code == XOR)
30181 && (INTVAL (x)
30182 & ~ (unsigned HOST_WIDE_INT) 0xffffffff) == 0))
30183 {
30184 *total = COSTS_N_INSNS (1);
30185 return true;
30186 }
30187 /* FALLTHRU */
30188
30189 case CONST_DOUBLE:
30190 case CONST_WIDE_INT:
30191 case CONST:
30192 case HIGH:
30193 case SYMBOL_REF:
30194 case MEM:
30195 /* When optimizing for size, MEM should be slightly more expensive
30196 than generating address, e.g., (plus (reg) (const)).
30197 L1 cache latency is about two instructions. */
30198 *total = !speed ? COSTS_N_INSNS (1) + 1 : COSTS_N_INSNS (2);
30199 return true;
30200
30201 case LABEL_REF:
30202 *total = 0;
30203 return true;
30204
30205 case PLUS:
30206 case MINUS:
30207 if (FLOAT_MODE_P (mode))
30208 *total = rs6000_cost->fp;
30209 else
30210 *total = COSTS_N_INSNS (1);
30211 return false;
30212
30213 case MULT:
30214 if (GET_CODE (XEXP (x, 1)) == CONST_INT
30215 && satisfies_constraint_I (XEXP (x, 1)))
30216 {
30217 if (INTVAL (XEXP (x, 1)) >= -256
30218 && INTVAL (XEXP (x, 1)) <= 255)
30219 *total = rs6000_cost->mulsi_const9;
30220 else
30221 *total = rs6000_cost->mulsi_const;
30222 }
30223 else if (mode == SFmode)
30224 *total = rs6000_cost->fp;
30225 else if (FLOAT_MODE_P (mode))
30226 *total = rs6000_cost->dmul;
30227 else if (mode == DImode)
30228 *total = rs6000_cost->muldi;
30229 else
30230 *total = rs6000_cost->mulsi;
30231 return false;
30232
30233 case FMA:
30234 if (mode == SFmode)
30235 *total = rs6000_cost->fp;
30236 else
30237 *total = rs6000_cost->dmul;
30238 break;
30239
30240 case DIV:
30241 case MOD:
30242 if (FLOAT_MODE_P (mode))
30243 {
30244 *total = mode == DFmode ? rs6000_cost->ddiv
30245 : rs6000_cost->sdiv;
30246 return false;
30247 }
30248 /* FALLTHRU */
30249
30250 case UDIV:
30251 case UMOD:
30252 if (GET_CODE (XEXP (x, 1)) == CONST_INT
30253 && exact_log2 (INTVAL (XEXP (x, 1))) >= 0)
30254 {
30255 if (code == DIV || code == MOD)
30256 /* Shift, addze */
30257 *total = COSTS_N_INSNS (2);
30258 else
30259 /* Shift */
30260 *total = COSTS_N_INSNS (1);
30261 }
30262 else
30263 {
30264 if (GET_MODE (XEXP (x, 1)) == DImode)
30265 *total = rs6000_cost->divdi;
30266 else
30267 *total = rs6000_cost->divsi;
30268 }
30269 /* Add in shift and subtract for MOD. */
30270 if (code == MOD || code == UMOD)
30271 *total += COSTS_N_INSNS (2);
30272 return false;
30273
30274 case CTZ:
30275 case FFS:
30276 *total = COSTS_N_INSNS (4);
30277 return false;
30278
30279 case POPCOUNT:
30280 *total = COSTS_N_INSNS (TARGET_POPCNTD ? 1 : 6);
30281 return false;
30282
30283 case PARITY:
30284 *total = COSTS_N_INSNS (TARGET_CMPB ? 2 : 6);
30285 return false;
30286
30287 case NOT:
30288 if (outer_code == AND || outer_code == IOR || outer_code == XOR)
30289 {
30290 *total = 0;
30291 return false;
30292 }
30293 /* FALLTHRU */
30294
30295 case AND:
30296 case CLZ:
30297 case IOR:
30298 case XOR:
30299 case ZERO_EXTRACT:
30300 *total = COSTS_N_INSNS (1);
30301 return false;
30302
30303 case ASHIFT:
30304 case ASHIFTRT:
30305 case LSHIFTRT:
30306 case ROTATE:
30307 case ROTATERT:
30308 /* Handle mul_highpart. */
30309 if (outer_code == TRUNCATE
30310 && GET_CODE (XEXP (x, 0)) == MULT)
30311 {
30312 if (mode == DImode)
30313 *total = rs6000_cost->muldi;
30314 else
30315 *total = rs6000_cost->mulsi;
30316 return true;
30317 }
30318 else if (outer_code == AND)
30319 *total = 0;
30320 else
30321 *total = COSTS_N_INSNS (1);
30322 return false;
30323
30324 case SIGN_EXTEND:
30325 case ZERO_EXTEND:
30326 if (GET_CODE (XEXP (x, 0)) == MEM)
30327 *total = 0;
30328 else
30329 *total = COSTS_N_INSNS (1);
30330 return false;
30331
30332 case COMPARE:
30333 case NEG:
30334 case ABS:
30335 if (!FLOAT_MODE_P (mode))
30336 {
30337 *total = COSTS_N_INSNS (1);
30338 return false;
30339 }
30340 /* FALLTHRU */
30341
30342 case FLOAT:
30343 case UNSIGNED_FLOAT:
30344 case FIX:
30345 case UNSIGNED_FIX:
30346 case FLOAT_TRUNCATE:
30347 *total = rs6000_cost->fp;
30348 return false;
30349
30350 case FLOAT_EXTEND:
30351 if (mode == DFmode)
30352 *total = 0;
30353 else
30354 *total = rs6000_cost->fp;
30355 return false;
30356
30357 case UNSPEC:
30358 switch (XINT (x, 1))
30359 {
30360 case UNSPEC_FRSP:
30361 *total = rs6000_cost->fp;
30362 return true;
30363
30364 default:
30365 break;
30366 }
30367 break;
30368
30369 case CALL:
30370 case IF_THEN_ELSE:
30371 if (!speed)
30372 {
30373 *total = COSTS_N_INSNS (1);
30374 return true;
30375 }
30376 else if (FLOAT_MODE_P (mode)
30377 && TARGET_PPC_GFXOPT && TARGET_HARD_FLOAT && TARGET_FPRS)
30378 {
30379 *total = rs6000_cost->fp;
30380 return false;
30381 }
30382 break;
30383
30384 case NE:
30385 case EQ:
30386 case GTU:
30387 case LTU:
30388 /* Carry bit requires mode == Pmode.
30389 NEG or PLUS already counted so only add one. */
30390 if (mode == Pmode
30391 && (outer_code == NEG || outer_code == PLUS))
30392 {
30393 *total = COSTS_N_INSNS (1);
30394 return true;
30395 }
30396 if (outer_code == SET)
30397 {
30398 if (XEXP (x, 1) == const0_rtx)
30399 {
30400 if (TARGET_ISEL && !TARGET_MFCRF)
30401 *total = COSTS_N_INSNS (8);
30402 else
30403 *total = COSTS_N_INSNS (2);
30404 return true;
30405 }
30406 else if (mode == Pmode)
30407 {
30408 *total = COSTS_N_INSNS (3);
30409 return false;
30410 }
30411 }
30412 /* FALLTHRU */
30413
30414 case GT:
30415 case LT:
30416 case UNORDERED:
30417 if (outer_code == SET && (XEXP (x, 1) == const0_rtx))
30418 {
30419 if (TARGET_ISEL && !TARGET_MFCRF)
30420 *total = COSTS_N_INSNS (8);
30421 else
30422 *total = COSTS_N_INSNS (2);
30423 return true;
30424 }
30425 /* CC COMPARE. */
30426 if (outer_code == COMPARE)
30427 {
30428 *total = 0;
30429 return true;
30430 }
30431 break;
30432
30433 default:
30434 break;
30435 }
30436
30437 return false;
30438 }
30439
30440 /* Debug form of r6000_rtx_costs that is selected if -mdebug=cost. */
30441
30442 static bool
30443 rs6000_debug_rtx_costs (rtx x, int code, int outer_code, int opno, int *total,
30444 bool speed)
30445 {
30446 bool ret = rs6000_rtx_costs (x, code, outer_code, opno, total, speed);
30447
30448 fprintf (stderr,
30449 "\nrs6000_rtx_costs, return = %s, code = %s, outer_code = %s, "
30450 "opno = %d, total = %d, speed = %s, x:\n",
30451 ret ? "complete" : "scan inner",
30452 GET_RTX_NAME (code),
30453 GET_RTX_NAME (outer_code),
30454 opno,
30455 *total,
30456 speed ? "true" : "false");
30457
30458 debug_rtx (x);
30459
30460 return ret;
30461 }
30462
30463 /* Debug form of ADDRESS_COST that is selected if -mdebug=cost. */
30464
30465 static int
30466 rs6000_debug_address_cost (rtx x, machine_mode mode,
30467 addr_space_t as, bool speed)
30468 {
30469 int ret = TARGET_ADDRESS_COST (x, mode, as, speed);
30470
30471 fprintf (stderr, "\nrs6000_address_cost, return = %d, speed = %s, x:\n",
30472 ret, speed ? "true" : "false");
30473 debug_rtx (x);
30474
30475 return ret;
30476 }
30477
30478
30479 /* A C expression returning the cost of moving data from a register of class
30480 CLASS1 to one of CLASS2. */
30481
30482 static int
30483 rs6000_register_move_cost (machine_mode mode,
30484 reg_class_t from, reg_class_t to)
30485 {
30486 int ret;
30487
30488 if (TARGET_DEBUG_COST)
30489 dbg_cost_ctrl++;
30490
30491 /* Moves from/to GENERAL_REGS. */
30492 if (reg_classes_intersect_p (to, GENERAL_REGS)
30493 || reg_classes_intersect_p (from, GENERAL_REGS))
30494 {
30495 reg_class_t rclass = from;
30496
30497 if (! reg_classes_intersect_p (to, GENERAL_REGS))
30498 rclass = to;
30499
30500 if (rclass == FLOAT_REGS || rclass == ALTIVEC_REGS || rclass == VSX_REGS)
30501 ret = (rs6000_memory_move_cost (mode, rclass, false)
30502 + rs6000_memory_move_cost (mode, GENERAL_REGS, false));
30503
30504 /* It's more expensive to move CR_REGS than CR0_REGS because of the
30505 shift. */
30506 else if (rclass == CR_REGS)
30507 ret = 4;
30508
30509 /* For those processors that have slow LR/CTR moves, make them more
30510 expensive than memory in order to bias spills to memory .*/
30511 else if ((rs6000_cpu == PROCESSOR_POWER6
30512 || rs6000_cpu == PROCESSOR_POWER7
30513 || rs6000_cpu == PROCESSOR_POWER8)
30514 && reg_classes_intersect_p (rclass, LINK_OR_CTR_REGS))
30515 ret = 6 * hard_regno_nregs[0][mode];
30516
30517 else
30518 /* A move will cost one instruction per GPR moved. */
30519 ret = 2 * hard_regno_nregs[0][mode];
30520 }
30521
30522 /* If we have VSX, we can easily move between FPR or Altivec registers. */
30523 else if (VECTOR_MEM_VSX_P (mode)
30524 && reg_classes_intersect_p (to, VSX_REGS)
30525 && reg_classes_intersect_p (from, VSX_REGS))
30526 ret = 2 * hard_regno_nregs[32][mode];
30527
30528 /* Moving between two similar registers is just one instruction. */
30529 else if (reg_classes_intersect_p (to, from))
30530 ret = (mode == TFmode || mode == TDmode) ? 4 : 2;
30531
30532 /* Everything else has to go through GENERAL_REGS. */
30533 else
30534 ret = (rs6000_register_move_cost (mode, GENERAL_REGS, to)
30535 + rs6000_register_move_cost (mode, from, GENERAL_REGS));
30536
30537 if (TARGET_DEBUG_COST)
30538 {
30539 if (dbg_cost_ctrl == 1)
30540 fprintf (stderr,
30541 "rs6000_register_move_cost:, ret=%d, mode=%s, from=%s, to=%s\n",
30542 ret, GET_MODE_NAME (mode), reg_class_names[from],
30543 reg_class_names[to]);
30544 dbg_cost_ctrl--;
30545 }
30546
30547 return ret;
30548 }
30549
30550 /* A C expressions returning the cost of moving data of MODE from a register to
30551 or from memory. */
30552
30553 static int
30554 rs6000_memory_move_cost (machine_mode mode, reg_class_t rclass,
30555 bool in ATTRIBUTE_UNUSED)
30556 {
30557 int ret;
30558
30559 if (TARGET_DEBUG_COST)
30560 dbg_cost_ctrl++;
30561
30562 if (reg_classes_intersect_p (rclass, GENERAL_REGS))
30563 ret = 4 * hard_regno_nregs[0][mode];
30564 else if ((reg_classes_intersect_p (rclass, FLOAT_REGS)
30565 || reg_classes_intersect_p (rclass, VSX_REGS)))
30566 ret = 4 * hard_regno_nregs[32][mode];
30567 else if (reg_classes_intersect_p (rclass, ALTIVEC_REGS))
30568 ret = 4 * hard_regno_nregs[FIRST_ALTIVEC_REGNO][mode];
30569 else
30570 ret = 4 + rs6000_register_move_cost (mode, rclass, GENERAL_REGS);
30571
30572 if (TARGET_DEBUG_COST)
30573 {
30574 if (dbg_cost_ctrl == 1)
30575 fprintf (stderr,
30576 "rs6000_memory_move_cost: ret=%d, mode=%s, rclass=%s, in=%d\n",
30577 ret, GET_MODE_NAME (mode), reg_class_names[rclass], in);
30578 dbg_cost_ctrl--;
30579 }
30580
30581 return ret;
30582 }
30583
30584 /* Returns a code for a target-specific builtin that implements
30585 reciprocal of the function, or NULL_TREE if not available. */
30586
30587 static tree
30588 rs6000_builtin_reciprocal (unsigned int fn, bool md_fn,
30589 bool sqrt ATTRIBUTE_UNUSED)
30590 {
30591 if (optimize_insn_for_size_p ())
30592 return NULL_TREE;
30593
30594 if (md_fn)
30595 switch (fn)
30596 {
30597 case VSX_BUILTIN_XVSQRTDP:
30598 if (!RS6000_RECIP_AUTO_RSQRTE_P (V2DFmode))
30599 return NULL_TREE;
30600
30601 return rs6000_builtin_decls[VSX_BUILTIN_RSQRT_2DF];
30602
30603 case VSX_BUILTIN_XVSQRTSP:
30604 if (!RS6000_RECIP_AUTO_RSQRTE_P (V4SFmode))
30605 return NULL_TREE;
30606
30607 return rs6000_builtin_decls[VSX_BUILTIN_RSQRT_4SF];
30608
30609 default:
30610 return NULL_TREE;
30611 }
30612
30613 else
30614 switch (fn)
30615 {
30616 case BUILT_IN_SQRT:
30617 if (!RS6000_RECIP_AUTO_RSQRTE_P (DFmode))
30618 return NULL_TREE;
30619
30620 return rs6000_builtin_decls[RS6000_BUILTIN_RSQRT];
30621
30622 case BUILT_IN_SQRTF:
30623 if (!RS6000_RECIP_AUTO_RSQRTE_P (SFmode))
30624 return NULL_TREE;
30625
30626 return rs6000_builtin_decls[RS6000_BUILTIN_RSQRTF];
30627
30628 default:
30629 return NULL_TREE;
30630 }
30631 }
30632
30633 /* Load up a constant. If the mode is a vector mode, splat the value across
30634 all of the vector elements. */
30635
30636 static rtx
30637 rs6000_load_constant_and_splat (machine_mode mode, REAL_VALUE_TYPE dconst)
30638 {
30639 rtx reg;
30640
30641 if (mode == SFmode || mode == DFmode)
30642 {
30643 rtx d = CONST_DOUBLE_FROM_REAL_VALUE (dconst, mode);
30644 reg = force_reg (mode, d);
30645 }
30646 else if (mode == V4SFmode)
30647 {
30648 rtx d = CONST_DOUBLE_FROM_REAL_VALUE (dconst, SFmode);
30649 rtvec v = gen_rtvec (4, d, d, d, d);
30650 reg = gen_reg_rtx (mode);
30651 rs6000_expand_vector_init (reg, gen_rtx_PARALLEL (mode, v));
30652 }
30653 else if (mode == V2DFmode)
30654 {
30655 rtx d = CONST_DOUBLE_FROM_REAL_VALUE (dconst, DFmode);
30656 rtvec v = gen_rtvec (2, d, d);
30657 reg = gen_reg_rtx (mode);
30658 rs6000_expand_vector_init (reg, gen_rtx_PARALLEL (mode, v));
30659 }
30660 else
30661 gcc_unreachable ();
30662
30663 return reg;
30664 }
30665
30666 /* Generate an FMA instruction. */
30667
30668 static void
30669 rs6000_emit_madd (rtx target, rtx m1, rtx m2, rtx a)
30670 {
30671 machine_mode mode = GET_MODE (target);
30672 rtx dst;
30673
30674 dst = expand_ternary_op (mode, fma_optab, m1, m2, a, target, 0);
30675 gcc_assert (dst != NULL);
30676
30677 if (dst != target)
30678 emit_move_insn (target, dst);
30679 }
30680
30681 /* Generate a FMSUB instruction: dst = fma(m1, m2, -a). */
30682
30683 static void
30684 rs6000_emit_msub (rtx target, rtx m1, rtx m2, rtx a)
30685 {
30686 machine_mode mode = GET_MODE (target);
30687 rtx dst;
30688
30689 /* Altivec does not support fms directly;
30690 generate in terms of fma in that case. */
30691 if (optab_handler (fms_optab, mode) != CODE_FOR_nothing)
30692 dst = expand_ternary_op (mode, fms_optab, m1, m2, a, target, 0);
30693 else
30694 {
30695 a = expand_unop (mode, neg_optab, a, NULL_RTX, 0);
30696 dst = expand_ternary_op (mode, fma_optab, m1, m2, a, target, 0);
30697 }
30698 gcc_assert (dst != NULL);
30699
30700 if (dst != target)
30701 emit_move_insn (target, dst);
30702 }
30703
30704 /* Generate a FNMSUB instruction: dst = -fma(m1, m2, -a). */
30705
30706 static void
30707 rs6000_emit_nmsub (rtx dst, rtx m1, rtx m2, rtx a)
30708 {
30709 machine_mode mode = GET_MODE (dst);
30710 rtx r;
30711
30712 /* This is a tad more complicated, since the fnma_optab is for
30713 a different expression: fma(-m1, m2, a), which is the same
30714 thing except in the case of signed zeros.
30715
30716 Fortunately we know that if FMA is supported that FNMSUB is
30717 also supported in the ISA. Just expand it directly. */
30718
30719 gcc_assert (optab_handler (fma_optab, mode) != CODE_FOR_nothing);
30720
30721 r = gen_rtx_NEG (mode, a);
30722 r = gen_rtx_FMA (mode, m1, m2, r);
30723 r = gen_rtx_NEG (mode, r);
30724 emit_insn (gen_rtx_SET (VOIDmode, dst, r));
30725 }
30726
30727 /* Newton-Raphson approximation of floating point divide DST = N/D. If NOTE_P,
30728 add a reg_note saying that this was a division. Support both scalar and
30729 vector divide. Assumes no trapping math and finite arguments. */
30730
30731 void
30732 rs6000_emit_swdiv (rtx dst, rtx n, rtx d, bool note_p)
30733 {
30734 machine_mode mode = GET_MODE (dst);
30735 rtx one, x0, e0, x1, xprev, eprev, xnext, enext, u, v;
30736 int i;
30737
30738 /* Low precision estimates guarantee 5 bits of accuracy. High
30739 precision estimates guarantee 14 bits of accuracy. SFmode
30740 requires 23 bits of accuracy. DFmode requires 52 bits of
30741 accuracy. Each pass at least doubles the accuracy, leading
30742 to the following. */
30743 int passes = (TARGET_RECIP_PRECISION) ? 1 : 3;
30744 if (mode == DFmode || mode == V2DFmode)
30745 passes++;
30746
30747 enum insn_code code = optab_handler (smul_optab, mode);
30748 insn_gen_fn gen_mul = GEN_FCN (code);
30749
30750 gcc_assert (code != CODE_FOR_nothing);
30751
30752 one = rs6000_load_constant_and_splat (mode, dconst1);
30753
30754 /* x0 = 1./d estimate */
30755 x0 = gen_reg_rtx (mode);
30756 emit_insn (gen_rtx_SET (VOIDmode, x0,
30757 gen_rtx_UNSPEC (mode, gen_rtvec (1, d),
30758 UNSPEC_FRES)));
30759
30760 /* Each iteration but the last calculates x_(i+1) = x_i * (2 - d * x_i). */
30761 if (passes > 1) {
30762
30763 /* e0 = 1. - d * x0 */
30764 e0 = gen_reg_rtx (mode);
30765 rs6000_emit_nmsub (e0, d, x0, one);
30766
30767 /* x1 = x0 + e0 * x0 */
30768 x1 = gen_reg_rtx (mode);
30769 rs6000_emit_madd (x1, e0, x0, x0);
30770
30771 for (i = 0, xprev = x1, eprev = e0; i < passes - 2;
30772 ++i, xprev = xnext, eprev = enext) {
30773
30774 /* enext = eprev * eprev */
30775 enext = gen_reg_rtx (mode);
30776 emit_insn (gen_mul (enext, eprev, eprev));
30777
30778 /* xnext = xprev + enext * xprev */
30779 xnext = gen_reg_rtx (mode);
30780 rs6000_emit_madd (xnext, enext, xprev, xprev);
30781 }
30782
30783 } else
30784 xprev = x0;
30785
30786 /* The last iteration calculates x_(i+1) = n * x_i * (2 - d * x_i). */
30787
30788 /* u = n * xprev */
30789 u = gen_reg_rtx (mode);
30790 emit_insn (gen_mul (u, n, xprev));
30791
30792 /* v = n - (d * u) */
30793 v = gen_reg_rtx (mode);
30794 rs6000_emit_nmsub (v, d, u, n);
30795
30796 /* dst = (v * xprev) + u */
30797 rs6000_emit_madd (dst, v, xprev, u);
30798
30799 if (note_p)
30800 add_reg_note (get_last_insn (), REG_EQUAL, gen_rtx_DIV (mode, n, d));
30801 }
30802
30803 /* Newton-Raphson approximation of single/double-precision floating point
30804 rsqrt. Assumes no trapping math and finite arguments. */
30805
30806 void
30807 rs6000_emit_swrsqrt (rtx dst, rtx src)
30808 {
30809 machine_mode mode = GET_MODE (src);
30810 rtx x0 = gen_reg_rtx (mode);
30811 rtx y = gen_reg_rtx (mode);
30812
30813 /* Low precision estimates guarantee 5 bits of accuracy. High
30814 precision estimates guarantee 14 bits of accuracy. SFmode
30815 requires 23 bits of accuracy. DFmode requires 52 bits of
30816 accuracy. Each pass at least doubles the accuracy, leading
30817 to the following. */
30818 int passes = (TARGET_RECIP_PRECISION) ? 1 : 3;
30819 if (mode == DFmode || mode == V2DFmode)
30820 passes++;
30821
30822 REAL_VALUE_TYPE dconst3_2;
30823 int i;
30824 rtx halfthree;
30825 enum insn_code code = optab_handler (smul_optab, mode);
30826 insn_gen_fn gen_mul = GEN_FCN (code);
30827
30828 gcc_assert (code != CODE_FOR_nothing);
30829
30830 /* Load up the constant 1.5 either as a scalar, or as a vector. */
30831 real_from_integer (&dconst3_2, VOIDmode, 3, SIGNED);
30832 SET_REAL_EXP (&dconst3_2, REAL_EXP (&dconst3_2) - 1);
30833
30834 halfthree = rs6000_load_constant_and_splat (mode, dconst3_2);
30835
30836 /* x0 = rsqrt estimate */
30837 emit_insn (gen_rtx_SET (VOIDmode, x0,
30838 gen_rtx_UNSPEC (mode, gen_rtvec (1, src),
30839 UNSPEC_RSQRT)));
30840
30841 /* y = 0.5 * src = 1.5 * src - src -> fewer constants */
30842 rs6000_emit_msub (y, src, halfthree, src);
30843
30844 for (i = 0; i < passes; i++)
30845 {
30846 rtx x1 = gen_reg_rtx (mode);
30847 rtx u = gen_reg_rtx (mode);
30848 rtx v = gen_reg_rtx (mode);
30849
30850 /* x1 = x0 * (1.5 - y * (x0 * x0)) */
30851 emit_insn (gen_mul (u, x0, x0));
30852 rs6000_emit_nmsub (v, y, u, halfthree);
30853 emit_insn (gen_mul (x1, x0, v));
30854 x0 = x1;
30855 }
30856
30857 emit_move_insn (dst, x0);
30858 return;
30859 }
30860
30861 /* Emit popcount intrinsic on TARGET_POPCNTB (Power5) and TARGET_POPCNTD
30862 (Power7) targets. DST is the target, and SRC is the argument operand. */
30863
30864 void
30865 rs6000_emit_popcount (rtx dst, rtx src)
30866 {
30867 machine_mode mode = GET_MODE (dst);
30868 rtx tmp1, tmp2;
30869
30870 /* Use the PPC ISA 2.06 popcnt{w,d} instruction if we can. */
30871 if (TARGET_POPCNTD)
30872 {
30873 if (mode == SImode)
30874 emit_insn (gen_popcntdsi2 (dst, src));
30875 else
30876 emit_insn (gen_popcntddi2 (dst, src));
30877 return;
30878 }
30879
30880 tmp1 = gen_reg_rtx (mode);
30881
30882 if (mode == SImode)
30883 {
30884 emit_insn (gen_popcntbsi2 (tmp1, src));
30885 tmp2 = expand_mult (SImode, tmp1, GEN_INT (0x01010101),
30886 NULL_RTX, 0);
30887 tmp2 = force_reg (SImode, tmp2);
30888 emit_insn (gen_lshrsi3 (dst, tmp2, GEN_INT (24)));
30889 }
30890 else
30891 {
30892 emit_insn (gen_popcntbdi2 (tmp1, src));
30893 tmp2 = expand_mult (DImode, tmp1,
30894 GEN_INT ((HOST_WIDE_INT)
30895 0x01010101 << 32 | 0x01010101),
30896 NULL_RTX, 0);
30897 tmp2 = force_reg (DImode, tmp2);
30898 emit_insn (gen_lshrdi3 (dst, tmp2, GEN_INT (56)));
30899 }
30900 }
30901
30902
30903 /* Emit parity intrinsic on TARGET_POPCNTB targets. DST is the
30904 target, and SRC is the argument operand. */
30905
30906 void
30907 rs6000_emit_parity (rtx dst, rtx src)
30908 {
30909 machine_mode mode = GET_MODE (dst);
30910 rtx tmp;
30911
30912 tmp = gen_reg_rtx (mode);
30913
30914 /* Use the PPC ISA 2.05 prtyw/prtyd instruction if we can. */
30915 if (TARGET_CMPB)
30916 {
30917 if (mode == SImode)
30918 {
30919 emit_insn (gen_popcntbsi2 (tmp, src));
30920 emit_insn (gen_paritysi2_cmpb (dst, tmp));
30921 }
30922 else
30923 {
30924 emit_insn (gen_popcntbdi2 (tmp, src));
30925 emit_insn (gen_paritydi2_cmpb (dst, tmp));
30926 }
30927 return;
30928 }
30929
30930 if (mode == SImode)
30931 {
30932 /* Is mult+shift >= shift+xor+shift+xor? */
30933 if (rs6000_cost->mulsi_const >= COSTS_N_INSNS (3))
30934 {
30935 rtx tmp1, tmp2, tmp3, tmp4;
30936
30937 tmp1 = gen_reg_rtx (SImode);
30938 emit_insn (gen_popcntbsi2 (tmp1, src));
30939
30940 tmp2 = gen_reg_rtx (SImode);
30941 emit_insn (gen_lshrsi3 (tmp2, tmp1, GEN_INT (16)));
30942 tmp3 = gen_reg_rtx (SImode);
30943 emit_insn (gen_xorsi3 (tmp3, tmp1, tmp2));
30944
30945 tmp4 = gen_reg_rtx (SImode);
30946 emit_insn (gen_lshrsi3 (tmp4, tmp3, GEN_INT (8)));
30947 emit_insn (gen_xorsi3 (tmp, tmp3, tmp4));
30948 }
30949 else
30950 rs6000_emit_popcount (tmp, src);
30951 emit_insn (gen_andsi3 (dst, tmp, const1_rtx));
30952 }
30953 else
30954 {
30955 /* Is mult+shift >= shift+xor+shift+xor+shift+xor? */
30956 if (rs6000_cost->muldi >= COSTS_N_INSNS (5))
30957 {
30958 rtx tmp1, tmp2, tmp3, tmp4, tmp5, tmp6;
30959
30960 tmp1 = gen_reg_rtx (DImode);
30961 emit_insn (gen_popcntbdi2 (tmp1, src));
30962
30963 tmp2 = gen_reg_rtx (DImode);
30964 emit_insn (gen_lshrdi3 (tmp2, tmp1, GEN_INT (32)));
30965 tmp3 = gen_reg_rtx (DImode);
30966 emit_insn (gen_xordi3 (tmp3, tmp1, tmp2));
30967
30968 tmp4 = gen_reg_rtx (DImode);
30969 emit_insn (gen_lshrdi3 (tmp4, tmp3, GEN_INT (16)));
30970 tmp5 = gen_reg_rtx (DImode);
30971 emit_insn (gen_xordi3 (tmp5, tmp3, tmp4));
30972
30973 tmp6 = gen_reg_rtx (DImode);
30974 emit_insn (gen_lshrdi3 (tmp6, tmp5, GEN_INT (8)));
30975 emit_insn (gen_xordi3 (tmp, tmp5, tmp6));
30976 }
30977 else
30978 rs6000_emit_popcount (tmp, src);
30979 emit_insn (gen_anddi3 (dst, tmp, const1_rtx));
30980 }
30981 }
30982
30983 /* Expand an Altivec constant permutation for little endian mode.
30984 There are two issues: First, the two input operands must be
30985 swapped so that together they form a double-wide array in LE
30986 order. Second, the vperm instruction has surprising behavior
30987 in LE mode: it interprets the elements of the source vectors
30988 in BE mode ("left to right") and interprets the elements of
30989 the destination vector in LE mode ("right to left"). To
30990 correct for this, we must subtract each element of the permute
30991 control vector from 31.
30992
30993 For example, suppose we want to concatenate vr10 = {0, 1, 2, 3}
30994 with vr11 = {4, 5, 6, 7} and extract {0, 2, 4, 6} using a vperm.
30995 We place {0,1,2,3,8,9,10,11,16,17,18,19,24,25,26,27} in vr12 to
30996 serve as the permute control vector. Then, in BE mode,
30997
30998 vperm 9,10,11,12
30999
31000 places the desired result in vr9. However, in LE mode the
31001 vector contents will be
31002
31003 vr10 = 00000003 00000002 00000001 00000000
31004 vr11 = 00000007 00000006 00000005 00000004
31005
31006 The result of the vperm using the same permute control vector is
31007
31008 vr9 = 05000000 07000000 01000000 03000000
31009
31010 That is, the leftmost 4 bytes of vr10 are interpreted as the
31011 source for the rightmost 4 bytes of vr9, and so on.
31012
31013 If we change the permute control vector to
31014
31015 vr12 = {31,20,29,28,23,22,21,20,15,14,13,12,7,6,5,4}
31016
31017 and issue
31018
31019 vperm 9,11,10,12
31020
31021 we get the desired
31022
31023 vr9 = 00000006 00000004 00000002 00000000. */
31024
31025 void
31026 altivec_expand_vec_perm_const_le (rtx operands[4])
31027 {
31028 unsigned int i;
31029 rtx perm[16];
31030 rtx constv, unspec;
31031 rtx target = operands[0];
31032 rtx op0 = operands[1];
31033 rtx op1 = operands[2];
31034 rtx sel = operands[3];
31035
31036 /* Unpack and adjust the constant selector. */
31037 for (i = 0; i < 16; ++i)
31038 {
31039 rtx e = XVECEXP (sel, 0, i);
31040 unsigned int elt = 31 - (INTVAL (e) & 31);
31041 perm[i] = GEN_INT (elt);
31042 }
31043
31044 /* Expand to a permute, swapping the inputs and using the
31045 adjusted selector. */
31046 if (!REG_P (op0))
31047 op0 = force_reg (V16QImode, op0);
31048 if (!REG_P (op1))
31049 op1 = force_reg (V16QImode, op1);
31050
31051 constv = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, perm));
31052 constv = force_reg (V16QImode, constv);
31053 unspec = gen_rtx_UNSPEC (V16QImode, gen_rtvec (3, op1, op0, constv),
31054 UNSPEC_VPERM);
31055 if (!REG_P (target))
31056 {
31057 rtx tmp = gen_reg_rtx (V16QImode);
31058 emit_move_insn (tmp, unspec);
31059 unspec = tmp;
31060 }
31061
31062 emit_move_insn (target, unspec);
31063 }
31064
31065 /* Similarly to altivec_expand_vec_perm_const_le, we must adjust the
31066 permute control vector. But here it's not a constant, so we must
31067 generate a vector NAND or NOR to do the adjustment. */
31068
31069 void
31070 altivec_expand_vec_perm_le (rtx operands[4])
31071 {
31072 rtx notx, iorx, unspec;
31073 rtx target = operands[0];
31074 rtx op0 = operands[1];
31075 rtx op1 = operands[2];
31076 rtx sel = operands[3];
31077 rtx tmp = target;
31078 rtx norreg = gen_reg_rtx (V16QImode);
31079 machine_mode mode = GET_MODE (target);
31080
31081 /* Get everything in regs so the pattern matches. */
31082 if (!REG_P (op0))
31083 op0 = force_reg (mode, op0);
31084 if (!REG_P (op1))
31085 op1 = force_reg (mode, op1);
31086 if (!REG_P (sel))
31087 sel = force_reg (V16QImode, sel);
31088 if (!REG_P (target))
31089 tmp = gen_reg_rtx (mode);
31090
31091 /* Invert the selector with a VNAND if available, else a VNOR.
31092 The VNAND is preferred for future fusion opportunities. */
31093 notx = gen_rtx_NOT (V16QImode, sel);
31094 iorx = (TARGET_P8_VECTOR
31095 ? gen_rtx_IOR (V16QImode, notx, notx)
31096 : gen_rtx_AND (V16QImode, notx, notx));
31097 emit_insn (gen_rtx_SET (VOIDmode, norreg, iorx));
31098
31099 /* Permute with operands reversed and adjusted selector. */
31100 unspec = gen_rtx_UNSPEC (mode, gen_rtvec (3, op1, op0, norreg),
31101 UNSPEC_VPERM);
31102
31103 /* Copy into target, possibly by way of a register. */
31104 if (!REG_P (target))
31105 {
31106 emit_move_insn (tmp, unspec);
31107 unspec = tmp;
31108 }
31109
31110 emit_move_insn (target, unspec);
31111 }
31112
31113 /* Expand an Altivec constant permutation. Return true if we match
31114 an efficient implementation; false to fall back to VPERM. */
31115
31116 bool
31117 altivec_expand_vec_perm_const (rtx operands[4])
31118 {
31119 struct altivec_perm_insn {
31120 HOST_WIDE_INT mask;
31121 enum insn_code impl;
31122 unsigned char perm[16];
31123 };
31124 static const struct altivec_perm_insn patterns[] = {
31125 { OPTION_MASK_ALTIVEC, CODE_FOR_altivec_vpkuhum_direct,
31126 { 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31 } },
31127 { OPTION_MASK_ALTIVEC, CODE_FOR_altivec_vpkuwum_direct,
31128 { 2, 3, 6, 7, 10, 11, 14, 15, 18, 19, 22, 23, 26, 27, 30, 31 } },
31129 { OPTION_MASK_ALTIVEC,
31130 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrghb_direct
31131 : CODE_FOR_altivec_vmrglb_direct),
31132 { 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23 } },
31133 { OPTION_MASK_ALTIVEC,
31134 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrghh_direct
31135 : CODE_FOR_altivec_vmrglh_direct),
31136 { 0, 1, 16, 17, 2, 3, 18, 19, 4, 5, 20, 21, 6, 7, 22, 23 } },
31137 { OPTION_MASK_ALTIVEC,
31138 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrghw_direct
31139 : CODE_FOR_altivec_vmrglw_direct),
31140 { 0, 1, 2, 3, 16, 17, 18, 19, 4, 5, 6, 7, 20, 21, 22, 23 } },
31141 { OPTION_MASK_ALTIVEC,
31142 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrglb_direct
31143 : CODE_FOR_altivec_vmrghb_direct),
31144 { 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31 } },
31145 { OPTION_MASK_ALTIVEC,
31146 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrglh_direct
31147 : CODE_FOR_altivec_vmrghh_direct),
31148 { 8, 9, 24, 25, 10, 11, 26, 27, 12, 13, 28, 29, 14, 15, 30, 31 } },
31149 { OPTION_MASK_ALTIVEC,
31150 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrglw_direct
31151 : CODE_FOR_altivec_vmrghw_direct),
31152 { 8, 9, 10, 11, 24, 25, 26, 27, 12, 13, 14, 15, 28, 29, 30, 31 } },
31153 { OPTION_MASK_P8_VECTOR, CODE_FOR_p8_vmrgew,
31154 { 0, 1, 2, 3, 16, 17, 18, 19, 8, 9, 10, 11, 24, 25, 26, 27 } },
31155 { OPTION_MASK_P8_VECTOR, CODE_FOR_p8_vmrgow,
31156 { 4, 5, 6, 7, 20, 21, 22, 23, 12, 13, 14, 15, 28, 29, 30, 31 } }
31157 };
31158
31159 unsigned int i, j, elt, which;
31160 unsigned char perm[16];
31161 rtx target, op0, op1, sel, x;
31162 bool one_vec;
31163
31164 target = operands[0];
31165 op0 = operands[1];
31166 op1 = operands[2];
31167 sel = operands[3];
31168
31169 /* Unpack the constant selector. */
31170 for (i = which = 0; i < 16; ++i)
31171 {
31172 rtx e = XVECEXP (sel, 0, i);
31173 elt = INTVAL (e) & 31;
31174 which |= (elt < 16 ? 1 : 2);
31175 perm[i] = elt;
31176 }
31177
31178 /* Simplify the constant selector based on operands. */
31179 switch (which)
31180 {
31181 default:
31182 gcc_unreachable ();
31183
31184 case 3:
31185 one_vec = false;
31186 if (!rtx_equal_p (op0, op1))
31187 break;
31188 /* FALLTHRU */
31189
31190 case 2:
31191 for (i = 0; i < 16; ++i)
31192 perm[i] &= 15;
31193 op0 = op1;
31194 one_vec = true;
31195 break;
31196
31197 case 1:
31198 op1 = op0;
31199 one_vec = true;
31200 break;
31201 }
31202
31203 /* Look for splat patterns. */
31204 if (one_vec)
31205 {
31206 elt = perm[0];
31207
31208 for (i = 0; i < 16; ++i)
31209 if (perm[i] != elt)
31210 break;
31211 if (i == 16)
31212 {
31213 if (!BYTES_BIG_ENDIAN)
31214 elt = 15 - elt;
31215 emit_insn (gen_altivec_vspltb_direct (target, op0, GEN_INT (elt)));
31216 return true;
31217 }
31218
31219 if (elt % 2 == 0)
31220 {
31221 for (i = 0; i < 16; i += 2)
31222 if (perm[i] != elt || perm[i + 1] != elt + 1)
31223 break;
31224 if (i == 16)
31225 {
31226 int field = BYTES_BIG_ENDIAN ? elt / 2 : 7 - elt / 2;
31227 x = gen_reg_rtx (V8HImode);
31228 emit_insn (gen_altivec_vsplth_direct (x, gen_lowpart (V8HImode, op0),
31229 GEN_INT (field)));
31230 emit_move_insn (target, gen_lowpart (V16QImode, x));
31231 return true;
31232 }
31233 }
31234
31235 if (elt % 4 == 0)
31236 {
31237 for (i = 0; i < 16; i += 4)
31238 if (perm[i] != elt
31239 || perm[i + 1] != elt + 1
31240 || perm[i + 2] != elt + 2
31241 || perm[i + 3] != elt + 3)
31242 break;
31243 if (i == 16)
31244 {
31245 int field = BYTES_BIG_ENDIAN ? elt / 4 : 3 - elt / 4;
31246 x = gen_reg_rtx (V4SImode);
31247 emit_insn (gen_altivec_vspltw_direct (x, gen_lowpart (V4SImode, op0),
31248 GEN_INT (field)));
31249 emit_move_insn (target, gen_lowpart (V16QImode, x));
31250 return true;
31251 }
31252 }
31253 }
31254
31255 /* Look for merge and pack patterns. */
31256 for (j = 0; j < ARRAY_SIZE (patterns); ++j)
31257 {
31258 bool swapped;
31259
31260 if ((patterns[j].mask & rs6000_isa_flags) == 0)
31261 continue;
31262
31263 elt = patterns[j].perm[0];
31264 if (perm[0] == elt)
31265 swapped = false;
31266 else if (perm[0] == elt + 16)
31267 swapped = true;
31268 else
31269 continue;
31270 for (i = 1; i < 16; ++i)
31271 {
31272 elt = patterns[j].perm[i];
31273 if (swapped)
31274 elt = (elt >= 16 ? elt - 16 : elt + 16);
31275 else if (one_vec && elt >= 16)
31276 elt -= 16;
31277 if (perm[i] != elt)
31278 break;
31279 }
31280 if (i == 16)
31281 {
31282 enum insn_code icode = patterns[j].impl;
31283 machine_mode omode = insn_data[icode].operand[0].mode;
31284 machine_mode imode = insn_data[icode].operand[1].mode;
31285
31286 /* For little-endian, don't use vpkuwum and vpkuhum if the
31287 underlying vector type is not V4SI and V8HI, respectively.
31288 For example, using vpkuwum with a V8HI picks up the even
31289 halfwords (BE numbering) when the even halfwords (LE
31290 numbering) are what we need. */
31291 if (!BYTES_BIG_ENDIAN
31292 && icode == CODE_FOR_altivec_vpkuwum_direct
31293 && ((GET_CODE (op0) == REG
31294 && GET_MODE (op0) != V4SImode)
31295 || (GET_CODE (op0) == SUBREG
31296 && GET_MODE (XEXP (op0, 0)) != V4SImode)))
31297 continue;
31298 if (!BYTES_BIG_ENDIAN
31299 && icode == CODE_FOR_altivec_vpkuhum_direct
31300 && ((GET_CODE (op0) == REG
31301 && GET_MODE (op0) != V8HImode)
31302 || (GET_CODE (op0) == SUBREG
31303 && GET_MODE (XEXP (op0, 0)) != V8HImode)))
31304 continue;
31305
31306 /* For little-endian, the two input operands must be swapped
31307 (or swapped back) to ensure proper right-to-left numbering
31308 from 0 to 2N-1. */
31309 if (swapped ^ !BYTES_BIG_ENDIAN)
31310 x = op0, op0 = op1, op1 = x;
31311 if (imode != V16QImode)
31312 {
31313 op0 = gen_lowpart (imode, op0);
31314 op1 = gen_lowpart (imode, op1);
31315 }
31316 if (omode == V16QImode)
31317 x = target;
31318 else
31319 x = gen_reg_rtx (omode);
31320 emit_insn (GEN_FCN (icode) (x, op0, op1));
31321 if (omode != V16QImode)
31322 emit_move_insn (target, gen_lowpart (V16QImode, x));
31323 return true;
31324 }
31325 }
31326
31327 if (!BYTES_BIG_ENDIAN)
31328 {
31329 altivec_expand_vec_perm_const_le (operands);
31330 return true;
31331 }
31332
31333 return false;
31334 }
31335
31336 /* Expand a Paired Single, VSX Permute Doubleword, or SPE constant permutation.
31337 Return true if we match an efficient implementation. */
31338
31339 static bool
31340 rs6000_expand_vec_perm_const_1 (rtx target, rtx op0, rtx op1,
31341 unsigned char perm0, unsigned char perm1)
31342 {
31343 rtx x;
31344
31345 /* If both selectors come from the same operand, fold to single op. */
31346 if ((perm0 & 2) == (perm1 & 2))
31347 {
31348 if (perm0 & 2)
31349 op0 = op1;
31350 else
31351 op1 = op0;
31352 }
31353 /* If both operands are equal, fold to simpler permutation. */
31354 if (rtx_equal_p (op0, op1))
31355 {
31356 perm0 = perm0 & 1;
31357 perm1 = (perm1 & 1) + 2;
31358 }
31359 /* If the first selector comes from the second operand, swap. */
31360 else if (perm0 & 2)
31361 {
31362 if (perm1 & 2)
31363 return false;
31364 perm0 -= 2;
31365 perm1 += 2;
31366 x = op0, op0 = op1, op1 = x;
31367 }
31368 /* If the second selector does not come from the second operand, fail. */
31369 else if ((perm1 & 2) == 0)
31370 return false;
31371
31372 /* Success! */
31373 if (target != NULL)
31374 {
31375 machine_mode vmode, dmode;
31376 rtvec v;
31377
31378 vmode = GET_MODE (target);
31379 gcc_assert (GET_MODE_NUNITS (vmode) == 2);
31380 dmode = mode_for_vector (GET_MODE_INNER (vmode), 4);
31381 x = gen_rtx_VEC_CONCAT (dmode, op0, op1);
31382 v = gen_rtvec (2, GEN_INT (perm0), GEN_INT (perm1));
31383 x = gen_rtx_VEC_SELECT (vmode, x, gen_rtx_PARALLEL (VOIDmode, v));
31384 emit_insn (gen_rtx_SET (VOIDmode, target, x));
31385 }
31386 return true;
31387 }
31388
31389 bool
31390 rs6000_expand_vec_perm_const (rtx operands[4])
31391 {
31392 rtx target, op0, op1, sel;
31393 unsigned char perm0, perm1;
31394
31395 target = operands[0];
31396 op0 = operands[1];
31397 op1 = operands[2];
31398 sel = operands[3];
31399
31400 /* Unpack the constant selector. */
31401 perm0 = INTVAL (XVECEXP (sel, 0, 0)) & 3;
31402 perm1 = INTVAL (XVECEXP (sel, 0, 1)) & 3;
31403
31404 return rs6000_expand_vec_perm_const_1 (target, op0, op1, perm0, perm1);
31405 }
31406
31407 /* Test whether a constant permutation is supported. */
31408
31409 static bool
31410 rs6000_vectorize_vec_perm_const_ok (machine_mode vmode,
31411 const unsigned char *sel)
31412 {
31413 /* AltiVec (and thus VSX) can handle arbitrary permutations. */
31414 if (TARGET_ALTIVEC)
31415 return true;
31416
31417 /* Check for ps_merge* or evmerge* insns. */
31418 if ((TARGET_PAIRED_FLOAT && vmode == V2SFmode)
31419 || (TARGET_SPE && vmode == V2SImode))
31420 {
31421 rtx op0 = gen_raw_REG (vmode, LAST_VIRTUAL_REGISTER + 1);
31422 rtx op1 = gen_raw_REG (vmode, LAST_VIRTUAL_REGISTER + 2);
31423 return rs6000_expand_vec_perm_const_1 (NULL, op0, op1, sel[0], sel[1]);
31424 }
31425
31426 return false;
31427 }
31428
31429 /* A subroutine for rs6000_expand_extract_even & rs6000_expand_interleave. */
31430
31431 static void
31432 rs6000_do_expand_vec_perm (rtx target, rtx op0, rtx op1,
31433 machine_mode vmode, unsigned nelt, rtx perm[])
31434 {
31435 machine_mode imode;
31436 rtx x;
31437
31438 imode = vmode;
31439 if (GET_MODE_CLASS (vmode) != MODE_VECTOR_INT)
31440 {
31441 imode = GET_MODE_INNER (vmode);
31442 imode = mode_for_size (GET_MODE_BITSIZE (imode), MODE_INT, 0);
31443 imode = mode_for_vector (imode, nelt);
31444 }
31445
31446 x = gen_rtx_CONST_VECTOR (imode, gen_rtvec_v (nelt, perm));
31447 x = expand_vec_perm (vmode, op0, op1, x, target);
31448 if (x != target)
31449 emit_move_insn (target, x);
31450 }
31451
31452 /* Expand an extract even operation. */
31453
31454 void
31455 rs6000_expand_extract_even (rtx target, rtx op0, rtx op1)
31456 {
31457 machine_mode vmode = GET_MODE (target);
31458 unsigned i, nelt = GET_MODE_NUNITS (vmode);
31459 rtx perm[16];
31460
31461 for (i = 0; i < nelt; i++)
31462 perm[i] = GEN_INT (i * 2);
31463
31464 rs6000_do_expand_vec_perm (target, op0, op1, vmode, nelt, perm);
31465 }
31466
31467 /* Expand a vector interleave operation. */
31468
31469 void
31470 rs6000_expand_interleave (rtx target, rtx op0, rtx op1, bool highp)
31471 {
31472 machine_mode vmode = GET_MODE (target);
31473 unsigned i, high, nelt = GET_MODE_NUNITS (vmode);
31474 rtx perm[16];
31475
31476 high = (highp ? 0 : nelt / 2);
31477 for (i = 0; i < nelt / 2; i++)
31478 {
31479 perm[i * 2] = GEN_INT (i + high);
31480 perm[i * 2 + 1] = GEN_INT (i + nelt + high);
31481 }
31482
31483 rs6000_do_expand_vec_perm (target, op0, op1, vmode, nelt, perm);
31484 }
31485
31486 /* Scale a V2DF vector SRC by two to the SCALE and place in TGT. */
31487 void
31488 rs6000_scale_v2df (rtx tgt, rtx src, int scale)
31489 {
31490 HOST_WIDE_INT hwi_scale (scale);
31491 REAL_VALUE_TYPE r_pow;
31492 rtvec v = rtvec_alloc (2);
31493 rtx elt;
31494 rtx scale_vec = gen_reg_rtx (V2DFmode);
31495 (void)real_powi (&r_pow, DFmode, &dconst2, hwi_scale);
31496 elt = CONST_DOUBLE_FROM_REAL_VALUE (r_pow, DFmode);
31497 RTVEC_ELT (v, 0) = elt;
31498 RTVEC_ELT (v, 1) = elt;
31499 rs6000_expand_vector_init (scale_vec, gen_rtx_PARALLEL (V2DFmode, v));
31500 emit_insn (gen_mulv2df3 (tgt, src, scale_vec));
31501 }
31502
31503 /* Return an RTX representing where to find the function value of a
31504 function returning MODE. */
31505 static rtx
31506 rs6000_complex_function_value (machine_mode mode)
31507 {
31508 unsigned int regno;
31509 rtx r1, r2;
31510 machine_mode inner = GET_MODE_INNER (mode);
31511 unsigned int inner_bytes = GET_MODE_SIZE (inner);
31512
31513 if (FLOAT_MODE_P (mode) && TARGET_HARD_FLOAT && TARGET_FPRS)
31514 regno = FP_ARG_RETURN;
31515 else
31516 {
31517 regno = GP_ARG_RETURN;
31518
31519 /* 32-bit is OK since it'll go in r3/r4. */
31520 if (TARGET_32BIT && inner_bytes >= 4)
31521 return gen_rtx_REG (mode, regno);
31522 }
31523
31524 if (inner_bytes >= 8)
31525 return gen_rtx_REG (mode, regno);
31526
31527 r1 = gen_rtx_EXPR_LIST (inner, gen_rtx_REG (inner, regno),
31528 const0_rtx);
31529 r2 = gen_rtx_EXPR_LIST (inner, gen_rtx_REG (inner, regno + 1),
31530 GEN_INT (inner_bytes));
31531 return gen_rtx_PARALLEL (mode, gen_rtvec (2, r1, r2));
31532 }
31533
31534 /* Target hook for TARGET_FUNCTION_VALUE.
31535
31536 On the SPE, both FPs and vectors are returned in r3.
31537
31538 On RS/6000 an integer value is in r3 and a floating-point value is in
31539 fp1, unless -msoft-float. */
31540
31541 static rtx
31542 rs6000_function_value (const_tree valtype,
31543 const_tree fn_decl_or_type ATTRIBUTE_UNUSED,
31544 bool outgoing ATTRIBUTE_UNUSED)
31545 {
31546 machine_mode mode;
31547 unsigned int regno;
31548 machine_mode elt_mode;
31549 int n_elts;
31550
31551 /* Special handling for structs in darwin64. */
31552 if (TARGET_MACHO
31553 && rs6000_darwin64_struct_check_p (TYPE_MODE (valtype), valtype))
31554 {
31555 CUMULATIVE_ARGS valcum;
31556 rtx valret;
31557
31558 valcum.words = 0;
31559 valcum.fregno = FP_ARG_MIN_REG;
31560 valcum.vregno = ALTIVEC_ARG_MIN_REG;
31561 /* Do a trial code generation as if this were going to be passed as
31562 an argument; if any part goes in memory, we return NULL. */
31563 valret = rs6000_darwin64_record_arg (&valcum, valtype, true, /* retval= */ true);
31564 if (valret)
31565 return valret;
31566 /* Otherwise fall through to standard ABI rules. */
31567 }
31568
31569 /* The ELFv2 ABI returns homogeneous VFP aggregates in registers. */
31570 if (rs6000_discover_homogeneous_aggregate (TYPE_MODE (valtype), valtype,
31571 &elt_mode, &n_elts))
31572 {
31573 int first_reg, n_regs, i;
31574 rtx par;
31575
31576 if (SCALAR_FLOAT_MODE_P (elt_mode))
31577 {
31578 /* _Decimal128 must use even/odd register pairs. */
31579 first_reg = (elt_mode == TDmode) ? FP_ARG_RETURN + 1 : FP_ARG_RETURN;
31580 n_regs = (GET_MODE_SIZE (elt_mode) + 7) >> 3;
31581 }
31582 else
31583 {
31584 first_reg = ALTIVEC_ARG_RETURN;
31585 n_regs = 1;
31586 }
31587
31588 par = gen_rtx_PARALLEL (TYPE_MODE (valtype), rtvec_alloc (n_elts));
31589 for (i = 0; i < n_elts; i++)
31590 {
31591 rtx r = gen_rtx_REG (elt_mode, first_reg + i * n_regs);
31592 rtx off = GEN_INT (i * GET_MODE_SIZE (elt_mode));
31593 XVECEXP (par, 0, i) = gen_rtx_EXPR_LIST (VOIDmode, r, off);
31594 }
31595
31596 return par;
31597 }
31598
31599 if (TARGET_32BIT && TARGET_POWERPC64 && TYPE_MODE (valtype) == DImode)
31600 {
31601 /* Long long return value need be split in -mpowerpc64, 32bit ABI. */
31602 return gen_rtx_PARALLEL (DImode,
31603 gen_rtvec (2,
31604 gen_rtx_EXPR_LIST (VOIDmode,
31605 gen_rtx_REG (SImode, GP_ARG_RETURN),
31606 const0_rtx),
31607 gen_rtx_EXPR_LIST (VOIDmode,
31608 gen_rtx_REG (SImode,
31609 GP_ARG_RETURN + 1),
31610 GEN_INT (4))));
31611 }
31612 if (TARGET_32BIT && TARGET_POWERPC64 && TYPE_MODE (valtype) == DCmode)
31613 {
31614 return gen_rtx_PARALLEL (DCmode,
31615 gen_rtvec (4,
31616 gen_rtx_EXPR_LIST (VOIDmode,
31617 gen_rtx_REG (SImode, GP_ARG_RETURN),
31618 const0_rtx),
31619 gen_rtx_EXPR_LIST (VOIDmode,
31620 gen_rtx_REG (SImode,
31621 GP_ARG_RETURN + 1),
31622 GEN_INT (4)),
31623 gen_rtx_EXPR_LIST (VOIDmode,
31624 gen_rtx_REG (SImode,
31625 GP_ARG_RETURN + 2),
31626 GEN_INT (8)),
31627 gen_rtx_EXPR_LIST (VOIDmode,
31628 gen_rtx_REG (SImode,
31629 GP_ARG_RETURN + 3),
31630 GEN_INT (12))));
31631 }
31632
31633 mode = TYPE_MODE (valtype);
31634 if ((INTEGRAL_TYPE_P (valtype) && GET_MODE_BITSIZE (mode) < BITS_PER_WORD)
31635 || POINTER_TYPE_P (valtype))
31636 mode = TARGET_32BIT ? SImode : DImode;
31637
31638 if (DECIMAL_FLOAT_MODE_P (mode) && TARGET_HARD_FLOAT && TARGET_FPRS)
31639 /* _Decimal128 must use an even/odd register pair. */
31640 regno = (mode == TDmode) ? FP_ARG_RETURN + 1 : FP_ARG_RETURN;
31641 else if (SCALAR_FLOAT_TYPE_P (valtype) && TARGET_HARD_FLOAT && TARGET_FPRS
31642 && ((TARGET_SINGLE_FLOAT && (mode == SFmode)) || TARGET_DOUBLE_FLOAT))
31643 regno = FP_ARG_RETURN;
31644 else if (TREE_CODE (valtype) == COMPLEX_TYPE
31645 && targetm.calls.split_complex_arg)
31646 return rs6000_complex_function_value (mode);
31647 /* VSX is a superset of Altivec and adds V2DImode/V2DFmode. Since the same
31648 return register is used in both cases, and we won't see V2DImode/V2DFmode
31649 for pure altivec, combine the two cases. */
31650 else if (TREE_CODE (valtype) == VECTOR_TYPE
31651 && TARGET_ALTIVEC && TARGET_ALTIVEC_ABI
31652 && ALTIVEC_OR_VSX_VECTOR_MODE (mode))
31653 regno = ALTIVEC_ARG_RETURN;
31654 else if (TARGET_E500_DOUBLE && TARGET_HARD_FLOAT
31655 && (mode == DFmode || mode == DCmode
31656 || mode == TFmode || mode == TCmode))
31657 return spe_build_register_parallel (mode, GP_ARG_RETURN);
31658 else
31659 regno = GP_ARG_RETURN;
31660
31661 return gen_rtx_REG (mode, regno);
31662 }
31663
31664 /* Define how to find the value returned by a library function
31665 assuming the value has mode MODE. */
31666 rtx
31667 rs6000_libcall_value (machine_mode mode)
31668 {
31669 unsigned int regno;
31670
31671 if (TARGET_32BIT && TARGET_POWERPC64 && mode == DImode)
31672 {
31673 /* Long long return value need be split in -mpowerpc64, 32bit ABI. */
31674 return gen_rtx_PARALLEL (DImode,
31675 gen_rtvec (2,
31676 gen_rtx_EXPR_LIST (VOIDmode,
31677 gen_rtx_REG (SImode, GP_ARG_RETURN),
31678 const0_rtx),
31679 gen_rtx_EXPR_LIST (VOIDmode,
31680 gen_rtx_REG (SImode,
31681 GP_ARG_RETURN + 1),
31682 GEN_INT (4))));
31683 }
31684
31685 if (DECIMAL_FLOAT_MODE_P (mode) && TARGET_HARD_FLOAT && TARGET_FPRS)
31686 /* _Decimal128 must use an even/odd register pair. */
31687 regno = (mode == TDmode) ? FP_ARG_RETURN + 1 : FP_ARG_RETURN;
31688 else if (SCALAR_FLOAT_MODE_P (mode)
31689 && TARGET_HARD_FLOAT && TARGET_FPRS
31690 && ((TARGET_SINGLE_FLOAT && mode == SFmode) || TARGET_DOUBLE_FLOAT))
31691 regno = FP_ARG_RETURN;
31692 /* VSX is a superset of Altivec and adds V2DImode/V2DFmode. Since the same
31693 return register is used in both cases, and we won't see V2DImode/V2DFmode
31694 for pure altivec, combine the two cases. */
31695 else if (ALTIVEC_OR_VSX_VECTOR_MODE (mode)
31696 && TARGET_ALTIVEC && TARGET_ALTIVEC_ABI)
31697 regno = ALTIVEC_ARG_RETURN;
31698 else if (COMPLEX_MODE_P (mode) && targetm.calls.split_complex_arg)
31699 return rs6000_complex_function_value (mode);
31700 else if (TARGET_E500_DOUBLE && TARGET_HARD_FLOAT
31701 && (mode == DFmode || mode == DCmode
31702 || mode == TFmode || mode == TCmode))
31703 return spe_build_register_parallel (mode, GP_ARG_RETURN);
31704 else
31705 regno = GP_ARG_RETURN;
31706
31707 return gen_rtx_REG (mode, regno);
31708 }
31709
31710
31711 /* Return true if we use LRA instead of reload pass. */
31712 static bool
31713 rs6000_lra_p (void)
31714 {
31715 return rs6000_lra_flag;
31716 }
31717
31718 /* Given FROM and TO register numbers, say whether this elimination is allowed.
31719 Frame pointer elimination is automatically handled.
31720
31721 For the RS/6000, if frame pointer elimination is being done, we would like
31722 to convert ap into fp, not sp.
31723
31724 We need r30 if -mminimal-toc was specified, and there are constant pool
31725 references. */
31726
31727 static bool
31728 rs6000_can_eliminate (const int from, const int to)
31729 {
31730 return (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM
31731 ? ! frame_pointer_needed
31732 : from == RS6000_PIC_OFFSET_TABLE_REGNUM
31733 ? ! TARGET_MINIMAL_TOC || TARGET_NO_TOC || get_pool_size () == 0
31734 : true);
31735 }
31736
31737 /* Define the offset between two registers, FROM to be eliminated and its
31738 replacement TO, at the start of a routine. */
31739 HOST_WIDE_INT
31740 rs6000_initial_elimination_offset (int from, int to)
31741 {
31742 rs6000_stack_t *info = rs6000_stack_info ();
31743 HOST_WIDE_INT offset;
31744
31745 if (from == HARD_FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
31746 offset = info->push_p ? 0 : -info->total_size;
31747 else if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
31748 {
31749 offset = info->push_p ? 0 : -info->total_size;
31750 if (FRAME_GROWS_DOWNWARD)
31751 offset += info->fixed_size + info->vars_size + info->parm_size;
31752 }
31753 else if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
31754 offset = FRAME_GROWS_DOWNWARD
31755 ? info->fixed_size + info->vars_size + info->parm_size
31756 : 0;
31757 else if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
31758 offset = info->total_size;
31759 else if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
31760 offset = info->push_p ? info->total_size : 0;
31761 else if (from == RS6000_PIC_OFFSET_TABLE_REGNUM)
31762 offset = 0;
31763 else
31764 gcc_unreachable ();
31765
31766 return offset;
31767 }
31768
31769 static rtx
31770 rs6000_dwarf_register_span (rtx reg)
31771 {
31772 rtx parts[8];
31773 int i, words;
31774 unsigned regno = REGNO (reg);
31775 machine_mode mode = GET_MODE (reg);
31776
31777 if (TARGET_SPE
31778 && regno < 32
31779 && (SPE_VECTOR_MODE (GET_MODE (reg))
31780 || (TARGET_E500_DOUBLE && FLOAT_MODE_P (mode)
31781 && mode != SFmode && mode != SDmode && mode != SCmode)))
31782 ;
31783 else
31784 return NULL_RTX;
31785
31786 regno = REGNO (reg);
31787
31788 /* The duality of the SPE register size wreaks all kinds of havoc.
31789 This is a way of distinguishing r0 in 32-bits from r0 in
31790 64-bits. */
31791 words = (GET_MODE_SIZE (mode) + UNITS_PER_FP_WORD - 1) / UNITS_PER_FP_WORD;
31792 gcc_assert (words <= 4);
31793 for (i = 0; i < words; i++, regno++)
31794 {
31795 if (BYTES_BIG_ENDIAN)
31796 {
31797 parts[2 * i] = gen_rtx_REG (SImode, regno + FIRST_SPE_HIGH_REGNO);
31798 parts[2 * i + 1] = gen_rtx_REG (SImode, regno);
31799 }
31800 else
31801 {
31802 parts[2 * i] = gen_rtx_REG (SImode, regno);
31803 parts[2 * i + 1] = gen_rtx_REG (SImode, regno + FIRST_SPE_HIGH_REGNO);
31804 }
31805 }
31806
31807 return gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (words * 2, parts));
31808 }
31809
31810 /* Fill in sizes for SPE register high parts in table used by unwinder. */
31811
31812 static void
31813 rs6000_init_dwarf_reg_sizes_extra (tree address)
31814 {
31815 if (TARGET_SPE)
31816 {
31817 int i;
31818 machine_mode mode = TYPE_MODE (char_type_node);
31819 rtx addr = expand_expr (address, NULL_RTX, VOIDmode, EXPAND_NORMAL);
31820 rtx mem = gen_rtx_MEM (BLKmode, addr);
31821 rtx value = gen_int_mode (4, mode);
31822
31823 for (i = FIRST_SPE_HIGH_REGNO; i < LAST_SPE_HIGH_REGNO+1; i++)
31824 {
31825 int column = DWARF_REG_TO_UNWIND_COLUMN
31826 (DWARF2_FRAME_REG_OUT (DWARF_FRAME_REGNUM (i), true));
31827 HOST_WIDE_INT offset = column * GET_MODE_SIZE (mode);
31828
31829 emit_move_insn (adjust_address (mem, mode, offset), value);
31830 }
31831 }
31832
31833 if (TARGET_MACHO && ! TARGET_ALTIVEC)
31834 {
31835 int i;
31836 machine_mode mode = TYPE_MODE (char_type_node);
31837 rtx addr = expand_expr (address, NULL_RTX, VOIDmode, EXPAND_NORMAL);
31838 rtx mem = gen_rtx_MEM (BLKmode, addr);
31839 rtx value = gen_int_mode (16, mode);
31840
31841 /* On Darwin, libgcc may be built to run on both G3 and G4/5.
31842 The unwinder still needs to know the size of Altivec registers. */
31843
31844 for (i = FIRST_ALTIVEC_REGNO; i < LAST_ALTIVEC_REGNO+1; i++)
31845 {
31846 int column = DWARF_REG_TO_UNWIND_COLUMN
31847 (DWARF2_FRAME_REG_OUT (DWARF_FRAME_REGNUM (i), true));
31848 HOST_WIDE_INT offset = column * GET_MODE_SIZE (mode);
31849
31850 emit_move_insn (adjust_address (mem, mode, offset), value);
31851 }
31852 }
31853 }
31854
31855 /* Map internal gcc register numbers to debug format register numbers.
31856 FORMAT specifies the type of debug register number to use:
31857 0 -- debug information, except for frame-related sections
31858 1 -- DWARF .debug_frame section
31859 2 -- DWARF .eh_frame section */
31860
31861 unsigned int
31862 rs6000_dbx_register_number (unsigned int regno, unsigned int format)
31863 {
31864 /* We never use the GCC internal number for SPE high registers.
31865 Those are mapped to the 1200..1231 range for all debug formats. */
31866 if (SPE_HIGH_REGNO_P (regno))
31867 return regno - FIRST_SPE_HIGH_REGNO + 1200;
31868
31869 /* Except for the above, we use the internal number for non-DWARF
31870 debug information, and also for .eh_frame. */
31871 if ((format == 0 && write_symbols != DWARF2_DEBUG) || format == 2)
31872 return regno;
31873
31874 /* On some platforms, we use the standard DWARF register
31875 numbering for .debug_info and .debug_frame. */
31876 #ifdef RS6000_USE_DWARF_NUMBERING
31877 if (regno <= 63)
31878 return regno;
31879 if (regno == LR_REGNO)
31880 return 108;
31881 if (regno == CTR_REGNO)
31882 return 109;
31883 /* Special handling for CR for .debug_frame: rs6000_emit_prologue has
31884 translated any combination of CR2, CR3, CR4 saves to a save of CR2.
31885 The actual code emitted saves the whole of CR, so we map CR2_REGNO
31886 to the DWARF reg for CR. */
31887 if (format == 1 && regno == CR2_REGNO)
31888 return 64;
31889 if (CR_REGNO_P (regno))
31890 return regno - CR0_REGNO + 86;
31891 if (regno == CA_REGNO)
31892 return 101; /* XER */
31893 if (ALTIVEC_REGNO_P (regno))
31894 return regno - FIRST_ALTIVEC_REGNO + 1124;
31895 if (regno == VRSAVE_REGNO)
31896 return 356;
31897 if (regno == VSCR_REGNO)
31898 return 67;
31899 if (regno == SPE_ACC_REGNO)
31900 return 99;
31901 if (regno == SPEFSCR_REGNO)
31902 return 612;
31903 #endif
31904 return regno;
31905 }
31906
31907 /* target hook eh_return_filter_mode */
31908 static machine_mode
31909 rs6000_eh_return_filter_mode (void)
31910 {
31911 return TARGET_32BIT ? SImode : word_mode;
31912 }
31913
31914 /* Target hook for scalar_mode_supported_p. */
31915 static bool
31916 rs6000_scalar_mode_supported_p (machine_mode mode)
31917 {
31918 if (DECIMAL_FLOAT_MODE_P (mode))
31919 return default_decimal_float_supported_p ();
31920 else
31921 return default_scalar_mode_supported_p (mode);
31922 }
31923
31924 /* Target hook for vector_mode_supported_p. */
31925 static bool
31926 rs6000_vector_mode_supported_p (machine_mode mode)
31927 {
31928
31929 if (TARGET_PAIRED_FLOAT && PAIRED_VECTOR_MODE (mode))
31930 return true;
31931
31932 if (TARGET_SPE && SPE_VECTOR_MODE (mode))
31933 return true;
31934
31935 else if (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode))
31936 return true;
31937
31938 else
31939 return false;
31940 }
31941
31942 /* Target hook for invalid_arg_for_unprototyped_fn. */
31943 static const char *
31944 invalid_arg_for_unprototyped_fn (const_tree typelist, const_tree funcdecl, const_tree val)
31945 {
31946 return (!rs6000_darwin64_abi
31947 && typelist == 0
31948 && TREE_CODE (TREE_TYPE (val)) == VECTOR_TYPE
31949 && (funcdecl == NULL_TREE
31950 || (TREE_CODE (funcdecl) == FUNCTION_DECL
31951 && DECL_BUILT_IN_CLASS (funcdecl) != BUILT_IN_MD)))
31952 ? N_("AltiVec argument passed to unprototyped function")
31953 : NULL;
31954 }
31955
31956 /* For TARGET_SECURE_PLT 32-bit PIC code we can save PIC register
31957 setup by using __stack_chk_fail_local hidden function instead of
31958 calling __stack_chk_fail directly. Otherwise it is better to call
31959 __stack_chk_fail directly. */
31960
31961 static tree ATTRIBUTE_UNUSED
31962 rs6000_stack_protect_fail (void)
31963 {
31964 return (DEFAULT_ABI == ABI_V4 && TARGET_SECURE_PLT && flag_pic)
31965 ? default_hidden_stack_protect_fail ()
31966 : default_external_stack_protect_fail ();
31967 }
31968
31969 void
31970 rs6000_final_prescan_insn (rtx_insn *insn, rtx *operand ATTRIBUTE_UNUSED,
31971 int num_operands ATTRIBUTE_UNUSED)
31972 {
31973 if (rs6000_warn_cell_microcode)
31974 {
31975 const char *temp;
31976 int insn_code_number = recog_memoized (insn);
31977 location_t location = INSN_LOCATION (insn);
31978
31979 /* Punt on insns we cannot recognize. */
31980 if (insn_code_number < 0)
31981 return;
31982
31983 temp = get_insn_template (insn_code_number, insn);
31984
31985 if (get_attr_cell_micro (insn) == CELL_MICRO_ALWAYS)
31986 warning_at (location, OPT_mwarn_cell_microcode,
31987 "emitting microcode insn %s\t[%s] #%d",
31988 temp, insn_data[INSN_CODE (insn)].name, INSN_UID (insn));
31989 else if (get_attr_cell_micro (insn) == CELL_MICRO_CONDITIONAL)
31990 warning_at (location, OPT_mwarn_cell_microcode,
31991 "emitting conditional microcode insn %s\t[%s] #%d",
31992 temp, insn_data[INSN_CODE (insn)].name, INSN_UID (insn));
31993 }
31994 }
31995
31996 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
31997
31998 #if TARGET_ELF
31999 static unsigned HOST_WIDE_INT
32000 rs6000_asan_shadow_offset (void)
32001 {
32002 return (unsigned HOST_WIDE_INT) 1 << (TARGET_64BIT ? 41 : 29);
32003 }
32004 #endif
32005 \f
32006 /* Mask options that we want to support inside of attribute((target)) and
32007 #pragma GCC target operations. Note, we do not include things like
32008 64/32-bit, endianess, hard/soft floating point, etc. that would have
32009 different calling sequences. */
32010
32011 struct rs6000_opt_mask {
32012 const char *name; /* option name */
32013 HOST_WIDE_INT mask; /* mask to set */
32014 bool invert; /* invert sense of mask */
32015 bool valid_target; /* option is a target option */
32016 };
32017
32018 static struct rs6000_opt_mask const rs6000_opt_masks[] =
32019 {
32020 { "altivec", OPTION_MASK_ALTIVEC, false, true },
32021 { "cmpb", OPTION_MASK_CMPB, false, true },
32022 { "crypto", OPTION_MASK_CRYPTO, false, true },
32023 { "direct-move", OPTION_MASK_DIRECT_MOVE, false, true },
32024 { "dlmzb", OPTION_MASK_DLMZB, false, true },
32025 { "fprnd", OPTION_MASK_FPRND, false, true },
32026 { "hard-dfp", OPTION_MASK_DFP, false, true },
32027 { "htm", OPTION_MASK_HTM, false, true },
32028 { "isel", OPTION_MASK_ISEL, false, true },
32029 { "mfcrf", OPTION_MASK_MFCRF, false, true },
32030 { "mfpgpr", OPTION_MASK_MFPGPR, false, true },
32031 { "mulhw", OPTION_MASK_MULHW, false, true },
32032 { "multiple", OPTION_MASK_MULTIPLE, false, true },
32033 { "popcntb", OPTION_MASK_POPCNTB, false, true },
32034 { "popcntd", OPTION_MASK_POPCNTD, false, true },
32035 { "power8-fusion", OPTION_MASK_P8_FUSION, false, true },
32036 { "power8-fusion-sign", OPTION_MASK_P8_FUSION_SIGN, false, true },
32037 { "power8-vector", OPTION_MASK_P8_VECTOR, false, true },
32038 { "powerpc-gfxopt", OPTION_MASK_PPC_GFXOPT, false, true },
32039 { "powerpc-gpopt", OPTION_MASK_PPC_GPOPT, false, true },
32040 { "quad-memory", OPTION_MASK_QUAD_MEMORY, false, true },
32041 { "quad-memory-atomic", OPTION_MASK_QUAD_MEMORY_ATOMIC, false, true },
32042 { "recip-precision", OPTION_MASK_RECIP_PRECISION, false, true },
32043 { "string", OPTION_MASK_STRING, false, true },
32044 { "update", OPTION_MASK_NO_UPDATE, true , true },
32045 { "upper-regs-df", OPTION_MASK_UPPER_REGS_DF, false, false },
32046 { "upper-regs-sf", OPTION_MASK_UPPER_REGS_SF, false, false },
32047 { "vsx", OPTION_MASK_VSX, false, true },
32048 { "vsx-timode", OPTION_MASK_VSX_TIMODE, false, true },
32049 #ifdef OPTION_MASK_64BIT
32050 #if TARGET_AIX_OS
32051 { "aix64", OPTION_MASK_64BIT, false, false },
32052 { "aix32", OPTION_MASK_64BIT, true, false },
32053 #else
32054 { "64", OPTION_MASK_64BIT, false, false },
32055 { "32", OPTION_MASK_64BIT, true, false },
32056 #endif
32057 #endif
32058 #ifdef OPTION_MASK_EABI
32059 { "eabi", OPTION_MASK_EABI, false, false },
32060 #endif
32061 #ifdef OPTION_MASK_LITTLE_ENDIAN
32062 { "little", OPTION_MASK_LITTLE_ENDIAN, false, false },
32063 { "big", OPTION_MASK_LITTLE_ENDIAN, true, false },
32064 #endif
32065 #ifdef OPTION_MASK_RELOCATABLE
32066 { "relocatable", OPTION_MASK_RELOCATABLE, false, false },
32067 #endif
32068 #ifdef OPTION_MASK_STRICT_ALIGN
32069 { "strict-align", OPTION_MASK_STRICT_ALIGN, false, false },
32070 #endif
32071 { "soft-float", OPTION_MASK_SOFT_FLOAT, false, false },
32072 { "string", OPTION_MASK_STRING, false, false },
32073 };
32074
32075 /* Builtin mask mapping for printing the flags. */
32076 static struct rs6000_opt_mask const rs6000_builtin_mask_names[] =
32077 {
32078 { "altivec", RS6000_BTM_ALTIVEC, false, false },
32079 { "vsx", RS6000_BTM_VSX, false, false },
32080 { "spe", RS6000_BTM_SPE, false, false },
32081 { "paired", RS6000_BTM_PAIRED, false, false },
32082 { "fre", RS6000_BTM_FRE, false, false },
32083 { "fres", RS6000_BTM_FRES, false, false },
32084 { "frsqrte", RS6000_BTM_FRSQRTE, false, false },
32085 { "frsqrtes", RS6000_BTM_FRSQRTES, false, false },
32086 { "popcntd", RS6000_BTM_POPCNTD, false, false },
32087 { "cell", RS6000_BTM_CELL, false, false },
32088 { "power8-vector", RS6000_BTM_P8_VECTOR, false, false },
32089 { "crypto", RS6000_BTM_CRYPTO, false, false },
32090 { "htm", RS6000_BTM_HTM, false, false },
32091 { "hard-dfp", RS6000_BTM_DFP, false, false },
32092 { "hard-float", RS6000_BTM_HARD_FLOAT, false, false },
32093 { "long-double-128", RS6000_BTM_LDBL128, false, false },
32094 };
32095
32096 /* Option variables that we want to support inside attribute((target)) and
32097 #pragma GCC target operations. */
32098
32099 struct rs6000_opt_var {
32100 const char *name; /* option name */
32101 size_t global_offset; /* offset of the option in global_options. */
32102 size_t target_offset; /* offset of the option in target optiosn. */
32103 };
32104
32105 static struct rs6000_opt_var const rs6000_opt_vars[] =
32106 {
32107 { "friz",
32108 offsetof (struct gcc_options, x_TARGET_FRIZ),
32109 offsetof (struct cl_target_option, x_TARGET_FRIZ), },
32110 { "avoid-indexed-addresses",
32111 offsetof (struct gcc_options, x_TARGET_AVOID_XFORM),
32112 offsetof (struct cl_target_option, x_TARGET_AVOID_XFORM) },
32113 { "paired",
32114 offsetof (struct gcc_options, x_rs6000_paired_float),
32115 offsetof (struct cl_target_option, x_rs6000_paired_float), },
32116 { "longcall",
32117 offsetof (struct gcc_options, x_rs6000_default_long_calls),
32118 offsetof (struct cl_target_option, x_rs6000_default_long_calls), },
32119 };
32120
32121 /* Inner function to handle attribute((target("..."))) and #pragma GCC target
32122 parsing. Return true if there were no errors. */
32123
32124 static bool
32125 rs6000_inner_target_options (tree args, bool attr_p)
32126 {
32127 bool ret = true;
32128
32129 if (args == NULL_TREE)
32130 ;
32131
32132 else if (TREE_CODE (args) == STRING_CST)
32133 {
32134 char *p = ASTRDUP (TREE_STRING_POINTER (args));
32135 char *q;
32136
32137 while ((q = strtok (p, ",")) != NULL)
32138 {
32139 bool error_p = false;
32140 bool not_valid_p = false;
32141 const char *cpu_opt = NULL;
32142
32143 p = NULL;
32144 if (strncmp (q, "cpu=", 4) == 0)
32145 {
32146 int cpu_index = rs6000_cpu_name_lookup (q+4);
32147 if (cpu_index >= 0)
32148 rs6000_cpu_index = cpu_index;
32149 else
32150 {
32151 error_p = true;
32152 cpu_opt = q+4;
32153 }
32154 }
32155 else if (strncmp (q, "tune=", 5) == 0)
32156 {
32157 int tune_index = rs6000_cpu_name_lookup (q+5);
32158 if (tune_index >= 0)
32159 rs6000_tune_index = tune_index;
32160 else
32161 {
32162 error_p = true;
32163 cpu_opt = q+5;
32164 }
32165 }
32166 else
32167 {
32168 size_t i;
32169 bool invert = false;
32170 char *r = q;
32171
32172 error_p = true;
32173 if (strncmp (r, "no-", 3) == 0)
32174 {
32175 invert = true;
32176 r += 3;
32177 }
32178
32179 for (i = 0; i < ARRAY_SIZE (rs6000_opt_masks); i++)
32180 if (strcmp (r, rs6000_opt_masks[i].name) == 0)
32181 {
32182 HOST_WIDE_INT mask = rs6000_opt_masks[i].mask;
32183
32184 if (!rs6000_opt_masks[i].valid_target)
32185 not_valid_p = true;
32186 else
32187 {
32188 error_p = false;
32189 rs6000_isa_flags_explicit |= mask;
32190
32191 /* VSX needs altivec, so -mvsx automagically sets
32192 altivec. */
32193 if (mask == OPTION_MASK_VSX && !invert)
32194 mask |= OPTION_MASK_ALTIVEC;
32195
32196 if (rs6000_opt_masks[i].invert)
32197 invert = !invert;
32198
32199 if (invert)
32200 rs6000_isa_flags &= ~mask;
32201 else
32202 rs6000_isa_flags |= mask;
32203 }
32204 break;
32205 }
32206
32207 if (error_p && !not_valid_p)
32208 {
32209 for (i = 0; i < ARRAY_SIZE (rs6000_opt_vars); i++)
32210 if (strcmp (r, rs6000_opt_vars[i].name) == 0)
32211 {
32212 size_t j = rs6000_opt_vars[i].global_offset;
32213 *((int *) ((char *)&global_options + j)) = !invert;
32214 error_p = false;
32215 break;
32216 }
32217 }
32218 }
32219
32220 if (error_p)
32221 {
32222 const char *eprefix, *esuffix;
32223
32224 ret = false;
32225 if (attr_p)
32226 {
32227 eprefix = "__attribute__((__target__(";
32228 esuffix = ")))";
32229 }
32230 else
32231 {
32232 eprefix = "#pragma GCC target ";
32233 esuffix = "";
32234 }
32235
32236 if (cpu_opt)
32237 error ("invalid cpu \"%s\" for %s\"%s\"%s", cpu_opt, eprefix,
32238 q, esuffix);
32239 else if (not_valid_p)
32240 error ("%s\"%s\"%s is not allowed", eprefix, q, esuffix);
32241 else
32242 error ("%s\"%s\"%s is invalid", eprefix, q, esuffix);
32243 }
32244 }
32245 }
32246
32247 else if (TREE_CODE (args) == TREE_LIST)
32248 {
32249 do
32250 {
32251 tree value = TREE_VALUE (args);
32252 if (value)
32253 {
32254 bool ret2 = rs6000_inner_target_options (value, attr_p);
32255 if (!ret2)
32256 ret = false;
32257 }
32258 args = TREE_CHAIN (args);
32259 }
32260 while (args != NULL_TREE);
32261 }
32262
32263 else
32264 gcc_unreachable ();
32265
32266 return ret;
32267 }
32268
32269 /* Print out the target options as a list for -mdebug=target. */
32270
32271 static void
32272 rs6000_debug_target_options (tree args, const char *prefix)
32273 {
32274 if (args == NULL_TREE)
32275 fprintf (stderr, "%s<NULL>", prefix);
32276
32277 else if (TREE_CODE (args) == STRING_CST)
32278 {
32279 char *p = ASTRDUP (TREE_STRING_POINTER (args));
32280 char *q;
32281
32282 while ((q = strtok (p, ",")) != NULL)
32283 {
32284 p = NULL;
32285 fprintf (stderr, "%s\"%s\"", prefix, q);
32286 prefix = ", ";
32287 }
32288 }
32289
32290 else if (TREE_CODE (args) == TREE_LIST)
32291 {
32292 do
32293 {
32294 tree value = TREE_VALUE (args);
32295 if (value)
32296 {
32297 rs6000_debug_target_options (value, prefix);
32298 prefix = ", ";
32299 }
32300 args = TREE_CHAIN (args);
32301 }
32302 while (args != NULL_TREE);
32303 }
32304
32305 else
32306 gcc_unreachable ();
32307
32308 return;
32309 }
32310
32311 \f
32312 /* Hook to validate attribute((target("..."))). */
32313
32314 static bool
32315 rs6000_valid_attribute_p (tree fndecl,
32316 tree ARG_UNUSED (name),
32317 tree args,
32318 int flags)
32319 {
32320 struct cl_target_option cur_target;
32321 bool ret;
32322 tree old_optimize = build_optimization_node (&global_options);
32323 tree new_target, new_optimize;
32324 tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
32325
32326 gcc_assert ((fndecl != NULL_TREE) && (args != NULL_TREE));
32327
32328 if (TARGET_DEBUG_TARGET)
32329 {
32330 tree tname = DECL_NAME (fndecl);
32331 fprintf (stderr, "\n==================== rs6000_valid_attribute_p:\n");
32332 if (tname)
32333 fprintf (stderr, "function: %.*s\n",
32334 (int) IDENTIFIER_LENGTH (tname),
32335 IDENTIFIER_POINTER (tname));
32336 else
32337 fprintf (stderr, "function: unknown\n");
32338
32339 fprintf (stderr, "args:");
32340 rs6000_debug_target_options (args, " ");
32341 fprintf (stderr, "\n");
32342
32343 if (flags)
32344 fprintf (stderr, "flags: 0x%x\n", flags);
32345
32346 fprintf (stderr, "--------------------\n");
32347 }
32348
32349 old_optimize = build_optimization_node (&global_options);
32350 func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
32351
32352 /* If the function changed the optimization levels as well as setting target
32353 options, start with the optimizations specified. */
32354 if (func_optimize && func_optimize != old_optimize)
32355 cl_optimization_restore (&global_options,
32356 TREE_OPTIMIZATION (func_optimize));
32357
32358 /* The target attributes may also change some optimization flags, so update
32359 the optimization options if necessary. */
32360 cl_target_option_save (&cur_target, &global_options);
32361 rs6000_cpu_index = rs6000_tune_index = -1;
32362 ret = rs6000_inner_target_options (args, true);
32363
32364 /* Set up any additional state. */
32365 if (ret)
32366 {
32367 ret = rs6000_option_override_internal (false);
32368 new_target = build_target_option_node (&global_options);
32369 }
32370 else
32371 new_target = NULL;
32372
32373 new_optimize = build_optimization_node (&global_options);
32374
32375 if (!new_target)
32376 ret = false;
32377
32378 else if (fndecl)
32379 {
32380 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = new_target;
32381
32382 if (old_optimize != new_optimize)
32383 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
32384 }
32385
32386 cl_target_option_restore (&global_options, &cur_target);
32387
32388 if (old_optimize != new_optimize)
32389 cl_optimization_restore (&global_options,
32390 TREE_OPTIMIZATION (old_optimize));
32391
32392 return ret;
32393 }
32394
32395 \f
32396 /* Hook to validate the current #pragma GCC target and set the state, and
32397 update the macros based on what was changed. If ARGS is NULL, then
32398 POP_TARGET is used to reset the options. */
32399
32400 bool
32401 rs6000_pragma_target_parse (tree args, tree pop_target)
32402 {
32403 tree prev_tree = build_target_option_node (&global_options);
32404 tree cur_tree;
32405 struct cl_target_option *prev_opt, *cur_opt;
32406 HOST_WIDE_INT prev_flags, cur_flags, diff_flags;
32407 HOST_WIDE_INT prev_bumask, cur_bumask, diff_bumask;
32408
32409 if (TARGET_DEBUG_TARGET)
32410 {
32411 fprintf (stderr, "\n==================== rs6000_pragma_target_parse\n");
32412 fprintf (stderr, "args:");
32413 rs6000_debug_target_options (args, " ");
32414 fprintf (stderr, "\n");
32415
32416 if (pop_target)
32417 {
32418 fprintf (stderr, "pop_target:\n");
32419 debug_tree (pop_target);
32420 }
32421 else
32422 fprintf (stderr, "pop_target: <NULL>\n");
32423
32424 fprintf (stderr, "--------------------\n");
32425 }
32426
32427 if (! args)
32428 {
32429 cur_tree = ((pop_target)
32430 ? pop_target
32431 : target_option_default_node);
32432 cl_target_option_restore (&global_options,
32433 TREE_TARGET_OPTION (cur_tree));
32434 }
32435 else
32436 {
32437 rs6000_cpu_index = rs6000_tune_index = -1;
32438 if (!rs6000_inner_target_options (args, false)
32439 || !rs6000_option_override_internal (false)
32440 || (cur_tree = build_target_option_node (&global_options))
32441 == NULL_TREE)
32442 {
32443 if (TARGET_DEBUG_BUILTIN || TARGET_DEBUG_TARGET)
32444 fprintf (stderr, "invalid pragma\n");
32445
32446 return false;
32447 }
32448 }
32449
32450 target_option_current_node = cur_tree;
32451
32452 /* If we have the preprocessor linked in (i.e. C or C++ languages), possibly
32453 change the macros that are defined. */
32454 if (rs6000_target_modify_macros_ptr)
32455 {
32456 prev_opt = TREE_TARGET_OPTION (prev_tree);
32457 prev_bumask = prev_opt->x_rs6000_builtin_mask;
32458 prev_flags = prev_opt->x_rs6000_isa_flags;
32459
32460 cur_opt = TREE_TARGET_OPTION (cur_tree);
32461 cur_flags = cur_opt->x_rs6000_isa_flags;
32462 cur_bumask = cur_opt->x_rs6000_builtin_mask;
32463
32464 diff_bumask = (prev_bumask ^ cur_bumask);
32465 diff_flags = (prev_flags ^ cur_flags);
32466
32467 if ((diff_flags != 0) || (diff_bumask != 0))
32468 {
32469 /* Delete old macros. */
32470 rs6000_target_modify_macros_ptr (false,
32471 prev_flags & diff_flags,
32472 prev_bumask & diff_bumask);
32473
32474 /* Define new macros. */
32475 rs6000_target_modify_macros_ptr (true,
32476 cur_flags & diff_flags,
32477 cur_bumask & diff_bumask);
32478 }
32479 }
32480
32481 return true;
32482 }
32483
32484 \f
32485 /* Remember the last target of rs6000_set_current_function. */
32486 static GTY(()) tree rs6000_previous_fndecl;
32487
32488 /* Establish appropriate back-end context for processing the function
32489 FNDECL. The argument might be NULL to indicate processing at top
32490 level, outside of any function scope. */
32491 static void
32492 rs6000_set_current_function (tree fndecl)
32493 {
32494 tree old_tree = (rs6000_previous_fndecl
32495 ? DECL_FUNCTION_SPECIFIC_TARGET (rs6000_previous_fndecl)
32496 : NULL_TREE);
32497
32498 tree new_tree = (fndecl
32499 ? DECL_FUNCTION_SPECIFIC_TARGET (fndecl)
32500 : NULL_TREE);
32501
32502 if (TARGET_DEBUG_TARGET)
32503 {
32504 bool print_final = false;
32505 fprintf (stderr, "\n==================== rs6000_set_current_function");
32506
32507 if (fndecl)
32508 fprintf (stderr, ", fndecl %s (%p)",
32509 (DECL_NAME (fndecl)
32510 ? IDENTIFIER_POINTER (DECL_NAME (fndecl))
32511 : "<unknown>"), (void *)fndecl);
32512
32513 if (rs6000_previous_fndecl)
32514 fprintf (stderr, ", prev_fndecl (%p)", (void *)rs6000_previous_fndecl);
32515
32516 fprintf (stderr, "\n");
32517 if (new_tree)
32518 {
32519 fprintf (stderr, "\nnew fndecl target specific options:\n");
32520 debug_tree (new_tree);
32521 print_final = true;
32522 }
32523
32524 if (old_tree)
32525 {
32526 fprintf (stderr, "\nold fndecl target specific options:\n");
32527 debug_tree (old_tree);
32528 print_final = true;
32529 }
32530
32531 if (print_final)
32532 fprintf (stderr, "--------------------\n");
32533 }
32534
32535 /* Only change the context if the function changes. This hook is called
32536 several times in the course of compiling a function, and we don't want to
32537 slow things down too much or call target_reinit when it isn't safe. */
32538 if (fndecl && fndecl != rs6000_previous_fndecl)
32539 {
32540 rs6000_previous_fndecl = fndecl;
32541 if (old_tree == new_tree)
32542 ;
32543
32544 else if (new_tree)
32545 {
32546 cl_target_option_restore (&global_options,
32547 TREE_TARGET_OPTION (new_tree));
32548 if (TREE_TARGET_GLOBALS (new_tree))
32549 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
32550 else
32551 TREE_TARGET_GLOBALS (new_tree)
32552 = save_target_globals_default_opts ();
32553 }
32554
32555 else if (old_tree)
32556 {
32557 new_tree = target_option_current_node;
32558 cl_target_option_restore (&global_options,
32559 TREE_TARGET_OPTION (new_tree));
32560 if (TREE_TARGET_GLOBALS (new_tree))
32561 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
32562 else if (new_tree == target_option_default_node)
32563 restore_target_globals (&default_target_globals);
32564 else
32565 TREE_TARGET_GLOBALS (new_tree)
32566 = save_target_globals_default_opts ();
32567 }
32568 }
32569 }
32570
32571 \f
32572 /* Save the current options */
32573
32574 static void
32575 rs6000_function_specific_save (struct cl_target_option *ptr,
32576 struct gcc_options *opts)
32577 {
32578 ptr->x_rs6000_isa_flags = opts->x_rs6000_isa_flags;
32579 ptr->x_rs6000_isa_flags_explicit = opts->x_rs6000_isa_flags_explicit;
32580 }
32581
32582 /* Restore the current options */
32583
32584 static void
32585 rs6000_function_specific_restore (struct gcc_options *opts,
32586 struct cl_target_option *ptr)
32587
32588 {
32589 opts->x_rs6000_isa_flags = ptr->x_rs6000_isa_flags;
32590 opts->x_rs6000_isa_flags_explicit = ptr->x_rs6000_isa_flags_explicit;
32591 (void) rs6000_option_override_internal (false);
32592 }
32593
32594 /* Print the current options */
32595
32596 static void
32597 rs6000_function_specific_print (FILE *file, int indent,
32598 struct cl_target_option *ptr)
32599 {
32600 rs6000_print_isa_options (file, indent, "Isa options set",
32601 ptr->x_rs6000_isa_flags);
32602
32603 rs6000_print_isa_options (file, indent, "Isa options explicit",
32604 ptr->x_rs6000_isa_flags_explicit);
32605 }
32606
32607 /* Helper function to print the current isa or misc options on a line. */
32608
32609 static void
32610 rs6000_print_options_internal (FILE *file,
32611 int indent,
32612 const char *string,
32613 HOST_WIDE_INT flags,
32614 const char *prefix,
32615 const struct rs6000_opt_mask *opts,
32616 size_t num_elements)
32617 {
32618 size_t i;
32619 size_t start_column = 0;
32620 size_t cur_column;
32621 size_t max_column = 76;
32622 const char *comma = "";
32623
32624 if (indent)
32625 start_column += fprintf (file, "%*s", indent, "");
32626
32627 if (!flags)
32628 {
32629 fprintf (stderr, DEBUG_FMT_S, string, "<none>");
32630 return;
32631 }
32632
32633 start_column += fprintf (stderr, DEBUG_FMT_WX, string, flags);
32634
32635 /* Print the various mask options. */
32636 cur_column = start_column;
32637 for (i = 0; i < num_elements; i++)
32638 {
32639 if ((flags & opts[i].mask) != 0)
32640 {
32641 const char *no_str = rs6000_opt_masks[i].invert ? "no-" : "";
32642 size_t len = (strlen (comma)
32643 + strlen (prefix)
32644 + strlen (no_str)
32645 + strlen (rs6000_opt_masks[i].name));
32646
32647 cur_column += len;
32648 if (cur_column > max_column)
32649 {
32650 fprintf (stderr, ", \\\n%*s", (int)start_column, "");
32651 cur_column = start_column + len;
32652 comma = "";
32653 }
32654
32655 fprintf (file, "%s%s%s%s", comma, prefix, no_str,
32656 rs6000_opt_masks[i].name);
32657 flags &= ~ opts[i].mask;
32658 comma = ", ";
32659 }
32660 }
32661
32662 fputs ("\n", file);
32663 }
32664
32665 /* Helper function to print the current isa options on a line. */
32666
32667 static void
32668 rs6000_print_isa_options (FILE *file, int indent, const char *string,
32669 HOST_WIDE_INT flags)
32670 {
32671 rs6000_print_options_internal (file, indent, string, flags, "-m",
32672 &rs6000_opt_masks[0],
32673 ARRAY_SIZE (rs6000_opt_masks));
32674 }
32675
32676 static void
32677 rs6000_print_builtin_options (FILE *file, int indent, const char *string,
32678 HOST_WIDE_INT flags)
32679 {
32680 rs6000_print_options_internal (file, indent, string, flags, "",
32681 &rs6000_builtin_mask_names[0],
32682 ARRAY_SIZE (rs6000_builtin_mask_names));
32683 }
32684
32685 \f
32686 /* Hook to determine if one function can safely inline another. */
32687
32688 static bool
32689 rs6000_can_inline_p (tree caller, tree callee)
32690 {
32691 bool ret = false;
32692 tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
32693 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
32694
32695 /* If callee has no option attributes, then it is ok to inline. */
32696 if (!callee_tree)
32697 ret = true;
32698
32699 /* If caller has no option attributes, but callee does then it is not ok to
32700 inline. */
32701 else if (!caller_tree)
32702 ret = false;
32703
32704 else
32705 {
32706 struct cl_target_option *caller_opts = TREE_TARGET_OPTION (caller_tree);
32707 struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
32708
32709 /* Callee's options should a subset of the caller's, i.e. a vsx function
32710 can inline an altivec function but a non-vsx function can't inline a
32711 vsx function. */
32712 if ((caller_opts->x_rs6000_isa_flags & callee_opts->x_rs6000_isa_flags)
32713 == callee_opts->x_rs6000_isa_flags)
32714 ret = true;
32715 }
32716
32717 if (TARGET_DEBUG_TARGET)
32718 fprintf (stderr, "rs6000_can_inline_p:, caller %s, callee %s, %s inline\n",
32719 (DECL_NAME (caller)
32720 ? IDENTIFIER_POINTER (DECL_NAME (caller))
32721 : "<unknown>"),
32722 (DECL_NAME (callee)
32723 ? IDENTIFIER_POINTER (DECL_NAME (callee))
32724 : "<unknown>"),
32725 (ret ? "can" : "cannot"));
32726
32727 return ret;
32728 }
32729 \f
32730 /* Allocate a stack temp and fixup the address so it meets the particular
32731 memory requirements (either offetable or REG+REG addressing). */
32732
32733 rtx
32734 rs6000_allocate_stack_temp (machine_mode mode,
32735 bool offsettable_p,
32736 bool reg_reg_p)
32737 {
32738 rtx stack = assign_stack_temp (mode, GET_MODE_SIZE (mode));
32739 rtx addr = XEXP (stack, 0);
32740 int strict_p = (reload_in_progress || reload_completed);
32741
32742 if (!legitimate_indirect_address_p (addr, strict_p))
32743 {
32744 if (offsettable_p
32745 && !rs6000_legitimate_offset_address_p (mode, addr, strict_p, true))
32746 stack = replace_equiv_address (stack, copy_addr_to_reg (addr));
32747
32748 else if (reg_reg_p && !legitimate_indexed_address_p (addr, strict_p))
32749 stack = replace_equiv_address (stack, copy_addr_to_reg (addr));
32750 }
32751
32752 return stack;
32753 }
32754
32755 /* Given a memory reference, if it is not a reg or reg+reg addressing, convert
32756 to such a form to deal with memory reference instructions like STFIWX that
32757 only take reg+reg addressing. */
32758
32759 rtx
32760 rs6000_address_for_fpconvert (rtx x)
32761 {
32762 int strict_p = (reload_in_progress || reload_completed);
32763 rtx addr;
32764
32765 gcc_assert (MEM_P (x));
32766 addr = XEXP (x, 0);
32767 if (! legitimate_indirect_address_p (addr, strict_p)
32768 && ! legitimate_indexed_address_p (addr, strict_p))
32769 {
32770 if (GET_CODE (addr) == PRE_INC || GET_CODE (addr) == PRE_DEC)
32771 {
32772 rtx reg = XEXP (addr, 0);
32773 HOST_WIDE_INT size = GET_MODE_SIZE (GET_MODE (x));
32774 rtx size_rtx = GEN_INT ((GET_CODE (addr) == PRE_DEC) ? -size : size);
32775 gcc_assert (REG_P (reg));
32776 emit_insn (gen_add3_insn (reg, reg, size_rtx));
32777 addr = reg;
32778 }
32779 else if (GET_CODE (addr) == PRE_MODIFY)
32780 {
32781 rtx reg = XEXP (addr, 0);
32782 rtx expr = XEXP (addr, 1);
32783 gcc_assert (REG_P (reg));
32784 gcc_assert (GET_CODE (expr) == PLUS);
32785 emit_insn (gen_add3_insn (reg, XEXP (expr, 0), XEXP (expr, 1)));
32786 addr = reg;
32787 }
32788
32789 x = replace_equiv_address (x, copy_addr_to_reg (addr));
32790 }
32791
32792 return x;
32793 }
32794
32795 /* Given a memory reference, if it is not in the form for altivec memory
32796 reference instructions (i.e. reg or reg+reg addressing with AND of -16),
32797 convert to the altivec format. */
32798
32799 rtx
32800 rs6000_address_for_altivec (rtx x)
32801 {
32802 gcc_assert (MEM_P (x));
32803 if (!altivec_indexed_or_indirect_operand (x, GET_MODE (x)))
32804 {
32805 rtx addr = XEXP (x, 0);
32806 int strict_p = (reload_in_progress || reload_completed);
32807
32808 if (!legitimate_indexed_address_p (addr, strict_p)
32809 && !legitimate_indirect_address_p (addr, strict_p))
32810 addr = copy_to_mode_reg (Pmode, addr);
32811
32812 addr = gen_rtx_AND (Pmode, addr, GEN_INT (-16));
32813 x = change_address (x, GET_MODE (x), addr);
32814 }
32815
32816 return x;
32817 }
32818
32819 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
32820
32821 On the RS/6000, all integer constants are acceptable, most won't be valid
32822 for particular insns, though. Only easy FP constants are acceptable. */
32823
32824 static bool
32825 rs6000_legitimate_constant_p (machine_mode mode, rtx x)
32826 {
32827 if (TARGET_ELF && tls_referenced_p (x))
32828 return false;
32829
32830 return ((GET_CODE (x) != CONST_DOUBLE && GET_CODE (x) != CONST_VECTOR)
32831 || GET_MODE (x) == VOIDmode
32832 || (TARGET_POWERPC64 && mode == DImode)
32833 || easy_fp_constant (x, mode)
32834 || easy_vector_constant (x, mode));
32835 }
32836
32837 \f
32838
32839 /* Expand code to perform a call under the AIX or ELFv2 ABI. */
32840
32841 void
32842 rs6000_call_aix (rtx value, rtx func_desc, rtx flag, rtx cookie)
32843 {
32844 rtx toc_reg = gen_rtx_REG (Pmode, TOC_REGNUM);
32845 rtx toc_load = NULL_RTX;
32846 rtx toc_restore = NULL_RTX;
32847 rtx func_addr;
32848 rtx abi_reg = NULL_RTX;
32849 rtx call[4];
32850 int n_call;
32851 rtx insn;
32852
32853 /* Handle longcall attributes. */
32854 if (INTVAL (cookie) & CALL_LONG)
32855 func_desc = rs6000_longcall_ref (func_desc);
32856
32857 /* Handle indirect calls. */
32858 if (GET_CODE (func_desc) != SYMBOL_REF
32859 || (DEFAULT_ABI == ABI_AIX && !SYMBOL_REF_FUNCTION_P (func_desc)))
32860 {
32861 /* Save the TOC into its reserved slot before the call,
32862 and prepare to restore it after the call. */
32863 rtx stack_ptr = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
32864 rtx stack_toc_offset = GEN_INT (RS6000_TOC_SAVE_SLOT);
32865 rtx stack_toc_mem = gen_frame_mem (Pmode,
32866 gen_rtx_PLUS (Pmode, stack_ptr,
32867 stack_toc_offset));
32868 toc_restore = gen_rtx_SET (VOIDmode, toc_reg, stack_toc_mem);
32869
32870 /* Can we optimize saving the TOC in the prologue or
32871 do we need to do it at every call? */
32872 if (TARGET_SAVE_TOC_INDIRECT && !cfun->calls_alloca)
32873 cfun->machine->save_toc_in_prologue = true;
32874 else
32875 {
32876 MEM_VOLATILE_P (stack_toc_mem) = 1;
32877 emit_move_insn (stack_toc_mem, toc_reg);
32878 }
32879
32880 if (DEFAULT_ABI == ABI_ELFv2)
32881 {
32882 /* A function pointer in the ELFv2 ABI is just a plain address, but
32883 the ABI requires it to be loaded into r12 before the call. */
32884 func_addr = gen_rtx_REG (Pmode, 12);
32885 emit_move_insn (func_addr, func_desc);
32886 abi_reg = func_addr;
32887 }
32888 else
32889 {
32890 /* A function pointer under AIX is a pointer to a data area whose
32891 first word contains the actual address of the function, whose
32892 second word contains a pointer to its TOC, and whose third word
32893 contains a value to place in the static chain register (r11).
32894 Note that if we load the static chain, our "trampoline" need
32895 not have any executable code. */
32896
32897 /* Load up address of the actual function. */
32898 func_desc = force_reg (Pmode, func_desc);
32899 func_addr = gen_reg_rtx (Pmode);
32900 emit_move_insn (func_addr, gen_rtx_MEM (Pmode, func_desc));
32901
32902 /* Prepare to load the TOC of the called function. Note that the
32903 TOC load must happen immediately before the actual call so
32904 that unwinding the TOC registers works correctly. See the
32905 comment in frob_update_context. */
32906 rtx func_toc_offset = GEN_INT (GET_MODE_SIZE (Pmode));
32907 rtx func_toc_mem = gen_rtx_MEM (Pmode,
32908 gen_rtx_PLUS (Pmode, func_desc,
32909 func_toc_offset));
32910 toc_load = gen_rtx_USE (VOIDmode, func_toc_mem);
32911
32912 /* If we have a static chain, load it up. */
32913 if (TARGET_POINTERS_TO_NESTED_FUNCTIONS)
32914 {
32915 rtx sc_reg = gen_rtx_REG (Pmode, STATIC_CHAIN_REGNUM);
32916 rtx func_sc_offset = GEN_INT (2 * GET_MODE_SIZE (Pmode));
32917 rtx func_sc_mem = gen_rtx_MEM (Pmode,
32918 gen_rtx_PLUS (Pmode, func_desc,
32919 func_sc_offset));
32920 emit_move_insn (sc_reg, func_sc_mem);
32921 abi_reg = sc_reg;
32922 }
32923 }
32924 }
32925 else
32926 {
32927 /* Direct calls use the TOC: for local calls, the callee will
32928 assume the TOC register is set; for non-local calls, the
32929 PLT stub needs the TOC register. */
32930 abi_reg = toc_reg;
32931 func_addr = func_desc;
32932 }
32933
32934 /* Create the call. */
32935 call[0] = gen_rtx_CALL (VOIDmode, gen_rtx_MEM (SImode, func_addr), flag);
32936 if (value != NULL_RTX)
32937 call[0] = gen_rtx_SET (VOIDmode, value, call[0]);
32938 n_call = 1;
32939
32940 if (toc_load)
32941 call[n_call++] = toc_load;
32942 if (toc_restore)
32943 call[n_call++] = toc_restore;
32944
32945 call[n_call++] = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (Pmode, LR_REGNO));
32946
32947 insn = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (n_call, call));
32948 insn = emit_call_insn (insn);
32949
32950 /* Mention all registers defined by the ABI to hold information
32951 as uses in CALL_INSN_FUNCTION_USAGE. */
32952 if (abi_reg)
32953 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), abi_reg);
32954 }
32955
32956 /* Expand code to perform a sibling call under the AIX or ELFv2 ABI. */
32957
32958 void
32959 rs6000_sibcall_aix (rtx value, rtx func_desc, rtx flag, rtx cookie)
32960 {
32961 rtx call[2];
32962 rtx insn;
32963
32964 gcc_assert (INTVAL (cookie) == 0);
32965
32966 /* Create the call. */
32967 call[0] = gen_rtx_CALL (VOIDmode, gen_rtx_MEM (SImode, func_desc), flag);
32968 if (value != NULL_RTX)
32969 call[0] = gen_rtx_SET (VOIDmode, value, call[0]);
32970
32971 call[1] = simple_return_rtx;
32972
32973 insn = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (2, call));
32974 insn = emit_call_insn (insn);
32975
32976 /* Note use of the TOC register. */
32977 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), gen_rtx_REG (Pmode, TOC_REGNUM));
32978 /* We need to also mark a use of the link register since the function we
32979 sibling-call to will use it to return to our caller. */
32980 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), gen_rtx_REG (Pmode, LR_REGNO));
32981 }
32982
32983 /* Return whether we need to always update the saved TOC pointer when we update
32984 the stack pointer. */
32985
32986 static bool
32987 rs6000_save_toc_in_prologue_p (void)
32988 {
32989 return (cfun && cfun->machine && cfun->machine->save_toc_in_prologue);
32990 }
32991
32992 #ifdef HAVE_GAS_HIDDEN
32993 # define USE_HIDDEN_LINKONCE 1
32994 #else
32995 # define USE_HIDDEN_LINKONCE 0
32996 #endif
32997
32998 /* Fills in the label name that should be used for a 476 link stack thunk. */
32999
33000 void
33001 get_ppc476_thunk_name (char name[32])
33002 {
33003 gcc_assert (TARGET_LINK_STACK);
33004
33005 if (USE_HIDDEN_LINKONCE)
33006 sprintf (name, "__ppc476.get_thunk");
33007 else
33008 ASM_GENERATE_INTERNAL_LABEL (name, "LPPC476_", 0);
33009 }
33010
33011 /* This function emits the simple thunk routine that is used to preserve
33012 the link stack on the 476 cpu. */
33013
33014 static void rs6000_code_end (void) ATTRIBUTE_UNUSED;
33015 static void
33016 rs6000_code_end (void)
33017 {
33018 char name[32];
33019 tree decl;
33020
33021 if (!TARGET_LINK_STACK)
33022 return;
33023
33024 get_ppc476_thunk_name (name);
33025
33026 decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL, get_identifier (name),
33027 build_function_type_list (void_type_node, NULL_TREE));
33028 DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
33029 NULL_TREE, void_type_node);
33030 TREE_PUBLIC (decl) = 1;
33031 TREE_STATIC (decl) = 1;
33032
33033 #if RS6000_WEAK
33034 if (USE_HIDDEN_LINKONCE)
33035 {
33036 cgraph_node::create (decl)->set_comdat_group (DECL_ASSEMBLER_NAME (decl));
33037 targetm.asm_out.unique_section (decl, 0);
33038 switch_to_section (get_named_section (decl, NULL, 0));
33039 DECL_WEAK (decl) = 1;
33040 ASM_WEAKEN_DECL (asm_out_file, decl, name, 0);
33041 targetm.asm_out.globalize_label (asm_out_file, name);
33042 targetm.asm_out.assemble_visibility (decl, VISIBILITY_HIDDEN);
33043 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
33044 }
33045 else
33046 #endif
33047 {
33048 switch_to_section (text_section);
33049 ASM_OUTPUT_LABEL (asm_out_file, name);
33050 }
33051
33052 DECL_INITIAL (decl) = make_node (BLOCK);
33053 current_function_decl = decl;
33054 init_function_start (decl);
33055 first_function_block_is_cold = false;
33056 /* Make sure unwind info is emitted for the thunk if needed. */
33057 final_start_function (emit_barrier (), asm_out_file, 1);
33058
33059 fputs ("\tblr\n", asm_out_file);
33060
33061 final_end_function ();
33062 init_insn_lengths ();
33063 free_after_compilation (cfun);
33064 set_cfun (NULL);
33065 current_function_decl = NULL;
33066 }
33067
33068 /* Add r30 to hard reg set if the prologue sets it up and it is not
33069 pic_offset_table_rtx. */
33070
33071 static void
33072 rs6000_set_up_by_prologue (struct hard_reg_set_container *set)
33073 {
33074 if (!TARGET_SINGLE_PIC_BASE
33075 && TARGET_TOC
33076 && TARGET_MINIMAL_TOC
33077 && get_pool_size () != 0)
33078 add_to_hard_reg_set (&set->set, Pmode, RS6000_PIC_OFFSET_TABLE_REGNUM);
33079 }
33080
33081 \f
33082 /* Helper function for rs6000_split_logical to emit a logical instruction after
33083 spliting the operation to single GPR registers.
33084
33085 DEST is the destination register.
33086 OP1 and OP2 are the input source registers.
33087 CODE is the base operation (AND, IOR, XOR, NOT).
33088 MODE is the machine mode.
33089 If COMPLEMENT_FINAL_P is true, wrap the whole operation with NOT.
33090 If COMPLEMENT_OP1_P is true, wrap operand1 with NOT.
33091 If COMPLEMENT_OP2_P is true, wrap operand2 with NOT. */
33092
33093 static void
33094 rs6000_split_logical_inner (rtx dest,
33095 rtx op1,
33096 rtx op2,
33097 enum rtx_code code,
33098 machine_mode mode,
33099 bool complement_final_p,
33100 bool complement_op1_p,
33101 bool complement_op2_p)
33102 {
33103 rtx bool_rtx;
33104
33105 /* Optimize AND of 0/0xffffffff and IOR/XOR of 0. */
33106 if (op2 && GET_CODE (op2) == CONST_INT
33107 && (mode == SImode || (mode == DImode && TARGET_POWERPC64))
33108 && !complement_final_p && !complement_op1_p && !complement_op2_p)
33109 {
33110 HOST_WIDE_INT mask = GET_MODE_MASK (mode);
33111 HOST_WIDE_INT value = INTVAL (op2) & mask;
33112
33113 /* Optimize AND of 0 to just set 0. Optimize AND of -1 to be a move. */
33114 if (code == AND)
33115 {
33116 if (value == 0)
33117 {
33118 emit_insn (gen_rtx_SET (VOIDmode, dest, const0_rtx));
33119 return;
33120 }
33121
33122 else if (value == mask)
33123 {
33124 if (!rtx_equal_p (dest, op1))
33125 emit_insn (gen_rtx_SET (VOIDmode, dest, op1));
33126 return;
33127 }
33128 }
33129
33130 /* Optimize IOR/XOR of 0 to be a simple move. Split large operations
33131 into separate ORI/ORIS or XORI/XORIS instrucitons. */
33132 else if (code == IOR || code == XOR)
33133 {
33134 if (value == 0)
33135 {
33136 if (!rtx_equal_p (dest, op1))
33137 emit_insn (gen_rtx_SET (VOIDmode, dest, op1));
33138 return;
33139 }
33140 }
33141 }
33142
33143 if (code == AND && mode == SImode
33144 && !complement_final_p && !complement_op1_p && !complement_op2_p)
33145 {
33146 emit_insn (gen_andsi3 (dest, op1, op2));
33147 return;
33148 }
33149
33150 if (complement_op1_p)
33151 op1 = gen_rtx_NOT (mode, op1);
33152
33153 if (complement_op2_p)
33154 op2 = gen_rtx_NOT (mode, op2);
33155
33156 bool_rtx = ((code == NOT)
33157 ? gen_rtx_NOT (mode, op1)
33158 : gen_rtx_fmt_ee (code, mode, op1, op2));
33159
33160 if (complement_final_p)
33161 bool_rtx = gen_rtx_NOT (mode, bool_rtx);
33162
33163 emit_insn (gen_rtx_SET (VOIDmode, dest, bool_rtx));
33164 }
33165
33166 /* Split a DImode AND/IOR/XOR with a constant on a 32-bit system. These
33167 operations are split immediately during RTL generation to allow for more
33168 optimizations of the AND/IOR/XOR.
33169
33170 OPERANDS is an array containing the destination and two input operands.
33171 CODE is the base operation (AND, IOR, XOR, NOT).
33172 MODE is the machine mode.
33173 If COMPLEMENT_FINAL_P is true, wrap the whole operation with NOT.
33174 If COMPLEMENT_OP1_P is true, wrap operand1 with NOT.
33175 If COMPLEMENT_OP2_P is true, wrap operand2 with NOT.
33176 CLOBBER_REG is either NULL or a scratch register of type CC to allow
33177 formation of the AND instructions. */
33178
33179 static void
33180 rs6000_split_logical_di (rtx operands[3],
33181 enum rtx_code code,
33182 bool complement_final_p,
33183 bool complement_op1_p,
33184 bool complement_op2_p)
33185 {
33186 const HOST_WIDE_INT lower_32bits = HOST_WIDE_INT_C(0xffffffff);
33187 const HOST_WIDE_INT upper_32bits = ~ lower_32bits;
33188 const HOST_WIDE_INT sign_bit = HOST_WIDE_INT_C(0x80000000);
33189 enum hi_lo { hi = 0, lo = 1 };
33190 rtx op0_hi_lo[2], op1_hi_lo[2], op2_hi_lo[2];
33191 size_t i;
33192
33193 op0_hi_lo[hi] = gen_highpart (SImode, operands[0]);
33194 op1_hi_lo[hi] = gen_highpart (SImode, operands[1]);
33195 op0_hi_lo[lo] = gen_lowpart (SImode, operands[0]);
33196 op1_hi_lo[lo] = gen_lowpart (SImode, operands[1]);
33197
33198 if (code == NOT)
33199 op2_hi_lo[hi] = op2_hi_lo[lo] = NULL_RTX;
33200 else
33201 {
33202 if (GET_CODE (operands[2]) != CONST_INT)
33203 {
33204 op2_hi_lo[hi] = gen_highpart_mode (SImode, DImode, operands[2]);
33205 op2_hi_lo[lo] = gen_lowpart (SImode, operands[2]);
33206 }
33207 else
33208 {
33209 HOST_WIDE_INT value = INTVAL (operands[2]);
33210 HOST_WIDE_INT value_hi_lo[2];
33211
33212 gcc_assert (!complement_final_p);
33213 gcc_assert (!complement_op1_p);
33214 gcc_assert (!complement_op2_p);
33215
33216 value_hi_lo[hi] = value >> 32;
33217 value_hi_lo[lo] = value & lower_32bits;
33218
33219 for (i = 0; i < 2; i++)
33220 {
33221 HOST_WIDE_INT sub_value = value_hi_lo[i];
33222
33223 if (sub_value & sign_bit)
33224 sub_value |= upper_32bits;
33225
33226 op2_hi_lo[i] = GEN_INT (sub_value);
33227
33228 /* If this is an AND instruction, check to see if we need to load
33229 the value in a register. */
33230 if (code == AND && sub_value != -1 && sub_value != 0
33231 && !and_operand (op2_hi_lo[i], SImode))
33232 op2_hi_lo[i] = force_reg (SImode, op2_hi_lo[i]);
33233 }
33234 }
33235 }
33236
33237 for (i = 0; i < 2; i++)
33238 {
33239 /* Split large IOR/XOR operations. */
33240 if ((code == IOR || code == XOR)
33241 && GET_CODE (op2_hi_lo[i]) == CONST_INT
33242 && !complement_final_p
33243 && !complement_op1_p
33244 && !complement_op2_p
33245 && !logical_const_operand (op2_hi_lo[i], SImode))
33246 {
33247 HOST_WIDE_INT value = INTVAL (op2_hi_lo[i]);
33248 HOST_WIDE_INT hi_16bits = value & HOST_WIDE_INT_C(0xffff0000);
33249 HOST_WIDE_INT lo_16bits = value & HOST_WIDE_INT_C(0x0000ffff);
33250 rtx tmp = gen_reg_rtx (SImode);
33251
33252 /* Make sure the constant is sign extended. */
33253 if ((hi_16bits & sign_bit) != 0)
33254 hi_16bits |= upper_32bits;
33255
33256 rs6000_split_logical_inner (tmp, op1_hi_lo[i], GEN_INT (hi_16bits),
33257 code, SImode, false, false, false);
33258
33259 rs6000_split_logical_inner (op0_hi_lo[i], tmp, GEN_INT (lo_16bits),
33260 code, SImode, false, false, false);
33261 }
33262 else
33263 rs6000_split_logical_inner (op0_hi_lo[i], op1_hi_lo[i], op2_hi_lo[i],
33264 code, SImode, complement_final_p,
33265 complement_op1_p, complement_op2_p);
33266 }
33267
33268 return;
33269 }
33270
33271 /* Split the insns that make up boolean operations operating on multiple GPR
33272 registers. The boolean MD patterns ensure that the inputs either are
33273 exactly the same as the output registers, or there is no overlap.
33274
33275 OPERANDS is an array containing the destination and two input operands.
33276 CODE is the base operation (AND, IOR, XOR, NOT).
33277 If COMPLEMENT_FINAL_P is true, wrap the whole operation with NOT.
33278 If COMPLEMENT_OP1_P is true, wrap operand1 with NOT.
33279 If COMPLEMENT_OP2_P is true, wrap operand2 with NOT. */
33280
33281 void
33282 rs6000_split_logical (rtx operands[3],
33283 enum rtx_code code,
33284 bool complement_final_p,
33285 bool complement_op1_p,
33286 bool complement_op2_p)
33287 {
33288 machine_mode mode = GET_MODE (operands[0]);
33289 machine_mode sub_mode;
33290 rtx op0, op1, op2;
33291 int sub_size, regno0, regno1, nregs, i;
33292
33293 /* If this is DImode, use the specialized version that can run before
33294 register allocation. */
33295 if (mode == DImode && !TARGET_POWERPC64)
33296 {
33297 rs6000_split_logical_di (operands, code, complement_final_p,
33298 complement_op1_p, complement_op2_p);
33299 return;
33300 }
33301
33302 op0 = operands[0];
33303 op1 = operands[1];
33304 op2 = (code == NOT) ? NULL_RTX : operands[2];
33305 sub_mode = (TARGET_POWERPC64) ? DImode : SImode;
33306 sub_size = GET_MODE_SIZE (sub_mode);
33307 regno0 = REGNO (op0);
33308 regno1 = REGNO (op1);
33309
33310 gcc_assert (reload_completed);
33311 gcc_assert (IN_RANGE (regno0, FIRST_GPR_REGNO, LAST_GPR_REGNO));
33312 gcc_assert (IN_RANGE (regno1, FIRST_GPR_REGNO, LAST_GPR_REGNO));
33313
33314 nregs = rs6000_hard_regno_nregs[(int)mode][regno0];
33315 gcc_assert (nregs > 1);
33316
33317 if (op2 && REG_P (op2))
33318 gcc_assert (IN_RANGE (REGNO (op2), FIRST_GPR_REGNO, LAST_GPR_REGNO));
33319
33320 for (i = 0; i < nregs; i++)
33321 {
33322 int offset = i * sub_size;
33323 rtx sub_op0 = simplify_subreg (sub_mode, op0, mode, offset);
33324 rtx sub_op1 = simplify_subreg (sub_mode, op1, mode, offset);
33325 rtx sub_op2 = ((code == NOT)
33326 ? NULL_RTX
33327 : simplify_subreg (sub_mode, op2, mode, offset));
33328
33329 rs6000_split_logical_inner (sub_op0, sub_op1, sub_op2, code, sub_mode,
33330 complement_final_p, complement_op1_p,
33331 complement_op2_p);
33332 }
33333
33334 return;
33335 }
33336
33337 \f
33338 /* Return true if the peephole2 can combine a load involving a combination of
33339 an addis instruction and a load with an offset that can be fused together on
33340 a power8. */
33341
33342 bool
33343 fusion_gpr_load_p (rtx addis_reg, /* register set via addis. */
33344 rtx addis_value, /* addis value. */
33345 rtx target, /* target register that is loaded. */
33346 rtx mem) /* bottom part of the memory addr. */
33347 {
33348 rtx addr;
33349 rtx base_reg;
33350
33351 /* Validate arguments. */
33352 if (!base_reg_operand (addis_reg, GET_MODE (addis_reg)))
33353 return false;
33354
33355 if (!base_reg_operand (target, GET_MODE (target)))
33356 return false;
33357
33358 if (!fusion_gpr_addis (addis_value, GET_MODE (addis_value)))
33359 return false;
33360
33361 /* Allow sign/zero extension. */
33362 if (GET_CODE (mem) == ZERO_EXTEND
33363 || (GET_CODE (mem) == SIGN_EXTEND && TARGET_P8_FUSION_SIGN))
33364 mem = XEXP (mem, 0);
33365
33366 if (!MEM_P (mem))
33367 return false;
33368
33369 if (!fusion_gpr_mem_load (mem, GET_MODE (mem)))
33370 return false;
33371
33372 addr = XEXP (mem, 0); /* either PLUS or LO_SUM. */
33373 if (GET_CODE (addr) != PLUS && GET_CODE (addr) != LO_SUM)
33374 return false;
33375
33376 /* Validate that the register used to load the high value is either the
33377 register being loaded, or we can safely replace its use.
33378
33379 This function is only called from the peephole2 pass and we assume that
33380 there are 2 instructions in the peephole (addis and load), so we want to
33381 check if the target register was not used in the memory address and the
33382 register to hold the addis result is dead after the peephole. */
33383 if (REGNO (addis_reg) != REGNO (target))
33384 {
33385 if (reg_mentioned_p (target, mem))
33386 return false;
33387
33388 if (!peep2_reg_dead_p (2, addis_reg))
33389 return false;
33390
33391 /* If the target register being loaded is the stack pointer, we must
33392 avoid loading any other value into it, even temporarily. */
33393 if (REG_P (target) && REGNO (target) == STACK_POINTER_REGNUM)
33394 return false;
33395 }
33396
33397 base_reg = XEXP (addr, 0);
33398 return REGNO (addis_reg) == REGNO (base_reg);
33399 }
33400
33401 /* During the peephole2 pass, adjust and expand the insns for a load fusion
33402 sequence. We adjust the addis register to use the target register. If the
33403 load sign extends, we adjust the code to do the zero extending load, and an
33404 explicit sign extension later since the fusion only covers zero extending
33405 loads.
33406
33407 The operands are:
33408 operands[0] register set with addis (to be replaced with target)
33409 operands[1] value set via addis
33410 operands[2] target register being loaded
33411 operands[3] D-form memory reference using operands[0]. */
33412
33413 void
33414 expand_fusion_gpr_load (rtx *operands)
33415 {
33416 rtx addis_value = operands[1];
33417 rtx target = operands[2];
33418 rtx orig_mem = operands[3];
33419 rtx new_addr, new_mem, orig_addr, offset;
33420 enum rtx_code plus_or_lo_sum;
33421 machine_mode target_mode = GET_MODE (target);
33422 machine_mode extend_mode = target_mode;
33423 machine_mode ptr_mode = Pmode;
33424 enum rtx_code extend = UNKNOWN;
33425
33426 if (GET_CODE (orig_mem) == ZERO_EXTEND
33427 || (TARGET_P8_FUSION_SIGN && GET_CODE (orig_mem) == SIGN_EXTEND))
33428 {
33429 extend = GET_CODE (orig_mem);
33430 orig_mem = XEXP (orig_mem, 0);
33431 target_mode = GET_MODE (orig_mem);
33432 }
33433
33434 gcc_assert (MEM_P (orig_mem));
33435
33436 orig_addr = XEXP (orig_mem, 0);
33437 plus_or_lo_sum = GET_CODE (orig_addr);
33438 gcc_assert (plus_or_lo_sum == PLUS || plus_or_lo_sum == LO_SUM);
33439
33440 offset = XEXP (orig_addr, 1);
33441 new_addr = gen_rtx_fmt_ee (plus_or_lo_sum, ptr_mode, addis_value, offset);
33442 new_mem = replace_equiv_address_nv (orig_mem, new_addr, false);
33443
33444 if (extend != UNKNOWN)
33445 new_mem = gen_rtx_fmt_e (ZERO_EXTEND, extend_mode, new_mem);
33446
33447 new_mem = gen_rtx_UNSPEC (extend_mode, gen_rtvec (1, new_mem),
33448 UNSPEC_FUSION_GPR);
33449 emit_insn (gen_rtx_SET (VOIDmode, target, new_mem));
33450
33451 if (extend == SIGN_EXTEND)
33452 {
33453 int sub_off = ((BYTES_BIG_ENDIAN)
33454 ? GET_MODE_SIZE (extend_mode) - GET_MODE_SIZE (target_mode)
33455 : 0);
33456 rtx sign_reg
33457 = simplify_subreg (target_mode, target, extend_mode, sub_off);
33458
33459 emit_insn (gen_rtx_SET (VOIDmode, target,
33460 gen_rtx_SIGN_EXTEND (extend_mode, sign_reg)));
33461 }
33462
33463 return;
33464 }
33465
33466 /* Return a string to fuse an addis instruction with a gpr load to the same
33467 register that we loaded up the addis instruction. The address that is used
33468 is the logical address that was formed during peephole2:
33469 (lo_sum (high) (low-part))
33470
33471 The code is complicated, so we call output_asm_insn directly, and just
33472 return "". */
33473
33474 const char *
33475 emit_fusion_gpr_load (rtx target, rtx mem)
33476 {
33477 rtx addis_value;
33478 rtx fuse_ops[10];
33479 rtx addr;
33480 rtx load_offset;
33481 const char *addis_str = NULL;
33482 const char *load_str = NULL;
33483 const char *mode_name = NULL;
33484 char insn_template[80];
33485 machine_mode mode;
33486 const char *comment_str = ASM_COMMENT_START;
33487
33488 if (GET_CODE (mem) == ZERO_EXTEND)
33489 mem = XEXP (mem, 0);
33490
33491 gcc_assert (REG_P (target) && MEM_P (mem));
33492
33493 if (*comment_str == ' ')
33494 comment_str++;
33495
33496 addr = XEXP (mem, 0);
33497 if (GET_CODE (addr) != PLUS && GET_CODE (addr) != LO_SUM)
33498 gcc_unreachable ();
33499
33500 addis_value = XEXP (addr, 0);
33501 load_offset = XEXP (addr, 1);
33502
33503 /* Now emit the load instruction to the same register. */
33504 mode = GET_MODE (mem);
33505 switch (mode)
33506 {
33507 case QImode:
33508 mode_name = "char";
33509 load_str = "lbz";
33510 break;
33511
33512 case HImode:
33513 mode_name = "short";
33514 load_str = "lhz";
33515 break;
33516
33517 case SImode:
33518 mode_name = "int";
33519 load_str = "lwz";
33520 break;
33521
33522 case DImode:
33523 gcc_assert (TARGET_POWERPC64);
33524 mode_name = "long";
33525 load_str = "ld";
33526 break;
33527
33528 default:
33529 gcc_unreachable ();
33530 }
33531
33532 /* Emit the addis instruction. */
33533 fuse_ops[0] = target;
33534 if (satisfies_constraint_L (addis_value))
33535 {
33536 fuse_ops[1] = addis_value;
33537 addis_str = "lis %0,%v1";
33538 }
33539
33540 else if (GET_CODE (addis_value) == PLUS)
33541 {
33542 rtx op0 = XEXP (addis_value, 0);
33543 rtx op1 = XEXP (addis_value, 1);
33544
33545 if (REG_P (op0) && CONST_INT_P (op1)
33546 && satisfies_constraint_L (op1))
33547 {
33548 fuse_ops[1] = op0;
33549 fuse_ops[2] = op1;
33550 addis_str = "addis %0,%1,%v2";
33551 }
33552 }
33553
33554 else if (GET_CODE (addis_value) == HIGH)
33555 {
33556 rtx value = XEXP (addis_value, 0);
33557 if (GET_CODE (value) == UNSPEC && XINT (value, 1) == UNSPEC_TOCREL)
33558 {
33559 fuse_ops[1] = XVECEXP (value, 0, 0); /* symbol ref. */
33560 fuse_ops[2] = XVECEXP (value, 0, 1); /* TOC register. */
33561 if (TARGET_ELF)
33562 addis_str = "addis %0,%2,%1@toc@ha";
33563
33564 else if (TARGET_XCOFF)
33565 addis_str = "addis %0,%1@u(%2)";
33566
33567 else
33568 gcc_unreachable ();
33569 }
33570
33571 else if (GET_CODE (value) == PLUS)
33572 {
33573 rtx op0 = XEXP (value, 0);
33574 rtx op1 = XEXP (value, 1);
33575
33576 if (GET_CODE (op0) == UNSPEC
33577 && XINT (op0, 1) == UNSPEC_TOCREL
33578 && CONST_INT_P (op1))
33579 {
33580 fuse_ops[1] = XVECEXP (op0, 0, 0); /* symbol ref. */
33581 fuse_ops[2] = XVECEXP (op0, 0, 1); /* TOC register. */
33582 fuse_ops[3] = op1;
33583 if (TARGET_ELF)
33584 addis_str = "addis %0,%2,%1+%3@toc@ha";
33585
33586 else if (TARGET_XCOFF)
33587 addis_str = "addis %0,%1+%3@u(%2)";
33588
33589 else
33590 gcc_unreachable ();
33591 }
33592 }
33593
33594 else if (satisfies_constraint_L (value))
33595 {
33596 fuse_ops[1] = value;
33597 addis_str = "lis %0,%v1";
33598 }
33599
33600 else if (TARGET_ELF && !TARGET_POWERPC64 && CONSTANT_P (value))
33601 {
33602 fuse_ops[1] = value;
33603 addis_str = "lis %0,%1@ha";
33604 }
33605 }
33606
33607 if (!addis_str)
33608 fatal_insn ("Could not generate addis value for fusion", addis_value);
33609
33610 sprintf (insn_template, "%s\t\t%s gpr load fusion, type %s", addis_str,
33611 comment_str, mode_name);
33612 output_asm_insn (insn_template, fuse_ops);
33613
33614 /* Emit the D-form load instruction. */
33615 if (CONST_INT_P (load_offset) && satisfies_constraint_I (load_offset))
33616 {
33617 sprintf (insn_template, "%s %%0,%%1(%%0)", load_str);
33618 fuse_ops[1] = load_offset;
33619 output_asm_insn (insn_template, fuse_ops);
33620 }
33621
33622 else if (GET_CODE (load_offset) == UNSPEC
33623 && XINT (load_offset, 1) == UNSPEC_TOCREL)
33624 {
33625 if (TARGET_ELF)
33626 sprintf (insn_template, "%s %%0,%%1@toc@l(%%0)", load_str);
33627
33628 else if (TARGET_XCOFF)
33629 sprintf (insn_template, "%s %%0,%%1@l(%%0)", load_str);
33630
33631 else
33632 gcc_unreachable ();
33633
33634 fuse_ops[1] = XVECEXP (load_offset, 0, 0);
33635 output_asm_insn (insn_template, fuse_ops);
33636 }
33637
33638 else if (GET_CODE (load_offset) == PLUS
33639 && GET_CODE (XEXP (load_offset, 0)) == UNSPEC
33640 && XINT (XEXP (load_offset, 0), 1) == UNSPEC_TOCREL
33641 && CONST_INT_P (XEXP (load_offset, 1)))
33642 {
33643 rtx tocrel_unspec = XEXP (load_offset, 0);
33644 if (TARGET_ELF)
33645 sprintf (insn_template, "%s %%0,%%1+%%2@toc@l(%%0)", load_str);
33646
33647 else if (TARGET_XCOFF)
33648 sprintf (insn_template, "%s %%0,%%1+%%2@l(%%0)", load_str);
33649
33650 else
33651 gcc_unreachable ();
33652
33653 fuse_ops[1] = XVECEXP (tocrel_unspec, 0, 0);
33654 fuse_ops[2] = XEXP (load_offset, 1);
33655 output_asm_insn (insn_template, fuse_ops);
33656 }
33657
33658 else if (TARGET_ELF && !TARGET_POWERPC64 && CONSTANT_P (load_offset))
33659 {
33660 sprintf (insn_template, "%s %%0,%%1@l(%%0)", load_str);
33661
33662 fuse_ops[1] = load_offset;
33663 output_asm_insn (insn_template, fuse_ops);
33664 }
33665
33666 else
33667 fatal_insn ("Unable to generate load offset for fusion", load_offset);
33668
33669 return "";
33670 }
33671 \f
33672 /* Analyze vector computations and remove unnecessary doubleword
33673 swaps (xxswapdi instructions). This pass is performed only
33674 for little-endian VSX code generation.
33675
33676 For this specific case, loads and stores of 4x32 and 2x64 vectors
33677 are inefficient. These are implemented using the lvx2dx and
33678 stvx2dx instructions, which invert the order of doublewords in
33679 a vector register. Thus the code generation inserts an xxswapdi
33680 after each such load, and prior to each such store. (For spill
33681 code after register assignment, an additional xxswapdi is inserted
33682 following each store in order to return a hard register to its
33683 unpermuted value.)
33684
33685 The extra xxswapdi instructions reduce performance. This can be
33686 particularly bad for vectorized code. The purpose of this pass
33687 is to reduce the number of xxswapdi instructions required for
33688 correctness.
33689
33690 The primary insight is that much code that operates on vectors
33691 does not care about the relative order of elements in a register,
33692 so long as the correct memory order is preserved. If we have
33693 a computation where all input values are provided by lvxd2x/xxswapdi
33694 sequences, all outputs are stored using xxswapdi/stvxd2x sequences,
33695 and all intermediate computations are pure SIMD (independent of
33696 element order), then all the xxswapdi's associated with the loads
33697 and stores may be removed.
33698
33699 This pass uses some of the infrastructure and logical ideas from
33700 the "web" pass in web.c. We create maximal webs of computations
33701 fitting the description above using union-find. Each such web is
33702 then optimized by removing its unnecessary xxswapdi instructions.
33703
33704 The pass is placed prior to global optimization so that we can
33705 perform the optimization in the safest and simplest way possible;
33706 that is, by replacing each xxswapdi insn with a register copy insn.
33707 Subsequent forward propagation will remove copies where possible.
33708
33709 There are some operations sensitive to element order for which we
33710 can still allow the operation, provided we modify those operations.
33711 These include CONST_VECTORs, for which we must swap the first and
33712 second halves of the constant vector; and SUBREGs, for which we
33713 must adjust the byte offset to account for the swapped doublewords.
33714 A remaining opportunity would be non-immediate-form splats, for
33715 which we should adjust the selected lane of the input. We should
33716 also make code generation adjustments for sum-across operations,
33717 since this is a common vectorizer reduction.
33718
33719 Because we run prior to the first split, we can see loads and stores
33720 here that match *vsx_le_perm_{load,store}_<mode>. These are vanilla
33721 vector loads and stores that have not yet been split into a permuting
33722 load/store and a swap. (One way this can happen is with a builtin
33723 call to vec_vsx_{ld,st}.) We can handle these as well, but rather
33724 than deleting a swap, we convert the load/store into a permuting
33725 load/store (which effectively removes the swap). */
33726
33727 /* Notes on Permutes
33728
33729 We do not currently handle computations that contain permutes. There
33730 is a general transformation that can be performed correctly, but it
33731 may introduce more expensive code than it replaces. To handle these
33732 would require a cost model to determine when to perform the optimization.
33733 This commentary records how this could be done if desired.
33734
33735 The most general permute is something like this (example for V16QI):
33736
33737 (vec_select:V16QI (vec_concat:V32QI (op1:V16QI) (op2:V16QI))
33738 (parallel [(const_int a0) (const_int a1)
33739 ...
33740 (const_int a14) (const_int a15)]))
33741
33742 where a0,...,a15 are in [0,31] and select elements from op1 and op2
33743 to produce in the result.
33744
33745 Regardless of mode, we can convert the PARALLEL to a mask of 16
33746 byte-element selectors. Let's call this M, with M[i] representing
33747 the ith byte-element selector value. Then if we swap doublewords
33748 throughout the computation, we can get correct behavior by replacing
33749 M with M' as follows:
33750
33751 { M[i+8]+8 : i < 8, M[i+8] in [0,7] U [16,23]
33752 M'[i] = { M[i+8]-8 : i < 8, M[i+8] in [8,15] U [24,31]
33753 { M[i-8]+8 : i >= 8, M[i-8] in [0,7] U [16,23]
33754 { M[i-8]-8 : i >= 8, M[i-8] in [8,15] U [24,31]
33755
33756 This seems promising at first, since we are just replacing one mask
33757 with another. But certain masks are preferable to others. If M
33758 is a mask that matches a vmrghh pattern, for example, M' certainly
33759 will not. Instead of a single vmrghh, we would generate a load of
33760 M' and a vperm. So we would need to know how many xxswapd's we can
33761 remove as a result of this transformation to determine if it's
33762 profitable; and preferably the logic would need to be aware of all
33763 the special preferable masks.
33764
33765 Another form of permute is an UNSPEC_VPERM, in which the mask is
33766 already in a register. In some cases, this mask may be a constant
33767 that we can discover with ud-chains, in which case the above
33768 transformation is ok. However, the common usage here is for the
33769 mask to be produced by an UNSPEC_LVSL, in which case the mask
33770 cannot be known at compile time. In such a case we would have to
33771 generate several instructions to compute M' as above at run time,
33772 and a cost model is needed again. */
33773
33774 /* This is based on the union-find logic in web.c. web_entry_base is
33775 defined in df.h. */
33776 class swap_web_entry : public web_entry_base
33777 {
33778 public:
33779 /* Pointer to the insn. */
33780 rtx_insn *insn;
33781 /* Set if insn contains a mention of a vector register. All other
33782 fields are undefined if this field is unset. */
33783 unsigned int is_relevant : 1;
33784 /* Set if insn is a load. */
33785 unsigned int is_load : 1;
33786 /* Set if insn is a store. */
33787 unsigned int is_store : 1;
33788 /* Set if insn is a doubleword swap. This can either be a register swap
33789 or a permuting load or store (test is_load and is_store for this). */
33790 unsigned int is_swap : 1;
33791 /* Set if the insn has a live-in use of a parameter register. */
33792 unsigned int is_live_in : 1;
33793 /* Set if the insn has a live-out def of a return register. */
33794 unsigned int is_live_out : 1;
33795 /* Set if the insn contains a subreg reference of a vector register. */
33796 unsigned int contains_subreg : 1;
33797 /* Set if the insn contains a 128-bit integer operand. */
33798 unsigned int is_128_int : 1;
33799 /* Set if this is a call-insn. */
33800 unsigned int is_call : 1;
33801 /* Set if this insn does not perform a vector operation for which
33802 element order matters, or if we know how to fix it up if it does.
33803 Undefined if is_swap is set. */
33804 unsigned int is_swappable : 1;
33805 /* A nonzero value indicates what kind of special handling for this
33806 insn is required if doublewords are swapped. Undefined if
33807 is_swappable is not set. */
33808 unsigned int special_handling : 3;
33809 /* Set if the web represented by this entry cannot be optimized. */
33810 unsigned int web_not_optimizable : 1;
33811 /* Set if this insn should be deleted. */
33812 unsigned int will_delete : 1;
33813 };
33814
33815 enum special_handling_values {
33816 SH_NONE = 0,
33817 SH_CONST_VECTOR,
33818 SH_SUBREG,
33819 SH_NOSWAP_LD,
33820 SH_NOSWAP_ST,
33821 SH_EXTRACT,
33822 SH_SPLAT
33823 };
33824
33825 /* Union INSN with all insns containing definitions that reach USE.
33826 Detect whether USE is live-in to the current function. */
33827 static void
33828 union_defs (swap_web_entry *insn_entry, rtx insn, df_ref use)
33829 {
33830 struct df_link *link = DF_REF_CHAIN (use);
33831
33832 if (!link)
33833 insn_entry[INSN_UID (insn)].is_live_in = 1;
33834
33835 while (link)
33836 {
33837 if (DF_REF_IS_ARTIFICIAL (link->ref))
33838 insn_entry[INSN_UID (insn)].is_live_in = 1;
33839
33840 if (DF_REF_INSN_INFO (link->ref))
33841 {
33842 rtx def_insn = DF_REF_INSN (link->ref);
33843 (void)unionfind_union (insn_entry + INSN_UID (insn),
33844 insn_entry + INSN_UID (def_insn));
33845 }
33846
33847 link = link->next;
33848 }
33849 }
33850
33851 /* Union INSN with all insns containing uses reached from DEF.
33852 Detect whether DEF is live-out from the current function. */
33853 static void
33854 union_uses (swap_web_entry *insn_entry, rtx insn, df_ref def)
33855 {
33856 struct df_link *link = DF_REF_CHAIN (def);
33857
33858 if (!link)
33859 insn_entry[INSN_UID (insn)].is_live_out = 1;
33860
33861 while (link)
33862 {
33863 /* This could be an eh use or some other artificial use;
33864 we treat these all the same (killing the optimization). */
33865 if (DF_REF_IS_ARTIFICIAL (link->ref))
33866 insn_entry[INSN_UID (insn)].is_live_out = 1;
33867
33868 if (DF_REF_INSN_INFO (link->ref))
33869 {
33870 rtx use_insn = DF_REF_INSN (link->ref);
33871 (void)unionfind_union (insn_entry + INSN_UID (insn),
33872 insn_entry + INSN_UID (use_insn));
33873 }
33874
33875 link = link->next;
33876 }
33877 }
33878
33879 /* Return 1 iff INSN is a load insn, including permuting loads that
33880 represent an lvxd2x instruction; else return 0. */
33881 static unsigned int
33882 insn_is_load_p (rtx insn)
33883 {
33884 rtx body = PATTERN (insn);
33885
33886 if (GET_CODE (body) == SET)
33887 {
33888 if (GET_CODE (SET_SRC (body)) == MEM)
33889 return 1;
33890
33891 if (GET_CODE (SET_SRC (body)) == VEC_SELECT
33892 && GET_CODE (XEXP (SET_SRC (body), 0)) == MEM)
33893 return 1;
33894
33895 return 0;
33896 }
33897
33898 if (GET_CODE (body) != PARALLEL)
33899 return 0;
33900
33901 rtx set = XVECEXP (body, 0, 0);
33902
33903 if (GET_CODE (set) == SET && GET_CODE (SET_SRC (set)) == MEM)
33904 return 1;
33905
33906 return 0;
33907 }
33908
33909 /* Return 1 iff INSN is a store insn, including permuting stores that
33910 represent an stvxd2x instruction; else return 0. */
33911 static unsigned int
33912 insn_is_store_p (rtx insn)
33913 {
33914 rtx body = PATTERN (insn);
33915 if (GET_CODE (body) == SET && GET_CODE (SET_DEST (body)) == MEM)
33916 return 1;
33917 if (GET_CODE (body) != PARALLEL)
33918 return 0;
33919 rtx set = XVECEXP (body, 0, 0);
33920 if (GET_CODE (set) == SET && GET_CODE (SET_DEST (set)) == MEM)
33921 return 1;
33922 return 0;
33923 }
33924
33925 /* Return 1 iff INSN swaps doublewords. This may be a reg-reg swap,
33926 a permuting load, or a permuting store. */
33927 static unsigned int
33928 insn_is_swap_p (rtx insn)
33929 {
33930 rtx body = PATTERN (insn);
33931 if (GET_CODE (body) != SET)
33932 return 0;
33933 rtx rhs = SET_SRC (body);
33934 if (GET_CODE (rhs) != VEC_SELECT)
33935 return 0;
33936 rtx parallel = XEXP (rhs, 1);
33937 if (GET_CODE (parallel) != PARALLEL)
33938 return 0;
33939 unsigned int len = XVECLEN (parallel, 0);
33940 if (len != 2 && len != 4 && len != 8 && len != 16)
33941 return 0;
33942 for (unsigned int i = 0; i < len / 2; ++i)
33943 {
33944 rtx op = XVECEXP (parallel, 0, i);
33945 if (GET_CODE (op) != CONST_INT || INTVAL (op) != len / 2 + i)
33946 return 0;
33947 }
33948 for (unsigned int i = len / 2; i < len; ++i)
33949 {
33950 rtx op = XVECEXP (parallel, 0, i);
33951 if (GET_CODE (op) != CONST_INT || INTVAL (op) != i - len / 2)
33952 return 0;
33953 }
33954 return 1;
33955 }
33956
33957 /* Return 1 iff OP is an operand that will not be affected by having
33958 vector doublewords swapped in memory. */
33959 static unsigned int
33960 rtx_is_swappable_p (rtx op, unsigned int *special)
33961 {
33962 enum rtx_code code = GET_CODE (op);
33963 int i, j;
33964 rtx parallel;
33965
33966 switch (code)
33967 {
33968 case LABEL_REF:
33969 case SYMBOL_REF:
33970 case CLOBBER:
33971 case REG:
33972 return 1;
33973
33974 case VEC_CONCAT:
33975 case ASM_INPUT:
33976 case ASM_OPERANDS:
33977 return 0;
33978
33979 case CONST_VECTOR:
33980 {
33981 *special = SH_CONST_VECTOR;
33982 return 1;
33983 }
33984
33985 case VEC_DUPLICATE:
33986 /* Opportunity: If XEXP (op, 0) has the same mode as the result,
33987 and XEXP (op, 1) is a PARALLEL with a single QImode const int,
33988 it represents a vector splat for which we can do special
33989 handling. */
33990 if (GET_CODE (XEXP (op, 0)) == CONST_INT)
33991 return 1;
33992 else if (GET_CODE (XEXP (op, 0)) == REG
33993 && GET_MODE_INNER (GET_MODE (op)) == GET_MODE (XEXP (op, 0)))
33994 /* This catches V2DF and V2DI splat, at a minimum. */
33995 return 1;
33996 else if (GET_CODE (XEXP (op, 0)) == VEC_SELECT)
33997 /* If the duplicated item is from a select, defer to the select
33998 processing to see if we can change the lane for the splat. */
33999 return rtx_is_swappable_p (XEXP (op, 0), special);
34000 else
34001 return 0;
34002
34003 case VEC_SELECT:
34004 /* A vec_extract operation is ok if we change the lane. */
34005 if (GET_CODE (XEXP (op, 0)) == REG
34006 && GET_MODE_INNER (GET_MODE (XEXP (op, 0))) == GET_MODE (op)
34007 && GET_CODE ((parallel = XEXP (op, 1))) == PARALLEL
34008 && XVECLEN (parallel, 0) == 1
34009 && GET_CODE (XVECEXP (parallel, 0, 0)) == CONST_INT)
34010 {
34011 *special = SH_EXTRACT;
34012 return 1;
34013 }
34014 else
34015 return 0;
34016
34017 case UNSPEC:
34018 {
34019 /* Various operations are unsafe for this optimization, at least
34020 without significant additional work. Permutes are obviously
34021 problematic, as both the permute control vector and the ordering
34022 of the target values are invalidated by doubleword swapping.
34023 Vector pack and unpack modify the number of vector lanes.
34024 Merge-high/low will not operate correctly on swapped operands.
34025 Vector shifts across element boundaries are clearly uncool,
34026 as are vector select and concatenate operations. Vector
34027 sum-across instructions define one operand with a specific
34028 order-dependent element, so additional fixup code would be
34029 needed to make those work. Vector set and non-immediate-form
34030 vector splat are element-order sensitive. A few of these
34031 cases might be workable with special handling if required. */
34032 int val = XINT (op, 1);
34033 switch (val)
34034 {
34035 default:
34036 break;
34037 case UNSPEC_VMRGH_DIRECT:
34038 case UNSPEC_VMRGL_DIRECT:
34039 case UNSPEC_VPACK_SIGN_SIGN_SAT:
34040 case UNSPEC_VPACK_SIGN_UNS_SAT:
34041 case UNSPEC_VPACK_UNS_UNS_MOD:
34042 case UNSPEC_VPACK_UNS_UNS_MOD_DIRECT:
34043 case UNSPEC_VPACK_UNS_UNS_SAT:
34044 case UNSPEC_VPERM:
34045 case UNSPEC_VPERM_UNS:
34046 case UNSPEC_VPERMHI:
34047 case UNSPEC_VPERMSI:
34048 case UNSPEC_VPKPX:
34049 case UNSPEC_VSLDOI:
34050 case UNSPEC_VSLO:
34051 case UNSPEC_VSRO:
34052 case UNSPEC_VSUM2SWS:
34053 case UNSPEC_VSUM4S:
34054 case UNSPEC_VSUM4UBS:
34055 case UNSPEC_VSUMSWS:
34056 case UNSPEC_VSUMSWS_DIRECT:
34057 case UNSPEC_VSX_CONCAT:
34058 case UNSPEC_VSX_SET:
34059 case UNSPEC_VSX_SLDWI:
34060 case UNSPEC_VUNPACK_HI_SIGN:
34061 case UNSPEC_VUNPACK_HI_SIGN_DIRECT:
34062 case UNSPEC_VUNPACK_LO_SIGN:
34063 case UNSPEC_VUNPACK_LO_SIGN_DIRECT:
34064 case UNSPEC_VUPKHPX:
34065 case UNSPEC_VUPKHS_V4SF:
34066 case UNSPEC_VUPKHU_V4SF:
34067 case UNSPEC_VUPKLPX:
34068 case UNSPEC_VUPKLS_V4SF:
34069 case UNSPEC_VUPKLU_V4SF:
34070 /* The following could be handled as an idiom with XXSPLTW.
34071 These place a scalar in BE element zero, but the XXSPLTW
34072 will currently expect it in BE element 2 in a swapped
34073 region. When one of these feeds an XXSPLTW with no other
34074 defs/uses either way, we can avoid the lane change for
34075 XXSPLTW and things will be correct. TBD. */
34076 case UNSPEC_VSX_CVDPSPN:
34077 case UNSPEC_VSX_CVSPDP:
34078 case UNSPEC_VSX_CVSPDPN:
34079 return 0;
34080 case UNSPEC_VSPLT_DIRECT:
34081 *special = SH_SPLAT;
34082 return 1;
34083 }
34084 }
34085
34086 default:
34087 break;
34088 }
34089
34090 const char *fmt = GET_RTX_FORMAT (code);
34091 int ok = 1;
34092
34093 for (i = 0; i < GET_RTX_LENGTH (code); ++i)
34094 if (fmt[i] == 'e' || fmt[i] == 'u')
34095 {
34096 unsigned int special_op = SH_NONE;
34097 ok &= rtx_is_swappable_p (XEXP (op, i), &special_op);
34098 /* Ensure we never have two kinds of special handling
34099 for the same insn. */
34100 if (*special != SH_NONE && special_op != SH_NONE
34101 && *special != special_op)
34102 return 0;
34103 *special = special_op;
34104 }
34105 else if (fmt[i] == 'E')
34106 for (j = 0; j < XVECLEN (op, i); ++j)
34107 {
34108 unsigned int special_op = SH_NONE;
34109 ok &= rtx_is_swappable_p (XVECEXP (op, i, j), &special_op);
34110 /* Ensure we never have two kinds of special handling
34111 for the same insn. */
34112 if (*special != SH_NONE && special_op != SH_NONE
34113 && *special != special_op)
34114 return 0;
34115 *special = special_op;
34116 }
34117
34118 return ok;
34119 }
34120
34121 /* Return 1 iff INSN is an operand that will not be affected by
34122 having vector doublewords swapped in memory (in which case
34123 *SPECIAL is unchanged), or that can be modified to be correct
34124 if vector doublewords are swapped in memory (in which case
34125 *SPECIAL is changed to a value indicating how). */
34126 static unsigned int
34127 insn_is_swappable_p (swap_web_entry *insn_entry, rtx insn,
34128 unsigned int *special)
34129 {
34130 /* Calls are always bad. */
34131 if (GET_CODE (insn) == CALL_INSN)
34132 return 0;
34133
34134 /* Loads and stores seen here are not permuting, but we can still
34135 fix them up by converting them to permuting ones. Exceptions:
34136 UNSPEC_LVE, UNSPEC_LVX, and UNSPEC_STVX, which have a PARALLEL
34137 body instead of a SET; and UNSPEC_STVE, which has an UNSPEC
34138 for the SET source. */
34139 rtx body = PATTERN (insn);
34140 int i = INSN_UID (insn);
34141
34142 if (insn_entry[i].is_load)
34143 {
34144 if (GET_CODE (body) == SET)
34145 {
34146 *special = SH_NOSWAP_LD;
34147 return 1;
34148 }
34149 else
34150 return 0;
34151 }
34152
34153 if (insn_entry[i].is_store)
34154 {
34155 if (GET_CODE (body) == SET && GET_CODE (SET_SRC (body)) != UNSPEC)
34156 {
34157 *special = SH_NOSWAP_ST;
34158 return 1;
34159 }
34160 else
34161 return 0;
34162 }
34163
34164 /* Otherwise check the operands for vector lane violations. */
34165 return rtx_is_swappable_p (body, special);
34166 }
34167
34168 enum chain_purpose { FOR_LOADS, FOR_STORES };
34169
34170 /* Return true if the UD or DU chain headed by LINK is non-empty,
34171 and every entry on the chain references an insn that is a
34172 register swap. Furthermore, if PURPOSE is FOR_LOADS, each such
34173 register swap must have only permuting loads as reaching defs.
34174 If PURPOSE is FOR_STORES, each such register swap must have only
34175 register swaps or permuting stores as reached uses. */
34176 static bool
34177 chain_contains_only_swaps (swap_web_entry *insn_entry, struct df_link *link,
34178 enum chain_purpose purpose)
34179 {
34180 if (!link)
34181 return false;
34182
34183 for (; link; link = link->next)
34184 {
34185 if (!VECTOR_MODE_P (GET_MODE (DF_REF_REG (link->ref))))
34186 continue;
34187
34188 if (DF_REF_IS_ARTIFICIAL (link->ref))
34189 return false;
34190
34191 rtx reached_insn = DF_REF_INSN (link->ref);
34192 unsigned uid = INSN_UID (reached_insn);
34193 struct df_insn_info *insn_info = DF_INSN_INFO_GET (reached_insn);
34194
34195 if (!insn_entry[uid].is_swap || insn_entry[uid].is_load
34196 || insn_entry[uid].is_store)
34197 return false;
34198
34199 if (purpose == FOR_LOADS)
34200 {
34201 df_ref use;
34202 FOR_EACH_INSN_INFO_USE (use, insn_info)
34203 {
34204 struct df_link *swap_link = DF_REF_CHAIN (use);
34205
34206 while (swap_link)
34207 {
34208 if (DF_REF_IS_ARTIFICIAL (link->ref))
34209 return false;
34210
34211 rtx swap_def_insn = DF_REF_INSN (swap_link->ref);
34212 unsigned uid2 = INSN_UID (swap_def_insn);
34213
34214 /* Only permuting loads are allowed. */
34215 if (!insn_entry[uid2].is_swap || !insn_entry[uid2].is_load)
34216 return false;
34217
34218 swap_link = swap_link->next;
34219 }
34220 }
34221 }
34222 else if (purpose == FOR_STORES)
34223 {
34224 df_ref def;
34225 FOR_EACH_INSN_INFO_DEF (def, insn_info)
34226 {
34227 struct df_link *swap_link = DF_REF_CHAIN (def);
34228
34229 while (swap_link)
34230 {
34231 if (DF_REF_IS_ARTIFICIAL (link->ref))
34232 return false;
34233
34234 rtx swap_use_insn = DF_REF_INSN (swap_link->ref);
34235 unsigned uid2 = INSN_UID (swap_use_insn);
34236
34237 /* Permuting stores or register swaps are allowed. */
34238 if (!insn_entry[uid2].is_swap || insn_entry[uid2].is_load)
34239 return false;
34240
34241 swap_link = swap_link->next;
34242 }
34243 }
34244 }
34245 }
34246
34247 return true;
34248 }
34249
34250 /* Mark the xxswapdi instructions associated with permuting loads and
34251 stores for removal. Note that we only flag them for deletion here,
34252 as there is a possibility of a swap being reached from multiple
34253 loads, etc. */
34254 static void
34255 mark_swaps_for_removal (swap_web_entry *insn_entry, unsigned int i)
34256 {
34257 rtx insn = insn_entry[i].insn;
34258 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
34259
34260 if (insn_entry[i].is_load)
34261 {
34262 df_ref def;
34263 FOR_EACH_INSN_INFO_DEF (def, insn_info)
34264 {
34265 struct df_link *link = DF_REF_CHAIN (def);
34266
34267 /* We know by now that these are swaps, so we can delete
34268 them confidently. */
34269 while (link)
34270 {
34271 rtx use_insn = DF_REF_INSN (link->ref);
34272 insn_entry[INSN_UID (use_insn)].will_delete = 1;
34273 link = link->next;
34274 }
34275 }
34276 }
34277 else if (insn_entry[i].is_store)
34278 {
34279 df_ref use;
34280 FOR_EACH_INSN_INFO_USE (use, insn_info)
34281 {
34282 /* Ignore uses for addressability. */
34283 machine_mode mode = GET_MODE (DF_REF_REG (use));
34284 if (!VECTOR_MODE_P (mode))
34285 continue;
34286
34287 struct df_link *link = DF_REF_CHAIN (use);
34288
34289 /* We know by now that these are swaps, so we can delete
34290 them confidently. */
34291 while (link)
34292 {
34293 rtx def_insn = DF_REF_INSN (link->ref);
34294 insn_entry[INSN_UID (def_insn)].will_delete = 1;
34295 link = link->next;
34296 }
34297 }
34298 }
34299 }
34300
34301 /* OP is either a CONST_VECTOR or an expression containing one.
34302 Swap the first half of the vector with the second in the first
34303 case. Recurse to find it in the second. */
34304 static void
34305 swap_const_vector_halves (rtx op)
34306 {
34307 int i;
34308 enum rtx_code code = GET_CODE (op);
34309 if (GET_CODE (op) == CONST_VECTOR)
34310 {
34311 int half_units = GET_MODE_NUNITS (GET_MODE (op)) / 2;
34312 for (i = 0; i < half_units; ++i)
34313 {
34314 rtx temp = CONST_VECTOR_ELT (op, i);
34315 CONST_VECTOR_ELT (op, i) = CONST_VECTOR_ELT (op, i + half_units);
34316 CONST_VECTOR_ELT (op, i + half_units) = temp;
34317 }
34318 }
34319 else
34320 {
34321 int j;
34322 const char *fmt = GET_RTX_FORMAT (code);
34323 for (i = 0; i < GET_RTX_LENGTH (code); ++i)
34324 if (fmt[i] == 'e' || fmt[i] == 'u')
34325 swap_const_vector_halves (XEXP (op, i));
34326 else if (fmt[i] == 'E')
34327 for (j = 0; j < XVECLEN (op, i); ++j)
34328 swap_const_vector_halves (XVECEXP (op, i, j));
34329 }
34330 }
34331
34332 /* Find all subregs of a vector expression that perform a narrowing,
34333 and adjust the subreg index to account for doubleword swapping. */
34334 static void
34335 adjust_subreg_index (rtx op)
34336 {
34337 enum rtx_code code = GET_CODE (op);
34338 if (code == SUBREG
34339 && (GET_MODE_SIZE (GET_MODE (op))
34340 < GET_MODE_SIZE (GET_MODE (XEXP (op, 0)))))
34341 {
34342 unsigned int index = SUBREG_BYTE (op);
34343 if (index < 8)
34344 index += 8;
34345 else
34346 index -= 8;
34347 SUBREG_BYTE (op) = index;
34348 }
34349
34350 const char *fmt = GET_RTX_FORMAT (code);
34351 int i,j;
34352 for (i = 0; i < GET_RTX_LENGTH (code); ++i)
34353 if (fmt[i] == 'e' || fmt[i] == 'u')
34354 adjust_subreg_index (XEXP (op, i));
34355 else if (fmt[i] == 'E')
34356 for (j = 0; j < XVECLEN (op, i); ++j)
34357 adjust_subreg_index (XVECEXP (op, i, j));
34358 }
34359
34360 /* Convert the non-permuting load INSN to a permuting one. */
34361 static void
34362 permute_load (rtx_insn *insn)
34363 {
34364 rtx body = PATTERN (insn);
34365 rtx mem_op = SET_SRC (body);
34366 rtx tgt_reg = SET_DEST (body);
34367 machine_mode mode = GET_MODE (tgt_reg);
34368 int n_elts = GET_MODE_NUNITS (mode);
34369 int half_elts = n_elts / 2;
34370 rtx par = gen_rtx_PARALLEL (mode, rtvec_alloc (n_elts));
34371 int i, j;
34372 for (i = 0, j = half_elts; i < half_elts; ++i, ++j)
34373 XVECEXP (par, 0, i) = GEN_INT (j);
34374 for (i = half_elts, j = 0; j < half_elts; ++i, ++j)
34375 XVECEXP (par, 0, i) = GEN_INT (j);
34376 rtx sel = gen_rtx_VEC_SELECT (mode, mem_op, par);
34377 SET_SRC (body) = sel;
34378 INSN_CODE (insn) = -1; /* Force re-recognition. */
34379 df_insn_rescan (insn);
34380
34381 if (dump_file)
34382 fprintf (dump_file, "Replacing load %d with permuted load\n",
34383 INSN_UID (insn));
34384 }
34385
34386 /* Convert the non-permuting store INSN to a permuting one. */
34387 static void
34388 permute_store (rtx_insn *insn)
34389 {
34390 rtx body = PATTERN (insn);
34391 rtx src_reg = SET_SRC (body);
34392 machine_mode mode = GET_MODE (src_reg);
34393 int n_elts = GET_MODE_NUNITS (mode);
34394 int half_elts = n_elts / 2;
34395 rtx par = gen_rtx_PARALLEL (mode, rtvec_alloc (n_elts));
34396 int i, j;
34397 for (i = 0, j = half_elts; i < half_elts; ++i, ++j)
34398 XVECEXP (par, 0, i) = GEN_INT (j);
34399 for (i = half_elts, j = 0; j < half_elts; ++i, ++j)
34400 XVECEXP (par, 0, i) = GEN_INT (j);
34401 rtx sel = gen_rtx_VEC_SELECT (mode, src_reg, par);
34402 SET_SRC (body) = sel;
34403 INSN_CODE (insn) = -1; /* Force re-recognition. */
34404 df_insn_rescan (insn);
34405
34406 if (dump_file)
34407 fprintf (dump_file, "Replacing store %d with permuted store\n",
34408 INSN_UID (insn));
34409 }
34410
34411 /* Given OP that contains a vector extract operation, adjust the index
34412 of the extracted lane to account for the doubleword swap. */
34413 static void
34414 adjust_extract (rtx_insn *insn)
34415 {
34416 rtx src = SET_SRC (PATTERN (insn));
34417 /* The vec_select may be wrapped in a vec_duplicate for a splat, so
34418 account for that. */
34419 rtx sel = GET_CODE (src) == VEC_DUPLICATE ? XEXP (src, 0) : src;
34420 rtx par = XEXP (sel, 1);
34421 int half_elts = GET_MODE_NUNITS (GET_MODE (XEXP (sel, 0))) >> 1;
34422 int lane = INTVAL (XVECEXP (par, 0, 0));
34423 lane = lane >= half_elts ? lane - half_elts : lane + half_elts;
34424 XVECEXP (par, 0, 0) = GEN_INT (lane);
34425 INSN_CODE (insn) = -1; /* Force re-recognition. */
34426 df_insn_rescan (insn);
34427
34428 if (dump_file)
34429 fprintf (dump_file, "Changing lane for extract %d\n", INSN_UID (insn));
34430 }
34431
34432 /* Given OP that contains a vector direct-splat operation, adjust the index
34433 of the source lane to account for the doubleword swap. */
34434 static void
34435 adjust_splat (rtx_insn *insn)
34436 {
34437 rtx body = PATTERN (insn);
34438 rtx unspec = XEXP (body, 1);
34439 int half_elts = GET_MODE_NUNITS (GET_MODE (unspec)) >> 1;
34440 int lane = INTVAL (XVECEXP (unspec, 0, 1));
34441 lane = lane >= half_elts ? lane - half_elts : lane + half_elts;
34442 XVECEXP (unspec, 0, 1) = GEN_INT (lane);
34443 INSN_CODE (insn) = -1; /* Force re-recognition. */
34444 df_insn_rescan (insn);
34445
34446 if (dump_file)
34447 fprintf (dump_file, "Changing lane for splat %d\n", INSN_UID (insn));
34448 }
34449
34450 /* The insn described by INSN_ENTRY[I] can be swapped, but only
34451 with special handling. Take care of that here. */
34452 static void
34453 handle_special_swappables (swap_web_entry *insn_entry, unsigned i)
34454 {
34455 rtx_insn *insn = insn_entry[i].insn;
34456 rtx body = PATTERN (insn);
34457
34458 switch (insn_entry[i].special_handling)
34459 {
34460 default:
34461 gcc_unreachable ();
34462 case SH_CONST_VECTOR:
34463 {
34464 /* A CONST_VECTOR will only show up somewhere in the RHS of a SET. */
34465 gcc_assert (GET_CODE (body) == SET);
34466 rtx rhs = SET_SRC (body);
34467 swap_const_vector_halves (rhs);
34468 if (dump_file)
34469 fprintf (dump_file, "Swapping constant halves in insn %d\n", i);
34470 break;
34471 }
34472 case SH_SUBREG:
34473 /* A subreg of the same size is already safe. For subregs that
34474 select a smaller portion of a reg, adjust the index for
34475 swapped doublewords. */
34476 adjust_subreg_index (body);
34477 if (dump_file)
34478 fprintf (dump_file, "Adjusting subreg in insn %d\n", i);
34479 break;
34480 case SH_NOSWAP_LD:
34481 /* Convert a non-permuting load to a permuting one. */
34482 permute_load (insn);
34483 break;
34484 case SH_NOSWAP_ST:
34485 /* Convert a non-permuting store to a permuting one. */
34486 permute_store (insn);
34487 break;
34488 case SH_EXTRACT:
34489 /* Change the lane on an extract operation. */
34490 adjust_extract (insn);
34491 break;
34492 case SH_SPLAT:
34493 /* Change the lane on a direct-splat operation. */
34494 adjust_splat (insn);
34495 break;
34496 }
34497 }
34498
34499 /* Find the insn from the Ith table entry, which is known to be a
34500 register swap Y = SWAP(X). Replace it with a copy Y = X. */
34501 static void
34502 replace_swap_with_copy (swap_web_entry *insn_entry, unsigned i)
34503 {
34504 rtx_insn *insn = insn_entry[i].insn;
34505 rtx body = PATTERN (insn);
34506 rtx src_reg = XEXP (SET_SRC (body), 0);
34507 rtx copy = gen_rtx_SET (VOIDmode, SET_DEST (body), src_reg);
34508 rtx_insn *new_insn = emit_insn_before (copy, insn);
34509 set_block_for_insn (new_insn, BLOCK_FOR_INSN (insn));
34510 df_insn_rescan (new_insn);
34511
34512 if (dump_file)
34513 {
34514 unsigned int new_uid = INSN_UID (new_insn);
34515 fprintf (dump_file, "Replacing swap %d with copy %d\n", i, new_uid);
34516 }
34517
34518 df_insn_delete (insn);
34519 remove_insn (insn);
34520 insn->set_deleted ();
34521 }
34522
34523 /* Dump the swap table to DUMP_FILE. */
34524 static void
34525 dump_swap_insn_table (swap_web_entry *insn_entry)
34526 {
34527 int e = get_max_uid ();
34528 fprintf (dump_file, "\nRelevant insns with their flag settings\n\n");
34529
34530 for (int i = 0; i < e; ++i)
34531 if (insn_entry[i].is_relevant)
34532 {
34533 swap_web_entry *pred_entry = (swap_web_entry *)insn_entry[i].pred ();
34534 fprintf (dump_file, "%6d %6d ", i,
34535 pred_entry && pred_entry->insn
34536 ? INSN_UID (pred_entry->insn) : 0);
34537 if (insn_entry[i].is_load)
34538 fputs ("load ", dump_file);
34539 if (insn_entry[i].is_store)
34540 fputs ("store ", dump_file);
34541 if (insn_entry[i].is_swap)
34542 fputs ("swap ", dump_file);
34543 if (insn_entry[i].is_live_in)
34544 fputs ("live-in ", dump_file);
34545 if (insn_entry[i].is_live_out)
34546 fputs ("live-out ", dump_file);
34547 if (insn_entry[i].contains_subreg)
34548 fputs ("subreg ", dump_file);
34549 if (insn_entry[i].is_128_int)
34550 fputs ("int128 ", dump_file);
34551 if (insn_entry[i].is_call)
34552 fputs ("call ", dump_file);
34553 if (insn_entry[i].is_swappable)
34554 {
34555 fputs ("swappable ", dump_file);
34556 if (insn_entry[i].special_handling == SH_CONST_VECTOR)
34557 fputs ("special:constvec ", dump_file);
34558 else if (insn_entry[i].special_handling == SH_SUBREG)
34559 fputs ("special:subreg ", dump_file);
34560 else if (insn_entry[i].special_handling == SH_NOSWAP_LD)
34561 fputs ("special:load ", dump_file);
34562 else if (insn_entry[i].special_handling == SH_NOSWAP_ST)
34563 fputs ("special:store ", dump_file);
34564 else if (insn_entry[i].special_handling == SH_EXTRACT)
34565 fputs ("special:extract ", dump_file);
34566 else if (insn_entry[i].special_handling == SH_SPLAT)
34567 fputs ("special:splat ", dump_file);
34568 }
34569 if (insn_entry[i].web_not_optimizable)
34570 fputs ("unoptimizable ", dump_file);
34571 if (insn_entry[i].will_delete)
34572 fputs ("delete ", dump_file);
34573 fputs ("\n", dump_file);
34574 }
34575 fputs ("\n", dump_file);
34576 }
34577
34578 /* Main entry point for this pass. */
34579 unsigned int
34580 rs6000_analyze_swaps (function *fun)
34581 {
34582 swap_web_entry *insn_entry;
34583 basic_block bb;
34584 rtx_insn *insn;
34585
34586 /* Dataflow analysis for use-def chains. */
34587 df_set_flags (DF_RD_PRUNE_DEAD_DEFS);
34588 df_chain_add_problem (DF_DU_CHAIN | DF_UD_CHAIN);
34589 df_analyze ();
34590 df_set_flags (DF_DEFER_INSN_RESCAN);
34591
34592 /* Allocate structure to represent webs of insns. */
34593 insn_entry = XCNEWVEC (swap_web_entry, get_max_uid ());
34594
34595 /* Walk the insns to gather basic data. */
34596 FOR_ALL_BB_FN (bb, fun)
34597 FOR_BB_INSNS (bb, insn)
34598 {
34599 unsigned int uid = INSN_UID (insn);
34600 if (NONDEBUG_INSN_P (insn))
34601 {
34602 insn_entry[uid].insn = insn;
34603
34604 if (GET_CODE (insn) == CALL_INSN)
34605 insn_entry[uid].is_call = 1;
34606
34607 /* Walk the uses and defs to see if we mention vector regs.
34608 Record any constraints on optimization of such mentions. */
34609 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
34610 df_ref mention;
34611 FOR_EACH_INSN_INFO_USE (mention, insn_info)
34612 {
34613 /* We use DF_REF_REAL_REG here to get inside any subregs. */
34614 machine_mode mode = GET_MODE (DF_REF_REAL_REG (mention));
34615
34616 /* If a use gets its value from a call insn, it will be
34617 a hard register and will look like (reg:V4SI 3 3).
34618 The df analysis creates two mentions for GPR3 and GPR4,
34619 both DImode. We must recognize this and treat it as a
34620 vector mention to ensure the call is unioned with this
34621 use. */
34622 if (mode == DImode && DF_REF_INSN_INFO (mention))
34623 {
34624 rtx feeder = DF_REF_INSN (mention);
34625 /* FIXME: It is pretty hard to get from the df mention
34626 to the mode of the use in the insn. We arbitrarily
34627 pick a vector mode here, even though the use might
34628 be a real DImode. We can be too conservative
34629 (create a web larger than necessary) because of
34630 this, so consider eventually fixing this. */
34631 if (GET_CODE (feeder) == CALL_INSN)
34632 mode = V4SImode;
34633 }
34634
34635 if (VECTOR_MODE_P (mode))
34636 {
34637 insn_entry[uid].is_relevant = 1;
34638 if (mode == TImode || mode == V1TImode)
34639 insn_entry[uid].is_128_int = 1;
34640 if (DF_REF_INSN_INFO (mention))
34641 insn_entry[uid].contains_subreg
34642 = !rtx_equal_p (DF_REF_REG (mention),
34643 DF_REF_REAL_REG (mention));
34644 union_defs (insn_entry, insn, mention);
34645 }
34646 }
34647 FOR_EACH_INSN_INFO_DEF (mention, insn_info)
34648 {
34649 /* We use DF_REF_REAL_REG here to get inside any subregs. */
34650 machine_mode mode = GET_MODE (DF_REF_REAL_REG (mention));
34651
34652 /* If we're loading up a hard vector register for a call,
34653 it looks like (set (reg:V4SI 9 9) (...)). The df
34654 analysis creates two mentions for GPR9 and GPR10, both
34655 DImode. So relying on the mode from the mentions
34656 isn't sufficient to ensure we union the call into the
34657 web with the parameter setup code. */
34658 if (mode == DImode && GET_CODE (insn) == SET
34659 && VECTOR_MODE_P (GET_MODE (SET_DEST (insn))))
34660 mode = GET_MODE (SET_DEST (insn));
34661
34662 if (VECTOR_MODE_P (mode))
34663 {
34664 insn_entry[uid].is_relevant = 1;
34665 if (mode == TImode || mode == V1TImode)
34666 insn_entry[uid].is_128_int = 1;
34667 if (DF_REF_INSN_INFO (mention))
34668 insn_entry[uid].contains_subreg
34669 = !rtx_equal_p (DF_REF_REG (mention),
34670 DF_REF_REAL_REG (mention));
34671 /* REG_FUNCTION_VALUE_P is not valid for subregs. */
34672 else if (REG_FUNCTION_VALUE_P (DF_REF_REG (mention)))
34673 insn_entry[uid].is_live_out = 1;
34674 union_uses (insn_entry, insn, mention);
34675 }
34676 }
34677
34678 if (insn_entry[uid].is_relevant)
34679 {
34680 /* Determine if this is a load or store. */
34681 insn_entry[uid].is_load = insn_is_load_p (insn);
34682 insn_entry[uid].is_store = insn_is_store_p (insn);
34683
34684 /* Determine if this is a doubleword swap. If not,
34685 determine whether it can legally be swapped. */
34686 if (insn_is_swap_p (insn))
34687 insn_entry[uid].is_swap = 1;
34688 else
34689 {
34690 unsigned int special = SH_NONE;
34691 insn_entry[uid].is_swappable
34692 = insn_is_swappable_p (insn_entry, insn, &special);
34693 if (special != SH_NONE && insn_entry[uid].contains_subreg)
34694 insn_entry[uid].is_swappable = 0;
34695 else if (special != SH_NONE)
34696 insn_entry[uid].special_handling = special;
34697 else if (insn_entry[uid].contains_subreg)
34698 insn_entry[uid].special_handling = SH_SUBREG;
34699 }
34700 }
34701 }
34702 }
34703
34704 if (dump_file)
34705 {
34706 fprintf (dump_file, "\nSwap insn entry table when first built\n");
34707 dump_swap_insn_table (insn_entry);
34708 }
34709
34710 /* Record unoptimizable webs. */
34711 unsigned e = get_max_uid (), i;
34712 for (i = 0; i < e; ++i)
34713 {
34714 if (!insn_entry[i].is_relevant)
34715 continue;
34716
34717 swap_web_entry *root
34718 = (swap_web_entry*)(&insn_entry[i])->unionfind_root ();
34719
34720 if (insn_entry[i].is_live_in || insn_entry[i].is_live_out
34721 || (insn_entry[i].contains_subreg
34722 && insn_entry[i].special_handling != SH_SUBREG)
34723 || insn_entry[i].is_128_int || insn_entry[i].is_call
34724 || !(insn_entry[i].is_swappable || insn_entry[i].is_swap))
34725 root->web_not_optimizable = 1;
34726
34727 /* If we have loads or stores that aren't permuting then the
34728 optimization isn't appropriate. */
34729 else if ((insn_entry[i].is_load || insn_entry[i].is_store)
34730 && !insn_entry[i].is_swap && !insn_entry[i].is_swappable)
34731 root->web_not_optimizable = 1;
34732
34733 /* If we have permuting loads or stores that are not accompanied
34734 by a register swap, the optimization isn't appropriate. */
34735 else if (insn_entry[i].is_load && insn_entry[i].is_swap)
34736 {
34737 rtx insn = insn_entry[i].insn;
34738 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
34739 df_ref def;
34740
34741 FOR_EACH_INSN_INFO_DEF (def, insn_info)
34742 {
34743 struct df_link *link = DF_REF_CHAIN (def);
34744
34745 if (!chain_contains_only_swaps (insn_entry, link, FOR_LOADS))
34746 {
34747 root->web_not_optimizable = 1;
34748 break;
34749 }
34750 }
34751 }
34752 else if (insn_entry[i].is_store && insn_entry[i].is_swap)
34753 {
34754 rtx insn = insn_entry[i].insn;
34755 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
34756 df_ref use;
34757
34758 FOR_EACH_INSN_INFO_USE (use, insn_info)
34759 {
34760 struct df_link *link = DF_REF_CHAIN (use);
34761
34762 if (!chain_contains_only_swaps (insn_entry, link, FOR_STORES))
34763 {
34764 root->web_not_optimizable = 1;
34765 break;
34766 }
34767 }
34768 }
34769 }
34770
34771 if (dump_file)
34772 {
34773 fprintf (dump_file, "\nSwap insn entry table after web analysis\n");
34774 dump_swap_insn_table (insn_entry);
34775 }
34776
34777 /* For each load and store in an optimizable web (which implies
34778 the loads and stores are permuting), find the associated
34779 register swaps and mark them for removal. Due to various
34780 optimizations we may mark the same swap more than once. Also
34781 perform special handling for swappable insns that require it. */
34782 for (i = 0; i < e; ++i)
34783 if ((insn_entry[i].is_load || insn_entry[i].is_store)
34784 && insn_entry[i].is_swap)
34785 {
34786 swap_web_entry* root_entry
34787 = (swap_web_entry*)((&insn_entry[i])->unionfind_root ());
34788 if (!root_entry->web_not_optimizable)
34789 mark_swaps_for_removal (insn_entry, i);
34790 }
34791 else if (insn_entry[i].is_swappable && insn_entry[i].special_handling)
34792 {
34793 swap_web_entry* root_entry
34794 = (swap_web_entry*)((&insn_entry[i])->unionfind_root ());
34795 if (!root_entry->web_not_optimizable)
34796 handle_special_swappables (insn_entry, i);
34797 }
34798
34799 /* Now delete the swaps marked for removal. */
34800 for (i = 0; i < e; ++i)
34801 if (insn_entry[i].will_delete)
34802 replace_swap_with_copy (insn_entry, i);
34803
34804 /* Clean up. */
34805 free (insn_entry);
34806 return 0;
34807 }
34808
34809 const pass_data pass_data_analyze_swaps =
34810 {
34811 RTL_PASS, /* type */
34812 "swaps", /* name */
34813 OPTGROUP_NONE, /* optinfo_flags */
34814 TV_NONE, /* tv_id */
34815 0, /* properties_required */
34816 0, /* properties_provided */
34817 0, /* properties_destroyed */
34818 0, /* todo_flags_start */
34819 TODO_df_finish, /* todo_flags_finish */
34820 };
34821
34822 class pass_analyze_swaps : public rtl_opt_pass
34823 {
34824 public:
34825 pass_analyze_swaps(gcc::context *ctxt)
34826 : rtl_opt_pass(pass_data_analyze_swaps, ctxt)
34827 {}
34828
34829 /* opt_pass methods: */
34830 virtual bool gate (function *)
34831 {
34832 return (optimize > 0 && !BYTES_BIG_ENDIAN && TARGET_VSX
34833 && rs6000_optimize_swaps);
34834 }
34835
34836 virtual unsigned int execute (function *fun)
34837 {
34838 return rs6000_analyze_swaps (fun);
34839 }
34840
34841 }; // class pass_analyze_swaps
34842
34843 rtl_opt_pass *
34844 make_pass_analyze_swaps (gcc::context *ctxt)
34845 {
34846 return new pass_analyze_swaps (ctxt);
34847 }
34848
34849 #ifdef RS6000_GLIBC_ATOMIC_FENV
34850 /* Function declarations for rs6000_atomic_assign_expand_fenv. */
34851 static tree atomic_hold_decl, atomic_clear_decl, atomic_update_decl;
34852 #endif
34853
34854 /* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV hook. */
34855
34856 static void
34857 rs6000_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
34858 {
34859 if (!TARGET_HARD_FLOAT || !TARGET_FPRS)
34860 {
34861 #ifdef RS6000_GLIBC_ATOMIC_FENV
34862 if (atomic_hold_decl == NULL_TREE)
34863 {
34864 atomic_hold_decl
34865 = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
34866 get_identifier ("__atomic_feholdexcept"),
34867 build_function_type_list (void_type_node,
34868 double_ptr_type_node,
34869 NULL_TREE));
34870 TREE_PUBLIC (atomic_hold_decl) = 1;
34871 DECL_EXTERNAL (atomic_hold_decl) = 1;
34872 }
34873
34874 if (atomic_clear_decl == NULL_TREE)
34875 {
34876 atomic_clear_decl
34877 = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
34878 get_identifier ("__atomic_feclearexcept"),
34879 build_function_type_list (void_type_node,
34880 NULL_TREE));
34881 TREE_PUBLIC (atomic_clear_decl) = 1;
34882 DECL_EXTERNAL (atomic_clear_decl) = 1;
34883 }
34884
34885 tree const_double = build_qualified_type (double_type_node,
34886 TYPE_QUAL_CONST);
34887 tree const_double_ptr = build_pointer_type (const_double);
34888 if (atomic_update_decl == NULL_TREE)
34889 {
34890 atomic_update_decl
34891 = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
34892 get_identifier ("__atomic_feupdateenv"),
34893 build_function_type_list (void_type_node,
34894 const_double_ptr,
34895 NULL_TREE));
34896 TREE_PUBLIC (atomic_update_decl) = 1;
34897 DECL_EXTERNAL (atomic_update_decl) = 1;
34898 }
34899
34900 tree fenv_var = create_tmp_var (double_type_node, NULL);
34901 mark_addressable (fenv_var);
34902 tree fenv_addr = build1 (ADDR_EXPR, double_ptr_type_node, fenv_var);
34903
34904 *hold = build_call_expr (atomic_hold_decl, 1, fenv_addr);
34905 *clear = build_call_expr (atomic_clear_decl, 0);
34906 *update = build_call_expr (atomic_update_decl, 1,
34907 fold_convert (const_double_ptr, fenv_addr));
34908 #endif
34909 return;
34910 }
34911
34912 tree mffs = rs6000_builtin_decls[RS6000_BUILTIN_MFFS];
34913 tree mtfsf = rs6000_builtin_decls[RS6000_BUILTIN_MTFSF];
34914 tree call_mffs = build_call_expr (mffs, 0);
34915
34916 /* Generates the equivalent of feholdexcept (&fenv_var)
34917
34918 *fenv_var = __builtin_mffs ();
34919 double fenv_hold;
34920 *(uint64_t*)&fenv_hold = *(uint64_t*)fenv_var & 0xffffffff00000007LL;
34921 __builtin_mtfsf (0xff, fenv_hold); */
34922
34923 /* Mask to clear everything except for the rounding modes and non-IEEE
34924 arithmetic flag. */
34925 const unsigned HOST_WIDE_INT hold_exception_mask =
34926 HOST_WIDE_INT_C (0xffffffff00000007);
34927
34928 tree fenv_var = create_tmp_var (double_type_node, NULL);
34929
34930 tree hold_mffs = build2 (MODIFY_EXPR, void_type_node, fenv_var, call_mffs);
34931
34932 tree fenv_llu = build1 (VIEW_CONVERT_EXPR, uint64_type_node, fenv_var);
34933 tree fenv_llu_and = build2 (BIT_AND_EXPR, uint64_type_node, fenv_llu,
34934 build_int_cst (uint64_type_node,
34935 hold_exception_mask));
34936
34937 tree fenv_hold_mtfsf = build1 (VIEW_CONVERT_EXPR, double_type_node,
34938 fenv_llu_and);
34939
34940 tree hold_mtfsf = build_call_expr (mtfsf, 2,
34941 build_int_cst (unsigned_type_node, 0xff),
34942 fenv_hold_mtfsf);
34943
34944 *hold = build2 (COMPOUND_EXPR, void_type_node, hold_mffs, hold_mtfsf);
34945
34946 /* Generates the equivalent of feclearexcept (FE_ALL_EXCEPT):
34947
34948 double fenv_clear = __builtin_mffs ();
34949 *(uint64_t)&fenv_clear &= 0xffffffff00000000LL;
34950 __builtin_mtfsf (0xff, fenv_clear); */
34951
34952 /* Mask to clear everything except for the rounding modes and non-IEEE
34953 arithmetic flag. */
34954 const unsigned HOST_WIDE_INT clear_exception_mask =
34955 HOST_WIDE_INT_C (0xffffffff00000000);
34956
34957 tree fenv_clear = create_tmp_var (double_type_node, NULL);
34958
34959 tree clear_mffs = build2 (MODIFY_EXPR, void_type_node, fenv_clear, call_mffs);
34960
34961 tree fenv_clean_llu = build1 (VIEW_CONVERT_EXPR, uint64_type_node, fenv_clear);
34962 tree fenv_clear_llu_and = build2 (BIT_AND_EXPR, uint64_type_node,
34963 fenv_clean_llu,
34964 build_int_cst (uint64_type_node,
34965 clear_exception_mask));
34966
34967 tree fenv_clear_mtfsf = build1 (VIEW_CONVERT_EXPR, double_type_node,
34968 fenv_clear_llu_and);
34969
34970 tree clear_mtfsf = build_call_expr (mtfsf, 2,
34971 build_int_cst (unsigned_type_node, 0xff),
34972 fenv_clear_mtfsf);
34973
34974 *clear = build2 (COMPOUND_EXPR, void_type_node, clear_mffs, clear_mtfsf);
34975
34976 /* Generates the equivalent of feupdateenv (&fenv_var)
34977
34978 double old_fenv = __builtin_mffs ();
34979 double fenv_update;
34980 *(uint64_t*)&fenv_update = (*(uint64_t*)&old & 0xffffffff1fffff00LL) |
34981 (*(uint64_t*)fenv_var 0x1ff80fff);
34982 __builtin_mtfsf (0xff, fenv_update); */
34983
34984 const unsigned HOST_WIDE_INT update_exception_mask =
34985 HOST_WIDE_INT_C (0xffffffff1fffff00);
34986 const unsigned HOST_WIDE_INT new_exception_mask =
34987 HOST_WIDE_INT_C (0x1ff80fff);
34988
34989 tree old_fenv = create_tmp_var (double_type_node, NULL);
34990 tree update_mffs = build2 (MODIFY_EXPR, void_type_node, old_fenv, call_mffs);
34991
34992 tree old_llu = build1 (VIEW_CONVERT_EXPR, uint64_type_node, old_fenv);
34993 tree old_llu_and = build2 (BIT_AND_EXPR, uint64_type_node, old_llu,
34994 build_int_cst (uint64_type_node,
34995 update_exception_mask));
34996
34997 tree new_llu_and = build2 (BIT_AND_EXPR, uint64_type_node, fenv_llu,
34998 build_int_cst (uint64_type_node,
34999 new_exception_mask));
35000
35001 tree new_llu_mask = build2 (BIT_IOR_EXPR, uint64_type_node,
35002 old_llu_and, new_llu_and);
35003
35004 tree fenv_update_mtfsf = build1 (VIEW_CONVERT_EXPR, double_type_node,
35005 new_llu_mask);
35006
35007 tree update_mtfsf = build_call_expr (mtfsf, 2,
35008 build_int_cst (unsigned_type_node, 0xff),
35009 fenv_update_mtfsf);
35010
35011 *update = build2 (COMPOUND_EXPR, void_type_node, update_mffs, update_mtfsf);
35012 }
35013
35014 \f
35015 struct gcc_target targetm = TARGET_INITIALIZER;
35016
35017 #include "gt-rs6000.h"
This page took 1.663621 seconds and 5 git commands to generate.