]> gcc.gnu.org Git - gcc.git/blob - gcc/config/rs6000/rs6000-call.cc
PowerPC: Add support for 1,024 bit DMR registers.
[gcc.git] / gcc / config / rs6000 / rs6000-call.cc
1 /* Subroutines used to generate function calls and handle built-in
2 instructions on IBM RS/6000.
3 Copyright (C) 1991-2023 Free Software Foundation, Inc.
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published
9 by the Free Software Foundation; either version 3, or (at your
10 option) any later version.
11
12 GCC is distributed in the hope that it will be useful, but WITHOUT
13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
14 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
15 License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
20
21 #define IN_TARGET_CODE 1
22
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "backend.h"
27 #include "rtl.h"
28 #include "tree.h"
29 #include "memmodel.h"
30 #include "gimple.h"
31 #include "cfghooks.h"
32 #include "cfgloop.h"
33 #include "df.h"
34 #include "tm_p.h"
35 #include "stringpool.h"
36 #include "expmed.h"
37 #include "optabs.h"
38 #include "regs.h"
39 #include "ira.h"
40 #include "recog.h"
41 #include "cgraph.h"
42 #include "diagnostic-core.h"
43 #include "insn-attr.h"
44 #include "flags.h"
45 #include "alias.h"
46 #include "fold-const.h"
47 #include "attribs.h"
48 #include "stor-layout.h"
49 #include "calls.h"
50 #include "print-tree.h"
51 #include "varasm.h"
52 #include "explow.h"
53 #include "expr.h"
54 #include "output.h"
55 #include "common/common-target.h"
56 #include "langhooks.h"
57 #include "gimplify.h"
58 #include "gimple-iterator.h"
59 #include "gimple-fold.h"
60 #include "ssa.h"
61 #include "tree-ssa-propagate.h"
62 #include "builtins.h"
63 #include "tree-vector-builder.h"
64 #include "ppc-auxv.h"
65 #include "targhooks.h"
66 #include "opts.h"
67
68 #include "rs6000-internal.h"
69
70 #ifndef TARGET_PROFILE_KERNEL
71 #define TARGET_PROFILE_KERNEL 0
72 #endif
73
74 #ifdef HAVE_AS_GNU_ATTRIBUTE
75 # ifndef HAVE_LD_PPC_GNU_ATTR_LONG_DOUBLE
76 # define HAVE_LD_PPC_GNU_ATTR_LONG_DOUBLE 0
77 # endif
78 #endif
79
80 #ifndef TARGET_NO_PROTOTYPE
81 #define TARGET_NO_PROTOTYPE 0
82 #endif
83
84 /* Nonzero if we can use a floating-point register to pass this arg. */
85 #define USE_FP_FOR_ARG_P(CUM,MODE) \
86 (SCALAR_FLOAT_MODE_NOT_VECTOR_P (MODE) \
87 && (CUM)->fregno <= FP_ARG_MAX_REG \
88 && TARGET_HARD_FLOAT)
89
90 /* Nonzero if we can use an AltiVec register to pass this arg. */
91 #define USE_ALTIVEC_FOR_ARG_P(CUM,MODE,NAMED) \
92 (ALTIVEC_OR_VSX_VECTOR_MODE (MODE) \
93 && (CUM)->vregno <= ALTIVEC_ARG_MAX_REG \
94 && TARGET_ALTIVEC_ABI \
95 && (NAMED))
96
97 /* Walk down the type tree of TYPE counting consecutive base elements.
98 If *MODEP is VOIDmode, then set it to the first valid floating point
99 or vector type. If a non-floating point or vector type is found, or
100 if a floating point or vector type that doesn't match a non-VOIDmode
101 *MODEP is found, then return -1, otherwise return the count in the
102 sub-tree.
103
104 There have been some ABI snafus along the way with C++. Modify
105 EMPTY_BASE_SEEN to a nonzero value iff a C++ empty base class makes
106 an appearance; separate flag bits indicate whether or not such a
107 field is marked "no unique address". Modify ZERO_WIDTH_BF_SEEN
108 to 1 iff a C++ zero-length bitfield makes an appearance, but
109 in this case otherwise treat this as still being a homogeneous
110 aggregate. */
111
112 static int
113 rs6000_aggregate_candidate (const_tree type, machine_mode *modep,
114 int *empty_base_seen, int *zero_width_bf_seen)
115 {
116 machine_mode mode;
117 HOST_WIDE_INT size;
118
119 switch (TREE_CODE (type))
120 {
121 case REAL_TYPE:
122 mode = TYPE_MODE (type);
123 if (!SCALAR_FLOAT_MODE_P (mode))
124 return -1;
125
126 if (*modep == VOIDmode)
127 *modep = mode;
128
129 if (*modep == mode)
130 return 1;
131
132 break;
133
134 case COMPLEX_TYPE:
135 mode = TYPE_MODE (TREE_TYPE (type));
136 if (!SCALAR_FLOAT_MODE_P (mode))
137 return -1;
138
139 if (*modep == VOIDmode)
140 *modep = mode;
141
142 if (*modep == mode)
143 return 2;
144
145 break;
146
147 case VECTOR_TYPE:
148 if (!TARGET_ALTIVEC_ABI || !TARGET_ALTIVEC)
149 return -1;
150
151 /* Use V4SImode as representative of all 128-bit vector types. */
152 size = int_size_in_bytes (type);
153 switch (size)
154 {
155 case 16:
156 mode = V4SImode;
157 break;
158 default:
159 return -1;
160 }
161
162 if (*modep == VOIDmode)
163 *modep = mode;
164
165 /* Vector modes are considered to be opaque: two vectors are
166 equivalent for the purposes of being homogeneous aggregates
167 if they are the same size. */
168 if (*modep == mode)
169 return 1;
170
171 break;
172
173 case ARRAY_TYPE:
174 {
175 int count;
176 tree index = TYPE_DOMAIN (type);
177
178 /* Can't handle incomplete types nor sizes that are not
179 fixed. */
180 if (!COMPLETE_TYPE_P (type)
181 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
182 return -1;
183
184 count = rs6000_aggregate_candidate (TREE_TYPE (type), modep,
185 empty_base_seen,
186 zero_width_bf_seen);
187 if (count == -1
188 || !index
189 || !TYPE_MAX_VALUE (index)
190 || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index))
191 || !TYPE_MIN_VALUE (index)
192 || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index))
193 || count < 0)
194 return -1;
195
196 count *= (1 + tree_to_uhwi (TYPE_MAX_VALUE (index))
197 - tree_to_uhwi (TYPE_MIN_VALUE (index)));
198
199 /* There must be no padding. */
200 if (wi::to_wide (TYPE_SIZE (type))
201 != count * GET_MODE_BITSIZE (*modep))
202 return -1;
203
204 return count;
205 }
206
207 case RECORD_TYPE:
208 {
209 int count = 0;
210 int sub_count;
211 tree field;
212
213 /* Can't handle incomplete types nor sizes that are not
214 fixed. */
215 if (!COMPLETE_TYPE_P (type)
216 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
217 return -1;
218
219 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
220 {
221 if (TREE_CODE (field) != FIELD_DECL)
222 continue;
223
224 if (DECL_FIELD_CXX_ZERO_WIDTH_BIT_FIELD (field))
225 {
226 /* GCC 11 and earlier generated incorrect code in a rare
227 corner case for C++. When a RECORD_TYPE looks like a
228 homogeneous aggregate, except that it also contains
229 one or more zero-width bit fields, these earlier
230 compilers would incorrectly pass the fields in FPRs
231 or VSRs. This occurred because the front end wrongly
232 removed these bitfields from the RECORD_TYPE. In
233 GCC 12 and later, the front end flaw was corrected.
234 We want to diagnose this case. To do this, we pretend
235 that we don't see the zero-width bit fields (hence
236 the continue statement here), but pass back a flag
237 indicating what happened. The caller then diagnoses
238 the issue and rejects the RECORD_TYPE as a homogeneous
239 aggregate. */
240 *zero_width_bf_seen = 1;
241 continue;
242 }
243
244 if (DECL_FIELD_ABI_IGNORED (field))
245 {
246 if (lookup_attribute ("no_unique_address",
247 DECL_ATTRIBUTES (field)))
248 *empty_base_seen |= 2;
249 else
250 *empty_base_seen |= 1;
251 continue;
252 }
253
254 sub_count = rs6000_aggregate_candidate (TREE_TYPE (field), modep,
255 empty_base_seen,
256 zero_width_bf_seen);
257 if (sub_count < 0)
258 return -1;
259 count += sub_count;
260 }
261
262 /* There must be no padding. */
263 if (wi::to_wide (TYPE_SIZE (type))
264 != count * GET_MODE_BITSIZE (*modep))
265 return -1;
266
267 return count;
268 }
269
270 case UNION_TYPE:
271 case QUAL_UNION_TYPE:
272 {
273 /* These aren't very interesting except in a degenerate case. */
274 int count = 0;
275 int sub_count;
276 tree field;
277
278 /* Can't handle incomplete types nor sizes that are not
279 fixed. */
280 if (!COMPLETE_TYPE_P (type)
281 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
282 return -1;
283
284 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
285 {
286 if (TREE_CODE (field) != FIELD_DECL)
287 continue;
288
289 sub_count = rs6000_aggregate_candidate (TREE_TYPE (field), modep,
290 empty_base_seen,
291 zero_width_bf_seen);
292 if (sub_count < 0)
293 return -1;
294 count = count > sub_count ? count : sub_count;
295 }
296
297 /* There must be no padding. */
298 if (wi::to_wide (TYPE_SIZE (type))
299 != count * GET_MODE_BITSIZE (*modep))
300 return -1;
301
302 return count;
303 }
304
305 default:
306 break;
307 }
308
309 return -1;
310 }
311
312 /* If an argument, whose type is described by TYPE and MODE, is a homogeneous
313 float or vector aggregate that shall be passed in FP/vector registers
314 according to the ELFv2 ABI, return the homogeneous element mode in
315 *ELT_MODE and the number of elements in *N_ELTS, and return TRUE.
316
317 Otherwise, set *ELT_MODE to MODE and *N_ELTS to 1, and return FALSE. */
318
319 bool
320 rs6000_discover_homogeneous_aggregate (machine_mode mode, const_tree type,
321 machine_mode *elt_mode,
322 int *n_elts)
323 {
324 /* Note that we do not accept complex types at the top level as
325 homogeneous aggregates; these types are handled via the
326 targetm.calls.split_complex_arg mechanism. Complex types
327 can be elements of homogeneous aggregates, however. */
328 if (TARGET_HARD_FLOAT && DEFAULT_ABI == ABI_ELFv2 && type
329 && AGGREGATE_TYPE_P (type))
330 {
331 machine_mode field_mode = VOIDmode;
332 int empty_base_seen = 0;
333 int zero_width_bf_seen = 0;
334 int field_count = rs6000_aggregate_candidate (type, &field_mode,
335 &empty_base_seen,
336 &zero_width_bf_seen);
337
338 if (field_count > 0)
339 {
340 int reg_size = ALTIVEC_OR_VSX_VECTOR_MODE (field_mode) ? 16 : 8;
341 int field_size = ROUND_UP (GET_MODE_SIZE (field_mode), reg_size);
342
343 /* The ELFv2 ABI allows homogeneous aggregates to occupy
344 up to AGGR_ARG_NUM_REG registers. */
345 if (field_count * field_size <= AGGR_ARG_NUM_REG * reg_size)
346 {
347 if (elt_mode)
348 *elt_mode = field_mode;
349 if (n_elts)
350 *n_elts = field_count;
351 if (empty_base_seen && warn_psabi)
352 {
353 static unsigned last_reported_type_uid;
354 unsigned uid = TYPE_UID (TYPE_MAIN_VARIANT (type));
355 if (uid != last_reported_type_uid)
356 {
357 const char *url
358 = CHANGES_ROOT_URL "gcc-10/changes.html#empty_base";
359 if (empty_base_seen & 1)
360 inform (input_location,
361 "parameter passing for argument of type %qT "
362 "when C++17 is enabled changed to match C++14 "
363 "%{in GCC 10.1%}", type, url);
364 else
365 inform (input_location,
366 "parameter passing for argument of type %qT "
367 "with %<[[no_unique_address]]%> members "
368 "changed %{in GCC 10.1%}", type, url);
369 last_reported_type_uid = uid;
370 }
371 }
372 if (zero_width_bf_seen && warn_psabi)
373 {
374 static unsigned last_reported_type_uid;
375 unsigned uid = TYPE_UID (TYPE_MAIN_VARIANT (type));
376 if (uid != last_reported_type_uid)
377 {
378 inform (input_location,
379 "ELFv2 parameter passing for an argument "
380 "containing zero-width bit fields but that is "
381 "otherwise a homogeneous aggregate was "
382 "corrected in GCC 12");
383 last_reported_type_uid = uid;
384 }
385 if (elt_mode)
386 *elt_mode = mode;
387 if (n_elts)
388 *n_elts = 1;
389 return false;
390 }
391 return true;
392 }
393 }
394 }
395
396 if (elt_mode)
397 *elt_mode = mode;
398 if (n_elts)
399 *n_elts = 1;
400 return false;
401 }
402
403 /* Return a nonzero value to say to return the function value in
404 memory, just as large structures are always returned. TYPE will be
405 the data type of the value, and FNTYPE will be the type of the
406 function doing the returning, or @code{NULL} for libcalls.
407
408 The AIX ABI for the RS/6000 specifies that all structures are
409 returned in memory. The Darwin ABI does the same.
410
411 For the Darwin 64 Bit ABI, a function result can be returned in
412 registers or in memory, depending on the size of the return data
413 type. If it is returned in registers, the value occupies the same
414 registers as it would if it were the first and only function
415 argument. Otherwise, the function places its result in memory at
416 the location pointed to by GPR3.
417
418 The SVR4 ABI specifies that structures <= 8 bytes are returned in r3/r4,
419 but a draft put them in memory, and GCC used to implement the draft
420 instead of the final standard. Therefore, aix_struct_return
421 controls this instead of DEFAULT_ABI; V.4 targets needing backward
422 compatibility can change DRAFT_V4_STRUCT_RET to override the
423 default, and -m switches get the final word. See
424 rs6000_option_override_internal for more details.
425
426 The PPC32 SVR4 ABI uses IEEE double extended for long double, if 128-bit
427 long double support is enabled. These values are returned in memory.
428
429 int_size_in_bytes returns -1 for variable size objects, which go in
430 memory always. The cast to unsigned makes -1 > 8. */
431
432 bool
433 rs6000_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
434 {
435 /* We do not allow MMA types being used as return values. Only report
436 the invalid return value usage the first time we encounter it. */
437 if (cfun
438 && !cfun->machine->mma_return_type_error
439 && TREE_TYPE (cfun->decl) == fntype
440 && (TYPE_MODE (type) == OOmode || TYPE_MODE (type) == XOmode
441 || TYPE_MODE (type) == TDOmode))
442 {
443 /* Record we have now handled function CFUN, so the next time we
444 are called, we do not re-report the same error. */
445 cfun->machine->mma_return_type_error = true;
446 if (TYPE_CANONICAL (type) != NULL_TREE)
447 type = TYPE_CANONICAL (type);
448 error ("invalid use of MMA type %qs as a function return value",
449 IDENTIFIER_POINTER (DECL_NAME (TYPE_NAME (type))));
450 }
451
452 /* For the Darwin64 ABI, test if we can fit the return value in regs. */
453 if (TARGET_MACHO
454 && rs6000_darwin64_abi
455 && TREE_CODE (type) == RECORD_TYPE
456 && int_size_in_bytes (type) > 0)
457 {
458 CUMULATIVE_ARGS valcum;
459 rtx valret;
460
461 valcum.words = 0;
462 valcum.fregno = FP_ARG_MIN_REG;
463 valcum.vregno = ALTIVEC_ARG_MIN_REG;
464 /* Do a trial code generation as if this were going to be passed
465 as an argument; if any part goes in memory, we return NULL. */
466 valret = rs6000_darwin64_record_arg (&valcum, type, true, true);
467 if (valret)
468 return false;
469 /* Otherwise fall through to more conventional ABI rules. */
470 }
471
472 /* The ELFv2 ABI returns homogeneous VFP aggregates in registers */
473 if (rs6000_discover_homogeneous_aggregate (TYPE_MODE (type), type,
474 NULL, NULL))
475 return false;
476
477 /* The ELFv2 ABI returns aggregates up to 16B in registers */
478 if (DEFAULT_ABI == ABI_ELFv2 && AGGREGATE_TYPE_P (type)
479 && (unsigned HOST_WIDE_INT) int_size_in_bytes (type) <= 16)
480 return false;
481
482 if (AGGREGATE_TYPE_P (type)
483 && (aix_struct_return
484 || (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 8))
485 return true;
486
487 /* Allow -maltivec -mabi=no-altivec without warning. Altivec vector
488 modes only exist for GCC vector types if -maltivec. */
489 if (TARGET_32BIT && !TARGET_ALTIVEC_ABI
490 && ALTIVEC_VECTOR_MODE (TYPE_MODE (type)))
491 return false;
492
493 /* Return synthetic vectors in memory. */
494 if (TREE_CODE (type) == VECTOR_TYPE
495 && int_size_in_bytes (type) > (TARGET_ALTIVEC_ABI ? 16 : 8))
496 {
497 static bool warned_for_return_big_vectors = false;
498 if (!warned_for_return_big_vectors)
499 {
500 warning (OPT_Wpsabi, "GCC vector returned by reference: "
501 "non-standard ABI extension with no compatibility "
502 "guarantee");
503 warned_for_return_big_vectors = true;
504 }
505 return true;
506 }
507
508 if (DEFAULT_ABI == ABI_V4 && TARGET_IEEEQUAD
509 && FLOAT128_IEEE_P (TYPE_MODE (type)))
510 return true;
511
512 return false;
513 }
514
515 /* Specify whether values returned in registers should be at the most
516 significant end of a register. We want aggregates returned by
517 value to match the way aggregates are passed to functions. */
518
519 bool
520 rs6000_return_in_msb (const_tree valtype)
521 {
522 return (DEFAULT_ABI == ABI_ELFv2
523 && BYTES_BIG_ENDIAN
524 && AGGREGATE_TYPE_P (valtype)
525 && (rs6000_function_arg_padding (TYPE_MODE (valtype), valtype)
526 == PAD_UPWARD));
527 }
528
529 #ifdef HAVE_AS_GNU_ATTRIBUTE
530 /* Return TRUE if a call to function FNDECL may be one that
531 potentially affects the function calling ABI of the object file. */
532
533 static bool
534 call_ABI_of_interest (tree fndecl)
535 {
536 if (rs6000_gnu_attr && symtab->state == EXPANSION)
537 {
538 struct cgraph_node *c_node;
539
540 /* Libcalls are always interesting. */
541 if (fndecl == NULL_TREE)
542 return true;
543
544 /* Any call to an external function is interesting. */
545 if (DECL_EXTERNAL (fndecl))
546 return true;
547
548 /* Interesting functions that we are emitting in this object file. */
549 c_node = cgraph_node::get (fndecl);
550 c_node = c_node->ultimate_alias_target ();
551 return !c_node->only_called_directly_p ();
552 }
553 return false;
554 }
555 #endif
556
557 /* Initialize a variable CUM of type CUMULATIVE_ARGS
558 for a call to a function whose data type is FNTYPE.
559 For a library call, FNTYPE is 0 and RETURN_MODE the return value mode.
560
561 For incoming args we set the number of arguments in the prototype large
562 so we never return a PARALLEL. */
563
564 void
565 init_cumulative_args (CUMULATIVE_ARGS *cum, tree fntype,
566 rtx libname ATTRIBUTE_UNUSED, int incoming,
567 int libcall, int n_named_args,
568 tree fndecl,
569 machine_mode return_mode ATTRIBUTE_UNUSED)
570 {
571 static CUMULATIVE_ARGS zero_cumulative;
572
573 *cum = zero_cumulative;
574 cum->words = 0;
575 cum->fregno = FP_ARG_MIN_REG;
576 cum->vregno = ALTIVEC_ARG_MIN_REG;
577 cum->prototype = (fntype && prototype_p (fntype));
578 cum->call_cookie = ((DEFAULT_ABI == ABI_V4 && libcall)
579 ? CALL_LIBCALL : CALL_NORMAL);
580 cum->sysv_gregno = GP_ARG_MIN_REG;
581 cum->stdarg = stdarg_p (fntype);
582 cum->libcall = libcall;
583
584 cum->nargs_prototype = 0;
585 if (incoming || cum->prototype)
586 cum->nargs_prototype = n_named_args;
587
588 /* Check for a longcall attribute. */
589 if ((!fntype && rs6000_default_long_calls)
590 || (fntype
591 && lookup_attribute ("longcall", TYPE_ATTRIBUTES (fntype))
592 && !lookup_attribute ("shortcall", TYPE_ATTRIBUTES (fntype))))
593 cum->call_cookie |= CALL_LONG;
594 else if (DEFAULT_ABI != ABI_DARWIN)
595 {
596 bool is_local = (fndecl
597 && !DECL_EXTERNAL (fndecl)
598 && !DECL_WEAK (fndecl)
599 && (*targetm.binds_local_p) (fndecl));
600 if (is_local)
601 ;
602 else if (flag_plt)
603 {
604 if (fntype
605 && lookup_attribute ("noplt", TYPE_ATTRIBUTES (fntype)))
606 cum->call_cookie |= CALL_LONG;
607 }
608 else
609 {
610 if (!(fntype
611 && lookup_attribute ("plt", TYPE_ATTRIBUTES (fntype))))
612 cum->call_cookie |= CALL_LONG;
613 }
614 }
615
616 if (TARGET_DEBUG_ARG)
617 {
618 fprintf (stderr, "\ninit_cumulative_args:");
619 if (fntype)
620 {
621 tree ret_type = TREE_TYPE (fntype);
622 fprintf (stderr, " ret code = %s,",
623 get_tree_code_name (TREE_CODE (ret_type)));
624 }
625
626 if (cum->call_cookie & CALL_LONG)
627 fprintf (stderr, " longcall,");
628
629 fprintf (stderr, " proto = %d, nargs = %d\n",
630 cum->prototype, cum->nargs_prototype);
631 }
632
633 #ifdef HAVE_AS_GNU_ATTRIBUTE
634 if (TARGET_ELF && (TARGET_64BIT || DEFAULT_ABI == ABI_V4))
635 {
636 cum->escapes = call_ABI_of_interest (fndecl);
637 if (cum->escapes)
638 {
639 tree return_type;
640
641 if (fntype)
642 {
643 return_type = TREE_TYPE (fntype);
644 return_mode = TYPE_MODE (return_type);
645 }
646 else
647 return_type = lang_hooks.types.type_for_mode (return_mode, 0);
648
649 if (return_type != NULL)
650 {
651 if (TREE_CODE (return_type) == RECORD_TYPE
652 && TYPE_TRANSPARENT_AGGR (return_type))
653 {
654 return_type = TREE_TYPE (first_field (return_type));
655 return_mode = TYPE_MODE (return_type);
656 }
657 if (AGGREGATE_TYPE_P (return_type)
658 && ((unsigned HOST_WIDE_INT) int_size_in_bytes (return_type)
659 <= 8))
660 rs6000_returns_struct = true;
661 }
662 if (SCALAR_FLOAT_MODE_P (return_mode))
663 {
664 rs6000_passes_float = true;
665 if ((HAVE_LD_PPC_GNU_ATTR_LONG_DOUBLE || TARGET_64BIT)
666 && (FLOAT128_IBM_P (return_mode)
667 || FLOAT128_IEEE_P (return_mode)
668 || (return_type != NULL
669 && (TYPE_MAIN_VARIANT (return_type)
670 == long_double_type_node))))
671 rs6000_passes_long_double = true;
672 }
673 if (ALTIVEC_OR_VSX_VECTOR_MODE (return_mode))
674 rs6000_passes_vector = true;
675 }
676 }
677 #endif
678
679 if (fntype
680 && !TARGET_ALTIVEC
681 && TARGET_ALTIVEC_ABI
682 && ALTIVEC_VECTOR_MODE (TYPE_MODE (TREE_TYPE (fntype))))
683 {
684 error ("cannot return value in vector register because"
685 " altivec instructions are disabled, use %qs"
686 " to enable them", "-maltivec");
687 }
688 }
689 \f
690
691 /* On rs6000, function arguments are promoted, as are function return
692 values. */
693
694 machine_mode
695 rs6000_promote_function_mode (const_tree type ATTRIBUTE_UNUSED,
696 machine_mode mode,
697 int *punsignedp ATTRIBUTE_UNUSED,
698 const_tree, int for_return ATTRIBUTE_UNUSED)
699 {
700 if (GET_MODE_CLASS (mode) == MODE_INT
701 && GET_MODE_SIZE (mode) < (TARGET_32BIT ? 4 : 8))
702 mode = TARGET_32BIT ? SImode : DImode;
703
704 return mode;
705 }
706
707 /* Return true if TYPE must be passed on the stack and not in registers. */
708
709 bool
710 rs6000_must_pass_in_stack (const function_arg_info &arg)
711 {
712 if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2 || TARGET_64BIT)
713 return must_pass_in_stack_var_size (arg);
714 else
715 return must_pass_in_stack_var_size_or_pad (arg);
716 }
717
718 static inline bool
719 is_complex_IBM_long_double (machine_mode mode)
720 {
721 return mode == ICmode || (mode == TCmode && FLOAT128_IBM_P (TCmode));
722 }
723
724 /* Whether ABI_V4 passes MODE args to a function in floating point
725 registers. */
726
727 static bool
728 abi_v4_pass_in_fpr (machine_mode mode, bool named)
729 {
730 if (!TARGET_HARD_FLOAT)
731 return false;
732 if (mode == DFmode)
733 return true;
734 if (mode == SFmode && named)
735 return true;
736 /* ABI_V4 passes complex IBM long double in 8 gprs.
737 Stupid, but we can't change the ABI now. */
738 if (is_complex_IBM_long_double (mode))
739 return false;
740 if (FLOAT128_2REG_P (mode))
741 return true;
742 if (DECIMAL_FLOAT_MODE_P (mode))
743 return true;
744 return false;
745 }
746
747 /* Implement TARGET_FUNCTION_ARG_PADDING.
748
749 For the AIX ABI structs are always stored left shifted in their
750 argument slot. */
751
752 pad_direction
753 rs6000_function_arg_padding (machine_mode mode, const_tree type)
754 {
755 #ifndef AGGREGATE_PADDING_FIXED
756 #define AGGREGATE_PADDING_FIXED 0
757 #endif
758 #ifndef AGGREGATES_PAD_UPWARD_ALWAYS
759 #define AGGREGATES_PAD_UPWARD_ALWAYS 0
760 #endif
761
762 if (!AGGREGATE_PADDING_FIXED)
763 {
764 /* GCC used to pass structures of the same size as integer types as
765 if they were in fact integers, ignoring TARGET_FUNCTION_ARG_PADDING.
766 i.e. Structures of size 1 or 2 (or 4 when TARGET_64BIT) were
767 passed padded downward, except that -mstrict-align further
768 muddied the water in that multi-component structures of 2 and 4
769 bytes in size were passed padded upward.
770
771 The following arranges for best compatibility with previous
772 versions of gcc, but removes the -mstrict-align dependency. */
773 if (BYTES_BIG_ENDIAN)
774 {
775 HOST_WIDE_INT size = 0;
776
777 if (mode == BLKmode)
778 {
779 if (type && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST)
780 size = int_size_in_bytes (type);
781 }
782 else
783 size = GET_MODE_SIZE (mode);
784
785 if (size == 1 || size == 2 || size == 4)
786 return PAD_DOWNWARD;
787 }
788 return PAD_UPWARD;
789 }
790
791 if (AGGREGATES_PAD_UPWARD_ALWAYS)
792 {
793 if (type != 0 && AGGREGATE_TYPE_P (type))
794 return PAD_UPWARD;
795 }
796
797 /* Fall back to the default. */
798 return default_function_arg_padding (mode, type);
799 }
800
801 /* If defined, a C expression that gives the alignment boundary, in bits,
802 of an argument with the specified mode and type. If it is not defined,
803 PARM_BOUNDARY is used for all arguments.
804
805 V.4 wants long longs and doubles to be double word aligned. Just
806 testing the mode size is a boneheaded way to do this as it means
807 that other types such as complex int are also double word aligned.
808 However, we're stuck with this because changing the ABI might break
809 existing library interfaces.
810
811 Quadword align Altivec/VSX vectors.
812 Quadword align large synthetic vector types. */
813
814 unsigned int
815 rs6000_function_arg_boundary (machine_mode mode, const_tree type)
816 {
817 machine_mode elt_mode;
818 int n_elts;
819
820 rs6000_discover_homogeneous_aggregate (mode, type, &elt_mode, &n_elts);
821
822 if (DEFAULT_ABI == ABI_V4
823 && (GET_MODE_SIZE (mode) == 8
824 || (TARGET_HARD_FLOAT
825 && !is_complex_IBM_long_double (mode)
826 && FLOAT128_2REG_P (mode))))
827 return 64;
828 else if (FLOAT128_VECTOR_P (mode))
829 return 128;
830 else if (type && TREE_CODE (type) == VECTOR_TYPE
831 && int_size_in_bytes (type) >= 8
832 && int_size_in_bytes (type) < 16)
833 return 64;
834 else if (ALTIVEC_OR_VSX_VECTOR_MODE (elt_mode)
835 || (type && TREE_CODE (type) == VECTOR_TYPE
836 && int_size_in_bytes (type) >= 16))
837 return 128;
838
839 /* Aggregate types that need > 8 byte alignment are quadword-aligned
840 in the parameter area in the ELFv2 ABI, and in the AIX ABI unless
841 -mcompat-align-parm is used. */
842 if (((DEFAULT_ABI == ABI_AIX && !rs6000_compat_align_parm)
843 || DEFAULT_ABI == ABI_ELFv2)
844 && type && TYPE_ALIGN (type) > 64)
845 {
846 /* "Aggregate" means any AGGREGATE_TYPE except for single-element
847 or homogeneous float/vector aggregates here. We already handled
848 vector aggregates above, but still need to check for float here. */
849 if (AGGREGATE_TYPE_P (type)
850 && !SCALAR_FLOAT_MODE_P (elt_mode))
851 return 128;
852 }
853
854 /* Similar for the Darwin64 ABI. Note that for historical reasons we
855 implement the "aggregate type" check as a BLKmode check here; this
856 means certain aggregate types are in fact not aligned. */
857 if (TARGET_MACHO && rs6000_darwin64_abi
858 && mode == BLKmode
859 && type && TYPE_ALIGN (type) > 64)
860 return 128;
861
862 return PARM_BOUNDARY;
863 }
864
865 /* The offset in words to the start of the parameter save area. */
866
867 static unsigned int
868 rs6000_parm_offset (void)
869 {
870 return (DEFAULT_ABI == ABI_V4 ? 2
871 : DEFAULT_ABI == ABI_ELFv2 ? 4
872 : 6);
873 }
874
875 /* For a function parm of MODE and TYPE, return the starting word in
876 the parameter area. NWORDS of the parameter area are already used. */
877
878 static unsigned int
879 rs6000_parm_start (machine_mode mode, const_tree type,
880 unsigned int nwords)
881 {
882 unsigned int align;
883
884 align = rs6000_function_arg_boundary (mode, type) / PARM_BOUNDARY - 1;
885 return nwords + (-(rs6000_parm_offset () + nwords) & align);
886 }
887
888 /* Compute the size (in words) of a function argument. */
889
890 static unsigned long
891 rs6000_arg_size (machine_mode mode, const_tree type)
892 {
893 unsigned long size;
894
895 if (mode != BLKmode)
896 size = GET_MODE_SIZE (mode);
897 else
898 size = int_size_in_bytes (type);
899
900 if (TARGET_32BIT)
901 return (size + 3) >> 2;
902 else
903 return (size + 7) >> 3;
904 }
905 \f
906 /* Use this to flush pending int fields. */
907
908 static void
909 rs6000_darwin64_record_arg_advance_flush (CUMULATIVE_ARGS *cum,
910 HOST_WIDE_INT bitpos, int final)
911 {
912 unsigned int startbit, endbit;
913 int intregs, intoffset;
914
915 /* Handle the situations where a float is taking up the first half
916 of the GPR, and the other half is empty (typically due to
917 alignment restrictions). We can detect this by a 8-byte-aligned
918 int field, or by seeing that this is the final flush for this
919 argument. Count the word and continue on. */
920 if (cum->floats_in_gpr == 1
921 && (cum->intoffset % 64 == 0
922 || (cum->intoffset == -1 && final)))
923 {
924 cum->words++;
925 cum->floats_in_gpr = 0;
926 }
927
928 if (cum->intoffset == -1)
929 return;
930
931 intoffset = cum->intoffset;
932 cum->intoffset = -1;
933 cum->floats_in_gpr = 0;
934
935 if (intoffset % BITS_PER_WORD != 0)
936 {
937 unsigned int bits = BITS_PER_WORD - intoffset % BITS_PER_WORD;
938 if (!int_mode_for_size (bits, 0).exists ())
939 {
940 /* We couldn't find an appropriate mode, which happens,
941 e.g., in packed structs when there are 3 bytes to load.
942 Back intoffset back to the beginning of the word in this
943 case. */
944 intoffset = ROUND_DOWN (intoffset, BITS_PER_WORD);
945 }
946 }
947
948 startbit = ROUND_DOWN (intoffset, BITS_PER_WORD);
949 endbit = ROUND_UP (bitpos, BITS_PER_WORD);
950 intregs = (endbit - startbit) / BITS_PER_WORD;
951 cum->words += intregs;
952 /* words should be unsigned. */
953 if ((unsigned)cum->words < (endbit/BITS_PER_WORD))
954 {
955 int pad = (endbit/BITS_PER_WORD) - cum->words;
956 cum->words += pad;
957 }
958 }
959
960 /* The darwin64 ABI calls for us to recurse down through structs,
961 looking for elements passed in registers. Unfortunately, we have
962 to track int register count here also because of misalignments
963 in powerpc alignment mode. */
964
965 static void
966 rs6000_darwin64_record_arg_advance_recurse (CUMULATIVE_ARGS *cum,
967 const_tree type,
968 HOST_WIDE_INT startbitpos)
969 {
970 tree f;
971
972 for (f = TYPE_FIELDS (type); f ; f = DECL_CHAIN (f))
973 if (TREE_CODE (f) == FIELD_DECL)
974 {
975 HOST_WIDE_INT bitpos = startbitpos;
976 tree ftype = TREE_TYPE (f);
977 machine_mode mode;
978 if (ftype == error_mark_node)
979 continue;
980 mode = TYPE_MODE (ftype);
981
982 if (DECL_SIZE (f) != 0
983 && tree_fits_uhwi_p (bit_position (f)))
984 bitpos += int_bit_position (f);
985
986 /* ??? FIXME: else assume zero offset. */
987
988 if (TREE_CODE (ftype) == RECORD_TYPE)
989 rs6000_darwin64_record_arg_advance_recurse (cum, ftype, bitpos);
990 else if (USE_FP_FOR_ARG_P (cum, mode))
991 {
992 unsigned n_fpregs = (GET_MODE_SIZE (mode) + 7) >> 3;
993 rs6000_darwin64_record_arg_advance_flush (cum, bitpos, 0);
994 cum->fregno += n_fpregs;
995 /* Single-precision floats present a special problem for
996 us, because they are smaller than an 8-byte GPR, and so
997 the structure-packing rules combined with the standard
998 varargs behavior mean that we want to pack float/float
999 and float/int combinations into a single register's
1000 space. This is complicated by the arg advance flushing,
1001 which works on arbitrarily large groups of int-type
1002 fields. */
1003 if (mode == SFmode)
1004 {
1005 if (cum->floats_in_gpr == 1)
1006 {
1007 /* Two floats in a word; count the word and reset
1008 the float count. */
1009 cum->words++;
1010 cum->floats_in_gpr = 0;
1011 }
1012 else if (bitpos % 64 == 0)
1013 {
1014 /* A float at the beginning of an 8-byte word;
1015 count it and put off adjusting cum->words until
1016 we see if a arg advance flush is going to do it
1017 for us. */
1018 cum->floats_in_gpr++;
1019 }
1020 else
1021 {
1022 /* The float is at the end of a word, preceded
1023 by integer fields, so the arg advance flush
1024 just above has already set cum->words and
1025 everything is taken care of. */
1026 }
1027 }
1028 else
1029 cum->words += n_fpregs;
1030 }
1031 else if (USE_ALTIVEC_FOR_ARG_P (cum, mode, 1))
1032 {
1033 rs6000_darwin64_record_arg_advance_flush (cum, bitpos, 0);
1034 cum->vregno++;
1035 cum->words += 2;
1036 }
1037 else if (cum->intoffset == -1)
1038 cum->intoffset = bitpos;
1039 }
1040 }
1041
1042 /* Check for an item that needs to be considered specially under the darwin 64
1043 bit ABI. These are record types where the mode is BLK or the structure is
1044 8 bytes in size. */
1045 int
1046 rs6000_darwin64_struct_check_p (machine_mode mode, const_tree type)
1047 {
1048 return rs6000_darwin64_abi
1049 && ((mode == BLKmode
1050 && TREE_CODE (type) == RECORD_TYPE
1051 && int_size_in_bytes (type) > 0)
1052 || (type && TREE_CODE (type) == RECORD_TYPE
1053 && int_size_in_bytes (type) == 8)) ? 1 : 0;
1054 }
1055
1056 /* Update the data in CUM to advance over an argument
1057 of mode MODE and data type TYPE.
1058 (TYPE is null for libcalls where that information may not be available.)
1059
1060 Note that for args passed by reference, function_arg will be called
1061 with MODE and TYPE set to that of the pointer to the arg, not the arg
1062 itself. */
1063
1064 static void
1065 rs6000_function_arg_advance_1 (CUMULATIVE_ARGS *cum, machine_mode mode,
1066 const_tree type, bool named, int depth)
1067 {
1068 machine_mode elt_mode;
1069 int n_elts;
1070
1071 rs6000_discover_homogeneous_aggregate (mode, type, &elt_mode, &n_elts);
1072
1073 /* Only tick off an argument if we're not recursing. */
1074 if (depth == 0)
1075 cum->nargs_prototype--;
1076
1077 #ifdef HAVE_AS_GNU_ATTRIBUTE
1078 if (TARGET_ELF && (TARGET_64BIT || DEFAULT_ABI == ABI_V4)
1079 && cum->escapes)
1080 {
1081 if (SCALAR_FLOAT_MODE_P (mode))
1082 {
1083 rs6000_passes_float = true;
1084 if ((HAVE_LD_PPC_GNU_ATTR_LONG_DOUBLE || TARGET_64BIT)
1085 && (FLOAT128_IBM_P (mode)
1086 || FLOAT128_IEEE_P (mode)
1087 || (type != NULL
1088 && TYPE_MAIN_VARIANT (type) == long_double_type_node)))
1089 rs6000_passes_long_double = true;
1090 }
1091 if (named && ALTIVEC_OR_VSX_VECTOR_MODE (mode))
1092 rs6000_passes_vector = true;
1093 }
1094 #endif
1095
1096 if (TARGET_ALTIVEC_ABI
1097 && (ALTIVEC_OR_VSX_VECTOR_MODE (elt_mode)
1098 || (type && TREE_CODE (type) == VECTOR_TYPE
1099 && int_size_in_bytes (type) == 16)))
1100 {
1101 bool stack = false;
1102
1103 if (USE_ALTIVEC_FOR_ARG_P (cum, elt_mode, named))
1104 {
1105 cum->vregno += n_elts;
1106
1107 /* If we are not splitting Complex IEEE128 args then account for the
1108 fact that they are passed in 2 VSX regs. */
1109 if (!targetm.calls.split_complex_arg && type
1110 && TREE_CODE (type) == COMPLEX_TYPE && elt_mode == KCmode)
1111 cum->vregno++;
1112
1113 if (!TARGET_ALTIVEC)
1114 error ("cannot pass argument in vector register because"
1115 " altivec instructions are disabled, use %qs"
1116 " to enable them", "-maltivec");
1117
1118 /* PowerPC64 Linux and AIX allocate GPRs for a vector argument
1119 even if it is going to be passed in a vector register.
1120 Darwin does the same for variable-argument functions. */
1121 if (((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
1122 && TARGET_64BIT)
1123 || (cum->stdarg && DEFAULT_ABI != ABI_V4))
1124 stack = true;
1125 }
1126 else
1127 stack = true;
1128
1129 if (stack)
1130 {
1131 int align;
1132
1133 /* Vector parameters must be 16-byte aligned. In 32-bit
1134 mode this means we need to take into account the offset
1135 to the parameter save area. In 64-bit mode, they just
1136 have to start on an even word, since the parameter save
1137 area is 16-byte aligned. */
1138 if (TARGET_32BIT)
1139 align = -(rs6000_parm_offset () + cum->words) & 3;
1140 else
1141 align = cum->words & 1;
1142 cum->words += align + rs6000_arg_size (mode, type);
1143
1144 if (TARGET_DEBUG_ARG)
1145 {
1146 fprintf (stderr, "function_adv: words = %2d, align=%d, ",
1147 cum->words, align);
1148 fprintf (stderr, "nargs = %4d, proto = %d, mode = %4s\n",
1149 cum->nargs_prototype, cum->prototype,
1150 GET_MODE_NAME (mode));
1151 }
1152 }
1153 }
1154 else if (TARGET_MACHO && rs6000_darwin64_struct_check_p (mode, type))
1155 {
1156 int size = int_size_in_bytes (type);
1157 /* Variable sized types have size == -1 and are
1158 treated as if consisting entirely of ints.
1159 Pad to 16 byte boundary if needed. */
1160 if (TYPE_ALIGN (type) >= 2 * BITS_PER_WORD
1161 && (cum->words % 2) != 0)
1162 cum->words++;
1163 /* For varargs, we can just go up by the size of the struct. */
1164 if (!named)
1165 cum->words += (size + 7) / 8;
1166 else
1167 {
1168 /* It is tempting to say int register count just goes up by
1169 sizeof(type)/8, but this is wrong in a case such as
1170 { int; double; int; } [powerpc alignment]. We have to
1171 grovel through the fields for these too. */
1172 cum->intoffset = 0;
1173 cum->floats_in_gpr = 0;
1174 rs6000_darwin64_record_arg_advance_recurse (cum, type, 0);
1175 rs6000_darwin64_record_arg_advance_flush (cum,
1176 size * BITS_PER_UNIT, 1);
1177 }
1178 if (TARGET_DEBUG_ARG)
1179 {
1180 fprintf (stderr, "function_adv: words = %2d, align=%d, size=%d",
1181 cum->words, TYPE_ALIGN (type), size);
1182 fprintf (stderr,
1183 "nargs = %4d, proto = %d, mode = %4s (darwin64 abi)\n",
1184 cum->nargs_prototype, cum->prototype,
1185 GET_MODE_NAME (mode));
1186 }
1187 }
1188 else if (DEFAULT_ABI == ABI_V4)
1189 {
1190 if (abi_v4_pass_in_fpr (mode, named))
1191 {
1192 /* _Decimal128 must use an even/odd register pair. This assumes
1193 that the register number is odd when fregno is odd. */
1194 if (mode == TDmode && (cum->fregno % 2) == 1)
1195 cum->fregno++;
1196
1197 if (cum->fregno + (FLOAT128_2REG_P (mode) ? 1 : 0)
1198 <= FP_ARG_V4_MAX_REG)
1199 cum->fregno += (GET_MODE_SIZE (mode) + 7) >> 3;
1200 else
1201 {
1202 cum->fregno = FP_ARG_V4_MAX_REG + 1;
1203 if (mode == DFmode || FLOAT128_IBM_P (mode)
1204 || mode == DDmode || mode == TDmode)
1205 cum->words += cum->words & 1;
1206 cum->words += rs6000_arg_size (mode, type);
1207 }
1208 }
1209 else
1210 {
1211 int n_words = rs6000_arg_size (mode, type);
1212 int gregno = cum->sysv_gregno;
1213
1214 /* Long long is put in (r3,r4), (r5,r6), (r7,r8) or (r9,r10).
1215 As does any other 2 word item such as complex int due to a
1216 historical mistake. */
1217 if (n_words == 2)
1218 gregno += (1 - gregno) & 1;
1219
1220 /* Multi-reg args are not split between registers and stack. */
1221 if (gregno + n_words - 1 > GP_ARG_MAX_REG)
1222 {
1223 /* Long long is aligned on the stack. So are other 2 word
1224 items such as complex int due to a historical mistake. */
1225 if (n_words == 2)
1226 cum->words += cum->words & 1;
1227 cum->words += n_words;
1228 }
1229
1230 /* Note: continuing to accumulate gregno past when we've started
1231 spilling to the stack indicates the fact that we've started
1232 spilling to the stack to expand_builtin_saveregs. */
1233 cum->sysv_gregno = gregno + n_words;
1234 }
1235
1236 if (TARGET_DEBUG_ARG)
1237 {
1238 fprintf (stderr, "function_adv: words = %2d, fregno = %2d, ",
1239 cum->words, cum->fregno);
1240 fprintf (stderr, "gregno = %2d, nargs = %4d, proto = %d, ",
1241 cum->sysv_gregno, cum->nargs_prototype, cum->prototype);
1242 fprintf (stderr, "mode = %4s, named = %d\n",
1243 GET_MODE_NAME (mode), named);
1244 }
1245 }
1246 else
1247 {
1248 int n_words = rs6000_arg_size (mode, type);
1249 int start_words = cum->words;
1250 int align_words = rs6000_parm_start (mode, type, start_words);
1251
1252 cum->words = align_words + n_words;
1253
1254 if (SCALAR_FLOAT_MODE_P (elt_mode) && TARGET_HARD_FLOAT)
1255 {
1256 /* _Decimal128 must be passed in an even/odd float register pair.
1257 This assumes that the register number is odd when fregno is
1258 odd. */
1259 if (elt_mode == TDmode && (cum->fregno % 2) == 1)
1260 cum->fregno++;
1261 cum->fregno += n_elts * ((GET_MODE_SIZE (elt_mode) + 7) >> 3);
1262 }
1263
1264 if (TARGET_DEBUG_ARG)
1265 {
1266 fprintf (stderr, "function_adv: words = %2d, fregno = %2d, ",
1267 cum->words, cum->fregno);
1268 fprintf (stderr, "nargs = %4d, proto = %d, mode = %4s, ",
1269 cum->nargs_prototype, cum->prototype, GET_MODE_NAME (mode));
1270 fprintf (stderr, "named = %d, align = %d, depth = %d\n",
1271 named, align_words - start_words, depth);
1272 }
1273 }
1274 }
1275
1276 void
1277 rs6000_function_arg_advance (cumulative_args_t cum,
1278 const function_arg_info &arg)
1279 {
1280 rs6000_function_arg_advance_1 (get_cumulative_args (cum),
1281 arg.mode, arg.type, arg.named, 0);
1282 }
1283
1284 /* A subroutine of rs6000_darwin64_record_arg. Assign the bits of the
1285 structure between cum->intoffset and bitpos to integer registers. */
1286
1287 static void
1288 rs6000_darwin64_record_arg_flush (CUMULATIVE_ARGS *cum,
1289 HOST_WIDE_INT bitpos, rtx rvec[], int *k)
1290 {
1291 machine_mode mode;
1292 unsigned int regno;
1293 unsigned int startbit, endbit;
1294 int this_regno, intregs, intoffset;
1295 rtx reg;
1296
1297 if (cum->intoffset == -1)
1298 return;
1299
1300 intoffset = cum->intoffset;
1301 cum->intoffset = -1;
1302
1303 /* If this is the trailing part of a word, try to only load that
1304 much into the register. Otherwise load the whole register. Note
1305 that in the latter case we may pick up unwanted bits. It's not a
1306 problem at the moment but may wish to revisit. */
1307
1308 if (intoffset % BITS_PER_WORD != 0)
1309 {
1310 unsigned int bits = BITS_PER_WORD - intoffset % BITS_PER_WORD;
1311 if (!int_mode_for_size (bits, 0).exists (&mode))
1312 {
1313 /* We couldn't find an appropriate mode, which happens,
1314 e.g., in packed structs when there are 3 bytes to load.
1315 Back intoffset back to the beginning of the word in this
1316 case. */
1317 intoffset = ROUND_DOWN (intoffset, BITS_PER_WORD);
1318 mode = word_mode;
1319 }
1320 }
1321 else
1322 mode = word_mode;
1323
1324 startbit = ROUND_DOWN (intoffset, BITS_PER_WORD);
1325 endbit = ROUND_UP (bitpos, BITS_PER_WORD);
1326 intregs = (endbit - startbit) / BITS_PER_WORD;
1327 this_regno = cum->words + intoffset / BITS_PER_WORD;
1328
1329 if (intregs > 0 && intregs > GP_ARG_NUM_REG - this_regno)
1330 cum->use_stack = 1;
1331
1332 intregs = MIN (intregs, GP_ARG_NUM_REG - this_regno);
1333 if (intregs <= 0)
1334 return;
1335
1336 intoffset /= BITS_PER_UNIT;
1337 do
1338 {
1339 regno = GP_ARG_MIN_REG + this_regno;
1340 reg = gen_rtx_REG (mode, regno);
1341 rvec[(*k)++] =
1342 gen_rtx_EXPR_LIST (VOIDmode, reg, GEN_INT (intoffset));
1343
1344 this_regno += 1;
1345 intoffset = (intoffset | (UNITS_PER_WORD-1)) + 1;
1346 mode = word_mode;
1347 intregs -= 1;
1348 }
1349 while (intregs > 0);
1350 }
1351
1352 /* Recursive workhorse for the following. */
1353
1354 static void
1355 rs6000_darwin64_record_arg_recurse (CUMULATIVE_ARGS *cum, const_tree type,
1356 HOST_WIDE_INT startbitpos, rtx rvec[],
1357 int *k)
1358 {
1359 tree f;
1360
1361 for (f = TYPE_FIELDS (type); f ; f = DECL_CHAIN (f))
1362 if (TREE_CODE (f) == FIELD_DECL)
1363 {
1364 HOST_WIDE_INT bitpos = startbitpos;
1365 tree ftype = TREE_TYPE (f);
1366 machine_mode mode;
1367 if (ftype == error_mark_node)
1368 continue;
1369 mode = TYPE_MODE (ftype);
1370
1371 if (DECL_SIZE (f) != 0
1372 && tree_fits_uhwi_p (bit_position (f)))
1373 bitpos += int_bit_position (f);
1374
1375 /* ??? FIXME: else assume zero offset. */
1376
1377 if (TREE_CODE (ftype) == RECORD_TYPE)
1378 rs6000_darwin64_record_arg_recurse (cum, ftype, bitpos, rvec, k);
1379 else if (cum->named && USE_FP_FOR_ARG_P (cum, mode))
1380 {
1381 unsigned n_fpreg = (GET_MODE_SIZE (mode) + 7) >> 3;
1382 #if 0
1383 switch (mode)
1384 {
1385 case E_SCmode: mode = SFmode; break;
1386 case E_DCmode: mode = DFmode; break;
1387 case E_TCmode: mode = TFmode; break;
1388 default: break;
1389 }
1390 #endif
1391 rs6000_darwin64_record_arg_flush (cum, bitpos, rvec, k);
1392 if (cum->fregno + n_fpreg > FP_ARG_MAX_REG + 1)
1393 {
1394 gcc_assert (cum->fregno == FP_ARG_MAX_REG
1395 && (mode == TFmode || mode == TDmode));
1396 /* Long double or _Decimal128 split over regs and memory. */
1397 mode = DECIMAL_FLOAT_MODE_P (mode) ? DDmode : DFmode;
1398 cum->use_stack=1;
1399 }
1400 rvec[(*k)++]
1401 = gen_rtx_EXPR_LIST (VOIDmode,
1402 gen_rtx_REG (mode, cum->fregno++),
1403 GEN_INT (bitpos / BITS_PER_UNIT));
1404 if (FLOAT128_2REG_P (mode))
1405 cum->fregno++;
1406 }
1407 else if (cum->named && USE_ALTIVEC_FOR_ARG_P (cum, mode, 1))
1408 {
1409 rs6000_darwin64_record_arg_flush (cum, bitpos, rvec, k);
1410 rvec[(*k)++]
1411 = gen_rtx_EXPR_LIST (VOIDmode,
1412 gen_rtx_REG (mode, cum->vregno++),
1413 GEN_INT (bitpos / BITS_PER_UNIT));
1414 }
1415 else if (cum->intoffset == -1)
1416 cum->intoffset = bitpos;
1417 }
1418 }
1419
1420 /* For the darwin64 ABI, we want to construct a PARALLEL consisting of
1421 the register(s) to be used for each field and subfield of a struct
1422 being passed by value, along with the offset of where the
1423 register's value may be found in the block. FP fields go in FP
1424 register, vector fields go in vector registers, and everything
1425 else goes in int registers, packed as in memory.
1426
1427 This code is also used for function return values. RETVAL indicates
1428 whether this is the case.
1429
1430 Much of this is taken from the SPARC V9 port, which has a similar
1431 calling convention. */
1432
1433 rtx
1434 rs6000_darwin64_record_arg (CUMULATIVE_ARGS *orig_cum, const_tree type,
1435 bool named, bool retval)
1436 {
1437 rtx rvec[FIRST_PSEUDO_REGISTER];
1438 int k = 1, kbase = 1;
1439 HOST_WIDE_INT typesize = int_size_in_bytes (type);
1440 /* This is a copy; modifications are not visible to our caller. */
1441 CUMULATIVE_ARGS copy_cum = *orig_cum;
1442 CUMULATIVE_ARGS *cum = &copy_cum;
1443
1444 /* Pad to 16 byte boundary if needed. */
1445 if (!retval && TYPE_ALIGN (type) >= 2 * BITS_PER_WORD
1446 && (cum->words % 2) != 0)
1447 cum->words++;
1448
1449 cum->intoffset = 0;
1450 cum->use_stack = 0;
1451 cum->named = named;
1452
1453 /* Put entries into rvec[] for individual FP and vector fields, and
1454 for the chunks of memory that go in int regs. Note we start at
1455 element 1; 0 is reserved for an indication of using memory, and
1456 may or may not be filled in below. */
1457 rs6000_darwin64_record_arg_recurse (cum, type, /* startbit pos= */ 0, rvec, &k);
1458 rs6000_darwin64_record_arg_flush (cum, typesize * BITS_PER_UNIT, rvec, &k);
1459
1460 /* If any part of the struct went on the stack put all of it there.
1461 This hack is because the generic code for
1462 FUNCTION_ARG_PARTIAL_NREGS cannot handle cases where the register
1463 parts of the struct are not at the beginning. */
1464 if (cum->use_stack)
1465 {
1466 if (retval)
1467 return NULL_RTX; /* doesn't go in registers at all */
1468 kbase = 0;
1469 rvec[0] = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
1470 }
1471 if (k > 1 || cum->use_stack)
1472 return gen_rtx_PARALLEL (BLKmode, gen_rtvec_v (k - kbase, &rvec[kbase]));
1473 else
1474 return NULL_RTX;
1475 }
1476
1477 /* Determine where to place an argument in 64-bit mode with 32-bit ABI. */
1478
1479 static rtx
1480 rs6000_mixed_function_arg (machine_mode mode, const_tree type,
1481 int align_words)
1482 {
1483 int n_units;
1484 int i, k;
1485 rtx rvec[GP_ARG_NUM_REG + 1];
1486
1487 if (align_words >= GP_ARG_NUM_REG)
1488 return NULL_RTX;
1489
1490 n_units = rs6000_arg_size (mode, type);
1491
1492 /* Optimize the simple case where the arg fits in one gpr, except in
1493 the case of BLKmode due to assign_parms assuming that registers are
1494 BITS_PER_WORD wide. */
1495 if (n_units == 0
1496 || (n_units == 1 && mode != BLKmode))
1497 return gen_rtx_REG (mode, GP_ARG_MIN_REG + align_words);
1498
1499 k = 0;
1500 if (align_words + n_units > GP_ARG_NUM_REG)
1501 /* Not all of the arg fits in gprs. Say that it goes in memory too,
1502 using a magic NULL_RTX component.
1503 This is not strictly correct. Only some of the arg belongs in
1504 memory, not all of it. However, the normal scheme using
1505 function_arg_partial_nregs can result in unusual subregs, eg.
1506 (subreg:SI (reg:DF) 4), which are not handled well. The code to
1507 store the whole arg to memory is often more efficient than code
1508 to store pieces, and we know that space is available in the right
1509 place for the whole arg. */
1510 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
1511
1512 i = 0;
1513 do
1514 {
1515 rtx r = gen_rtx_REG (SImode, GP_ARG_MIN_REG + align_words);
1516 rtx off = GEN_INT (i++ * 4);
1517 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, r, off);
1518 }
1519 while (++align_words < GP_ARG_NUM_REG && --n_units != 0);
1520
1521 return gen_rtx_PARALLEL (mode, gen_rtvec_v (k, rvec));
1522 }
1523
1524 /* We have an argument of MODE and TYPE that goes into FPRs or VRs,
1525 but must also be copied into the parameter save area starting at
1526 offset ALIGN_WORDS. Fill in RVEC with the elements corresponding
1527 to the GPRs and/or memory. Return the number of elements used. */
1528
1529 static int
1530 rs6000_psave_function_arg (machine_mode mode, const_tree type,
1531 int align_words, rtx *rvec)
1532 {
1533 int k = 0;
1534
1535 if (align_words < GP_ARG_NUM_REG)
1536 {
1537 int n_words = rs6000_arg_size (mode, type);
1538
1539 if (align_words + n_words > GP_ARG_NUM_REG
1540 || mode == BLKmode
1541 || (TARGET_32BIT && TARGET_POWERPC64))
1542 {
1543 /* If this is partially on the stack, then we only
1544 include the portion actually in registers here. */
1545 machine_mode rmode = TARGET_32BIT ? SImode : DImode;
1546 int i = 0;
1547
1548 if (align_words + n_words > GP_ARG_NUM_REG)
1549 {
1550 /* Not all of the arg fits in gprs. Say that it goes in memory
1551 too, using a magic NULL_RTX component. Also see comment in
1552 rs6000_mixed_function_arg for why the normal
1553 function_arg_partial_nregs scheme doesn't work in this case. */
1554 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
1555 }
1556
1557 do
1558 {
1559 rtx r = gen_rtx_REG (rmode, GP_ARG_MIN_REG + align_words);
1560 rtx off = GEN_INT (i++ * GET_MODE_SIZE (rmode));
1561 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, r, off);
1562 }
1563 while (++align_words < GP_ARG_NUM_REG && --n_words != 0);
1564 }
1565 else
1566 {
1567 /* The whole arg fits in gprs. */
1568 rtx r = gen_rtx_REG (mode, GP_ARG_MIN_REG + align_words);
1569 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, r, const0_rtx);
1570 }
1571 }
1572 else
1573 {
1574 /* It's entirely in memory. */
1575 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
1576 }
1577
1578 return k;
1579 }
1580
1581 /* RVEC is a vector of K components of an argument of mode MODE.
1582 Construct the final function_arg return value from it. */
1583
1584 static rtx
1585 rs6000_finish_function_arg (machine_mode mode, rtx *rvec, int k)
1586 {
1587 gcc_assert (k >= 1);
1588
1589 /* Avoid returning a PARALLEL in the trivial cases. */
1590 if (k == 1)
1591 {
1592 if (XEXP (rvec[0], 0) == NULL_RTX)
1593 return NULL_RTX;
1594
1595 if (GET_MODE (XEXP (rvec[0], 0)) == mode)
1596 return XEXP (rvec[0], 0);
1597 }
1598
1599 return gen_rtx_PARALLEL (mode, gen_rtvec_v (k, rvec));
1600 }
1601
1602 /* Determine where to put an argument to a function.
1603 Value is zero to push the argument on the stack,
1604 or a hard register in which to store the argument.
1605
1606 CUM is a variable of type CUMULATIVE_ARGS which gives info about
1607 the preceding args and about the function being called. It is
1608 not modified in this routine.
1609 ARG is a description of the argument.
1610
1611 On RS/6000 the first eight words of non-FP are normally in registers
1612 and the rest are pushed. Under AIX, the first 13 FP args are in registers.
1613 Under V.4, the first 8 FP args are in registers.
1614
1615 If this is floating-point and no prototype is specified, we use
1616 both an FP and integer register (or possibly FP reg and stack). Library
1617 functions (when CALL_LIBCALL is set) always have the proper types for args,
1618 so we can pass the FP value just in one register. emit_library_function
1619 doesn't support PARALLEL anyway.
1620
1621 Note that for args passed by reference, function_arg will be called
1622 with ARG describing the pointer to the arg, not the arg itself. */
1623
1624 rtx
1625 rs6000_function_arg (cumulative_args_t cum_v, const function_arg_info &arg)
1626 {
1627 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
1628 tree type = arg.type;
1629 machine_mode mode = arg.mode;
1630 bool named = arg.named;
1631 enum rs6000_abi abi = DEFAULT_ABI;
1632 machine_mode elt_mode;
1633 int n_elts;
1634
1635 /* We do not allow MMA types being used as function arguments. */
1636 if (mode == OOmode || mode == XOmode)
1637 {
1638 if (TYPE_CANONICAL (type) != NULL_TREE)
1639 type = TYPE_CANONICAL (type);
1640 error ("invalid use of MMA operand of type %qs as a function parameter",
1641 IDENTIFIER_POINTER (DECL_NAME (TYPE_NAME (type))));
1642 return NULL_RTX;
1643 }
1644
1645 if (mode == TDOmode)
1646 {
1647 if (TYPE_CANONICAL (type) != NULL_TREE)
1648 type = TYPE_CANONICAL (type);
1649 error ("invalid use of dense math operand of type %qs as a function "
1650 "parameter",
1651 IDENTIFIER_POINTER (DECL_NAME (TYPE_NAME (type))));
1652 return NULL_RTX;
1653 }
1654
1655 /* Return a marker to indicate whether CR1 needs to set or clear the
1656 bit that V.4 uses to say fp args were passed in registers.
1657 Assume that we don't need the marker for software floating point,
1658 or compiler generated library calls. */
1659 if (arg.end_marker_p ())
1660 {
1661 if (abi == ABI_V4
1662 && (cum->call_cookie & CALL_LIBCALL) == 0
1663 && (cum->stdarg
1664 || (cum->nargs_prototype < 0
1665 && (cum->prototype || TARGET_NO_PROTOTYPE)))
1666 && TARGET_HARD_FLOAT)
1667 return GEN_INT (cum->call_cookie
1668 | ((cum->fregno == FP_ARG_MIN_REG)
1669 ? CALL_V4_SET_FP_ARGS
1670 : CALL_V4_CLEAR_FP_ARGS));
1671
1672 return GEN_INT (cum->call_cookie & ~CALL_LIBCALL);
1673 }
1674
1675 rs6000_discover_homogeneous_aggregate (mode, type, &elt_mode, &n_elts);
1676
1677 if (TARGET_MACHO && rs6000_darwin64_struct_check_p (mode, type))
1678 {
1679 rtx rslt = rs6000_darwin64_record_arg (cum, type, named, /*retval= */false);
1680 if (rslt != NULL_RTX)
1681 return rslt;
1682 /* Else fall through to usual handling. */
1683 }
1684
1685 if (USE_ALTIVEC_FOR_ARG_P (cum, elt_mode, named))
1686 {
1687 rtx rvec[GP_ARG_NUM_REG + AGGR_ARG_NUM_REG + 1];
1688 rtx r, off;
1689 int i, k = 0;
1690
1691 /* Do we also need to pass this argument in the parameter save area?
1692 Library support functions for IEEE 128-bit are assumed to not need the
1693 value passed both in GPRs and in vector registers. */
1694 if (TARGET_64BIT && !cum->prototype
1695 && (!cum->libcall || !FLOAT128_VECTOR_P (elt_mode)))
1696 {
1697 int align_words = ROUND_UP (cum->words, 2);
1698 k = rs6000_psave_function_arg (mode, type, align_words, rvec);
1699 }
1700
1701 /* Describe where this argument goes in the vector registers. */
1702 for (i = 0; i < n_elts && cum->vregno + i <= ALTIVEC_ARG_MAX_REG; i++)
1703 {
1704 r = gen_rtx_REG (elt_mode, cum->vregno + i);
1705 off = GEN_INT (i * GET_MODE_SIZE (elt_mode));
1706 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, r, off);
1707 }
1708
1709 return rs6000_finish_function_arg (mode, rvec, k);
1710 }
1711 else if (TARGET_ALTIVEC_ABI
1712 && (ALTIVEC_OR_VSX_VECTOR_MODE (mode)
1713 || (type && TREE_CODE (type) == VECTOR_TYPE
1714 && int_size_in_bytes (type) == 16)))
1715 {
1716 if (named || abi == ABI_V4)
1717 return NULL_RTX;
1718 else
1719 {
1720 /* Vector parameters to varargs functions under AIX or Darwin
1721 get passed in memory and possibly also in GPRs. */
1722 int align, align_words, n_words;
1723 machine_mode part_mode;
1724
1725 /* Vector parameters must be 16-byte aligned. In 32-bit
1726 mode this means we need to take into account the offset
1727 to the parameter save area. In 64-bit mode, they just
1728 have to start on an even word, since the parameter save
1729 area is 16-byte aligned. */
1730 if (TARGET_32BIT)
1731 align = -(rs6000_parm_offset () + cum->words) & 3;
1732 else
1733 align = cum->words & 1;
1734 align_words = cum->words + align;
1735
1736 /* Out of registers? Memory, then. */
1737 if (align_words >= GP_ARG_NUM_REG)
1738 return NULL_RTX;
1739
1740 if (TARGET_32BIT && TARGET_POWERPC64)
1741 return rs6000_mixed_function_arg (mode, type, align_words);
1742
1743 /* The vector value goes in GPRs. Only the part of the
1744 value in GPRs is reported here. */
1745 part_mode = mode;
1746 n_words = rs6000_arg_size (mode, type);
1747 if (align_words + n_words > GP_ARG_NUM_REG)
1748 /* Fortunately, there are only two possibilities, the value
1749 is either wholly in GPRs or half in GPRs and half not. */
1750 part_mode = DImode;
1751
1752 return gen_rtx_REG (part_mode, GP_ARG_MIN_REG + align_words);
1753 }
1754 }
1755
1756 else if (abi == ABI_V4)
1757 {
1758 if (abi_v4_pass_in_fpr (mode, named))
1759 {
1760 /* _Decimal128 must use an even/odd register pair. This assumes
1761 that the register number is odd when fregno is odd. */
1762 if (mode == TDmode && (cum->fregno % 2) == 1)
1763 cum->fregno++;
1764
1765 if (cum->fregno + (FLOAT128_2REG_P (mode) ? 1 : 0)
1766 <= FP_ARG_V4_MAX_REG)
1767 return gen_rtx_REG (mode, cum->fregno);
1768 else
1769 return NULL_RTX;
1770 }
1771 else
1772 {
1773 int n_words = rs6000_arg_size (mode, type);
1774 int gregno = cum->sysv_gregno;
1775
1776 /* Long long is put in (r3,r4), (r5,r6), (r7,r8) or (r9,r10).
1777 As does any other 2 word item such as complex int due to a
1778 historical mistake. */
1779 if (n_words == 2)
1780 gregno += (1 - gregno) & 1;
1781
1782 /* Multi-reg args are not split between registers and stack. */
1783 if (gregno + n_words - 1 > GP_ARG_MAX_REG)
1784 return NULL_RTX;
1785
1786 if (TARGET_32BIT && TARGET_POWERPC64)
1787 return rs6000_mixed_function_arg (mode, type,
1788 gregno - GP_ARG_MIN_REG);
1789 return gen_rtx_REG (mode, gregno);
1790 }
1791 }
1792 else
1793 {
1794 int align_words = rs6000_parm_start (mode, type, cum->words);
1795
1796 /* _Decimal128 must be passed in an even/odd float register pair.
1797 This assumes that the register number is odd when fregno is odd. */
1798 if (elt_mode == TDmode && (cum->fregno % 2) == 1)
1799 cum->fregno++;
1800
1801 if (USE_FP_FOR_ARG_P (cum, elt_mode)
1802 && !(TARGET_AIX && !TARGET_ELF
1803 && type != NULL && AGGREGATE_TYPE_P (type)))
1804 {
1805 rtx rvec[GP_ARG_NUM_REG + AGGR_ARG_NUM_REG + 1];
1806 rtx r, off;
1807 int i, k = 0;
1808 unsigned long n_fpreg = (GET_MODE_SIZE (elt_mode) + 7) >> 3;
1809 int fpr_words;
1810
1811 /* Do we also need to pass this argument in the parameter
1812 save area? */
1813 if (type && (cum->nargs_prototype <= 0
1814 || ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
1815 && TARGET_XL_COMPAT
1816 && align_words >= GP_ARG_NUM_REG)))
1817 k = rs6000_psave_function_arg (mode, type, align_words, rvec);
1818
1819 /* Describe where this argument goes in the fprs. */
1820 for (i = 0; i < n_elts
1821 && cum->fregno + i * n_fpreg <= FP_ARG_MAX_REG; i++)
1822 {
1823 /* Check if the argument is split over registers and memory.
1824 This can only ever happen for long double or _Decimal128;
1825 complex types are handled via split_complex_arg. */
1826 machine_mode fmode = elt_mode;
1827 if (cum->fregno + (i + 1) * n_fpreg > FP_ARG_MAX_REG + 1)
1828 {
1829 gcc_assert (FLOAT128_2REG_P (fmode));
1830 fmode = DECIMAL_FLOAT_MODE_P (fmode) ? DDmode : DFmode;
1831 }
1832
1833 r = gen_rtx_REG (fmode, cum->fregno + i * n_fpreg);
1834 off = GEN_INT (i * GET_MODE_SIZE (elt_mode));
1835 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, r, off);
1836 }
1837
1838 /* If there were not enough FPRs to hold the argument, the rest
1839 usually goes into memory. However, if the current position
1840 is still within the register parameter area, a portion may
1841 actually have to go into GPRs.
1842
1843 Note that it may happen that the portion of the argument
1844 passed in the first "half" of the first GPR was already
1845 passed in the last FPR as well.
1846
1847 For unnamed arguments, we already set up GPRs to cover the
1848 whole argument in rs6000_psave_function_arg, so there is
1849 nothing further to do at this point. */
1850 fpr_words = (i * GET_MODE_SIZE (elt_mode)) / (TARGET_32BIT ? 4 : 8);
1851 if (i < n_elts && align_words + fpr_words < GP_ARG_NUM_REG
1852 && cum->nargs_prototype > 0)
1853 {
1854 machine_mode rmode = TARGET_32BIT ? SImode : DImode;
1855 int n_words = rs6000_arg_size (mode, type);
1856
1857 align_words += fpr_words;
1858 n_words -= fpr_words;
1859
1860 do
1861 {
1862 r = gen_rtx_REG (rmode, GP_ARG_MIN_REG + align_words);
1863 off = GEN_INT (fpr_words++ * GET_MODE_SIZE (rmode));
1864 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, r, off);
1865 }
1866 while (++align_words < GP_ARG_NUM_REG && --n_words != 0);
1867 }
1868
1869 return rs6000_finish_function_arg (mode, rvec, k);
1870 }
1871 else if (align_words < GP_ARG_NUM_REG)
1872 {
1873 if (TARGET_32BIT && TARGET_POWERPC64)
1874 return rs6000_mixed_function_arg (mode, type, align_words);
1875
1876 return gen_rtx_REG (mode, GP_ARG_MIN_REG + align_words);
1877 }
1878 else
1879 return NULL_RTX;
1880 }
1881 }
1882 \f
1883 /* For an arg passed partly in registers and partly in memory, this is
1884 the number of bytes passed in registers. For args passed entirely in
1885 registers or entirely in memory, zero. When an arg is described by a
1886 PARALLEL, perhaps using more than one register type, this function
1887 returns the number of bytes used by the first element of the PARALLEL. */
1888
1889 int
1890 rs6000_arg_partial_bytes (cumulative_args_t cum_v,
1891 const function_arg_info &arg)
1892 {
1893 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
1894 bool passed_in_gprs = true;
1895 int ret = 0;
1896 int align_words;
1897 machine_mode elt_mode;
1898 int n_elts;
1899
1900 rs6000_discover_homogeneous_aggregate (arg.mode, arg.type,
1901 &elt_mode, &n_elts);
1902
1903 if (DEFAULT_ABI == ABI_V4)
1904 return 0;
1905
1906 if (USE_ALTIVEC_FOR_ARG_P (cum, elt_mode, arg.named))
1907 {
1908 /* If we are passing this arg in the fixed parameter save area (gprs or
1909 memory) as well as VRs, we do not use the partial bytes mechanism;
1910 instead, rs6000_function_arg will return a PARALLEL including a memory
1911 element as necessary. Library support functions for IEEE 128-bit are
1912 assumed to not need the value passed both in GPRs and in vector
1913 registers. */
1914 if (TARGET_64BIT && !cum->prototype
1915 && (!cum->libcall || !FLOAT128_VECTOR_P (elt_mode)))
1916 return 0;
1917
1918 /* Otherwise, we pass in VRs only. Check for partial copies. */
1919 passed_in_gprs = false;
1920 if (cum->vregno + n_elts > ALTIVEC_ARG_MAX_REG + 1)
1921 ret = (ALTIVEC_ARG_MAX_REG + 1 - cum->vregno) * 16;
1922 }
1923
1924 /* In this complicated case we just disable the partial_nregs code. */
1925 if (TARGET_MACHO && rs6000_darwin64_struct_check_p (arg.mode, arg.type))
1926 return 0;
1927
1928 align_words = rs6000_parm_start (arg.mode, arg.type, cum->words);
1929
1930 if (USE_FP_FOR_ARG_P (cum, elt_mode)
1931 && !(TARGET_AIX && !TARGET_ELF && arg.aggregate_type_p ()))
1932 {
1933 unsigned long n_fpreg = (GET_MODE_SIZE (elt_mode) + 7) >> 3;
1934
1935 /* If we are passing this arg in the fixed parameter save area
1936 (gprs or memory) as well as FPRs, we do not use the partial
1937 bytes mechanism; instead, rs6000_function_arg will return a
1938 PARALLEL including a memory element as necessary. */
1939 if (arg.type
1940 && (cum->nargs_prototype <= 0
1941 || ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
1942 && TARGET_XL_COMPAT
1943 && align_words >= GP_ARG_NUM_REG)))
1944 return 0;
1945
1946 /* Otherwise, we pass in FPRs only. Check for partial copies. */
1947 passed_in_gprs = false;
1948 if (cum->fregno + n_elts * n_fpreg > FP_ARG_MAX_REG + 1)
1949 {
1950 /* Compute number of bytes / words passed in FPRs. If there
1951 is still space available in the register parameter area
1952 *after* that amount, a part of the argument will be passed
1953 in GPRs. In that case, the total amount passed in any
1954 registers is equal to the amount that would have been passed
1955 in GPRs if everything were passed there, so we fall back to
1956 the GPR code below to compute the appropriate value. */
1957 int fpr = ((FP_ARG_MAX_REG + 1 - cum->fregno)
1958 * MIN (8, GET_MODE_SIZE (elt_mode)));
1959 int fpr_words = fpr / (TARGET_32BIT ? 4 : 8);
1960
1961 if (align_words + fpr_words < GP_ARG_NUM_REG)
1962 passed_in_gprs = true;
1963 else
1964 ret = fpr;
1965 }
1966 }
1967
1968 if (passed_in_gprs
1969 && align_words < GP_ARG_NUM_REG
1970 && GP_ARG_NUM_REG < align_words + rs6000_arg_size (arg.mode, arg.type))
1971 ret = (GP_ARG_NUM_REG - align_words) * (TARGET_32BIT ? 4 : 8);
1972
1973 if (ret != 0 && TARGET_DEBUG_ARG)
1974 fprintf (stderr, "rs6000_arg_partial_bytes: %d\n", ret);
1975
1976 return ret;
1977 }
1978 \f
1979 /* A C expression that indicates when an argument must be passed by
1980 reference. If nonzero for an argument, a copy of that argument is
1981 made in memory and a pointer to the argument is passed instead of
1982 the argument itself. The pointer is passed in whatever way is
1983 appropriate for passing a pointer to that type.
1984
1985 Under V.4, aggregates and long double are passed by reference.
1986
1987 As an extension to all 32-bit ABIs, AltiVec vectors are passed by
1988 reference unless the AltiVec vector extension ABI is in force.
1989
1990 As an extension to all ABIs, variable sized types are passed by
1991 reference. */
1992
1993 bool
1994 rs6000_pass_by_reference (cumulative_args_t, const function_arg_info &arg)
1995 {
1996 if (!arg.type)
1997 return 0;
1998
1999 if (DEFAULT_ABI == ABI_V4 && TARGET_IEEEQUAD
2000 && FLOAT128_IEEE_P (TYPE_MODE (arg.type)))
2001 {
2002 if (TARGET_DEBUG_ARG)
2003 fprintf (stderr, "function_arg_pass_by_reference: V4 IEEE 128-bit\n");
2004 return 1;
2005 }
2006
2007 if (DEFAULT_ABI == ABI_V4 && AGGREGATE_TYPE_P (arg.type))
2008 {
2009 if (TARGET_DEBUG_ARG)
2010 fprintf (stderr, "function_arg_pass_by_reference: V4 aggregate\n");
2011 return 1;
2012 }
2013
2014 if (int_size_in_bytes (arg.type) < 0)
2015 {
2016 if (TARGET_DEBUG_ARG)
2017 fprintf (stderr, "function_arg_pass_by_reference: variable size\n");
2018 return 1;
2019 }
2020
2021 /* Allow -maltivec -mabi=no-altivec without warning. Altivec vector
2022 modes only exist for GCC vector types if -maltivec. */
2023 if (TARGET_32BIT && !TARGET_ALTIVEC_ABI && ALTIVEC_VECTOR_MODE (arg.mode))
2024 {
2025 if (TARGET_DEBUG_ARG)
2026 fprintf (stderr, "function_arg_pass_by_reference: AltiVec\n");
2027 return 1;
2028 }
2029
2030 /* Pass synthetic vectors in memory. */
2031 if (TREE_CODE (arg.type) == VECTOR_TYPE
2032 && int_size_in_bytes (arg.type) > (TARGET_ALTIVEC_ABI ? 16 : 8))
2033 {
2034 static bool warned_for_pass_big_vectors = false;
2035 if (TARGET_DEBUG_ARG)
2036 fprintf (stderr, "function_arg_pass_by_reference: synthetic vector\n");
2037 if (!warned_for_pass_big_vectors)
2038 {
2039 warning (OPT_Wpsabi, "GCC vector passed by reference: "
2040 "non-standard ABI extension with no compatibility "
2041 "guarantee");
2042 warned_for_pass_big_vectors = true;
2043 }
2044 return 1;
2045 }
2046
2047 return 0;
2048 }
2049
2050 /* Process parameter of type TYPE after ARGS_SO_FAR parameters were
2051 already processes. Return true if the parameter must be passed
2052 (fully or partially) on the stack. */
2053
2054 static bool
2055 rs6000_parm_needs_stack (cumulative_args_t args_so_far, tree type)
2056 {
2057 int unsignedp;
2058 rtx entry_parm;
2059
2060 /* Catch errors. */
2061 if (type == NULL || type == error_mark_node)
2062 return true;
2063
2064 /* Handle types with no storage requirement. */
2065 if (TYPE_MODE (type) == VOIDmode)
2066 return false;
2067
2068 /* Handle complex types. */
2069 if (TREE_CODE (type) == COMPLEX_TYPE)
2070 return (rs6000_parm_needs_stack (args_so_far, TREE_TYPE (type))
2071 || rs6000_parm_needs_stack (args_so_far, TREE_TYPE (type)));
2072
2073 /* Handle transparent aggregates. */
2074 if ((TREE_CODE (type) == UNION_TYPE || TREE_CODE (type) == RECORD_TYPE)
2075 && TYPE_TRANSPARENT_AGGR (type))
2076 type = TREE_TYPE (first_field (type));
2077
2078 /* See if this arg was passed by invisible reference. */
2079 function_arg_info arg (type, /*named=*/true);
2080 apply_pass_by_reference_rules (get_cumulative_args (args_so_far), arg);
2081
2082 /* Find mode as it is passed by the ABI. */
2083 unsignedp = TYPE_UNSIGNED (type);
2084 arg.mode = promote_mode (arg.type, arg.mode, &unsignedp);
2085
2086 /* If we must pass in stack, we need a stack. */
2087 if (rs6000_must_pass_in_stack (arg))
2088 return true;
2089
2090 /* If there is no incoming register, we need a stack. */
2091 entry_parm = rs6000_function_arg (args_so_far, arg);
2092 if (entry_parm == NULL)
2093 return true;
2094
2095 /* Likewise if we need to pass both in registers and on the stack. */
2096 if (GET_CODE (entry_parm) == PARALLEL
2097 && XEXP (XVECEXP (entry_parm, 0, 0), 0) == NULL_RTX)
2098 return true;
2099
2100 /* Also true if we're partially in registers and partially not. */
2101 if (rs6000_arg_partial_bytes (args_so_far, arg) != 0)
2102 return true;
2103
2104 /* Update info on where next arg arrives in registers. */
2105 rs6000_function_arg_advance (args_so_far, arg);
2106 return false;
2107 }
2108
2109 /* Return true if FUN has no prototype, has a variable argument
2110 list, or passes any parameter in memory. */
2111
2112 static bool
2113 rs6000_function_parms_need_stack (tree fun, bool incoming)
2114 {
2115 tree fntype, result;
2116 CUMULATIVE_ARGS args_so_far_v;
2117 cumulative_args_t args_so_far;
2118
2119 if (!fun)
2120 /* Must be a libcall, all of which only use reg parms. */
2121 return false;
2122
2123 fntype = fun;
2124 if (!TYPE_P (fun))
2125 fntype = TREE_TYPE (fun);
2126
2127 /* Varargs functions need the parameter save area. */
2128 if ((!incoming && !prototype_p (fntype)) || stdarg_p (fntype))
2129 return true;
2130
2131 INIT_CUMULATIVE_INCOMING_ARGS (args_so_far_v, fntype, NULL_RTX);
2132 args_so_far = pack_cumulative_args (&args_so_far_v);
2133
2134 /* When incoming, we will have been passed the function decl.
2135 It is necessary to use the decl to handle K&R style functions,
2136 where TYPE_ARG_TYPES may not be available. */
2137 if (incoming)
2138 {
2139 gcc_assert (DECL_P (fun));
2140 result = DECL_RESULT (fun);
2141 }
2142 else
2143 result = TREE_TYPE (fntype);
2144
2145 if (result && aggregate_value_p (result, fntype))
2146 {
2147 if (!TYPE_P (result))
2148 result = TREE_TYPE (result);
2149 result = build_pointer_type (result);
2150 rs6000_parm_needs_stack (args_so_far, result);
2151 }
2152
2153 if (incoming)
2154 {
2155 tree parm;
2156
2157 for (parm = DECL_ARGUMENTS (fun);
2158 parm && parm != void_list_node;
2159 parm = TREE_CHAIN (parm))
2160 if (rs6000_parm_needs_stack (args_so_far, TREE_TYPE (parm)))
2161 return true;
2162 }
2163 else
2164 {
2165 function_args_iterator args_iter;
2166 tree arg_type;
2167
2168 FOREACH_FUNCTION_ARGS (fntype, arg_type, args_iter)
2169 if (rs6000_parm_needs_stack (args_so_far, arg_type))
2170 return true;
2171 }
2172
2173 return false;
2174 }
2175
2176 /* Return the size of the REG_PARM_STACK_SPACE are for FUN. This is
2177 usually a constant depending on the ABI. However, in the ELFv2 ABI
2178 the register parameter area is optional when calling a function that
2179 has a prototype is scope, has no variable argument list, and passes
2180 all parameters in registers. */
2181
2182 int
2183 rs6000_reg_parm_stack_space (tree fun, bool incoming)
2184 {
2185 int reg_parm_stack_space;
2186
2187 switch (DEFAULT_ABI)
2188 {
2189 default:
2190 reg_parm_stack_space = 0;
2191 break;
2192
2193 case ABI_AIX:
2194 case ABI_DARWIN:
2195 reg_parm_stack_space = TARGET_64BIT ? 64 : 32;
2196 break;
2197
2198 case ABI_ELFv2:
2199 /* ??? Recomputing this every time is a bit expensive. Is there
2200 a place to cache this information? */
2201 if (rs6000_function_parms_need_stack (fun, incoming))
2202 reg_parm_stack_space = TARGET_64BIT ? 64 : 32;
2203 else
2204 reg_parm_stack_space = 0;
2205 break;
2206 }
2207
2208 return reg_parm_stack_space;
2209 }
2210
2211 static void
2212 rs6000_move_block_from_reg (int regno, rtx x, int nregs)
2213 {
2214 int i;
2215 machine_mode reg_mode = TARGET_32BIT ? SImode : DImode;
2216
2217 if (nregs == 0)
2218 return;
2219
2220 for (i = 0; i < nregs; i++)
2221 {
2222 rtx tem = adjust_address_nv (x, reg_mode, i * GET_MODE_SIZE (reg_mode));
2223 if (reload_completed)
2224 {
2225 if (! strict_memory_address_p (reg_mode, XEXP (tem, 0)))
2226 tem = NULL_RTX;
2227 else
2228 tem = simplify_gen_subreg (reg_mode, x, BLKmode,
2229 i * GET_MODE_SIZE (reg_mode));
2230 }
2231 else
2232 tem = replace_equiv_address (tem, XEXP (tem, 0));
2233
2234 gcc_assert (tem);
2235
2236 emit_move_insn (tem, gen_rtx_REG (reg_mode, regno + i));
2237 }
2238 }
2239 \f
2240 /* Perform any needed actions needed for a function that is receiving a
2241 variable number of arguments.
2242
2243 CUM is as above.
2244
2245 ARG is the last named argument.
2246
2247 PRETEND_SIZE is a variable that should be set to the amount of stack
2248 that must be pushed by the prolog to pretend that our caller pushed
2249 it.
2250
2251 Normally, this macro will push all remaining incoming registers on the
2252 stack and set PRETEND_SIZE to the length of the registers pushed. */
2253
2254 void
2255 setup_incoming_varargs (cumulative_args_t cum,
2256 const function_arg_info &arg,
2257 int *pretend_size ATTRIBUTE_UNUSED, int no_rtl)
2258 {
2259 CUMULATIVE_ARGS next_cum;
2260 int reg_size = TARGET_32BIT ? 4 : 8;
2261 rtx save_area = NULL_RTX, mem;
2262 int first_reg_offset;
2263 alias_set_type set;
2264
2265 /* Skip the last named argument. */
2266 next_cum = *get_cumulative_args (cum);
2267 if (!TYPE_NO_NAMED_ARGS_STDARG_P (TREE_TYPE (current_function_decl)))
2268 rs6000_function_arg_advance_1 (&next_cum, arg.mode, arg.type, arg.named,
2269 0);
2270
2271 if (DEFAULT_ABI == ABI_V4)
2272 {
2273 first_reg_offset = next_cum.sysv_gregno - GP_ARG_MIN_REG;
2274
2275 if (! no_rtl)
2276 {
2277 int gpr_reg_num = 0, gpr_size = 0, fpr_size = 0;
2278 HOST_WIDE_INT offset = 0;
2279
2280 /* Try to optimize the size of the varargs save area.
2281 The ABI requires that ap.reg_save_area is doubleword
2282 aligned, but we don't need to allocate space for all
2283 the bytes, only those to which we actually will save
2284 anything. */
2285 if (cfun->va_list_gpr_size && first_reg_offset < GP_ARG_NUM_REG)
2286 gpr_reg_num = GP_ARG_NUM_REG - first_reg_offset;
2287 if (TARGET_HARD_FLOAT
2288 && next_cum.fregno <= FP_ARG_V4_MAX_REG
2289 && cfun->va_list_fpr_size)
2290 {
2291 if (gpr_reg_num)
2292 fpr_size = (next_cum.fregno - FP_ARG_MIN_REG)
2293 * UNITS_PER_FP_WORD;
2294 if (cfun->va_list_fpr_size
2295 < FP_ARG_V4_MAX_REG + 1 - next_cum.fregno)
2296 fpr_size += cfun->va_list_fpr_size * UNITS_PER_FP_WORD;
2297 else
2298 fpr_size += (FP_ARG_V4_MAX_REG + 1 - next_cum.fregno)
2299 * UNITS_PER_FP_WORD;
2300 }
2301 if (gpr_reg_num)
2302 {
2303 offset = -((first_reg_offset * reg_size) & ~7);
2304 if (!fpr_size && gpr_reg_num > cfun->va_list_gpr_size)
2305 {
2306 gpr_reg_num = cfun->va_list_gpr_size;
2307 if (reg_size == 4 && (first_reg_offset & 1))
2308 gpr_reg_num++;
2309 }
2310 gpr_size = (gpr_reg_num * reg_size + 7) & ~7;
2311 }
2312 else if (fpr_size)
2313 offset = - (int) (next_cum.fregno - FP_ARG_MIN_REG)
2314 * UNITS_PER_FP_WORD
2315 - (int) (GP_ARG_NUM_REG * reg_size);
2316
2317 if (gpr_size + fpr_size)
2318 {
2319 rtx reg_save_area
2320 = assign_stack_local (BLKmode, gpr_size + fpr_size, 64);
2321 gcc_assert (MEM_P (reg_save_area));
2322 reg_save_area = XEXP (reg_save_area, 0);
2323 if (GET_CODE (reg_save_area) == PLUS)
2324 {
2325 gcc_assert (XEXP (reg_save_area, 0)
2326 == virtual_stack_vars_rtx);
2327 gcc_assert (CONST_INT_P (XEXP (reg_save_area, 1)));
2328 offset += INTVAL (XEXP (reg_save_area, 1));
2329 }
2330 else
2331 gcc_assert (reg_save_area == virtual_stack_vars_rtx);
2332 }
2333
2334 cfun->machine->varargs_save_offset = offset;
2335 save_area = plus_constant (Pmode, virtual_stack_vars_rtx, offset);
2336 }
2337 }
2338 else
2339 {
2340 first_reg_offset = next_cum.words;
2341 save_area = crtl->args.internal_arg_pointer;
2342
2343 if (!TYPE_NO_NAMED_ARGS_STDARG_P (TREE_TYPE (current_function_decl))
2344 && targetm.calls.must_pass_in_stack (arg))
2345 first_reg_offset += rs6000_arg_size (TYPE_MODE (arg.type), arg.type);
2346 }
2347
2348 set = get_varargs_alias_set ();
2349 if (! no_rtl && first_reg_offset < GP_ARG_NUM_REG
2350 && cfun->va_list_gpr_size)
2351 {
2352 int n_gpr, nregs = GP_ARG_NUM_REG - first_reg_offset;
2353
2354 if (va_list_gpr_counter_field)
2355 /* V4 va_list_gpr_size counts number of registers needed. */
2356 n_gpr = cfun->va_list_gpr_size;
2357 else
2358 /* char * va_list instead counts number of bytes needed. */
2359 n_gpr = (cfun->va_list_gpr_size + reg_size - 1) / reg_size;
2360
2361 if (nregs > n_gpr)
2362 nregs = n_gpr;
2363
2364 mem = gen_rtx_MEM (BLKmode,
2365 plus_constant (Pmode, save_area,
2366 first_reg_offset * reg_size));
2367 MEM_NOTRAP_P (mem) = 1;
2368 set_mem_alias_set (mem, set);
2369 set_mem_align (mem, BITS_PER_WORD);
2370
2371 rs6000_move_block_from_reg (GP_ARG_MIN_REG + first_reg_offset, mem,
2372 nregs);
2373 }
2374
2375 /* Save FP registers if needed. */
2376 if (DEFAULT_ABI == ABI_V4
2377 && TARGET_HARD_FLOAT
2378 && ! no_rtl
2379 && next_cum.fregno <= FP_ARG_V4_MAX_REG
2380 && cfun->va_list_fpr_size)
2381 {
2382 int fregno = next_cum.fregno, nregs;
2383 rtx cr1 = gen_rtx_REG (CCmode, CR1_REGNO);
2384 rtx lab = gen_label_rtx ();
2385 int off = (GP_ARG_NUM_REG * reg_size) + ((fregno - FP_ARG_MIN_REG)
2386 * UNITS_PER_FP_WORD);
2387
2388 emit_jump_insn
2389 (gen_rtx_SET (pc_rtx,
2390 gen_rtx_IF_THEN_ELSE (VOIDmode,
2391 gen_rtx_NE (VOIDmode, cr1,
2392 const0_rtx),
2393 gen_rtx_LABEL_REF (VOIDmode, lab),
2394 pc_rtx)));
2395
2396 for (nregs = 0;
2397 fregno <= FP_ARG_V4_MAX_REG && nregs < cfun->va_list_fpr_size;
2398 fregno++, off += UNITS_PER_FP_WORD, nregs++)
2399 {
2400 mem = gen_rtx_MEM (TARGET_HARD_FLOAT ? DFmode : SFmode,
2401 plus_constant (Pmode, save_area, off));
2402 MEM_NOTRAP_P (mem) = 1;
2403 set_mem_alias_set (mem, set);
2404 set_mem_align (mem, GET_MODE_ALIGNMENT (
2405 TARGET_HARD_FLOAT ? DFmode : SFmode));
2406 emit_move_insn (mem, gen_rtx_REG (
2407 TARGET_HARD_FLOAT ? DFmode : SFmode, fregno));
2408 }
2409
2410 emit_label (lab);
2411 }
2412 }
2413
2414 /* Create the va_list data type. */
2415
2416 tree
2417 rs6000_build_builtin_va_list (void)
2418 {
2419 tree f_gpr, f_fpr, f_res, f_ovf, f_sav, record, type_decl;
2420
2421 /* For AIX, prefer 'char *' because that's what the system
2422 header files like. */
2423 if (DEFAULT_ABI != ABI_V4)
2424 return build_pointer_type (char_type_node);
2425
2426 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
2427 type_decl = build_decl (BUILTINS_LOCATION, TYPE_DECL,
2428 get_identifier ("__va_list_tag"), record);
2429
2430 f_gpr = build_decl (BUILTINS_LOCATION, FIELD_DECL, get_identifier ("gpr"),
2431 unsigned_char_type_node);
2432 f_fpr = build_decl (BUILTINS_LOCATION, FIELD_DECL, get_identifier ("fpr"),
2433 unsigned_char_type_node);
2434 /* Give the two bytes of padding a name, so that -Wpadded won't warn on
2435 every user file. */
2436 f_res = build_decl (BUILTINS_LOCATION, FIELD_DECL,
2437 get_identifier ("reserved"), short_unsigned_type_node);
2438 f_ovf = build_decl (BUILTINS_LOCATION, FIELD_DECL,
2439 get_identifier ("overflow_arg_area"),
2440 ptr_type_node);
2441 f_sav = build_decl (BUILTINS_LOCATION, FIELD_DECL,
2442 get_identifier ("reg_save_area"),
2443 ptr_type_node);
2444
2445 va_list_gpr_counter_field = f_gpr;
2446 va_list_fpr_counter_field = f_fpr;
2447
2448 DECL_FIELD_CONTEXT (f_gpr) = record;
2449 DECL_FIELD_CONTEXT (f_fpr) = record;
2450 DECL_FIELD_CONTEXT (f_res) = record;
2451 DECL_FIELD_CONTEXT (f_ovf) = record;
2452 DECL_FIELD_CONTEXT (f_sav) = record;
2453
2454 TYPE_STUB_DECL (record) = type_decl;
2455 TYPE_NAME (record) = type_decl;
2456 TYPE_FIELDS (record) = f_gpr;
2457 DECL_CHAIN (f_gpr) = f_fpr;
2458 DECL_CHAIN (f_fpr) = f_res;
2459 DECL_CHAIN (f_res) = f_ovf;
2460 DECL_CHAIN (f_ovf) = f_sav;
2461
2462 layout_type (record);
2463
2464 /* The correct type is an array type of one element. */
2465 return build_array_type (record, build_index_type (size_zero_node));
2466 }
2467
2468 /* Implement va_start. */
2469
2470 void
2471 rs6000_va_start (tree valist, rtx nextarg)
2472 {
2473 HOST_WIDE_INT words, n_gpr, n_fpr;
2474 tree f_gpr, f_fpr, f_res, f_ovf, f_sav;
2475 tree gpr, fpr, ovf, sav, t;
2476
2477 /* Only SVR4 needs something special. */
2478 if (DEFAULT_ABI != ABI_V4)
2479 {
2480 std_expand_builtin_va_start (valist, nextarg);
2481 return;
2482 }
2483
2484 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
2485 f_fpr = DECL_CHAIN (f_gpr);
2486 f_res = DECL_CHAIN (f_fpr);
2487 f_ovf = DECL_CHAIN (f_res);
2488 f_sav = DECL_CHAIN (f_ovf);
2489
2490 valist = build_simple_mem_ref (valist);
2491 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
2492 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), unshare_expr (valist),
2493 f_fpr, NULL_TREE);
2494 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), unshare_expr (valist),
2495 f_ovf, NULL_TREE);
2496 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), unshare_expr (valist),
2497 f_sav, NULL_TREE);
2498
2499 /* Count number of gp and fp argument registers used. */
2500 words = crtl->args.info.words;
2501 n_gpr = MIN (crtl->args.info.sysv_gregno - GP_ARG_MIN_REG,
2502 GP_ARG_NUM_REG);
2503 n_fpr = MIN (crtl->args.info.fregno - FP_ARG_MIN_REG,
2504 FP_ARG_NUM_REG);
2505
2506 if (TARGET_DEBUG_ARG)
2507 fprintf (stderr, "va_start: words = " HOST_WIDE_INT_PRINT_DEC", n_gpr = "
2508 HOST_WIDE_INT_PRINT_DEC", n_fpr = " HOST_WIDE_INT_PRINT_DEC"\n",
2509 words, n_gpr, n_fpr);
2510
2511 if (cfun->va_list_gpr_size)
2512 {
2513 t = build2 (MODIFY_EXPR, TREE_TYPE (gpr), gpr,
2514 build_int_cst (NULL_TREE, n_gpr));
2515 TREE_SIDE_EFFECTS (t) = 1;
2516 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2517 }
2518
2519 if (cfun->va_list_fpr_size)
2520 {
2521 t = build2 (MODIFY_EXPR, TREE_TYPE (fpr), fpr,
2522 build_int_cst (NULL_TREE, n_fpr));
2523 TREE_SIDE_EFFECTS (t) = 1;
2524 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2525
2526 #ifdef HAVE_AS_GNU_ATTRIBUTE
2527 if (call_ABI_of_interest (cfun->decl))
2528 rs6000_passes_float = true;
2529 #endif
2530 }
2531
2532 /* Find the overflow area. */
2533 t = make_tree (TREE_TYPE (ovf), crtl->args.internal_arg_pointer);
2534 if (words != 0)
2535 t = fold_build_pointer_plus_hwi (t, words * MIN_UNITS_PER_WORD);
2536 t = build2 (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
2537 TREE_SIDE_EFFECTS (t) = 1;
2538 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2539
2540 /* If there were no va_arg invocations, don't set up the register
2541 save area. */
2542 if (!cfun->va_list_gpr_size
2543 && !cfun->va_list_fpr_size
2544 && n_gpr < GP_ARG_NUM_REG
2545 && n_fpr < FP_ARG_V4_MAX_REG)
2546 return;
2547
2548 /* Find the register save area. */
2549 t = make_tree (TREE_TYPE (sav), virtual_stack_vars_rtx);
2550 if (cfun->machine->varargs_save_offset)
2551 t = fold_build_pointer_plus_hwi (t, cfun->machine->varargs_save_offset);
2552 t = build2 (MODIFY_EXPR, TREE_TYPE (sav), sav, t);
2553 TREE_SIDE_EFFECTS (t) = 1;
2554 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2555 }
2556
2557 /* Implement va_arg. */
2558
2559 tree
2560 rs6000_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
2561 gimple_seq *post_p)
2562 {
2563 tree f_gpr, f_fpr, f_res, f_ovf, f_sav;
2564 tree gpr, fpr, ovf, sav, reg, t, u;
2565 int size, rsize, n_reg, sav_ofs, sav_scale;
2566 tree lab_false, lab_over, addr;
2567 int align;
2568 tree ptrtype = build_pointer_type_for_mode (type, ptr_mode, true);
2569 int regalign = 0;
2570 gimple *stmt;
2571
2572 if (pass_va_arg_by_reference (type))
2573 {
2574 t = rs6000_gimplify_va_arg (valist, ptrtype, pre_p, post_p);
2575 return build_va_arg_indirect_ref (t);
2576 }
2577
2578 /* We need to deal with the fact that the darwin ppc64 ABI is defined by an
2579 earlier version of gcc, with the property that it always applied alignment
2580 adjustments to the va-args (even for zero-sized types). The cheapest way
2581 to deal with this is to replicate the effect of the part of
2582 std_gimplify_va_arg_expr that carries out the align adjust, for the case
2583 of relevance.
2584 We don't need to check for pass-by-reference because of the test above.
2585 We can return a simplifed answer, since we know there's no offset to add. */
2586
2587 if (((TARGET_MACHO
2588 && rs6000_darwin64_abi)
2589 || DEFAULT_ABI == ABI_ELFv2
2590 || (DEFAULT_ABI == ABI_AIX && !rs6000_compat_align_parm))
2591 && integer_zerop (TYPE_SIZE (type)))
2592 {
2593 unsigned HOST_WIDE_INT align, boundary;
2594 tree valist_tmp = get_initialized_tmp_var (valist, pre_p, NULL);
2595 align = PARM_BOUNDARY / BITS_PER_UNIT;
2596 boundary = rs6000_function_arg_boundary (TYPE_MODE (type), type);
2597 if (boundary > MAX_SUPPORTED_STACK_ALIGNMENT)
2598 boundary = MAX_SUPPORTED_STACK_ALIGNMENT;
2599 boundary /= BITS_PER_UNIT;
2600 if (boundary > align)
2601 {
2602 tree t ;
2603 /* This updates arg ptr by the amount that would be necessary
2604 to align the zero-sized (but not zero-alignment) item. */
2605 t = build2 (MODIFY_EXPR, TREE_TYPE (valist), valist_tmp,
2606 fold_build_pointer_plus_hwi (valist_tmp, boundary - 1));
2607 gimplify_and_add (t, pre_p);
2608
2609 t = fold_convert (sizetype, valist_tmp);
2610 t = build2 (MODIFY_EXPR, TREE_TYPE (valist), valist_tmp,
2611 fold_convert (TREE_TYPE (valist),
2612 fold_build2 (BIT_AND_EXPR, sizetype, t,
2613 size_int (-boundary))));
2614 t = build2 (MODIFY_EXPR, TREE_TYPE (valist), valist, t);
2615 gimplify_and_add (t, pre_p);
2616 }
2617 /* Since it is zero-sized there's no increment for the item itself. */
2618 valist_tmp = fold_convert (build_pointer_type (type), valist_tmp);
2619 return build_va_arg_indirect_ref (valist_tmp);
2620 }
2621
2622 if (DEFAULT_ABI != ABI_V4)
2623 {
2624 if (targetm.calls.split_complex_arg && TREE_CODE (type) == COMPLEX_TYPE)
2625 {
2626 tree elem_type = TREE_TYPE (type);
2627 machine_mode elem_mode = TYPE_MODE (elem_type);
2628 int elem_size = GET_MODE_SIZE (elem_mode);
2629
2630 if (elem_size < UNITS_PER_WORD)
2631 {
2632 tree real_part, imag_part;
2633 gimple_seq post = NULL;
2634
2635 real_part = rs6000_gimplify_va_arg (valist, elem_type, pre_p,
2636 &post);
2637 /* Copy the value into a temporary, lest the formal temporary
2638 be reused out from under us. */
2639 real_part = get_initialized_tmp_var (real_part, pre_p, &post);
2640 gimple_seq_add_seq (pre_p, post);
2641
2642 imag_part = rs6000_gimplify_va_arg (valist, elem_type, pre_p,
2643 post_p);
2644
2645 return build2 (COMPLEX_EXPR, type, real_part, imag_part);
2646 }
2647 }
2648
2649 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
2650 }
2651
2652 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
2653 f_fpr = DECL_CHAIN (f_gpr);
2654 f_res = DECL_CHAIN (f_fpr);
2655 f_ovf = DECL_CHAIN (f_res);
2656 f_sav = DECL_CHAIN (f_ovf);
2657
2658 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
2659 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), unshare_expr (valist),
2660 f_fpr, NULL_TREE);
2661 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), unshare_expr (valist),
2662 f_ovf, NULL_TREE);
2663 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), unshare_expr (valist),
2664 f_sav, NULL_TREE);
2665
2666 size = int_size_in_bytes (type);
2667 rsize = (size + 3) / 4;
2668 int pad = 4 * rsize - size;
2669 align = 1;
2670
2671 machine_mode mode = TYPE_MODE (type);
2672 if (abi_v4_pass_in_fpr (mode, false))
2673 {
2674 /* FP args go in FP registers, if present. */
2675 reg = fpr;
2676 n_reg = (size + 7) / 8;
2677 sav_ofs = (TARGET_HARD_FLOAT ? 8 : 4) * 4;
2678 sav_scale = (TARGET_HARD_FLOAT ? 8 : 4);
2679 if (mode != SFmode && mode != SDmode)
2680 align = 8;
2681 }
2682 else
2683 {
2684 /* Otherwise into GP registers. */
2685 reg = gpr;
2686 n_reg = rsize;
2687 sav_ofs = 0;
2688 sav_scale = 4;
2689 if (n_reg == 2)
2690 align = 8;
2691 }
2692
2693 /* Pull the value out of the saved registers.... */
2694
2695 lab_over = NULL;
2696 addr = create_tmp_var (ptr_type_node, "addr");
2697
2698 /* AltiVec vectors never go in registers when -mabi=altivec. */
2699 if (TARGET_ALTIVEC_ABI && ALTIVEC_VECTOR_MODE (mode))
2700 align = 16;
2701 else
2702 {
2703 lab_false = create_artificial_label (input_location);
2704 lab_over = create_artificial_label (input_location);
2705
2706 /* Long long is aligned in the registers. As are any other 2 gpr
2707 item such as complex int due to a historical mistake. */
2708 u = reg;
2709 if (n_reg == 2 && reg == gpr)
2710 {
2711 regalign = 1;
2712 u = build2 (BIT_AND_EXPR, TREE_TYPE (reg), unshare_expr (reg),
2713 build_int_cst (TREE_TYPE (reg), n_reg - 1));
2714 u = build2 (POSTINCREMENT_EXPR, TREE_TYPE (reg),
2715 unshare_expr (reg), u);
2716 }
2717 /* _Decimal128 is passed in even/odd fpr pairs; the stored
2718 reg number is 0 for f1, so we want to make it odd. */
2719 else if (reg == fpr && mode == TDmode)
2720 {
2721 t = build2 (BIT_IOR_EXPR, TREE_TYPE (reg), unshare_expr (reg),
2722 build_int_cst (TREE_TYPE (reg), 1));
2723 u = build2 (MODIFY_EXPR, void_type_node, unshare_expr (reg), t);
2724 }
2725
2726 t = fold_convert (TREE_TYPE (reg), size_int (8 - n_reg + 1));
2727 t = build2 (GE_EXPR, boolean_type_node, u, t);
2728 u = build1 (GOTO_EXPR, void_type_node, lab_false);
2729 t = build3 (COND_EXPR, void_type_node, t, u, NULL_TREE);
2730 gimplify_and_add (t, pre_p);
2731
2732 t = sav;
2733 if (sav_ofs)
2734 t = fold_build_pointer_plus_hwi (sav, sav_ofs);
2735
2736 u = build2 (POSTINCREMENT_EXPR, TREE_TYPE (reg), unshare_expr (reg),
2737 build_int_cst (TREE_TYPE (reg), n_reg));
2738 u = fold_convert (sizetype, u);
2739 u = build2 (MULT_EXPR, sizetype, u, size_int (sav_scale));
2740 t = fold_build_pointer_plus (t, u);
2741
2742 /* _Decimal32 varargs are located in the second word of the 64-bit
2743 FP register for 32-bit binaries. */
2744 if (TARGET_32BIT && TARGET_HARD_FLOAT && mode == SDmode)
2745 t = fold_build_pointer_plus_hwi (t, size);
2746
2747 /* Args are passed right-aligned. */
2748 if (BYTES_BIG_ENDIAN)
2749 t = fold_build_pointer_plus_hwi (t, pad);
2750
2751 gimplify_assign (addr, t, pre_p);
2752
2753 gimple_seq_add_stmt (pre_p, gimple_build_goto (lab_over));
2754
2755 stmt = gimple_build_label (lab_false);
2756 gimple_seq_add_stmt (pre_p, stmt);
2757
2758 if ((n_reg == 2 && !regalign) || n_reg > 2)
2759 {
2760 /* Ensure that we don't find any more args in regs.
2761 Alignment has taken care of for special cases. */
2762 gimplify_assign (reg, build_int_cst (TREE_TYPE (reg), 8), pre_p);
2763 }
2764 }
2765
2766 /* ... otherwise out of the overflow area. */
2767
2768 /* Care for on-stack alignment if needed. */
2769 t = ovf;
2770 if (align != 1)
2771 {
2772 t = fold_build_pointer_plus_hwi (t, align - 1);
2773 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
2774 build_int_cst (TREE_TYPE (t), -align));
2775 }
2776
2777 /* Args are passed right-aligned. */
2778 if (BYTES_BIG_ENDIAN)
2779 t = fold_build_pointer_plus_hwi (t, pad);
2780
2781 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
2782
2783 gimplify_assign (unshare_expr (addr), t, pre_p);
2784
2785 t = fold_build_pointer_plus_hwi (t, size);
2786 gimplify_assign (unshare_expr (ovf), t, pre_p);
2787
2788 if (lab_over)
2789 {
2790 stmt = gimple_build_label (lab_over);
2791 gimple_seq_add_stmt (pre_p, stmt);
2792 }
2793
2794 if (STRICT_ALIGNMENT
2795 && (TYPE_ALIGN (type)
2796 > (unsigned) BITS_PER_UNIT * (align < 4 ? 4 : align)))
2797 {
2798 /* The value (of type complex double, for example) may not be
2799 aligned in memory in the saved registers, so copy via a
2800 temporary. (This is the same code as used for SPARC.) */
2801 tree tmp = create_tmp_var (type, "va_arg_tmp");
2802 tree dest_addr = build_fold_addr_expr (tmp);
2803
2804 tree copy = build_call_expr (builtin_decl_implicit (BUILT_IN_MEMCPY),
2805 3, dest_addr, addr, size_int (rsize * 4));
2806 TREE_ADDRESSABLE (tmp) = 1;
2807
2808 gimplify_and_add (copy, pre_p);
2809 addr = dest_addr;
2810 }
2811
2812 addr = fold_convert (ptrtype, addr);
2813 return build_va_arg_indirect_ref (addr);
2814 }
2815
2816 /* Return the permutation index for the swapping on the given vector mode.
2817 Note that the permutation index is correspondingly generated by endianness,
2818 it should be used by direct vector permutation. */
2819
2820 rtx
2821 swap_endian_selector_for_mode (machine_mode mode)
2822 {
2823 unsigned int swap1[16] = {15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0};
2824 unsigned int swap2[16] = {7,6,5,4,3,2,1,0,15,14,13,12,11,10,9,8};
2825 unsigned int swap4[16] = {3,2,1,0,7,6,5,4,11,10,9,8,15,14,13,12};
2826 unsigned int swap8[16] = {1,0,3,2,5,4,7,6,9,8,11,10,13,12,15,14};
2827
2828 unsigned int *swaparray, i;
2829 rtx perm[16];
2830
2831 switch (mode)
2832 {
2833 case E_V1TImode:
2834 swaparray = swap1;
2835 break;
2836 case E_V2DFmode:
2837 case E_V2DImode:
2838 swaparray = swap2;
2839 break;
2840 case E_V4SFmode:
2841 case E_V4SImode:
2842 swaparray = swap4;
2843 break;
2844 case E_V8HImode:
2845 swaparray = swap8;
2846 break;
2847 default:
2848 gcc_unreachable ();
2849 }
2850
2851 for (i = 0; i < 16; ++i)
2852 if (BYTES_BIG_ENDIAN)
2853 perm[i] = GEN_INT (swaparray[i]);
2854 else
2855 /* Generates the reversed perm for little endian. */
2856 perm[i] = GEN_INT (~swaparray[i] & 0x0000001f);
2857
2858 return force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode,
2859 gen_rtvec_v (16, perm)));
2860 }
2861
2862 /* Return the internal arg pointer used for function incoming
2863 arguments. When -fsplit-stack, the arg pointer is r12 so we need
2864 to copy it to a pseudo in order for it to be preserved over calls
2865 and suchlike. We'd really like to use a pseudo here for the
2866 internal arg pointer but data-flow analysis is not prepared to
2867 accept pseudos as live at the beginning of a function. */
2868
2869 rtx
2870 rs6000_internal_arg_pointer (void)
2871 {
2872 if (flag_split_stack
2873 && (lookup_attribute ("no_split_stack", DECL_ATTRIBUTES (cfun->decl))
2874 == NULL))
2875
2876 {
2877 if (cfun->machine->split_stack_arg_pointer == NULL_RTX)
2878 {
2879 rtx pat;
2880
2881 cfun->machine->split_stack_arg_pointer = gen_reg_rtx (Pmode);
2882 REG_POINTER (cfun->machine->split_stack_arg_pointer) = 1;
2883
2884 /* Put the pseudo initialization right after the note at the
2885 beginning of the function. */
2886 pat = gen_rtx_SET (cfun->machine->split_stack_arg_pointer,
2887 gen_rtx_REG (Pmode, 12));
2888 push_topmost_sequence ();
2889 emit_insn_after (pat, get_insns ());
2890 pop_topmost_sequence ();
2891 }
2892 rtx ret = plus_constant (Pmode, cfun->machine->split_stack_arg_pointer,
2893 FIRST_PARM_OFFSET (current_function_decl));
2894 return copy_to_reg (ret);
2895 }
2896 return virtual_incoming_args_rtx;
2897 }
2898
2899 \f
2900 /* A C compound statement that outputs the assembler code for a thunk
2901 function, used to implement C++ virtual function calls with
2902 multiple inheritance. The thunk acts as a wrapper around a virtual
2903 function, adjusting the implicit object parameter before handing
2904 control off to the real function.
2905
2906 First, emit code to add the integer DELTA to the location that
2907 contains the incoming first argument. Assume that this argument
2908 contains a pointer, and is the one used to pass the `this' pointer
2909 in C++. This is the incoming argument *before* the function
2910 prologue, e.g. `%o0' on a sparc. The addition must preserve the
2911 values of all other incoming arguments.
2912
2913 After the addition, emit code to jump to FUNCTION, which is a
2914 `FUNCTION_DECL'. This is a direct pure jump, not a call, and does
2915 not touch the return address. Hence returning from FUNCTION will
2916 return to whoever called the current `thunk'.
2917
2918 The effect must be as if FUNCTION had been called directly with the
2919 adjusted first argument. This macro is responsible for emitting
2920 all of the code for a thunk function; output_function_prologue()
2921 and output_function_epilogue() are not invoked.
2922
2923 The THUNK_FNDECL is redundant. (DELTA and FUNCTION have already
2924 been extracted from it.) It might possibly be useful on some
2925 targets, but probably not.
2926
2927 If you do not define this macro, the target-independent code in the
2928 C++ frontend will generate a less efficient heavyweight thunk that
2929 calls FUNCTION instead of jumping to it. The generic approach does
2930 not support varargs. */
2931
2932 void
2933 rs6000_output_mi_thunk (FILE *file, tree thunk_fndecl ATTRIBUTE_UNUSED,
2934 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
2935 tree function)
2936 {
2937 const char *fnname = get_fnname_from_decl (thunk_fndecl);
2938 rtx this_rtx, funexp;
2939 rtx_insn *insn;
2940
2941 reload_completed = 1;
2942 epilogue_completed = 1;
2943
2944 /* Mark the end of the (empty) prologue. */
2945 emit_note (NOTE_INSN_PROLOGUE_END);
2946
2947 /* Find the "this" pointer. If the function returns a structure,
2948 the structure return pointer is in r3. */
2949 if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
2950 this_rtx = gen_rtx_REG (Pmode, 4);
2951 else
2952 this_rtx = gen_rtx_REG (Pmode, 3);
2953
2954 /* Apply the constant offset, if required. */
2955 if (delta)
2956 emit_insn (gen_add3_insn (this_rtx, this_rtx, GEN_INT (delta)));
2957
2958 /* Apply the offset from the vtable, if required. */
2959 if (vcall_offset)
2960 {
2961 rtx vcall_offset_rtx = GEN_INT (vcall_offset);
2962 rtx tmp = gen_rtx_REG (Pmode, 12);
2963
2964 emit_move_insn (tmp, gen_rtx_MEM (Pmode, this_rtx));
2965 if (((unsigned HOST_WIDE_INT) vcall_offset) + 0x8000 >= 0x10000)
2966 {
2967 emit_insn (gen_add3_insn (tmp, tmp, vcall_offset_rtx));
2968 emit_move_insn (tmp, gen_rtx_MEM (Pmode, tmp));
2969 }
2970 else
2971 {
2972 rtx loc = gen_rtx_PLUS (Pmode, tmp, vcall_offset_rtx);
2973
2974 emit_move_insn (tmp, gen_rtx_MEM (Pmode, loc));
2975 }
2976 emit_insn (gen_add3_insn (this_rtx, this_rtx, tmp));
2977 }
2978
2979 /* Generate a tail call to the target function. */
2980 if (!TREE_USED (function))
2981 {
2982 assemble_external (function);
2983 TREE_USED (function) = 1;
2984 }
2985 funexp = XEXP (DECL_RTL (function), 0);
2986 funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
2987
2988 insn = emit_call_insn (gen_sibcall (funexp, const0_rtx, const0_rtx));
2989 SIBLING_CALL_P (insn) = 1;
2990 emit_barrier ();
2991
2992 /* Run just enough of rest_of_compilation to get the insns emitted.
2993 There's not really enough bulk here to make other passes such as
2994 instruction scheduling worth while. */
2995 insn = get_insns ();
2996 shorten_branches (insn);
2997 assemble_start_function (thunk_fndecl, fnname);
2998 final_start_function (insn, file, 1);
2999 final (insn, file, 1);
3000 final_end_function ();
3001 assemble_end_function (thunk_fndecl, fnname);
3002
3003 reload_completed = 0;
3004 epilogue_completed = 0;
3005 }
This page took 0.160882 seconds and 5 git commands to generate.