]> gcc.gnu.org Git - gcc.git/blame - gcc/config/aarch64/aarch64.c
[AArch64] Simplify frame pointer logic
[gcc.git] / gcc / config / aarch64 / aarch64.c
CommitLineData
bdb7bf8a 1/* Machine description for AArch64 architecture.
85ec4feb 2 Copyright (C) 2009-2018 Free Software Foundation, Inc.
43e9d192
IB
3 Contributed by ARM Ltd.
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
11
12 GCC is distributed in the hope that it will be useful, but
13 WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
20
8fcc61f8
RS
21#define IN_TARGET_CODE 1
22
43e9d192 23#include "config.h"
01736018 24#define INCLUDE_STRING
43e9d192
IB
25#include "system.h"
26#include "coretypes.h"
c7131fb2 27#include "backend.h"
e11c4407
AM
28#include "target.h"
29#include "rtl.h"
c7131fb2 30#include "tree.h"
e73cf9a2 31#include "memmodel.h"
c7131fb2 32#include "gimple.h"
e11c4407
AM
33#include "cfghooks.h"
34#include "cfgloop.h"
c7131fb2 35#include "df.h"
e11c4407
AM
36#include "tm_p.h"
37#include "stringpool.h"
314e6352 38#include "attribs.h"
e11c4407
AM
39#include "optabs.h"
40#include "regs.h"
41#include "emit-rtl.h"
42#include "recog.h"
43#include "diagnostic.h"
43e9d192 44#include "insn-attr.h"
40e23961 45#include "alias.h"
40e23961 46#include "fold-const.h"
d8a2d370
DN
47#include "stor-layout.h"
48#include "calls.h"
49#include "varasm.h"
43e9d192 50#include "output.h"
36566b39 51#include "flags.h"
36566b39 52#include "explow.h"
43e9d192
IB
53#include "expr.h"
54#include "reload.h"
43e9d192 55#include "langhooks.h"
5a2c8331 56#include "opts.h"
2d6bc7fa 57#include "params.h"
45b0be94 58#include "gimplify.h"
43e9d192 59#include "dwarf2.h"
61d371eb 60#include "gimple-iterator.h"
8990e73a 61#include "tree-vectorizer.h"
d1bcc29f 62#include "aarch64-cost-tables.h"
0ee859b5 63#include "dumpfile.h"
9b2b7279 64#include "builtins.h"
8baff86e 65#include "rtl-iter.h"
9bbe08fe 66#include "tm-constrs.h"
d03f7e44 67#include "sched-int.h"
d78006d9 68#include "target-globals.h"
a3eb8a52 69#include "common/common-target.h"
43cacb12 70#include "cfgrtl.h"
51b86113
DM
71#include "selftest.h"
72#include "selftest-rtl.h"
43cacb12 73#include "rtx-vector-builder.h"
43e9d192 74
994c5d85 75/* This file should be included last. */
d58627a0
RS
76#include "target-def.h"
77
28514dda
YZ
78/* Defined for convenience. */
79#define POINTER_BYTES (POINTER_SIZE / BITS_PER_UNIT)
80
43e9d192
IB
81/* Classifies an address.
82
83 ADDRESS_REG_IMM
84 A simple base register plus immediate offset.
85
86 ADDRESS_REG_WB
87 A base register indexed by immediate offset with writeback.
88
89 ADDRESS_REG_REG
90 A base register indexed by (optionally scaled) register.
91
92 ADDRESS_REG_UXTW
93 A base register indexed by (optionally scaled) zero-extended register.
94
95 ADDRESS_REG_SXTW
96 A base register indexed by (optionally scaled) sign-extended register.
97
98 ADDRESS_LO_SUM
99 A LO_SUM rtx with a base register and "LO12" symbol relocation.
100
101 ADDRESS_SYMBOLIC:
102 A constant symbolic address, in pc-relative literal pool. */
103
104enum aarch64_address_type {
105 ADDRESS_REG_IMM,
106 ADDRESS_REG_WB,
107 ADDRESS_REG_REG,
108 ADDRESS_REG_UXTW,
109 ADDRESS_REG_SXTW,
110 ADDRESS_LO_SUM,
111 ADDRESS_SYMBOLIC
112};
113
114struct aarch64_address_info {
115 enum aarch64_address_type type;
116 rtx base;
117 rtx offset;
dc640181 118 poly_int64 const_offset;
43e9d192
IB
119 int shift;
120 enum aarch64_symbol_type symbol_type;
121};
122
b187677b 123/* Information about a legitimate vector immediate operand. */
48063b9d
IB
124struct simd_immediate_info
125{
b187677b
RS
126 enum insn_type { MOV, MVN };
127 enum modifier_type { LSL, MSL };
128
129 simd_immediate_info () {}
130 simd_immediate_info (scalar_float_mode, rtx);
131 simd_immediate_info (scalar_int_mode, unsigned HOST_WIDE_INT,
132 insn_type = MOV, modifier_type = LSL,
133 unsigned int = 0);
43cacb12 134 simd_immediate_info (scalar_mode, rtx, rtx);
b187677b
RS
135
136 /* The mode of the elements. */
137 scalar_mode elt_mode;
138
43cacb12
RS
139 /* The value of each element if all elements are the same, or the
140 first value if the constant is a series. */
48063b9d 141 rtx value;
b187677b 142
43cacb12
RS
143 /* The value of the step if the constant is a series, null otherwise. */
144 rtx step;
145
b187677b
RS
146 /* The instruction to use to move the immediate into a vector. */
147 insn_type insn;
148
149 /* The kind of shift modifier to use, and the number of bits to shift.
150 This is (LSL, 0) if no shift is needed. */
151 modifier_type modifier;
152 unsigned int shift;
48063b9d
IB
153};
154
b187677b
RS
155/* Construct a floating-point immediate in which each element has mode
156 ELT_MODE_IN and value VALUE_IN. */
157inline simd_immediate_info
158::simd_immediate_info (scalar_float_mode elt_mode_in, rtx value_in)
43cacb12 159 : elt_mode (elt_mode_in), value (value_in), step (NULL_RTX), insn (MOV),
b187677b
RS
160 modifier (LSL), shift (0)
161{}
162
163/* Construct an integer immediate in which each element has mode ELT_MODE_IN
164 and value VALUE_IN. The other parameters are as for the structure
165 fields. */
166inline simd_immediate_info
167::simd_immediate_info (scalar_int_mode elt_mode_in,
168 unsigned HOST_WIDE_INT value_in,
169 insn_type insn_in, modifier_type modifier_in,
170 unsigned int shift_in)
171 : elt_mode (elt_mode_in), value (gen_int_mode (value_in, elt_mode_in)),
43cacb12
RS
172 step (NULL_RTX), insn (insn_in), modifier (modifier_in), shift (shift_in)
173{}
174
175/* Construct an integer immediate in which each element has mode ELT_MODE_IN
176 and where element I is equal to VALUE_IN + I * STEP_IN. */
177inline simd_immediate_info
178::simd_immediate_info (scalar_mode elt_mode_in, rtx value_in, rtx step_in)
179 : elt_mode (elt_mode_in), value (value_in), step (step_in), insn (MOV),
180 modifier (LSL), shift (0)
b187677b
RS
181{}
182
43e9d192
IB
183/* The current code model. */
184enum aarch64_code_model aarch64_cmodel;
185
43cacb12
RS
186/* The number of 64-bit elements in an SVE vector. */
187poly_uint16 aarch64_sve_vg;
188
43e9d192
IB
189#ifdef HAVE_AS_TLS
190#undef TARGET_HAVE_TLS
191#define TARGET_HAVE_TLS 1
192#endif
193
ef4bddc2
RS
194static bool aarch64_composite_type_p (const_tree, machine_mode);
195static bool aarch64_vfp_is_call_or_return_candidate (machine_mode,
43e9d192 196 const_tree,
ef4bddc2 197 machine_mode *, int *,
43e9d192
IB
198 bool *);
199static void aarch64_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED;
200static void aarch64_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED;
43e9d192 201static void aarch64_override_options_after_change (void);
ef4bddc2 202static bool aarch64_vector_mode_supported_p (machine_mode);
ef4bddc2 203static int aarch64_address_cost (rtx, machine_mode, addr_space_t, bool);
7df76747
N
204static bool aarch64_builtin_support_vector_misalignment (machine_mode mode,
205 const_tree type,
206 int misalignment,
207 bool is_packed);
43cacb12 208static machine_mode aarch64_simd_container_mode (scalar_mode, poly_int64);
c348cab0 209static bool aarch64_print_ldpstp_address (FILE *, machine_mode, rtx);
88b08073 210
0c6caaf8
RL
211/* Major revision number of the ARM Architecture implemented by the target. */
212unsigned aarch64_architecture_version;
213
43e9d192 214/* The processor for which instructions should be scheduled. */
02fdbd5b 215enum aarch64_processor aarch64_tune = cortexa53;
43e9d192 216
43e9d192
IB
217/* Mask to specify which instruction scheduling options should be used. */
218unsigned long aarch64_tune_flags = 0;
219
1be34295 220/* Global flag for PC relative loads. */
9ee6540a 221bool aarch64_pcrelative_literal_loads;
1be34295 222
d6cb6d6a
WD
223/* Global flag for whether frame pointer is enabled. */
224bool aarch64_use_frame_pointer;
225
8dec06f2
JG
226/* Support for command line parsing of boolean flags in the tuning
227 structures. */
228struct aarch64_flag_desc
229{
230 const char* name;
231 unsigned int flag;
232};
233
ed9fa8d2 234#define AARCH64_FUSION_PAIR(name, internal_name) \
8dec06f2
JG
235 { name, AARCH64_FUSE_##internal_name },
236static const struct aarch64_flag_desc aarch64_fusible_pairs[] =
237{
238 { "none", AARCH64_FUSE_NOTHING },
239#include "aarch64-fusion-pairs.def"
240 { "all", AARCH64_FUSE_ALL },
241 { NULL, AARCH64_FUSE_NOTHING }
242};
8dec06f2 243
a339a01c 244#define AARCH64_EXTRA_TUNING_OPTION(name, internal_name) \
8dec06f2
JG
245 { name, AARCH64_EXTRA_TUNE_##internal_name },
246static const struct aarch64_flag_desc aarch64_tuning_flags[] =
247{
248 { "none", AARCH64_EXTRA_TUNE_NONE },
249#include "aarch64-tuning-flags.def"
250 { "all", AARCH64_EXTRA_TUNE_ALL },
251 { NULL, AARCH64_EXTRA_TUNE_NONE }
252};
8dec06f2 253
43e9d192
IB
254/* Tuning parameters. */
255
43e9d192
IB
256static const struct cpu_addrcost_table generic_addrcost_table =
257{
67747367 258 {
2fae724a 259 1, /* hi */
bd95e655
JG
260 0, /* si */
261 0, /* di */
2fae724a 262 1, /* ti */
67747367 263 },
bd95e655
JG
264 0, /* pre_modify */
265 0, /* post_modify */
266 0, /* register_offset */
783879e6
EM
267 0, /* register_sextend */
268 0, /* register_zextend */
bd95e655 269 0 /* imm_offset */
43e9d192
IB
270};
271
5ec1ae3b
EM
272static const struct cpu_addrcost_table exynosm1_addrcost_table =
273{
274 {
275 0, /* hi */
276 0, /* si */
277 0, /* di */
278 2, /* ti */
279 },
280 0, /* pre_modify */
281 0, /* post_modify */
282 1, /* register_offset */
283 1, /* register_sextend */
284 2, /* register_zextend */
285 0, /* imm_offset */
286};
287
381e27aa
PT
288static const struct cpu_addrcost_table xgene1_addrcost_table =
289{
381e27aa 290 {
bd95e655
JG
291 1, /* hi */
292 0, /* si */
293 0, /* di */
294 1, /* ti */
381e27aa 295 },
bd95e655
JG
296 1, /* pre_modify */
297 0, /* post_modify */
298 0, /* register_offset */
783879e6
EM
299 1, /* register_sextend */
300 1, /* register_zextend */
bd95e655 301 0, /* imm_offset */
381e27aa
PT
302};
303
d1261ac6 304static const struct cpu_addrcost_table thunderx2t99_addrcost_table =
ad611a4c
VP
305{
306 {
5f407e57
AP
307 1, /* hi */
308 1, /* si */
309 1, /* di */
ad611a4c
VP
310 2, /* ti */
311 },
312 0, /* pre_modify */
313 0, /* post_modify */
314 2, /* register_offset */
315 3, /* register_sextend */
316 3, /* register_zextend */
317 0, /* imm_offset */
318};
319
43e9d192
IB
320static const struct cpu_regmove_cost generic_regmove_cost =
321{
bd95e655 322 1, /* GP2GP */
3969c510
WD
323 /* Avoid the use of slow int<->fp moves for spilling by setting
324 their cost higher than memmov_cost. */
bd95e655
JG
325 5, /* GP2FP */
326 5, /* FP2GP */
327 2 /* FP2FP */
43e9d192
IB
328};
329
e4a9c55a
WD
330static const struct cpu_regmove_cost cortexa57_regmove_cost =
331{
bd95e655 332 1, /* GP2GP */
e4a9c55a
WD
333 /* Avoid the use of slow int<->fp moves for spilling by setting
334 their cost higher than memmov_cost. */
bd95e655
JG
335 5, /* GP2FP */
336 5, /* FP2GP */
337 2 /* FP2FP */
e4a9c55a
WD
338};
339
340static const struct cpu_regmove_cost cortexa53_regmove_cost =
341{
bd95e655 342 1, /* GP2GP */
e4a9c55a
WD
343 /* Avoid the use of slow int<->fp moves for spilling by setting
344 their cost higher than memmov_cost. */
bd95e655
JG
345 5, /* GP2FP */
346 5, /* FP2GP */
347 2 /* FP2FP */
e4a9c55a
WD
348};
349
5ec1ae3b
EM
350static const struct cpu_regmove_cost exynosm1_regmove_cost =
351{
352 1, /* GP2GP */
353 /* Avoid the use of slow int<->fp moves for spilling by setting
354 their cost higher than memmov_cost (actual, 4 and 9). */
355 9, /* GP2FP */
356 9, /* FP2GP */
357 1 /* FP2FP */
358};
359
d1bcc29f
AP
360static const struct cpu_regmove_cost thunderx_regmove_cost =
361{
bd95e655
JG
362 2, /* GP2GP */
363 2, /* GP2FP */
364 6, /* FP2GP */
365 4 /* FP2FP */
d1bcc29f
AP
366};
367
381e27aa
PT
368static const struct cpu_regmove_cost xgene1_regmove_cost =
369{
bd95e655 370 1, /* GP2GP */
381e27aa
PT
371 /* Avoid the use of slow int<->fp moves for spilling by setting
372 their cost higher than memmov_cost. */
bd95e655
JG
373 8, /* GP2FP */
374 8, /* FP2GP */
375 2 /* FP2FP */
381e27aa
PT
376};
377
ee446d9f
JW
378static const struct cpu_regmove_cost qdf24xx_regmove_cost =
379{
380 2, /* GP2GP */
381 /* Avoid the use of int<->fp moves for spilling. */
382 6, /* GP2FP */
383 6, /* FP2GP */
384 4 /* FP2FP */
385};
386
d1261ac6 387static const struct cpu_regmove_cost thunderx2t99_regmove_cost =
ad611a4c
VP
388{
389 1, /* GP2GP */
390 /* Avoid the use of int<->fp moves for spilling. */
391 8, /* GP2FP */
392 8, /* FP2GP */
393 4 /* FP2FP */
394};
395
8990e73a 396/* Generic costs for vector insn classes. */
8990e73a
TB
397static const struct cpu_vector_cost generic_vector_cost =
398{
cd8ae5ed
AP
399 1, /* scalar_int_stmt_cost */
400 1, /* scalar_fp_stmt_cost */
bd95e655
JG
401 1, /* scalar_load_cost */
402 1, /* scalar_store_cost */
cd8ae5ed
AP
403 1, /* vec_int_stmt_cost */
404 1, /* vec_fp_stmt_cost */
c428f91c 405 2, /* vec_permute_cost */
bd95e655
JG
406 1, /* vec_to_scalar_cost */
407 1, /* scalar_to_vec_cost */
408 1, /* vec_align_load_cost */
409 1, /* vec_unalign_load_cost */
410 1, /* vec_unalign_store_cost */
411 1, /* vec_store_cost */
412 3, /* cond_taken_branch_cost */
413 1 /* cond_not_taken_branch_cost */
8990e73a
TB
414};
415
c3f20327
AP
416/* ThunderX costs for vector insn classes. */
417static const struct cpu_vector_cost thunderx_vector_cost =
418{
cd8ae5ed
AP
419 1, /* scalar_int_stmt_cost */
420 1, /* scalar_fp_stmt_cost */
c3f20327
AP
421 3, /* scalar_load_cost */
422 1, /* scalar_store_cost */
cd8ae5ed 423 4, /* vec_int_stmt_cost */
b29d7591 424 1, /* vec_fp_stmt_cost */
c3f20327
AP
425 4, /* vec_permute_cost */
426 2, /* vec_to_scalar_cost */
427 2, /* scalar_to_vec_cost */
428 3, /* vec_align_load_cost */
7e87a3d9
AP
429 5, /* vec_unalign_load_cost */
430 5, /* vec_unalign_store_cost */
c3f20327
AP
431 1, /* vec_store_cost */
432 3, /* cond_taken_branch_cost */
433 3 /* cond_not_taken_branch_cost */
434};
435
60bff090 436/* Generic costs for vector insn classes. */
60bff090
JG
437static const struct cpu_vector_cost cortexa57_vector_cost =
438{
cd8ae5ed
AP
439 1, /* scalar_int_stmt_cost */
440 1, /* scalar_fp_stmt_cost */
bd95e655
JG
441 4, /* scalar_load_cost */
442 1, /* scalar_store_cost */
cd8ae5ed
AP
443 2, /* vec_int_stmt_cost */
444 2, /* vec_fp_stmt_cost */
c428f91c 445 3, /* vec_permute_cost */
bd95e655
JG
446 8, /* vec_to_scalar_cost */
447 8, /* scalar_to_vec_cost */
db4a1c18
WD
448 4, /* vec_align_load_cost */
449 4, /* vec_unalign_load_cost */
bd95e655
JG
450 1, /* vec_unalign_store_cost */
451 1, /* vec_store_cost */
452 1, /* cond_taken_branch_cost */
453 1 /* cond_not_taken_branch_cost */
60bff090
JG
454};
455
5ec1ae3b
EM
456static const struct cpu_vector_cost exynosm1_vector_cost =
457{
cd8ae5ed
AP
458 1, /* scalar_int_stmt_cost */
459 1, /* scalar_fp_stmt_cost */
5ec1ae3b
EM
460 5, /* scalar_load_cost */
461 1, /* scalar_store_cost */
cd8ae5ed
AP
462 3, /* vec_int_stmt_cost */
463 3, /* vec_fp_stmt_cost */
c428f91c 464 3, /* vec_permute_cost */
5ec1ae3b
EM
465 3, /* vec_to_scalar_cost */
466 3, /* scalar_to_vec_cost */
467 5, /* vec_align_load_cost */
468 5, /* vec_unalign_load_cost */
469 1, /* vec_unalign_store_cost */
470 1, /* vec_store_cost */
471 1, /* cond_taken_branch_cost */
472 1 /* cond_not_taken_branch_cost */
473};
474
381e27aa 475/* Generic costs for vector insn classes. */
381e27aa
PT
476static const struct cpu_vector_cost xgene1_vector_cost =
477{
cd8ae5ed
AP
478 1, /* scalar_int_stmt_cost */
479 1, /* scalar_fp_stmt_cost */
bd95e655
JG
480 5, /* scalar_load_cost */
481 1, /* scalar_store_cost */
cd8ae5ed
AP
482 2, /* vec_int_stmt_cost */
483 2, /* vec_fp_stmt_cost */
c428f91c 484 2, /* vec_permute_cost */
bd95e655
JG
485 4, /* vec_to_scalar_cost */
486 4, /* scalar_to_vec_cost */
487 10, /* vec_align_load_cost */
488 10, /* vec_unalign_load_cost */
489 2, /* vec_unalign_store_cost */
490 2, /* vec_store_cost */
491 2, /* cond_taken_branch_cost */
492 1 /* cond_not_taken_branch_cost */
381e27aa
PT
493};
494
ad611a4c 495/* Costs for vector insn classes for Vulcan. */
d1261ac6 496static const struct cpu_vector_cost thunderx2t99_vector_cost =
ad611a4c 497{
cd8ae5ed
AP
498 1, /* scalar_int_stmt_cost */
499 6, /* scalar_fp_stmt_cost */
ad611a4c
VP
500 4, /* scalar_load_cost */
501 1, /* scalar_store_cost */
cd8ae5ed
AP
502 5, /* vec_int_stmt_cost */
503 6, /* vec_fp_stmt_cost */
ad611a4c
VP
504 3, /* vec_permute_cost */
505 6, /* vec_to_scalar_cost */
506 5, /* scalar_to_vec_cost */
507 8, /* vec_align_load_cost */
508 8, /* vec_unalign_load_cost */
509 4, /* vec_unalign_store_cost */
510 4, /* vec_store_cost */
511 2, /* cond_taken_branch_cost */
512 1 /* cond_not_taken_branch_cost */
513};
514
b9066f5a
MW
515/* Generic costs for branch instructions. */
516static const struct cpu_branch_cost generic_branch_cost =
517{
9094d4a4
WD
518 1, /* Predictable. */
519 3 /* Unpredictable. */
b9066f5a
MW
520};
521
9acc9cbe
EM
522/* Generic approximation modes. */
523static const cpu_approx_modes generic_approx_modes =
524{
79a2bc2d 525 AARCH64_APPROX_NONE, /* division */
98daafa0 526 AARCH64_APPROX_NONE, /* sqrt */
9acc9cbe
EM
527 AARCH64_APPROX_NONE /* recip_sqrt */
528};
529
530/* Approximation modes for Exynos M1. */
531static const cpu_approx_modes exynosm1_approx_modes =
532{
79a2bc2d 533 AARCH64_APPROX_NONE, /* division */
98daafa0 534 AARCH64_APPROX_ALL, /* sqrt */
9acc9cbe
EM
535 AARCH64_APPROX_ALL /* recip_sqrt */
536};
537
538/* Approximation modes for X-Gene 1. */
539static const cpu_approx_modes xgene1_approx_modes =
540{
79a2bc2d 541 AARCH64_APPROX_NONE, /* division */
98daafa0 542 AARCH64_APPROX_NONE, /* sqrt */
9acc9cbe
EM
543 AARCH64_APPROX_ALL /* recip_sqrt */
544};
545
9d2c6e2e
MK
546/* Generic prefetch settings (which disable prefetch). */
547static const cpu_prefetch_tune generic_prefetch_tune =
548{
549 0, /* num_slots */
550 -1, /* l1_cache_size */
551 -1, /* l1_cache_line_size */
16b2cafd
MK
552 -1, /* l2_cache_size */
553 -1 /* default_opt_level */
9d2c6e2e
MK
554};
555
556static const cpu_prefetch_tune exynosm1_prefetch_tune =
557{
558 0, /* num_slots */
559 -1, /* l1_cache_size */
560 64, /* l1_cache_line_size */
16b2cafd
MK
561 -1, /* l2_cache_size */
562 -1 /* default_opt_level */
9d2c6e2e
MK
563};
564
565static const cpu_prefetch_tune qdf24xx_prefetch_tune =
566{
70c51b58
MK
567 4, /* num_slots */
568 32, /* l1_cache_size */
9d2c6e2e 569 64, /* l1_cache_line_size */
725e2110 570 512, /* l2_cache_size */
47811d7d 571 -1 /* default_opt_level */
9d2c6e2e
MK
572};
573
f1e247d0
AP
574static const cpu_prefetch_tune thunderxt88_prefetch_tune =
575{
576 8, /* num_slots */
577 32, /* l1_cache_size */
578 128, /* l1_cache_line_size */
579 16*1024, /* l2_cache_size */
580 3 /* default_opt_level */
581};
582
583static const cpu_prefetch_tune thunderx_prefetch_tune =
584{
585 8, /* num_slots */
586 32, /* l1_cache_size */
587 128, /* l1_cache_line_size */
588 -1, /* l2_cache_size */
589 -1 /* default_opt_level */
590};
591
9d2c6e2e
MK
592static const cpu_prefetch_tune thunderx2t99_prefetch_tune =
593{
f1e247d0
AP
594 8, /* num_slots */
595 32, /* l1_cache_size */
9d2c6e2e 596 64, /* l1_cache_line_size */
f1e247d0 597 256, /* l2_cache_size */
16b2cafd 598 -1 /* default_opt_level */
9d2c6e2e
MK
599};
600
43e9d192
IB
601static const struct tune_params generic_tunings =
602{
4e2cd668 603 &cortexa57_extra_costs,
43e9d192
IB
604 &generic_addrcost_table,
605 &generic_regmove_cost,
8990e73a 606 &generic_vector_cost,
b9066f5a 607 &generic_branch_cost,
9acc9cbe 608 &generic_approx_modes,
bd95e655
JG
609 4, /* memmov_cost */
610 2, /* issue_rate */
e0701ef0 611 (AARCH64_FUSE_AES_AESMC), /* fusible_ops */
0b82a5a2 612 8, /* function_align. */
6b13482b
WD
613 4, /* jump_align. */
614 8, /* loop_align. */
cee66c68
WD
615 2, /* int_reassoc_width. */
616 4, /* fp_reassoc_width. */
50093a33
WD
617 1, /* vec_reassoc_width. */
618 2, /* min_div_recip_mul_sf. */
dfba575f 619 2, /* min_div_recip_mul_df. */
50487d79 620 0, /* max_case_values. */
3b4c0f7e 621 tune_params::AUTOPREFETCHER_WEAK, /* autoprefetcher_model. */
9d2c6e2e
MK
622 (AARCH64_EXTRA_TUNE_NONE), /* tune_flags. */
623 &generic_prefetch_tune
43e9d192
IB
624};
625
1c72a3ca
JG
626static const struct tune_params cortexa35_tunings =
627{
628 &cortexa53_extra_costs,
629 &generic_addrcost_table,
630 &cortexa53_regmove_cost,
631 &generic_vector_cost,
aca97ef8 632 &generic_branch_cost,
9acc9cbe 633 &generic_approx_modes,
1c72a3ca
JG
634 4, /* memmov_cost */
635 1, /* issue_rate */
0bc24338 636 (AARCH64_FUSE_AES_AESMC | AARCH64_FUSE_MOV_MOVK | AARCH64_FUSE_ADRP_ADD
1c72a3ca 637 | AARCH64_FUSE_MOVK_MOVK | AARCH64_FUSE_ADRP_LDR), /* fusible_ops */
d4407370 638 16, /* function_align. */
9779b2e8 639 4, /* jump_align. */
d4407370 640 8, /* loop_align. */
1c72a3ca
JG
641 2, /* int_reassoc_width. */
642 4, /* fp_reassoc_width. */
643 1, /* vec_reassoc_width. */
644 2, /* min_div_recip_mul_sf. */
645 2, /* min_div_recip_mul_df. */
646 0, /* max_case_values. */
1c72a3ca 647 tune_params::AUTOPREFETCHER_WEAK, /* autoprefetcher_model. */
9d2c6e2e
MK
648 (AARCH64_EXTRA_TUNE_NONE), /* tune_flags. */
649 &generic_prefetch_tune
1c72a3ca
JG
650};
651
984239ad
KT
652static const struct tune_params cortexa53_tunings =
653{
654 &cortexa53_extra_costs,
655 &generic_addrcost_table,
e4a9c55a 656 &cortexa53_regmove_cost,
984239ad 657 &generic_vector_cost,
aca97ef8 658 &generic_branch_cost,
9acc9cbe 659 &generic_approx_modes,
bd95e655
JG
660 4, /* memmov_cost */
661 2, /* issue_rate */
00a8574a 662 (AARCH64_FUSE_AES_AESMC | AARCH64_FUSE_MOV_MOVK | AARCH64_FUSE_ADRP_ADD
e9a3a175 663 | AARCH64_FUSE_MOVK_MOVK | AARCH64_FUSE_ADRP_LDR), /* fusible_ops */
d4407370 664 16, /* function_align. */
9779b2e8 665 4, /* jump_align. */
d4407370 666 8, /* loop_align. */
cee66c68
WD
667 2, /* int_reassoc_width. */
668 4, /* fp_reassoc_width. */
50093a33
WD
669 1, /* vec_reassoc_width. */
670 2, /* min_div_recip_mul_sf. */
dfba575f 671 2, /* min_div_recip_mul_df. */
50487d79 672 0, /* max_case_values. */
2d6bc7fa 673 tune_params::AUTOPREFETCHER_WEAK, /* autoprefetcher_model. */
9d2c6e2e
MK
674 (AARCH64_EXTRA_TUNE_NONE), /* tune_flags. */
675 &generic_prefetch_tune
984239ad
KT
676};
677
4fd92af6
KT
678static const struct tune_params cortexa57_tunings =
679{
680 &cortexa57_extra_costs,
a39d4348 681 &generic_addrcost_table,
e4a9c55a 682 &cortexa57_regmove_cost,
60bff090 683 &cortexa57_vector_cost,
aca97ef8 684 &generic_branch_cost,
9acc9cbe 685 &generic_approx_modes,
bd95e655
JG
686 4, /* memmov_cost */
687 3, /* issue_rate */
00a8574a 688 (AARCH64_FUSE_AES_AESMC | AARCH64_FUSE_MOV_MOVK | AARCH64_FUSE_ADRP_ADD
e9a3a175 689 | AARCH64_FUSE_MOVK_MOVK), /* fusible_ops */
0b82a5a2 690 16, /* function_align. */
9779b2e8 691 4, /* jump_align. */
d4407370 692 8, /* loop_align. */
cee66c68
WD
693 2, /* int_reassoc_width. */
694 4, /* fp_reassoc_width. */
50093a33
WD
695 1, /* vec_reassoc_width. */
696 2, /* min_div_recip_mul_sf. */
dfba575f 697 2, /* min_div_recip_mul_df. */
50487d79 698 0, /* max_case_values. */
2d6bc7fa 699 tune_params::AUTOPREFETCHER_WEAK, /* autoprefetcher_model. */
9d2c6e2e
MK
700 (AARCH64_EXTRA_TUNE_RENAME_FMA_REGS), /* tune_flags. */
701 &generic_prefetch_tune
dfba575f
JG
702};
703
704static const struct tune_params cortexa72_tunings =
705{
706 &cortexa57_extra_costs,
a39d4348 707 &generic_addrcost_table,
dfba575f
JG
708 &cortexa57_regmove_cost,
709 &cortexa57_vector_cost,
aca97ef8 710 &generic_branch_cost,
9acc9cbe 711 &generic_approx_modes,
dfba575f
JG
712 4, /* memmov_cost */
713 3, /* issue_rate */
00a8574a 714 (AARCH64_FUSE_AES_AESMC | AARCH64_FUSE_MOV_MOVK | AARCH64_FUSE_ADRP_ADD
dfba575f
JG
715 | AARCH64_FUSE_MOVK_MOVK), /* fusible_ops */
716 16, /* function_align. */
9779b2e8 717 4, /* jump_align. */
d4407370 718 8, /* loop_align. */
dfba575f
JG
719 2, /* int_reassoc_width. */
720 4, /* fp_reassoc_width. */
721 1, /* vec_reassoc_width. */
722 2, /* min_div_recip_mul_sf. */
723 2, /* min_div_recip_mul_df. */
50487d79 724 0, /* max_case_values. */
0bc24338 725 tune_params::AUTOPREFETCHER_WEAK, /* autoprefetcher_model. */
9d2c6e2e
MK
726 (AARCH64_EXTRA_TUNE_NONE), /* tune_flags. */
727 &generic_prefetch_tune
4fd92af6
KT
728};
729
4fb570c4
KT
730static const struct tune_params cortexa73_tunings =
731{
732 &cortexa57_extra_costs,
a39d4348 733 &generic_addrcost_table,
4fb570c4
KT
734 &cortexa57_regmove_cost,
735 &cortexa57_vector_cost,
aca97ef8 736 &generic_branch_cost,
4fb570c4
KT
737 &generic_approx_modes,
738 4, /* memmov_cost. */
739 2, /* issue_rate. */
740 (AARCH64_FUSE_AES_AESMC | AARCH64_FUSE_MOV_MOVK | AARCH64_FUSE_ADRP_ADD
741 | AARCH64_FUSE_MOVK_MOVK | AARCH64_FUSE_ADRP_LDR), /* fusible_ops */
742 16, /* function_align. */
9779b2e8 743 4, /* jump_align. */
d4407370 744 8, /* loop_align. */
4fb570c4
KT
745 2, /* int_reassoc_width. */
746 4, /* fp_reassoc_width. */
747 1, /* vec_reassoc_width. */
748 2, /* min_div_recip_mul_sf. */
749 2, /* min_div_recip_mul_df. */
750 0, /* max_case_values. */
4fb570c4 751 tune_params::AUTOPREFETCHER_WEAK, /* autoprefetcher_model. */
9d2c6e2e
MK
752 (AARCH64_EXTRA_TUNE_NONE), /* tune_flags. */
753 &generic_prefetch_tune
4fb570c4
KT
754};
755
9d2c6e2e
MK
756
757
5ec1ae3b
EM
758static const struct tune_params exynosm1_tunings =
759{
760 &exynosm1_extra_costs,
761 &exynosm1_addrcost_table,
762 &exynosm1_regmove_cost,
763 &exynosm1_vector_cost,
764 &generic_branch_cost,
9acc9cbe 765 &exynosm1_approx_modes,
5ec1ae3b
EM
766 4, /* memmov_cost */
767 3, /* issue_rate */
25cc2199 768 (AARCH64_FUSE_AES_AESMC), /* fusible_ops */
5ec1ae3b
EM
769 4, /* function_align. */
770 4, /* jump_align. */
771 4, /* loop_align. */
772 2, /* int_reassoc_width. */
773 4, /* fp_reassoc_width. */
774 1, /* vec_reassoc_width. */
775 2, /* min_div_recip_mul_sf. */
776 2, /* min_div_recip_mul_df. */
777 48, /* max_case_values. */
220379df 778 tune_params::AUTOPREFETCHER_WEAK, /* autoprefetcher_model. */
9d2c6e2e
MK
779 (AARCH64_EXTRA_TUNE_NONE), /* tune_flags. */
780 &exynosm1_prefetch_tune
5ec1ae3b
EM
781};
782
f1e247d0
AP
783static const struct tune_params thunderxt88_tunings =
784{
785 &thunderx_extra_costs,
786 &generic_addrcost_table,
787 &thunderx_regmove_cost,
788 &thunderx_vector_cost,
789 &generic_branch_cost,
790 &generic_approx_modes,
791 6, /* memmov_cost */
792 2, /* issue_rate */
793 AARCH64_FUSE_CMP_BRANCH, /* fusible_ops */
794 8, /* function_align. */
795 8, /* jump_align. */
796 8, /* loop_align. */
797 2, /* int_reassoc_width. */
798 4, /* fp_reassoc_width. */
799 1, /* vec_reassoc_width. */
800 2, /* min_div_recip_mul_sf. */
801 2, /* min_div_recip_mul_df. */
802 0, /* max_case_values. */
803 tune_params::AUTOPREFETCHER_OFF, /* autoprefetcher_model. */
804 (AARCH64_EXTRA_TUNE_SLOW_UNALIGNED_LDPW), /* tune_flags. */
805 &thunderxt88_prefetch_tune
806};
807
d1bcc29f
AP
808static const struct tune_params thunderx_tunings =
809{
810 &thunderx_extra_costs,
811 &generic_addrcost_table,
812 &thunderx_regmove_cost,
c3f20327 813 &thunderx_vector_cost,
b9066f5a 814 &generic_branch_cost,
9acc9cbe 815 &generic_approx_modes,
bd95e655
JG
816 6, /* memmov_cost */
817 2, /* issue_rate */
e9a3a175 818 AARCH64_FUSE_CMP_BRANCH, /* fusible_ops */
0b82a5a2
WD
819 8, /* function_align. */
820 8, /* jump_align. */
821 8, /* loop_align. */
cee66c68
WD
822 2, /* int_reassoc_width. */
823 4, /* fp_reassoc_width. */
50093a33
WD
824 1, /* vec_reassoc_width. */
825 2, /* min_div_recip_mul_sf. */
dfba575f 826 2, /* min_div_recip_mul_df. */
50487d79 827 0, /* max_case_values. */
2d6bc7fa 828 tune_params::AUTOPREFETCHER_OFF, /* autoprefetcher_model. */
b10f1009
AP
829 (AARCH64_EXTRA_TUNE_SLOW_UNALIGNED_LDPW
830 | AARCH64_EXTRA_TUNE_CHEAP_SHIFT_EXTEND), /* tune_flags. */
f1e247d0 831 &thunderx_prefetch_tune
d1bcc29f
AP
832};
833
381e27aa
PT
834static const struct tune_params xgene1_tunings =
835{
836 &xgene1_extra_costs,
837 &xgene1_addrcost_table,
838 &xgene1_regmove_cost,
839 &xgene1_vector_cost,
b9066f5a 840 &generic_branch_cost,
9acc9cbe 841 &xgene1_approx_modes,
bd95e655
JG
842 6, /* memmov_cost */
843 4, /* issue_rate */
e9a3a175 844 AARCH64_FUSE_NOTHING, /* fusible_ops */
381e27aa
PT
845 16, /* function_align. */
846 8, /* jump_align. */
847 16, /* loop_align. */
848 2, /* int_reassoc_width. */
849 4, /* fp_reassoc_width. */
50093a33
WD
850 1, /* vec_reassoc_width. */
851 2, /* min_div_recip_mul_sf. */
dfba575f 852 2, /* min_div_recip_mul_df. */
50487d79 853 0, /* max_case_values. */
2d6bc7fa 854 tune_params::AUTOPREFETCHER_OFF, /* autoprefetcher_model. */
9d2c6e2e
MK
855 (AARCH64_EXTRA_TUNE_NONE), /* tune_flags. */
856 &generic_prefetch_tune
381e27aa
PT
857};
858
ee446d9f
JW
859static const struct tune_params qdf24xx_tunings =
860{
861 &qdf24xx_extra_costs,
a39d4348 862 &generic_addrcost_table,
ee446d9f
JW
863 &qdf24xx_regmove_cost,
864 &generic_vector_cost,
865 &generic_branch_cost,
866 &generic_approx_modes,
867 4, /* memmov_cost */
868 4, /* issue_rate */
869 (AARCH64_FUSE_MOV_MOVK | AARCH64_FUSE_ADRP_ADD
870 | AARCH64_FUSE_MOVK_MOVK), /* fuseable_ops */
871 16, /* function_align. */
872 8, /* jump_align. */
873 16, /* loop_align. */
874 2, /* int_reassoc_width. */
875 4, /* fp_reassoc_width. */
876 1, /* vec_reassoc_width. */
877 2, /* min_div_recip_mul_sf. */
878 2, /* min_div_recip_mul_df. */
879 0, /* max_case_values. */
4f2a94e6 880 tune_params::AUTOPREFETCHER_WEAK, /* autoprefetcher_model. */
9d2c6e2e
MK
881 (AARCH64_EXTRA_TUNE_NONE), /* tune_flags. */
882 &qdf24xx_prefetch_tune
ee446d9f
JW
883};
884
52ee8191
SP
885/* Tuning structure for the Qualcomm Saphira core. Default to falkor values
886 for now. */
887static const struct tune_params saphira_tunings =
888{
889 &generic_extra_costs,
890 &generic_addrcost_table,
891 &generic_regmove_cost,
892 &generic_vector_cost,
893 &generic_branch_cost,
894 &generic_approx_modes,
895 4, /* memmov_cost */
896 4, /* issue_rate */
897 (AARCH64_FUSE_MOV_MOVK | AARCH64_FUSE_ADRP_ADD
898 | AARCH64_FUSE_MOVK_MOVK), /* fuseable_ops */
899 16, /* function_align. */
900 8, /* jump_align. */
901 16, /* loop_align. */
902 2, /* int_reassoc_width. */
903 4, /* fp_reassoc_width. */
904 1, /* vec_reassoc_width. */
905 2, /* min_div_recip_mul_sf. */
906 2, /* min_div_recip_mul_df. */
907 0, /* max_case_values. */
908 tune_params::AUTOPREFETCHER_WEAK, /* autoprefetcher_model. */
909 (AARCH64_EXTRA_TUNE_NONE), /* tune_flags. */
910 &generic_prefetch_tune
911};
912
d1261ac6 913static const struct tune_params thunderx2t99_tunings =
ad611a4c 914{
d1261ac6
AP
915 &thunderx2t99_extra_costs,
916 &thunderx2t99_addrcost_table,
917 &thunderx2t99_regmove_cost,
918 &thunderx2t99_vector_cost,
aca97ef8 919 &generic_branch_cost,
ad611a4c
VP
920 &generic_approx_modes,
921 4, /* memmov_cost. */
922 4, /* issue_rate. */
00c7c57f
JB
923 (AARCH64_FUSE_CMP_BRANCH | AARCH64_FUSE_AES_AESMC
924 | AARCH64_FUSE_ALU_BRANCH), /* fusible_ops */
ad611a4c
VP
925 16, /* function_align. */
926 8, /* jump_align. */
927 16, /* loop_align. */
928 3, /* int_reassoc_width. */
929 2, /* fp_reassoc_width. */
930 2, /* vec_reassoc_width. */
931 2, /* min_div_recip_mul_sf. */
932 2, /* min_div_recip_mul_df. */
933 0, /* max_case_values. */
f1e247d0 934 tune_params::AUTOPREFETCHER_WEAK, /* autoprefetcher_model. */
9d2c6e2e
MK
935 (AARCH64_EXTRA_TUNE_NONE), /* tune_flags. */
936 &thunderx2t99_prefetch_tune
ad611a4c
VP
937};
938
8dec06f2
JG
939/* Support for fine-grained override of the tuning structures. */
940struct aarch64_tuning_override_function
941{
942 const char* name;
943 void (*parse_override)(const char*, struct tune_params*);
944};
945
946static void aarch64_parse_fuse_string (const char*, struct tune_params*);
947static void aarch64_parse_tune_string (const char*, struct tune_params*);
948
949static const struct aarch64_tuning_override_function
950aarch64_tuning_override_functions[] =
951{
952 { "fuse", aarch64_parse_fuse_string },
953 { "tune", aarch64_parse_tune_string },
954 { NULL, NULL }
955};
956
43e9d192
IB
957/* A processor implementing AArch64. */
958struct processor
959{
960 const char *const name;
46806c44
KT
961 enum aarch64_processor ident;
962 enum aarch64_processor sched_core;
393ae126 963 enum aarch64_arch arch;
0c6caaf8 964 unsigned architecture_version;
43e9d192
IB
965 const unsigned long flags;
966 const struct tune_params *const tune;
967};
968
393ae126
KT
969/* Architectures implementing AArch64. */
970static const struct processor all_architectures[] =
971{
972#define AARCH64_ARCH(NAME, CORE, ARCH_IDENT, ARCH_REV, FLAGS) \
973 {NAME, CORE, CORE, AARCH64_ARCH_##ARCH_IDENT, ARCH_REV, FLAGS, NULL},
974#include "aarch64-arches.def"
393ae126
KT
975 {NULL, aarch64_none, aarch64_none, aarch64_no_arch, 0, 0, NULL}
976};
977
43e9d192
IB
978/* Processor cores implementing AArch64. */
979static const struct processor all_cores[] =
980{
e8fcc9fa 981#define AARCH64_CORE(NAME, IDENT, SCHED, ARCH, FLAGS, COSTS, IMP, PART, VARIANT) \
393ae126
KT
982 {NAME, IDENT, SCHED, AARCH64_ARCH_##ARCH, \
983 all_architectures[AARCH64_ARCH_##ARCH].architecture_version, \
984 FLAGS, &COSTS##_tunings},
43e9d192 985#include "aarch64-cores.def"
393ae126
KT
986 {"generic", generic, cortexa53, AARCH64_ARCH_8A, 8,
987 AARCH64_FL_FOR_ARCH8, &generic_tunings},
988 {NULL, aarch64_none, aarch64_none, aarch64_no_arch, 0, 0, NULL}
43e9d192
IB
989};
990
43e9d192 991
361fb3ee
KT
992/* Target specification. These are populated by the -march, -mtune, -mcpu
993 handling code or by target attributes. */
43e9d192
IB
994static const struct processor *selected_arch;
995static const struct processor *selected_cpu;
996static const struct processor *selected_tune;
997
b175b679
JG
998/* The current tuning set. */
999struct tune_params aarch64_tune_params = generic_tunings;
1000
43e9d192
IB
1001#define AARCH64_CPU_DEFAULT_FLAGS ((selected_cpu) ? selected_cpu->flags : 0)
1002
1003/* An ISA extension in the co-processor and main instruction set space. */
1004struct aarch64_option_extension
1005{
1006 const char *const name;
1007 const unsigned long flags_on;
1008 const unsigned long flags_off;
1009};
1010
43e9d192
IB
1011typedef enum aarch64_cond_code
1012{
1013 AARCH64_EQ = 0, AARCH64_NE, AARCH64_CS, AARCH64_CC, AARCH64_MI, AARCH64_PL,
1014 AARCH64_VS, AARCH64_VC, AARCH64_HI, AARCH64_LS, AARCH64_GE, AARCH64_LT,
1015 AARCH64_GT, AARCH64_LE, AARCH64_AL, AARCH64_NV
1016}
1017aarch64_cc;
1018
1019#define AARCH64_INVERSE_CONDITION_CODE(X) ((aarch64_cc) (((int) X) ^ 1))
1020
1021/* The condition codes of the processor, and the inverse function. */
1022static const char * const aarch64_condition_codes[] =
1023{
1024 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
1025 "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
1026};
1027
973d2e01
TP
1028/* Generate code to enable conditional branches in functions over 1 MiB. */
1029const char *
1030aarch64_gen_far_branch (rtx * operands, int pos_label, const char * dest,
1031 const char * branch_format)
1032{
1033 rtx_code_label * tmp_label = gen_label_rtx ();
1034 char label_buf[256];
1035 char buffer[128];
1036 ASM_GENERATE_INTERNAL_LABEL (label_buf, dest,
1037 CODE_LABEL_NUMBER (tmp_label));
1038 const char *label_ptr = targetm.strip_name_encoding (label_buf);
1039 rtx dest_label = operands[pos_label];
1040 operands[pos_label] = tmp_label;
1041
1042 snprintf (buffer, sizeof (buffer), "%s%s", branch_format, label_ptr);
1043 output_asm_insn (buffer, operands);
1044
1045 snprintf (buffer, sizeof (buffer), "b\t%%l%d\n%s:", pos_label, label_ptr);
1046 operands[pos_label] = dest_label;
1047 output_asm_insn (buffer, operands);
1048 return "";
1049}
1050
261fb553
AL
1051void
1052aarch64_err_no_fpadvsimd (machine_mode mode, const char *msg)
1053{
1054 const char *mc = FLOAT_MODE_P (mode) ? "floating-point" : "vector";
1055 if (TARGET_GENERAL_REGS_ONLY)
1056 error ("%qs is incompatible with %s %s", "-mgeneral-regs-only", mc, msg);
1057 else
1058 error ("%qs feature modifier is incompatible with %s %s", "+nofp", mc, msg);
1059}
1060
c64f7d37
WD
1061/* Implement TARGET_IRA_CHANGE_PSEUDO_ALLOCNO_CLASS.
1062 The register allocator chooses ALL_REGS if FP_REGS and GENERAL_REGS have
31e2b5a3
WD
1063 the same cost even if ALL_REGS has a much larger cost. ALL_REGS is also
1064 used if the cost of both FP_REGS and GENERAL_REGS is lower than the memory
1065 cost (in this case the best class is the lowest cost one). Using ALL_REGS
1066 irrespectively of its cost results in bad allocations with many redundant
1067 int<->FP moves which are expensive on various cores.
1068 To avoid this we don't allow ALL_REGS as the allocno class, but force a
1069 decision between FP_REGS and GENERAL_REGS. We use the allocno class if it
1070 isn't ALL_REGS. Similarly, use the best class if it isn't ALL_REGS.
1071 Otherwise set the allocno class depending on the mode.
1072 The result of this is that it is no longer inefficient to have a higher
1073 memory move cost than the register move cost.
1074*/
c64f7d37
WD
1075
1076static reg_class_t
31e2b5a3
WD
1077aarch64_ira_change_pseudo_allocno_class (int regno, reg_class_t allocno_class,
1078 reg_class_t best_class)
c64f7d37 1079{
b8506a8a 1080 machine_mode mode;
c64f7d37
WD
1081
1082 if (allocno_class != ALL_REGS)
1083 return allocno_class;
1084
31e2b5a3
WD
1085 if (best_class != ALL_REGS)
1086 return best_class;
1087
c64f7d37
WD
1088 mode = PSEUDO_REGNO_MODE (regno);
1089 return FLOAT_MODE_P (mode) || VECTOR_MODE_P (mode) ? FP_REGS : GENERAL_REGS;
1090}
1091
26e0ff94 1092static unsigned int
b8506a8a 1093aarch64_min_divisions_for_recip_mul (machine_mode mode)
26e0ff94 1094{
50093a33 1095 if (GET_MODE_UNIT_SIZE (mode) == 4)
b175b679
JG
1096 return aarch64_tune_params.min_div_recip_mul_sf;
1097 return aarch64_tune_params.min_div_recip_mul_df;
26e0ff94
WD
1098}
1099
b5b33e11 1100/* Return the reassociation width of treeop OPC with mode MODE. */
cee66c68 1101static int
b5b33e11 1102aarch64_reassociation_width (unsigned opc, machine_mode mode)
cee66c68
WD
1103{
1104 if (VECTOR_MODE_P (mode))
b175b679 1105 return aarch64_tune_params.vec_reassoc_width;
cee66c68 1106 if (INTEGRAL_MODE_P (mode))
b175b679 1107 return aarch64_tune_params.int_reassoc_width;
b5b33e11
WD
1108 /* Avoid reassociating floating point addition so we emit more FMAs. */
1109 if (FLOAT_MODE_P (mode) && opc != PLUS_EXPR)
b175b679 1110 return aarch64_tune_params.fp_reassoc_width;
cee66c68
WD
1111 return 1;
1112}
1113
43e9d192
IB
1114/* Provide a mapping from gcc register numbers to dwarf register numbers. */
1115unsigned
1116aarch64_dbx_register_number (unsigned regno)
1117{
1118 if (GP_REGNUM_P (regno))
1119 return AARCH64_DWARF_R0 + regno - R0_REGNUM;
1120 else if (regno == SP_REGNUM)
1121 return AARCH64_DWARF_SP;
1122 else if (FP_REGNUM_P (regno))
1123 return AARCH64_DWARF_V0 + regno - V0_REGNUM;
43cacb12
RS
1124 else if (PR_REGNUM_P (regno))
1125 return AARCH64_DWARF_P0 + regno - P0_REGNUM;
1126 else if (regno == VG_REGNUM)
1127 return AARCH64_DWARF_VG;
43e9d192
IB
1128
1129 /* Return values >= DWARF_FRAME_REGISTERS indicate that there is no
1130 equivalent DWARF register. */
1131 return DWARF_FRAME_REGISTERS;
1132}
1133
43cacb12
RS
1134/* Return true if MODE is any of the Advanced SIMD structure modes. */
1135static bool
1136aarch64_advsimd_struct_mode_p (machine_mode mode)
1137{
1138 return (TARGET_SIMD
1139 && (mode == OImode || mode == CImode || mode == XImode));
1140}
1141
1142/* Return true if MODE is an SVE predicate mode. */
1143static bool
1144aarch64_sve_pred_mode_p (machine_mode mode)
1145{
1146 return (TARGET_SVE
1147 && (mode == VNx16BImode
1148 || mode == VNx8BImode
1149 || mode == VNx4BImode
1150 || mode == VNx2BImode));
1151}
1152
1153/* Three mutually-exclusive flags describing a vector or predicate type. */
1154const unsigned int VEC_ADVSIMD = 1;
1155const unsigned int VEC_SVE_DATA = 2;
1156const unsigned int VEC_SVE_PRED = 4;
1157/* Can be used in combination with VEC_ADVSIMD or VEC_SVE_DATA to indicate
1158 a structure of 2, 3 or 4 vectors. */
1159const unsigned int VEC_STRUCT = 8;
1160/* Useful combinations of the above. */
1161const unsigned int VEC_ANY_SVE = VEC_SVE_DATA | VEC_SVE_PRED;
1162const unsigned int VEC_ANY_DATA = VEC_ADVSIMD | VEC_SVE_DATA;
1163
1164/* Return a set of flags describing the vector properties of mode MODE.
1165 Ignore modes that are not supported by the current target. */
1166static unsigned int
1167aarch64_classify_vector_mode (machine_mode mode)
1168{
1169 if (aarch64_advsimd_struct_mode_p (mode))
1170 return VEC_ADVSIMD | VEC_STRUCT;
1171
1172 if (aarch64_sve_pred_mode_p (mode))
1173 return VEC_SVE_PRED;
1174
1175 scalar_mode inner = GET_MODE_INNER (mode);
1176 if (VECTOR_MODE_P (mode)
1177 && (inner == QImode
1178 || inner == HImode
1179 || inner == HFmode
1180 || inner == SImode
1181 || inner == SFmode
1182 || inner == DImode
1183 || inner == DFmode))
1184 {
9f4cbab8
RS
1185 if (TARGET_SVE)
1186 {
1187 if (known_eq (GET_MODE_BITSIZE (mode), BITS_PER_SVE_VECTOR))
1188 return VEC_SVE_DATA;
1189 if (known_eq (GET_MODE_BITSIZE (mode), BITS_PER_SVE_VECTOR * 2)
1190 || known_eq (GET_MODE_BITSIZE (mode), BITS_PER_SVE_VECTOR * 3)
1191 || known_eq (GET_MODE_BITSIZE (mode), BITS_PER_SVE_VECTOR * 4))
1192 return VEC_SVE_DATA | VEC_STRUCT;
1193 }
43cacb12
RS
1194
1195 /* This includes V1DF but not V1DI (which doesn't exist). */
1196 if (TARGET_SIMD
1197 && (known_eq (GET_MODE_BITSIZE (mode), 64)
1198 || known_eq (GET_MODE_BITSIZE (mode), 128)))
1199 return VEC_ADVSIMD;
1200 }
1201
1202 return 0;
1203}
1204
1205/* Return true if MODE is any of the data vector modes, including
1206 structure modes. */
43e9d192 1207static bool
43cacb12 1208aarch64_vector_data_mode_p (machine_mode mode)
43e9d192 1209{
43cacb12 1210 return aarch64_classify_vector_mode (mode) & VEC_ANY_DATA;
43e9d192
IB
1211}
1212
43cacb12
RS
1213/* Return true if MODE is an SVE data vector mode; either a single vector
1214 or a structure of vectors. */
43e9d192 1215static bool
43cacb12 1216aarch64_sve_data_mode_p (machine_mode mode)
43e9d192 1217{
43cacb12 1218 return aarch64_classify_vector_mode (mode) & VEC_SVE_DATA;
43e9d192
IB
1219}
1220
9f4cbab8
RS
1221/* Implement target hook TARGET_ARRAY_MODE. */
1222static opt_machine_mode
1223aarch64_array_mode (machine_mode mode, unsigned HOST_WIDE_INT nelems)
1224{
1225 if (aarch64_classify_vector_mode (mode) == VEC_SVE_DATA
1226 && IN_RANGE (nelems, 2, 4))
1227 return mode_for_vector (GET_MODE_INNER (mode),
1228 GET_MODE_NUNITS (mode) * nelems);
1229
1230 return opt_machine_mode ();
1231}
1232
43e9d192
IB
1233/* Implement target hook TARGET_ARRAY_MODE_SUPPORTED_P. */
1234static bool
ef4bddc2 1235aarch64_array_mode_supported_p (machine_mode mode,
43e9d192
IB
1236 unsigned HOST_WIDE_INT nelems)
1237{
1238 if (TARGET_SIMD
635e66fe
AL
1239 && (AARCH64_VALID_SIMD_QREG_MODE (mode)
1240 || AARCH64_VALID_SIMD_DREG_MODE (mode))
43e9d192
IB
1241 && (nelems >= 2 && nelems <= 4))
1242 return true;
1243
1244 return false;
1245}
1246
43cacb12
RS
1247/* Return the SVE predicate mode to use for elements that have
1248 ELEM_NBYTES bytes, if such a mode exists. */
1249
1250opt_machine_mode
1251aarch64_sve_pred_mode (unsigned int elem_nbytes)
1252{
1253 if (TARGET_SVE)
1254 {
1255 if (elem_nbytes == 1)
1256 return VNx16BImode;
1257 if (elem_nbytes == 2)
1258 return VNx8BImode;
1259 if (elem_nbytes == 4)
1260 return VNx4BImode;
1261 if (elem_nbytes == 8)
1262 return VNx2BImode;
1263 }
1264 return opt_machine_mode ();
1265}
1266
1267/* Implement TARGET_VECTORIZE_GET_MASK_MODE. */
1268
1269static opt_machine_mode
1270aarch64_get_mask_mode (poly_uint64 nunits, poly_uint64 nbytes)
1271{
1272 if (TARGET_SVE && known_eq (nbytes, BYTES_PER_SVE_VECTOR))
1273 {
1274 unsigned int elem_nbytes = vector_element_size (nbytes, nunits);
1275 machine_mode pred_mode;
1276 if (aarch64_sve_pred_mode (elem_nbytes).exists (&pred_mode))
1277 return pred_mode;
1278 }
1279
1280 return default_get_mask_mode (nunits, nbytes);
1281}
1282
c43f4279 1283/* Implement TARGET_HARD_REGNO_NREGS. */
43e9d192 1284
c43f4279 1285static unsigned int
ef4bddc2 1286aarch64_hard_regno_nregs (unsigned regno, machine_mode mode)
43e9d192 1287{
6a70badb
RS
1288 /* ??? Logically we should only need to provide a value when
1289 HARD_REGNO_MODE_OK says that the combination is valid,
1290 but at the moment we need to handle all modes. Just ignore
1291 any runtime parts for registers that can't store them. */
1292 HOST_WIDE_INT lowest_size = constant_lower_bound (GET_MODE_SIZE (mode));
43e9d192
IB
1293 switch (aarch64_regno_regclass (regno))
1294 {
1295 case FP_REGS:
1296 case FP_LO_REGS:
43cacb12
RS
1297 if (aarch64_sve_data_mode_p (mode))
1298 return exact_div (GET_MODE_SIZE (mode),
1299 BYTES_PER_SVE_VECTOR).to_constant ();
6a70badb 1300 return CEIL (lowest_size, UNITS_PER_VREG);
43cacb12
RS
1301 case PR_REGS:
1302 case PR_LO_REGS:
1303 case PR_HI_REGS:
1304 return 1;
43e9d192 1305 default:
6a70badb 1306 return CEIL (lowest_size, UNITS_PER_WORD);
43e9d192
IB
1307 }
1308 gcc_unreachable ();
1309}
1310
f939c3e6 1311/* Implement TARGET_HARD_REGNO_MODE_OK. */
43e9d192 1312
f939c3e6 1313static bool
ef4bddc2 1314aarch64_hard_regno_mode_ok (unsigned regno, machine_mode mode)
43e9d192
IB
1315{
1316 if (GET_MODE_CLASS (mode) == MODE_CC)
1317 return regno == CC_REGNUM;
1318
43cacb12
RS
1319 if (regno == VG_REGNUM)
1320 /* This must have the same size as _Unwind_Word. */
1321 return mode == DImode;
1322
1323 unsigned int vec_flags = aarch64_classify_vector_mode (mode);
1324 if (vec_flags & VEC_SVE_PRED)
1325 return PR_REGNUM_P (regno);
1326
1327 if (PR_REGNUM_P (regno))
1328 return 0;
1329
9259db42
YZ
1330 if (regno == SP_REGNUM)
1331 /* The purpose of comparing with ptr_mode is to support the
1332 global register variable associated with the stack pointer
1333 register via the syntax of asm ("wsp") in ILP32. */
1334 return mode == Pmode || mode == ptr_mode;
1335
1336 if (regno == FRAME_POINTER_REGNUM || regno == ARG_POINTER_REGNUM)
43e9d192
IB
1337 return mode == Pmode;
1338
43cacb12 1339 if (GP_REGNUM_P (regno) && known_le (GET_MODE_SIZE (mode), 16))
f939c3e6 1340 return true;
43e9d192
IB
1341
1342 if (FP_REGNUM_P (regno))
1343 {
43cacb12 1344 if (vec_flags & VEC_STRUCT)
4edd6298 1345 return end_hard_regno (mode, regno) - 1 <= V31_REGNUM;
43e9d192 1346 else
43cacb12 1347 return !VECTOR_MODE_P (mode) || vec_flags != 0;
43e9d192
IB
1348 }
1349
f939c3e6 1350 return false;
43e9d192
IB
1351}
1352
80ec73f4
RS
1353/* Implement TARGET_HARD_REGNO_CALL_PART_CLOBBERED. The callee only saves
1354 the lower 64 bits of a 128-bit register. Tell the compiler the callee
1355 clobbers the top 64 bits when restoring the bottom 64 bits. */
1356
1357static bool
1358aarch64_hard_regno_call_part_clobbered (unsigned int regno, machine_mode mode)
1359{
6a70badb 1360 return FP_REGNUM_P (regno) && maybe_gt (GET_MODE_SIZE (mode), 8);
80ec73f4
RS
1361}
1362
43cacb12
RS
1363/* Implement REGMODE_NATURAL_SIZE. */
1364poly_uint64
1365aarch64_regmode_natural_size (machine_mode mode)
1366{
1367 /* The natural size for SVE data modes is one SVE data vector,
1368 and similarly for predicates. We can't independently modify
1369 anything smaller than that. */
1370 /* ??? For now, only do this for variable-width SVE registers.
1371 Doing it for constant-sized registers breaks lower-subreg.c. */
1372 /* ??? And once that's fixed, we should probably have similar
1373 code for Advanced SIMD. */
1374 if (!aarch64_sve_vg.is_constant ())
1375 {
1376 unsigned int vec_flags = aarch64_classify_vector_mode (mode);
1377 if (vec_flags & VEC_SVE_PRED)
1378 return BYTES_PER_SVE_PRED;
1379 if (vec_flags & VEC_SVE_DATA)
1380 return BYTES_PER_SVE_VECTOR;
1381 }
1382 return UNITS_PER_WORD;
1383}
1384
73d9ac6a 1385/* Implement HARD_REGNO_CALLER_SAVE_MODE. */
ef4bddc2 1386machine_mode
43cacb12
RS
1387aarch64_hard_regno_caller_save_mode (unsigned regno, unsigned,
1388 machine_mode mode)
1389{
1390 /* The predicate mode determines which bits are significant and
1391 which are "don't care". Decreasing the number of lanes would
1392 lose data while increasing the number of lanes would make bits
1393 unnecessarily significant. */
1394 if (PR_REGNUM_P (regno))
1395 return mode;
6a70badb
RS
1396 if (known_ge (GET_MODE_SIZE (mode), 4))
1397 return mode;
73d9ac6a 1398 else
6a70badb 1399 return SImode;
73d9ac6a
IB
1400}
1401
58e17cf8
RS
1402/* Implement TARGET_CONSTANT_ALIGNMENT. Make strings word-aligned so
1403 that strcpy from constants will be faster. */
1404
1405static HOST_WIDE_INT
1406aarch64_constant_alignment (const_tree exp, HOST_WIDE_INT align)
1407{
1408 if (TREE_CODE (exp) == STRING_CST && !optimize_size)
1409 return MAX (align, BITS_PER_WORD);
1410 return align;
1411}
1412
43e9d192
IB
1413/* Return true if calls to DECL should be treated as
1414 long-calls (ie called via a register). */
1415static bool
1416aarch64_decl_is_long_call_p (const_tree decl ATTRIBUTE_UNUSED)
1417{
1418 return false;
1419}
1420
1421/* Return true if calls to symbol-ref SYM should be treated as
1422 long-calls (ie called via a register). */
1423bool
1424aarch64_is_long_call_p (rtx sym)
1425{
1426 return aarch64_decl_is_long_call_p (SYMBOL_REF_DECL (sym));
1427}
1428
b60d63cb
JW
1429/* Return true if calls to symbol-ref SYM should not go through
1430 plt stubs. */
1431
1432bool
1433aarch64_is_noplt_call_p (rtx sym)
1434{
1435 const_tree decl = SYMBOL_REF_DECL (sym);
1436
1437 if (flag_pic
1438 && decl
1439 && (!flag_plt
1440 || lookup_attribute ("noplt", DECL_ATTRIBUTES (decl)))
1441 && !targetm.binds_local_p (decl))
1442 return true;
1443
1444 return false;
1445}
1446
43e9d192
IB
1447/* Return true if the offsets to a zero/sign-extract operation
1448 represent an expression that matches an extend operation. The
1449 operands represent the paramters from
1450
4745e701 1451 (extract:MODE (mult (reg) (MULT_IMM)) (EXTRACT_IMM) (const_int 0)). */
43e9d192 1452bool
77e994c9 1453aarch64_is_extend_from_extract (scalar_int_mode mode, rtx mult_imm,
43e9d192
IB
1454 rtx extract_imm)
1455{
1456 HOST_WIDE_INT mult_val, extract_val;
1457
1458 if (! CONST_INT_P (mult_imm) || ! CONST_INT_P (extract_imm))
1459 return false;
1460
1461 mult_val = INTVAL (mult_imm);
1462 extract_val = INTVAL (extract_imm);
1463
1464 if (extract_val > 8
1465 && extract_val < GET_MODE_BITSIZE (mode)
1466 && exact_log2 (extract_val & ~7) > 0
1467 && (extract_val & 7) <= 4
1468 && mult_val == (1 << (extract_val & 7)))
1469 return true;
1470
1471 return false;
1472}
1473
1474/* Emit an insn that's a simple single-set. Both the operands must be
1475 known to be valid. */
827ab47a 1476inline static rtx_insn *
43e9d192
IB
1477emit_set_insn (rtx x, rtx y)
1478{
f7df4a84 1479 return emit_insn (gen_rtx_SET (x, y));
43e9d192
IB
1480}
1481
1482/* X and Y are two things to compare using CODE. Emit the compare insn and
1483 return the rtx for register 0 in the proper mode. */
1484rtx
1485aarch64_gen_compare_reg (RTX_CODE code, rtx x, rtx y)
1486{
ef4bddc2 1487 machine_mode mode = SELECT_CC_MODE (code, x, y);
43e9d192
IB
1488 rtx cc_reg = gen_rtx_REG (mode, CC_REGNUM);
1489
1490 emit_set_insn (cc_reg, gen_rtx_COMPARE (mode, x, y));
1491 return cc_reg;
1492}
1493
1494/* Build the SYMBOL_REF for __tls_get_addr. */
1495
1496static GTY(()) rtx tls_get_addr_libfunc;
1497
1498rtx
1499aarch64_tls_get_addr (void)
1500{
1501 if (!tls_get_addr_libfunc)
1502 tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
1503 return tls_get_addr_libfunc;
1504}
1505
1506/* Return the TLS model to use for ADDR. */
1507
1508static enum tls_model
1509tls_symbolic_operand_type (rtx addr)
1510{
1511 enum tls_model tls_kind = TLS_MODEL_NONE;
43e9d192
IB
1512 if (GET_CODE (addr) == CONST)
1513 {
6a70badb
RS
1514 poly_int64 addend;
1515 rtx sym = strip_offset (addr, &addend);
43e9d192
IB
1516 if (GET_CODE (sym) == SYMBOL_REF)
1517 tls_kind = SYMBOL_REF_TLS_MODEL (sym);
1518 }
1519 else if (GET_CODE (addr) == SYMBOL_REF)
1520 tls_kind = SYMBOL_REF_TLS_MODEL (addr);
1521
1522 return tls_kind;
1523}
1524
1525/* We'll allow lo_sum's in addresses in our legitimate addresses
1526 so that combine would take care of combining addresses where
1527 necessary, but for generation purposes, we'll generate the address
1528 as :
1529 RTL Absolute
1530 tmp = hi (symbol_ref); adrp x1, foo
1531 dest = lo_sum (tmp, symbol_ref); add dest, x1, :lo_12:foo
1532 nop
1533
1534 PIC TLS
1535 adrp x1, :got:foo adrp tmp, :tlsgd:foo
1536 ldr x1, [:got_lo12:foo] add dest, tmp, :tlsgd_lo12:foo
1537 bl __tls_get_addr
1538 nop
1539
1540 Load TLS symbol, depending on TLS mechanism and TLS access model.
1541
1542 Global Dynamic - Traditional TLS:
1543 adrp tmp, :tlsgd:imm
1544 add dest, tmp, #:tlsgd_lo12:imm
1545 bl __tls_get_addr
1546
1547 Global Dynamic - TLS Descriptors:
1548 adrp dest, :tlsdesc:imm
1549 ldr tmp, [dest, #:tlsdesc_lo12:imm]
1550 add dest, dest, #:tlsdesc_lo12:imm
1551 blr tmp
1552 mrs tp, tpidr_el0
1553 add dest, dest, tp
1554
1555 Initial Exec:
1556 mrs tp, tpidr_el0
1557 adrp tmp, :gottprel:imm
1558 ldr dest, [tmp, #:gottprel_lo12:imm]
1559 add dest, dest, tp
1560
1561 Local Exec:
1562 mrs tp, tpidr_el0
0699caae
RL
1563 add t0, tp, #:tprel_hi12:imm, lsl #12
1564 add t0, t0, #:tprel_lo12_nc:imm
43e9d192
IB
1565*/
1566
1567static void
1568aarch64_load_symref_appropriately (rtx dest, rtx imm,
1569 enum aarch64_symbol_type type)
1570{
1571 switch (type)
1572 {
1573 case SYMBOL_SMALL_ABSOLUTE:
1574 {
28514dda 1575 /* In ILP32, the mode of dest can be either SImode or DImode. */
43e9d192 1576 rtx tmp_reg = dest;
ef4bddc2 1577 machine_mode mode = GET_MODE (dest);
28514dda
YZ
1578
1579 gcc_assert (mode == Pmode || mode == ptr_mode);
1580
43e9d192 1581 if (can_create_pseudo_p ())
28514dda 1582 tmp_reg = gen_reg_rtx (mode);
43e9d192 1583
28514dda 1584 emit_move_insn (tmp_reg, gen_rtx_HIGH (mode, imm));
43e9d192
IB
1585 emit_insn (gen_add_losym (dest, tmp_reg, imm));
1586 return;
1587 }
1588
a5350ddc 1589 case SYMBOL_TINY_ABSOLUTE:
f7df4a84 1590 emit_insn (gen_rtx_SET (dest, imm));
a5350ddc
CSS
1591 return;
1592
1b1e81f8
JW
1593 case SYMBOL_SMALL_GOT_28K:
1594 {
1595 machine_mode mode = GET_MODE (dest);
1596 rtx gp_rtx = pic_offset_table_rtx;
53021678
JW
1597 rtx insn;
1598 rtx mem;
1b1e81f8
JW
1599
1600 /* NOTE: pic_offset_table_rtx can be NULL_RTX, because we can reach
1601 here before rtl expand. Tree IVOPT will generate rtl pattern to
1602 decide rtx costs, in which case pic_offset_table_rtx is not
1603 initialized. For that case no need to generate the first adrp
026c3cfd 1604 instruction as the final cost for global variable access is
1b1e81f8
JW
1605 one instruction. */
1606 if (gp_rtx != NULL)
1607 {
1608 /* -fpic for -mcmodel=small allow 32K GOT table size (but we are
1609 using the page base as GOT base, the first page may be wasted,
1610 in the worst scenario, there is only 28K space for GOT).
1611
1612 The generate instruction sequence for accessing global variable
1613 is:
1614
a3957742 1615 ldr reg, [pic_offset_table_rtx, #:gotpage_lo15:sym]
1b1e81f8
JW
1616
1617 Only one instruction needed. But we must initialize
1618 pic_offset_table_rtx properly. We generate initialize insn for
1619 every global access, and allow CSE to remove all redundant.
1620
1621 The final instruction sequences will look like the following
1622 for multiply global variables access.
1623
a3957742 1624 adrp pic_offset_table_rtx, _GLOBAL_OFFSET_TABLE_
1b1e81f8 1625
a3957742
JW
1626 ldr reg, [pic_offset_table_rtx, #:gotpage_lo15:sym1]
1627 ldr reg, [pic_offset_table_rtx, #:gotpage_lo15:sym2]
1628 ldr reg, [pic_offset_table_rtx, #:gotpage_lo15:sym3]
1629 ... */
1b1e81f8
JW
1630
1631 rtx s = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
1632 crtl->uses_pic_offset_table = 1;
1633 emit_move_insn (gp_rtx, gen_rtx_HIGH (Pmode, s));
1634
1635 if (mode != GET_MODE (gp_rtx))
4ba8f0a3
AP
1636 gp_rtx = gen_lowpart (mode, gp_rtx);
1637
1b1e81f8
JW
1638 }
1639
1640 if (mode == ptr_mode)
1641 {
1642 if (mode == DImode)
53021678 1643 insn = gen_ldr_got_small_28k_di (dest, gp_rtx, imm);
1b1e81f8 1644 else
53021678
JW
1645 insn = gen_ldr_got_small_28k_si (dest, gp_rtx, imm);
1646
1647 mem = XVECEXP (SET_SRC (insn), 0, 0);
1b1e81f8
JW
1648 }
1649 else
1650 {
1651 gcc_assert (mode == Pmode);
53021678
JW
1652
1653 insn = gen_ldr_got_small_28k_sidi (dest, gp_rtx, imm);
1654 mem = XVECEXP (XEXP (SET_SRC (insn), 0), 0, 0);
1b1e81f8
JW
1655 }
1656
53021678
JW
1657 /* The operand is expected to be MEM. Whenever the related insn
1658 pattern changed, above code which calculate mem should be
1659 updated. */
1660 gcc_assert (GET_CODE (mem) == MEM);
1661 MEM_READONLY_P (mem) = 1;
1662 MEM_NOTRAP_P (mem) = 1;
1663 emit_insn (insn);
1b1e81f8
JW
1664 return;
1665 }
1666
6642bdb4 1667 case SYMBOL_SMALL_GOT_4G:
43e9d192 1668 {
28514dda
YZ
1669 /* In ILP32, the mode of dest can be either SImode or DImode,
1670 while the got entry is always of SImode size. The mode of
1671 dest depends on how dest is used: if dest is assigned to a
1672 pointer (e.g. in the memory), it has SImode; it may have
1673 DImode if dest is dereferenced to access the memeory.
1674 This is why we have to handle three different ldr_got_small
1675 patterns here (two patterns for ILP32). */
53021678
JW
1676
1677 rtx insn;
1678 rtx mem;
43e9d192 1679 rtx tmp_reg = dest;
ef4bddc2 1680 machine_mode mode = GET_MODE (dest);
28514dda 1681
43e9d192 1682 if (can_create_pseudo_p ())
28514dda
YZ
1683 tmp_reg = gen_reg_rtx (mode);
1684
1685 emit_move_insn (tmp_reg, gen_rtx_HIGH (mode, imm));
1686 if (mode == ptr_mode)
1687 {
1688 if (mode == DImode)
53021678 1689 insn = gen_ldr_got_small_di (dest, tmp_reg, imm);
28514dda 1690 else
53021678
JW
1691 insn = gen_ldr_got_small_si (dest, tmp_reg, imm);
1692
1693 mem = XVECEXP (SET_SRC (insn), 0, 0);
28514dda
YZ
1694 }
1695 else
1696 {
1697 gcc_assert (mode == Pmode);
53021678
JW
1698
1699 insn = gen_ldr_got_small_sidi (dest, tmp_reg, imm);
1700 mem = XVECEXP (XEXP (SET_SRC (insn), 0), 0, 0);
28514dda
YZ
1701 }
1702
53021678
JW
1703 gcc_assert (GET_CODE (mem) == MEM);
1704 MEM_READONLY_P (mem) = 1;
1705 MEM_NOTRAP_P (mem) = 1;
1706 emit_insn (insn);
43e9d192
IB
1707 return;
1708 }
1709
1710 case SYMBOL_SMALL_TLSGD:
1711 {
5d8a22a5 1712 rtx_insn *insns;
23b88fda
N
1713 machine_mode mode = GET_MODE (dest);
1714 rtx result = gen_rtx_REG (mode, R0_REGNUM);
43e9d192
IB
1715
1716 start_sequence ();
23b88fda
N
1717 if (TARGET_ILP32)
1718 aarch64_emit_call_insn (gen_tlsgd_small_si (result, imm));
1719 else
1720 aarch64_emit_call_insn (gen_tlsgd_small_di (result, imm));
43e9d192
IB
1721 insns = get_insns ();
1722 end_sequence ();
1723
1724 RTL_CONST_CALL_P (insns) = 1;
1725 emit_libcall_block (insns, dest, result, imm);
1726 return;
1727 }
1728
1729 case SYMBOL_SMALL_TLSDESC:
1730 {
ef4bddc2 1731 machine_mode mode = GET_MODE (dest);
621ad2de 1732 rtx x0 = gen_rtx_REG (mode, R0_REGNUM);
43e9d192
IB
1733 rtx tp;
1734
621ad2de
AP
1735 gcc_assert (mode == Pmode || mode == ptr_mode);
1736
2876a13f
JW
1737 /* In ILP32, the got entry is always of SImode size. Unlike
1738 small GOT, the dest is fixed at reg 0. */
1739 if (TARGET_ILP32)
1740 emit_insn (gen_tlsdesc_small_si (imm));
621ad2de 1741 else
2876a13f 1742 emit_insn (gen_tlsdesc_small_di (imm));
43e9d192 1743 tp = aarch64_load_tp (NULL);
621ad2de
AP
1744
1745 if (mode != Pmode)
1746 tp = gen_lowpart (mode, tp);
1747
2876a13f 1748 emit_insn (gen_rtx_SET (dest, gen_rtx_PLUS (mode, tp, x0)));
241dbd9d
QZ
1749 if (REG_P (dest))
1750 set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
43e9d192
IB
1751 return;
1752 }
1753
79496620 1754 case SYMBOL_SMALL_TLSIE:
43e9d192 1755 {
621ad2de
AP
1756 /* In ILP32, the mode of dest can be either SImode or DImode,
1757 while the got entry is always of SImode size. The mode of
1758 dest depends on how dest is used: if dest is assigned to a
1759 pointer (e.g. in the memory), it has SImode; it may have
1760 DImode if dest is dereferenced to access the memeory.
1761 This is why we have to handle three different tlsie_small
1762 patterns here (two patterns for ILP32). */
ef4bddc2 1763 machine_mode mode = GET_MODE (dest);
621ad2de 1764 rtx tmp_reg = gen_reg_rtx (mode);
43e9d192 1765 rtx tp = aarch64_load_tp (NULL);
621ad2de
AP
1766
1767 if (mode == ptr_mode)
1768 {
1769 if (mode == DImode)
1770 emit_insn (gen_tlsie_small_di (tmp_reg, imm));
1771 else
1772 {
1773 emit_insn (gen_tlsie_small_si (tmp_reg, imm));
1774 tp = gen_lowpart (mode, tp);
1775 }
1776 }
1777 else
1778 {
1779 gcc_assert (mode == Pmode);
1780 emit_insn (gen_tlsie_small_sidi (tmp_reg, imm));
1781 }
1782
f7df4a84 1783 emit_insn (gen_rtx_SET (dest, gen_rtx_PLUS (mode, tp, tmp_reg)));
241dbd9d
QZ
1784 if (REG_P (dest))
1785 set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
43e9d192
IB
1786 return;
1787 }
1788
cbf5629e 1789 case SYMBOL_TLSLE12:
d18ba284 1790 case SYMBOL_TLSLE24:
cbf5629e
JW
1791 case SYMBOL_TLSLE32:
1792 case SYMBOL_TLSLE48:
43e9d192 1793 {
cbf5629e 1794 machine_mode mode = GET_MODE (dest);
43e9d192 1795 rtx tp = aarch64_load_tp (NULL);
e6f7f0e9 1796
cbf5629e
JW
1797 if (mode != Pmode)
1798 tp = gen_lowpart (mode, tp);
1799
1800 switch (type)
1801 {
1802 case SYMBOL_TLSLE12:
1803 emit_insn ((mode == DImode ? gen_tlsle12_di : gen_tlsle12_si)
1804 (dest, tp, imm));
1805 break;
1806 case SYMBOL_TLSLE24:
1807 emit_insn ((mode == DImode ? gen_tlsle24_di : gen_tlsle24_si)
1808 (dest, tp, imm));
1809 break;
1810 case SYMBOL_TLSLE32:
1811 emit_insn ((mode == DImode ? gen_tlsle32_di : gen_tlsle32_si)
1812 (dest, imm));
1813 emit_insn ((mode == DImode ? gen_adddi3 : gen_addsi3)
1814 (dest, dest, tp));
1815 break;
1816 case SYMBOL_TLSLE48:
1817 emit_insn ((mode == DImode ? gen_tlsle48_di : gen_tlsle48_si)
1818 (dest, imm));
1819 emit_insn ((mode == DImode ? gen_adddi3 : gen_addsi3)
1820 (dest, dest, tp));
1821 break;
1822 default:
1823 gcc_unreachable ();
1824 }
e6f7f0e9 1825
241dbd9d
QZ
1826 if (REG_P (dest))
1827 set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
43e9d192
IB
1828 return;
1829 }
1830
87dd8ab0
MS
1831 case SYMBOL_TINY_GOT:
1832 emit_insn (gen_ldr_got_tiny (dest, imm));
1833 return;
1834
5ae7caad
JW
1835 case SYMBOL_TINY_TLSIE:
1836 {
1837 machine_mode mode = GET_MODE (dest);
1838 rtx tp = aarch64_load_tp (NULL);
1839
1840 if (mode == ptr_mode)
1841 {
1842 if (mode == DImode)
1843 emit_insn (gen_tlsie_tiny_di (dest, imm, tp));
1844 else
1845 {
1846 tp = gen_lowpart (mode, tp);
1847 emit_insn (gen_tlsie_tiny_si (dest, imm, tp));
1848 }
1849 }
1850 else
1851 {
1852 gcc_assert (mode == Pmode);
1853 emit_insn (gen_tlsie_tiny_sidi (dest, imm, tp));
1854 }
1855
241dbd9d
QZ
1856 if (REG_P (dest))
1857 set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
5ae7caad
JW
1858 return;
1859 }
1860
43e9d192
IB
1861 default:
1862 gcc_unreachable ();
1863 }
1864}
1865
1866/* Emit a move from SRC to DEST. Assume that the move expanders can
1867 handle all moves if !can_create_pseudo_p (). The distinction is
1868 important because, unlike emit_move_insn, the move expanders know
1869 how to force Pmode objects into the constant pool even when the
1870 constant pool address is not itself legitimate. */
1871static rtx
1872aarch64_emit_move (rtx dest, rtx src)
1873{
1874 return (can_create_pseudo_p ()
1875 ? emit_move_insn (dest, src)
1876 : emit_move_insn_1 (dest, src));
1877}
1878
f22d7973
RS
1879/* Apply UNOPTAB to OP and store the result in DEST. */
1880
1881static void
1882aarch64_emit_unop (rtx dest, optab unoptab, rtx op)
1883{
1884 rtx tmp = expand_unop (GET_MODE (dest), unoptab, op, dest, 0);
1885 if (dest != tmp)
1886 emit_move_insn (dest, tmp);
1887}
1888
1889/* Apply BINOPTAB to OP0 and OP1 and store the result in DEST. */
1890
1891static void
1892aarch64_emit_binop (rtx dest, optab binoptab, rtx op0, rtx op1)
1893{
1894 rtx tmp = expand_binop (GET_MODE (dest), binoptab, op0, op1, dest, 0,
1895 OPTAB_DIRECT);
1896 if (dest != tmp)
1897 emit_move_insn (dest, tmp);
1898}
1899
030d03b8
RE
1900/* Split a 128-bit move operation into two 64-bit move operations,
1901 taking care to handle partial overlap of register to register
1902 copies. Special cases are needed when moving between GP regs and
1903 FP regs. SRC can be a register, constant or memory; DST a register
1904 or memory. If either operand is memory it must not have any side
1905 effects. */
43e9d192
IB
1906void
1907aarch64_split_128bit_move (rtx dst, rtx src)
1908{
030d03b8
RE
1909 rtx dst_lo, dst_hi;
1910 rtx src_lo, src_hi;
43e9d192 1911
ef4bddc2 1912 machine_mode mode = GET_MODE (dst);
12dc6974 1913
030d03b8
RE
1914 gcc_assert (mode == TImode || mode == TFmode);
1915 gcc_assert (!(side_effects_p (src) || side_effects_p (dst)));
1916 gcc_assert (mode == GET_MODE (src) || GET_MODE (src) == VOIDmode);
43e9d192
IB
1917
1918 if (REG_P (dst) && REG_P (src))
1919 {
030d03b8
RE
1920 int src_regno = REGNO (src);
1921 int dst_regno = REGNO (dst);
43e9d192 1922
030d03b8 1923 /* Handle FP <-> GP regs. */
43e9d192
IB
1924 if (FP_REGNUM_P (dst_regno) && GP_REGNUM_P (src_regno))
1925 {
030d03b8
RE
1926 src_lo = gen_lowpart (word_mode, src);
1927 src_hi = gen_highpart (word_mode, src);
1928
1929 if (mode == TImode)
1930 {
1931 emit_insn (gen_aarch64_movtilow_di (dst, src_lo));
1932 emit_insn (gen_aarch64_movtihigh_di (dst, src_hi));
1933 }
1934 else
1935 {
1936 emit_insn (gen_aarch64_movtflow_di (dst, src_lo));
1937 emit_insn (gen_aarch64_movtfhigh_di (dst, src_hi));
1938 }
1939 return;
43e9d192
IB
1940 }
1941 else if (GP_REGNUM_P (dst_regno) && FP_REGNUM_P (src_regno))
1942 {
030d03b8
RE
1943 dst_lo = gen_lowpart (word_mode, dst);
1944 dst_hi = gen_highpart (word_mode, dst);
1945
1946 if (mode == TImode)
1947 {
1948 emit_insn (gen_aarch64_movdi_tilow (dst_lo, src));
1949 emit_insn (gen_aarch64_movdi_tihigh (dst_hi, src));
1950 }
1951 else
1952 {
1953 emit_insn (gen_aarch64_movdi_tflow (dst_lo, src));
1954 emit_insn (gen_aarch64_movdi_tfhigh (dst_hi, src));
1955 }
1956 return;
43e9d192 1957 }
43e9d192
IB
1958 }
1959
030d03b8
RE
1960 dst_lo = gen_lowpart (word_mode, dst);
1961 dst_hi = gen_highpart (word_mode, dst);
1962 src_lo = gen_lowpart (word_mode, src);
1963 src_hi = gen_highpart_mode (word_mode, mode, src);
1964
1965 /* At most one pairing may overlap. */
1966 if (reg_overlap_mentioned_p (dst_lo, src_hi))
1967 {
1968 aarch64_emit_move (dst_hi, src_hi);
1969 aarch64_emit_move (dst_lo, src_lo);
1970 }
1971 else
1972 {
1973 aarch64_emit_move (dst_lo, src_lo);
1974 aarch64_emit_move (dst_hi, src_hi);
1975 }
43e9d192
IB
1976}
1977
1978bool
1979aarch64_split_128bit_move_p (rtx dst, rtx src)
1980{
1981 return (! REG_P (src)
1982 || ! (FP_REGNUM_P (REGNO (dst)) && FP_REGNUM_P (REGNO (src))));
1983}
1984
8b033a8a
SN
1985/* Split a complex SIMD combine. */
1986
1987void
1988aarch64_split_simd_combine (rtx dst, rtx src1, rtx src2)
1989{
ef4bddc2
RS
1990 machine_mode src_mode = GET_MODE (src1);
1991 machine_mode dst_mode = GET_MODE (dst);
8b033a8a
SN
1992
1993 gcc_assert (VECTOR_MODE_P (dst_mode));
a977dc0c
MC
1994 gcc_assert (register_operand (dst, dst_mode)
1995 && register_operand (src1, src_mode)
1996 && register_operand (src2, src_mode));
8b033a8a 1997
a977dc0c 1998 rtx (*gen) (rtx, rtx, rtx);
8b033a8a 1999
a977dc0c
MC
2000 switch (src_mode)
2001 {
4e10a5a7 2002 case E_V8QImode:
a977dc0c
MC
2003 gen = gen_aarch64_simd_combinev8qi;
2004 break;
4e10a5a7 2005 case E_V4HImode:
a977dc0c
MC
2006 gen = gen_aarch64_simd_combinev4hi;
2007 break;
4e10a5a7 2008 case E_V2SImode:
a977dc0c
MC
2009 gen = gen_aarch64_simd_combinev2si;
2010 break;
4e10a5a7 2011 case E_V4HFmode:
a977dc0c
MC
2012 gen = gen_aarch64_simd_combinev4hf;
2013 break;
4e10a5a7 2014 case E_V2SFmode:
a977dc0c
MC
2015 gen = gen_aarch64_simd_combinev2sf;
2016 break;
4e10a5a7 2017 case E_DImode:
a977dc0c
MC
2018 gen = gen_aarch64_simd_combinedi;
2019 break;
4e10a5a7 2020 case E_DFmode:
a977dc0c
MC
2021 gen = gen_aarch64_simd_combinedf;
2022 break;
2023 default:
2024 gcc_unreachable ();
8b033a8a 2025 }
a977dc0c
MC
2026
2027 emit_insn (gen (dst, src1, src2));
2028 return;
8b033a8a
SN
2029}
2030
fd4842cd
SN
2031/* Split a complex SIMD move. */
2032
2033void
2034aarch64_split_simd_move (rtx dst, rtx src)
2035{
ef4bddc2
RS
2036 machine_mode src_mode = GET_MODE (src);
2037 machine_mode dst_mode = GET_MODE (dst);
fd4842cd
SN
2038
2039 gcc_assert (VECTOR_MODE_P (dst_mode));
2040
2041 if (REG_P (dst) && REG_P (src))
2042 {
c59b7e28
SN
2043 rtx (*gen) (rtx, rtx);
2044
fd4842cd
SN
2045 gcc_assert (VECTOR_MODE_P (src_mode));
2046
2047 switch (src_mode)
2048 {
4e10a5a7 2049 case E_V16QImode:
c59b7e28 2050 gen = gen_aarch64_split_simd_movv16qi;
fd4842cd 2051 break;
4e10a5a7 2052 case E_V8HImode:
c59b7e28 2053 gen = gen_aarch64_split_simd_movv8hi;
fd4842cd 2054 break;
4e10a5a7 2055 case E_V4SImode:
c59b7e28 2056 gen = gen_aarch64_split_simd_movv4si;
fd4842cd 2057 break;
4e10a5a7 2058 case E_V2DImode:
c59b7e28 2059 gen = gen_aarch64_split_simd_movv2di;
fd4842cd 2060 break;
4e10a5a7 2061 case E_V8HFmode:
71a11456
AL
2062 gen = gen_aarch64_split_simd_movv8hf;
2063 break;
4e10a5a7 2064 case E_V4SFmode:
c59b7e28 2065 gen = gen_aarch64_split_simd_movv4sf;
fd4842cd 2066 break;
4e10a5a7 2067 case E_V2DFmode:
c59b7e28 2068 gen = gen_aarch64_split_simd_movv2df;
fd4842cd
SN
2069 break;
2070 default:
2071 gcc_unreachable ();
2072 }
c59b7e28
SN
2073
2074 emit_insn (gen (dst, src));
fd4842cd
SN
2075 return;
2076 }
2077}
2078
ef22810a
RH
2079bool
2080aarch64_zero_extend_const_eq (machine_mode xmode, rtx x,
2081 machine_mode ymode, rtx y)
2082{
2083 rtx r = simplify_const_unary_operation (ZERO_EXTEND, xmode, y, ymode);
2084 gcc_assert (r != NULL);
2085 return rtx_equal_p (x, r);
2086}
2087
2088
43e9d192 2089static rtx
ef4bddc2 2090aarch64_force_temporary (machine_mode mode, rtx x, rtx value)
43e9d192
IB
2091{
2092 if (can_create_pseudo_p ())
e18b4a81 2093 return force_reg (mode, value);
43e9d192
IB
2094 else
2095 {
f5470a77
RS
2096 gcc_assert (x);
2097 aarch64_emit_move (x, value);
43e9d192
IB
2098 return x;
2099 }
2100}
2101
43cacb12
RS
2102/* Return true if we can move VALUE into a register using a single
2103 CNT[BHWD] instruction. */
2104
2105static bool
2106aarch64_sve_cnt_immediate_p (poly_int64 value)
2107{
2108 HOST_WIDE_INT factor = value.coeffs[0];
2109 /* The coefficient must be [1, 16] * {2, 4, 8, 16}. */
2110 return (value.coeffs[1] == factor
2111 && IN_RANGE (factor, 2, 16 * 16)
2112 && (factor & 1) == 0
2113 && factor <= 16 * (factor & -factor));
2114}
2115
2116/* Likewise for rtx X. */
2117
2118bool
2119aarch64_sve_cnt_immediate_p (rtx x)
2120{
2121 poly_int64 value;
2122 return poly_int_rtx_p (x, &value) && aarch64_sve_cnt_immediate_p (value);
2123}
2124
2125/* Return the asm string for an instruction with a CNT-like vector size
2126 operand (a vector pattern followed by a multiplier in the range [1, 16]).
2127 PREFIX is the mnemonic without the size suffix and OPERANDS is the
2128 first part of the operands template (the part that comes before the
2129 vector size itself). FACTOR is the number of quadwords.
2130 NELTS_PER_VQ, if nonzero, is the number of elements in each quadword.
2131 If it is zero, we can use any element size. */
2132
2133static char *
2134aarch64_output_sve_cnt_immediate (const char *prefix, const char *operands,
2135 unsigned int factor,
2136 unsigned int nelts_per_vq)
2137{
2138 static char buffer[sizeof ("sqincd\t%x0, %w0, all, mul #16")];
2139
2140 if (nelts_per_vq == 0)
2141 /* There is some overlap in the ranges of the four CNT instructions.
2142 Here we always use the smallest possible element size, so that the
2143 multiplier is 1 whereever possible. */
2144 nelts_per_vq = factor & -factor;
2145 int shift = std::min (exact_log2 (nelts_per_vq), 4);
2146 gcc_assert (IN_RANGE (shift, 1, 4));
2147 char suffix = "dwhb"[shift - 1];
2148
2149 factor >>= shift;
2150 unsigned int written;
2151 if (factor == 1)
2152 written = snprintf (buffer, sizeof (buffer), "%s%c\t%s",
2153 prefix, suffix, operands);
2154 else
2155 written = snprintf (buffer, sizeof (buffer), "%s%c\t%s, all, mul #%d",
2156 prefix, suffix, operands, factor);
2157 gcc_assert (written < sizeof (buffer));
2158 return buffer;
2159}
2160
2161/* Return the asm string for an instruction with a CNT-like vector size
2162 operand (a vector pattern followed by a multiplier in the range [1, 16]).
2163 PREFIX is the mnemonic without the size suffix and OPERANDS is the
2164 first part of the operands template (the part that comes before the
2165 vector size itself). X is the value of the vector size operand,
2166 as a polynomial integer rtx. */
2167
2168char *
2169aarch64_output_sve_cnt_immediate (const char *prefix, const char *operands,
2170 rtx x)
2171{
2172 poly_int64 value = rtx_to_poly_int64 (x);
2173 gcc_assert (aarch64_sve_cnt_immediate_p (value));
2174 return aarch64_output_sve_cnt_immediate (prefix, operands,
2175 value.coeffs[1], 0);
2176}
2177
2178/* Return true if we can add VALUE to a register using a single ADDVL
2179 or ADDPL instruction. */
2180
2181static bool
2182aarch64_sve_addvl_addpl_immediate_p (poly_int64 value)
2183{
2184 HOST_WIDE_INT factor = value.coeffs[0];
2185 if (factor == 0 || value.coeffs[1] != factor)
2186 return false;
2187 /* FACTOR counts VG / 2, so a value of 2 is one predicate width
2188 and a value of 16 is one vector width. */
2189 return (((factor & 15) == 0 && IN_RANGE (factor, -32 * 16, 31 * 16))
2190 || ((factor & 1) == 0 && IN_RANGE (factor, -32 * 2, 31 * 2)));
2191}
2192
2193/* Likewise for rtx X. */
2194
2195bool
2196aarch64_sve_addvl_addpl_immediate_p (rtx x)
2197{
2198 poly_int64 value;
2199 return (poly_int_rtx_p (x, &value)
2200 && aarch64_sve_addvl_addpl_immediate_p (value));
2201}
2202
2203/* Return the asm string for adding ADDVL or ADDPL immediate X to operand 1
2204 and storing the result in operand 0. */
2205
2206char *
2207aarch64_output_sve_addvl_addpl (rtx dest, rtx base, rtx offset)
2208{
2209 static char buffer[sizeof ("addpl\t%x0, %x1, #-") + 3 * sizeof (int)];
2210 poly_int64 offset_value = rtx_to_poly_int64 (offset);
2211 gcc_assert (aarch64_sve_addvl_addpl_immediate_p (offset_value));
2212
2213 /* Use INC or DEC if possible. */
2214 if (rtx_equal_p (dest, base) && GP_REGNUM_P (REGNO (dest)))
2215 {
2216 if (aarch64_sve_cnt_immediate_p (offset_value))
2217 return aarch64_output_sve_cnt_immediate ("inc", "%x0",
2218 offset_value.coeffs[1], 0);
2219 if (aarch64_sve_cnt_immediate_p (-offset_value))
2220 return aarch64_output_sve_cnt_immediate ("dec", "%x0",
2221 -offset_value.coeffs[1], 0);
2222 }
2223
2224 int factor = offset_value.coeffs[1];
2225 if ((factor & 15) == 0)
2226 snprintf (buffer, sizeof (buffer), "addvl\t%%x0, %%x1, #%d", factor / 16);
2227 else
2228 snprintf (buffer, sizeof (buffer), "addpl\t%%x0, %%x1, #%d", factor / 2);
2229 return buffer;
2230}
2231
2232/* Return true if X is a valid immediate for an SVE vector INC or DEC
2233 instruction. If it is, store the number of elements in each vector
2234 quadword in *NELTS_PER_VQ_OUT (if nonnull) and store the multiplication
2235 factor in *FACTOR_OUT (if nonnull). */
2236
2237bool
2238aarch64_sve_inc_dec_immediate_p (rtx x, int *factor_out,
2239 unsigned int *nelts_per_vq_out)
2240{
2241 rtx elt;
2242 poly_int64 value;
2243
2244 if (!const_vec_duplicate_p (x, &elt)
2245 || !poly_int_rtx_p (elt, &value))
2246 return false;
2247
2248 unsigned int nelts_per_vq = 128 / GET_MODE_UNIT_BITSIZE (GET_MODE (x));
2249 if (nelts_per_vq != 8 && nelts_per_vq != 4 && nelts_per_vq != 2)
2250 /* There's no vector INCB. */
2251 return false;
2252
2253 HOST_WIDE_INT factor = value.coeffs[0];
2254 if (value.coeffs[1] != factor)
2255 return false;
2256
2257 /* The coefficient must be [1, 16] * NELTS_PER_VQ. */
2258 if ((factor % nelts_per_vq) != 0
2259 || !IN_RANGE (abs (factor), nelts_per_vq, 16 * nelts_per_vq))
2260 return false;
2261
2262 if (factor_out)
2263 *factor_out = factor;
2264 if (nelts_per_vq_out)
2265 *nelts_per_vq_out = nelts_per_vq;
2266 return true;
2267}
2268
2269/* Return true if X is a valid immediate for an SVE vector INC or DEC
2270 instruction. */
2271
2272bool
2273aarch64_sve_inc_dec_immediate_p (rtx x)
2274{
2275 return aarch64_sve_inc_dec_immediate_p (x, NULL, NULL);
2276}
2277
2278/* Return the asm template for an SVE vector INC or DEC instruction.
2279 OPERANDS gives the operands before the vector count and X is the
2280 value of the vector count operand itself. */
2281
2282char *
2283aarch64_output_sve_inc_dec_immediate (const char *operands, rtx x)
2284{
2285 int factor;
2286 unsigned int nelts_per_vq;
2287 if (!aarch64_sve_inc_dec_immediate_p (x, &factor, &nelts_per_vq))
2288 gcc_unreachable ();
2289 if (factor < 0)
2290 return aarch64_output_sve_cnt_immediate ("dec", operands, -factor,
2291 nelts_per_vq);
2292 else
2293 return aarch64_output_sve_cnt_immediate ("inc", operands, factor,
2294 nelts_per_vq);
2295}
43e9d192 2296
82614948
RR
2297static int
2298aarch64_internal_mov_immediate (rtx dest, rtx imm, bool generate,
77e994c9 2299 scalar_int_mode mode)
43e9d192 2300{
43e9d192 2301 int i;
9a4865db
WD
2302 unsigned HOST_WIDE_INT val, val2, mask;
2303 int one_match, zero_match;
2304 int num_insns;
43e9d192 2305
9a4865db
WD
2306 val = INTVAL (imm);
2307
2308 if (aarch64_move_imm (val, mode))
43e9d192 2309 {
82614948 2310 if (generate)
f7df4a84 2311 emit_insn (gen_rtx_SET (dest, imm));
9a4865db 2312 return 1;
43e9d192
IB
2313 }
2314
9de00935
TC
2315 /* Check to see if the low 32 bits are either 0xffffXXXX or 0xXXXXffff
2316 (with XXXX non-zero). In that case check to see if the move can be done in
2317 a smaller mode. */
2318 val2 = val & 0xffffffff;
2319 if (mode == DImode
2320 && aarch64_move_imm (val2, SImode)
2321 && (((val >> 32) & 0xffff) == 0 || (val >> 48) == 0))
2322 {
2323 if (generate)
2324 emit_insn (gen_rtx_SET (dest, GEN_INT (val2)));
2325
2326 /* Check if we have to emit a second instruction by checking to see
2327 if any of the upper 32 bits of the original DI mode value is set. */
2328 if (val == val2)
2329 return 1;
2330
2331 i = (val >> 48) ? 48 : 32;
2332
2333 if (generate)
2334 emit_insn (gen_insv_immdi (dest, GEN_INT (i),
2335 GEN_INT ((val >> i) & 0xffff)));
2336
2337 return 2;
2338 }
2339
9a4865db 2340 if ((val >> 32) == 0 || mode == SImode)
43e9d192 2341 {
82614948
RR
2342 if (generate)
2343 {
9a4865db
WD
2344 emit_insn (gen_rtx_SET (dest, GEN_INT (val & 0xffff)));
2345 if (mode == SImode)
2346 emit_insn (gen_insv_immsi (dest, GEN_INT (16),
2347 GEN_INT ((val >> 16) & 0xffff)));
2348 else
2349 emit_insn (gen_insv_immdi (dest, GEN_INT (16),
2350 GEN_INT ((val >> 16) & 0xffff)));
82614948 2351 }
9a4865db 2352 return 2;
43e9d192
IB
2353 }
2354
2355 /* Remaining cases are all for DImode. */
2356
43e9d192 2357 mask = 0xffff;
9a4865db
WD
2358 zero_match = ((val & mask) == 0) + ((val & (mask << 16)) == 0) +
2359 ((val & (mask << 32)) == 0) + ((val & (mask << 48)) == 0);
2360 one_match = ((~val & mask) == 0) + ((~val & (mask << 16)) == 0) +
2361 ((~val & (mask << 32)) == 0) + ((~val & (mask << 48)) == 0);
43e9d192 2362
62c8d76c 2363 if (zero_match != 2 && one_match != 2)
43e9d192 2364 {
62c8d76c
WD
2365 /* Try emitting a bitmask immediate with a movk replacing 16 bits.
2366 For a 64-bit bitmask try whether changing 16 bits to all ones or
2367 zeroes creates a valid bitmask. To check any repeated bitmask,
2368 try using 16 bits from the other 32-bit half of val. */
43e9d192 2369
62c8d76c 2370 for (i = 0; i < 64; i += 16, mask <<= 16)
43e9d192 2371 {
62c8d76c
WD
2372 val2 = val & ~mask;
2373 if (val2 != val && aarch64_bitmask_imm (val2, mode))
2374 break;
2375 val2 = val | mask;
2376 if (val2 != val && aarch64_bitmask_imm (val2, mode))
2377 break;
2378 val2 = val2 & ~mask;
2379 val2 = val2 | (((val2 >> 32) | (val2 << 32)) & mask);
2380 if (val2 != val && aarch64_bitmask_imm (val2, mode))
2381 break;
43e9d192 2382 }
62c8d76c 2383 if (i != 64)
43e9d192 2384 {
62c8d76c 2385 if (generate)
43e9d192 2386 {
62c8d76c
WD
2387 emit_insn (gen_rtx_SET (dest, GEN_INT (val2)));
2388 emit_insn (gen_insv_immdi (dest, GEN_INT (i),
9a4865db 2389 GEN_INT ((val >> i) & 0xffff)));
43e9d192 2390 }
1312b1ba 2391 return 2;
43e9d192
IB
2392 }
2393 }
2394
9a4865db
WD
2395 /* Generate 2-4 instructions, skipping 16 bits of all zeroes or ones which
2396 are emitted by the initial mov. If one_match > zero_match, skip set bits,
2397 otherwise skip zero bits. */
2c274197 2398
9a4865db 2399 num_insns = 1;
43e9d192 2400 mask = 0xffff;
9a4865db
WD
2401 val2 = one_match > zero_match ? ~val : val;
2402 i = (val2 & mask) != 0 ? 0 : (val2 & (mask << 16)) != 0 ? 16 : 32;
2403
2404 if (generate)
2405 emit_insn (gen_rtx_SET (dest, GEN_INT (one_match > zero_match
2406 ? (val | ~(mask << i))
2407 : (val & (mask << i)))));
2408 for (i += 16; i < 64; i += 16)
43e9d192 2409 {
9a4865db
WD
2410 if ((val2 & (mask << i)) == 0)
2411 continue;
2412 if (generate)
2413 emit_insn (gen_insv_immdi (dest, GEN_INT (i),
2414 GEN_INT ((val >> i) & 0xffff)));
2415 num_insns ++;
82614948
RR
2416 }
2417
2418 return num_insns;
2419}
2420
c0bb5bc5
WD
2421/* Return whether imm is a 128-bit immediate which is simple enough to
2422 expand inline. */
2423bool
2424aarch64_mov128_immediate (rtx imm)
2425{
2426 if (GET_CODE (imm) == CONST_INT)
2427 return true;
2428
2429 gcc_assert (CONST_WIDE_INT_NUNITS (imm) == 2);
2430
2431 rtx lo = GEN_INT (CONST_WIDE_INT_ELT (imm, 0));
2432 rtx hi = GEN_INT (CONST_WIDE_INT_ELT (imm, 1));
2433
2434 return aarch64_internal_mov_immediate (NULL_RTX, lo, false, DImode)
2435 + aarch64_internal_mov_immediate (NULL_RTX, hi, false, DImode) <= 4;
2436}
2437
2438
43cacb12
RS
2439/* Return the number of temporary registers that aarch64_add_offset_1
2440 would need to add OFFSET to a register. */
2441
2442static unsigned int
2443aarch64_add_offset_1_temporaries (HOST_WIDE_INT offset)
2444{
2445 return abs_hwi (offset) < 0x1000000 ? 0 : 1;
2446}
2447
f5470a77
RS
2448/* A subroutine of aarch64_add_offset. Set DEST to SRC + OFFSET for
2449 a non-polynomial OFFSET. MODE is the mode of the addition.
2450 FRAME_RELATED_P is true if the RTX_FRAME_RELATED flag should
2451 be set and CFA adjustments added to the generated instructions.
2452
2453 TEMP1, if nonnull, is a register of mode MODE that can be used as a
2454 temporary if register allocation is already complete. This temporary
2455 register may overlap DEST but must not overlap SRC. If TEMP1 is known
2456 to hold abs (OFFSET), EMIT_MOVE_IMM can be set to false to avoid emitting
2457 the immediate again.
0100c5f9
RS
2458
2459 Since this function may be used to adjust the stack pointer, we must
2460 ensure that it cannot cause transient stack deallocation (for example
2461 by first incrementing SP and then decrementing when adjusting by a
2462 large immediate). */
2463
2464static void
f5470a77
RS
2465aarch64_add_offset_1 (scalar_int_mode mode, rtx dest,
2466 rtx src, HOST_WIDE_INT offset, rtx temp1,
2467 bool frame_related_p, bool emit_move_imm)
0100c5f9 2468{
f5470a77
RS
2469 gcc_assert (emit_move_imm || temp1 != NULL_RTX);
2470 gcc_assert (temp1 == NULL_RTX || !reg_overlap_mentioned_p (temp1, src));
2471
2472 HOST_WIDE_INT moffset = abs_hwi (offset);
0100c5f9
RS
2473 rtx_insn *insn;
2474
f5470a77
RS
2475 if (!moffset)
2476 {
2477 if (!rtx_equal_p (dest, src))
2478 {
2479 insn = emit_insn (gen_rtx_SET (dest, src));
2480 RTX_FRAME_RELATED_P (insn) = frame_related_p;
2481 }
2482 return;
2483 }
0100c5f9
RS
2484
2485 /* Single instruction adjustment. */
f5470a77 2486 if (aarch64_uimm12_shift (moffset))
0100c5f9 2487 {
f5470a77 2488 insn = emit_insn (gen_add3_insn (dest, src, GEN_INT (offset)));
0100c5f9
RS
2489 RTX_FRAME_RELATED_P (insn) = frame_related_p;
2490 return;
2491 }
2492
f5470a77
RS
2493 /* Emit 2 additions/subtractions if the adjustment is less than 24 bits
2494 and either:
2495
2496 a) the offset cannot be loaded by a 16-bit move or
2497 b) there is no spare register into which we can move it. */
2498 if (moffset < 0x1000000
2499 && ((!temp1 && !can_create_pseudo_p ())
2500 || !aarch64_move_imm (moffset, mode)))
0100c5f9 2501 {
f5470a77 2502 HOST_WIDE_INT low_off = moffset & 0xfff;
0100c5f9 2503
f5470a77
RS
2504 low_off = offset < 0 ? -low_off : low_off;
2505 insn = emit_insn (gen_add3_insn (dest, src, GEN_INT (low_off)));
0100c5f9 2506 RTX_FRAME_RELATED_P (insn) = frame_related_p;
f5470a77 2507 insn = emit_insn (gen_add2_insn (dest, GEN_INT (offset - low_off)));
0100c5f9
RS
2508 RTX_FRAME_RELATED_P (insn) = frame_related_p;
2509 return;
2510 }
2511
2512 /* Emit a move immediate if required and an addition/subtraction. */
0100c5f9 2513 if (emit_move_imm)
f5470a77
RS
2514 {
2515 gcc_assert (temp1 != NULL_RTX || can_create_pseudo_p ());
2516 temp1 = aarch64_force_temporary (mode, temp1, GEN_INT (moffset));
2517 }
2518 insn = emit_insn (offset < 0
2519 ? gen_sub3_insn (dest, src, temp1)
2520 : gen_add3_insn (dest, src, temp1));
0100c5f9
RS
2521 if (frame_related_p)
2522 {
2523 RTX_FRAME_RELATED_P (insn) = frame_related_p;
f5470a77
RS
2524 rtx adj = plus_constant (mode, src, offset);
2525 add_reg_note (insn, REG_CFA_ADJUST_CFA, gen_rtx_SET (dest, adj));
0100c5f9
RS
2526 }
2527}
2528
43cacb12
RS
2529/* Return the number of temporary registers that aarch64_add_offset
2530 would need to move OFFSET into a register or add OFFSET to a register;
2531 ADD_P is true if we want the latter rather than the former. */
2532
2533static unsigned int
2534aarch64_offset_temporaries (bool add_p, poly_int64 offset)
2535{
2536 /* This follows the same structure as aarch64_add_offset. */
2537 if (add_p && aarch64_sve_addvl_addpl_immediate_p (offset))
2538 return 0;
2539
2540 unsigned int count = 0;
2541 HOST_WIDE_INT factor = offset.coeffs[1];
2542 HOST_WIDE_INT constant = offset.coeffs[0] - factor;
2543 poly_int64 poly_offset (factor, factor);
2544 if (add_p && aarch64_sve_addvl_addpl_immediate_p (poly_offset))
2545 /* Need one register for the ADDVL/ADDPL result. */
2546 count += 1;
2547 else if (factor != 0)
2548 {
2549 factor = abs (factor);
2550 if (factor > 16 * (factor & -factor))
2551 /* Need one register for the CNT result and one for the multiplication
2552 factor. If necessary, the second temporary can be reused for the
2553 constant part of the offset. */
2554 return 2;
2555 /* Need one register for the CNT result (which might then
2556 be shifted). */
2557 count += 1;
2558 }
2559 return count + aarch64_add_offset_1_temporaries (constant);
2560}
2561
2562/* If X can be represented as a poly_int64, return the number
2563 of temporaries that are required to add it to a register.
2564 Return -1 otherwise. */
2565
2566int
2567aarch64_add_offset_temporaries (rtx x)
2568{
2569 poly_int64 offset;
2570 if (!poly_int_rtx_p (x, &offset))
2571 return -1;
2572 return aarch64_offset_temporaries (true, offset);
2573}
2574
f5470a77
RS
2575/* Set DEST to SRC + OFFSET. MODE is the mode of the addition.
2576 FRAME_RELATED_P is true if the RTX_FRAME_RELATED flag should
2577 be set and CFA adjustments added to the generated instructions.
2578
2579 TEMP1, if nonnull, is a register of mode MODE that can be used as a
2580 temporary if register allocation is already complete. This temporary
43cacb12
RS
2581 register may overlap DEST if !FRAME_RELATED_P but must not overlap SRC.
2582 If TEMP1 is known to hold abs (OFFSET), EMIT_MOVE_IMM can be set to
2583 false to avoid emitting the immediate again.
2584
2585 TEMP2, if nonnull, is a second temporary register that doesn't
2586 overlap either DEST or REG.
f5470a77
RS
2587
2588 Since this function may be used to adjust the stack pointer, we must
2589 ensure that it cannot cause transient stack deallocation (for example
2590 by first incrementing SP and then decrementing when adjusting by a
2591 large immediate). */
2592
2593static void
2594aarch64_add_offset (scalar_int_mode mode, rtx dest, rtx src,
43cacb12
RS
2595 poly_int64 offset, rtx temp1, rtx temp2,
2596 bool frame_related_p, bool emit_move_imm = true)
0100c5f9 2597{
f5470a77
RS
2598 gcc_assert (emit_move_imm || temp1 != NULL_RTX);
2599 gcc_assert (temp1 == NULL_RTX || !reg_overlap_mentioned_p (temp1, src));
43cacb12
RS
2600 gcc_assert (temp1 == NULL_RTX
2601 || !frame_related_p
2602 || !reg_overlap_mentioned_p (temp1, dest));
2603 gcc_assert (temp2 == NULL_RTX || !reg_overlap_mentioned_p (dest, temp2));
2604
2605 /* Try using ADDVL or ADDPL to add the whole value. */
2606 if (src != const0_rtx && aarch64_sve_addvl_addpl_immediate_p (offset))
2607 {
2608 rtx offset_rtx = gen_int_mode (offset, mode);
2609 rtx_insn *insn = emit_insn (gen_add3_insn (dest, src, offset_rtx));
2610 RTX_FRAME_RELATED_P (insn) = frame_related_p;
2611 return;
2612 }
2613
2614 /* Coefficient 1 is multiplied by the number of 128-bit blocks in an
2615 SVE vector register, over and above the minimum size of 128 bits.
2616 This is equivalent to half the value returned by CNTD with a
2617 vector shape of ALL. */
2618 HOST_WIDE_INT factor = offset.coeffs[1];
2619 HOST_WIDE_INT constant = offset.coeffs[0] - factor;
2620
2621 /* Try using ADDVL or ADDPL to add the VG-based part. */
2622 poly_int64 poly_offset (factor, factor);
2623 if (src != const0_rtx
2624 && aarch64_sve_addvl_addpl_immediate_p (poly_offset))
2625 {
2626 rtx offset_rtx = gen_int_mode (poly_offset, mode);
2627 if (frame_related_p)
2628 {
2629 rtx_insn *insn = emit_insn (gen_add3_insn (dest, src, offset_rtx));
2630 RTX_FRAME_RELATED_P (insn) = true;
2631 src = dest;
2632 }
2633 else
2634 {
2635 rtx addr = gen_rtx_PLUS (mode, src, offset_rtx);
2636 src = aarch64_force_temporary (mode, temp1, addr);
2637 temp1 = temp2;
2638 temp2 = NULL_RTX;
2639 }
2640 }
2641 /* Otherwise use a CNT-based sequence. */
2642 else if (factor != 0)
2643 {
2644 /* Use a subtraction if we have a negative factor. */
2645 rtx_code code = PLUS;
2646 if (factor < 0)
2647 {
2648 factor = -factor;
2649 code = MINUS;
2650 }
2651
2652 /* Calculate CNTD * FACTOR / 2. First try to fold the division
2653 into the multiplication. */
2654 rtx val;
2655 int shift = 0;
2656 if (factor & 1)
2657 /* Use a right shift by 1. */
2658 shift = -1;
2659 else
2660 factor /= 2;
2661 HOST_WIDE_INT low_bit = factor & -factor;
2662 if (factor <= 16 * low_bit)
2663 {
2664 if (factor > 16 * 8)
2665 {
2666 /* "CNTB Xn, ALL, MUL #FACTOR" is out of range, so calculate
2667 the value with the minimum multiplier and shift it into
2668 position. */
2669 int extra_shift = exact_log2 (low_bit);
2670 shift += extra_shift;
2671 factor >>= extra_shift;
2672 }
2673 val = gen_int_mode (poly_int64 (factor * 2, factor * 2), mode);
2674 }
2675 else
2676 {
2677 /* Use CNTD, then multiply it by FACTOR. */
2678 val = gen_int_mode (poly_int64 (2, 2), mode);
2679 val = aarch64_force_temporary (mode, temp1, val);
2680
2681 /* Go back to using a negative multiplication factor if we have
2682 no register from which to subtract. */
2683 if (code == MINUS && src == const0_rtx)
2684 {
2685 factor = -factor;
2686 code = PLUS;
2687 }
2688 rtx coeff1 = gen_int_mode (factor, mode);
2689 coeff1 = aarch64_force_temporary (mode, temp2, coeff1);
2690 val = gen_rtx_MULT (mode, val, coeff1);
2691 }
2692
2693 if (shift > 0)
2694 {
2695 /* Multiply by 1 << SHIFT. */
2696 val = aarch64_force_temporary (mode, temp1, val);
2697 val = gen_rtx_ASHIFT (mode, val, GEN_INT (shift));
2698 }
2699 else if (shift == -1)
2700 {
2701 /* Divide by 2. */
2702 val = aarch64_force_temporary (mode, temp1, val);
2703 val = gen_rtx_ASHIFTRT (mode, val, const1_rtx);
2704 }
2705
2706 /* Calculate SRC +/- CNTD * FACTOR / 2. */
2707 if (src != const0_rtx)
2708 {
2709 val = aarch64_force_temporary (mode, temp1, val);
2710 val = gen_rtx_fmt_ee (code, mode, src, val);
2711 }
2712 else if (code == MINUS)
2713 {
2714 val = aarch64_force_temporary (mode, temp1, val);
2715 val = gen_rtx_NEG (mode, val);
2716 }
2717
2718 if (constant == 0 || frame_related_p)
2719 {
2720 rtx_insn *insn = emit_insn (gen_rtx_SET (dest, val));
2721 if (frame_related_p)
2722 {
2723 RTX_FRAME_RELATED_P (insn) = true;
2724 add_reg_note (insn, REG_CFA_ADJUST_CFA,
2725 gen_rtx_SET (dest, plus_constant (Pmode, src,
2726 poly_offset)));
2727 }
2728 src = dest;
2729 if (constant == 0)
2730 return;
2731 }
2732 else
2733 {
2734 src = aarch64_force_temporary (mode, temp1, val);
2735 temp1 = temp2;
2736 temp2 = NULL_RTX;
2737 }
2738
2739 emit_move_imm = true;
2740 }
f5470a77 2741
f5470a77
RS
2742 aarch64_add_offset_1 (mode, dest, src, constant, temp1,
2743 frame_related_p, emit_move_imm);
0100c5f9
RS
2744}
2745
43cacb12
RS
2746/* Like aarch64_add_offset, but the offset is given as an rtx rather
2747 than a poly_int64. */
2748
2749void
2750aarch64_split_add_offset (scalar_int_mode mode, rtx dest, rtx src,
2751 rtx offset_rtx, rtx temp1, rtx temp2)
2752{
2753 aarch64_add_offset (mode, dest, src, rtx_to_poly_int64 (offset_rtx),
2754 temp1, temp2, false);
2755}
2756
f5470a77
RS
2757/* Add DELTA to the stack pointer, marking the instructions frame-related.
2758 TEMP1 is available as a temporary if nonnull. EMIT_MOVE_IMM is false
2759 if TEMP1 already contains abs (DELTA). */
2760
0100c5f9 2761static inline void
43cacb12 2762aarch64_add_sp (rtx temp1, rtx temp2, poly_int64 delta, bool emit_move_imm)
0100c5f9 2763{
f5470a77 2764 aarch64_add_offset (Pmode, stack_pointer_rtx, stack_pointer_rtx, delta,
43cacb12 2765 temp1, temp2, true, emit_move_imm);
0100c5f9
RS
2766}
2767
f5470a77
RS
2768/* Subtract DELTA from the stack pointer, marking the instructions
2769 frame-related if FRAME_RELATED_P. TEMP1 is available as a temporary
2770 if nonnull. */
2771
0100c5f9 2772static inline void
43cacb12 2773aarch64_sub_sp (rtx temp1, rtx temp2, poly_int64 delta, bool frame_related_p)
0100c5f9 2774{
f5470a77 2775 aarch64_add_offset (Pmode, stack_pointer_rtx, stack_pointer_rtx, -delta,
43cacb12 2776 temp1, temp2, frame_related_p);
0100c5f9 2777}
82614948 2778
43cacb12
RS
2779/* Set DEST to (vec_series BASE STEP). */
2780
2781static void
2782aarch64_expand_vec_series (rtx dest, rtx base, rtx step)
82614948
RR
2783{
2784 machine_mode mode = GET_MODE (dest);
43cacb12
RS
2785 scalar_mode inner = GET_MODE_INNER (mode);
2786
2787 /* Each operand can be a register or an immediate in the range [-16, 15]. */
2788 if (!aarch64_sve_index_immediate_p (base))
2789 base = force_reg (inner, base);
2790 if (!aarch64_sve_index_immediate_p (step))
2791 step = force_reg (inner, step);
2792
2793 emit_set_insn (dest, gen_rtx_VEC_SERIES (mode, base, step));
2794}
82614948 2795
43cacb12
RS
2796/* Try to duplicate SRC into SVE register DEST, given that SRC is an
2797 integer of mode INT_MODE. Return true on success. */
2798
2799static bool
2800aarch64_expand_sve_widened_duplicate (rtx dest, scalar_int_mode src_mode,
2801 rtx src)
2802{
2803 /* If the constant is smaller than 128 bits, we can do the move
2804 using a vector of SRC_MODEs. */
2805 if (src_mode != TImode)
2806 {
2807 poly_uint64 count = exact_div (GET_MODE_SIZE (GET_MODE (dest)),
2808 GET_MODE_SIZE (src_mode));
2809 machine_mode dup_mode = mode_for_vector (src_mode, count).require ();
2810 emit_move_insn (gen_lowpart (dup_mode, dest),
2811 gen_const_vec_duplicate (dup_mode, src));
2812 return true;
2813 }
2814
947b1372 2815 /* Use LD1RQ[BHWD] to load the 128 bits from memory. */
43cacb12
RS
2816 src = force_const_mem (src_mode, src);
2817 if (!src)
2818 return false;
2819
2820 /* Make sure that the address is legitimate. */
2821 if (!aarch64_sve_ld1r_operand_p (src))
2822 {
2823 rtx addr = force_reg (Pmode, XEXP (src, 0));
2824 src = replace_equiv_address (src, addr);
2825 }
2826
947b1372
RS
2827 machine_mode mode = GET_MODE (dest);
2828 unsigned int elem_bytes = GET_MODE_UNIT_SIZE (mode);
2829 machine_mode pred_mode = aarch64_sve_pred_mode (elem_bytes).require ();
2830 rtx ptrue = force_reg (pred_mode, CONSTM1_RTX (pred_mode));
2831 src = gen_rtx_UNSPEC (mode, gen_rtvec (2, ptrue, src), UNSPEC_LD1RQ);
2832 emit_insn (gen_rtx_SET (dest, src));
43cacb12
RS
2833 return true;
2834}
2835
2836/* Expand a move of general CONST_VECTOR SRC into DEST, given that it
2837 isn't a simple duplicate or series. */
2838
2839static void
2840aarch64_expand_sve_const_vector (rtx dest, rtx src)
2841{
2842 machine_mode mode = GET_MODE (src);
2843 unsigned int npatterns = CONST_VECTOR_NPATTERNS (src);
2844 unsigned int nelts_per_pattern = CONST_VECTOR_NELTS_PER_PATTERN (src);
2845 gcc_assert (npatterns > 1);
2846
2847 if (nelts_per_pattern == 1)
2848 {
2849 /* The constant is a repeating seqeuence of at least two elements,
2850 where the repeating elements occupy no more than 128 bits.
2851 Get an integer representation of the replicated value. */
8179efe0
RS
2852 scalar_int_mode int_mode;
2853 if (BYTES_BIG_ENDIAN)
2854 /* For now, always use LD1RQ to load the value on big-endian
2855 targets, since the handling of smaller integers includes a
2856 subreg that is semantically an element reverse. */
2857 int_mode = TImode;
2858 else
2859 {
2860 unsigned int int_bits = GET_MODE_UNIT_BITSIZE (mode) * npatterns;
2861 gcc_assert (int_bits <= 128);
2862 int_mode = int_mode_for_size (int_bits, 0).require ();
2863 }
43cacb12
RS
2864 rtx int_value = simplify_gen_subreg (int_mode, src, mode, 0);
2865 if (int_value
2866 && aarch64_expand_sve_widened_duplicate (dest, int_mode, int_value))
2867 return;
2868 }
2869
2870 /* Expand each pattern individually. */
2871 rtx_vector_builder builder;
2872 auto_vec<rtx, 16> vectors (npatterns);
2873 for (unsigned int i = 0; i < npatterns; ++i)
2874 {
2875 builder.new_vector (mode, 1, nelts_per_pattern);
2876 for (unsigned int j = 0; j < nelts_per_pattern; ++j)
2877 builder.quick_push (CONST_VECTOR_ELT (src, i + j * npatterns));
2878 vectors.quick_push (force_reg (mode, builder.build ()));
2879 }
2880
2881 /* Use permutes to interleave the separate vectors. */
2882 while (npatterns > 1)
2883 {
2884 npatterns /= 2;
2885 for (unsigned int i = 0; i < npatterns; ++i)
2886 {
2887 rtx tmp = (npatterns == 1 ? dest : gen_reg_rtx (mode));
2888 rtvec v = gen_rtvec (2, vectors[i], vectors[i + npatterns]);
2889 emit_set_insn (tmp, gen_rtx_UNSPEC (mode, v, UNSPEC_ZIP1));
2890 vectors[i] = tmp;
2891 }
2892 }
2893 gcc_assert (vectors[0] == dest);
2894}
2895
2896/* Set DEST to immediate IMM. For SVE vector modes, GEN_VEC_DUPLICATE
2897 is a pattern that can be used to set DEST to a replicated scalar
2898 element. */
2899
2900void
2901aarch64_expand_mov_immediate (rtx dest, rtx imm,
2902 rtx (*gen_vec_duplicate) (rtx, rtx))
2903{
2904 machine_mode mode = GET_MODE (dest);
82614948
RR
2905
2906 /* Check on what type of symbol it is. */
77e994c9
RS
2907 scalar_int_mode int_mode;
2908 if ((GET_CODE (imm) == SYMBOL_REF
2909 || GET_CODE (imm) == LABEL_REF
43cacb12
RS
2910 || GET_CODE (imm) == CONST
2911 || GET_CODE (imm) == CONST_POLY_INT)
77e994c9 2912 && is_a <scalar_int_mode> (mode, &int_mode))
82614948 2913 {
43cacb12
RS
2914 rtx mem;
2915 poly_int64 offset;
2916 HOST_WIDE_INT const_offset;
82614948
RR
2917 enum aarch64_symbol_type sty;
2918
2919 /* If we have (const (plus symbol offset)), separate out the offset
2920 before we start classifying the symbol. */
43cacb12 2921 rtx base = strip_offset (imm, &offset);
82614948 2922
43cacb12
RS
2923 /* We must always add an offset involving VL separately, rather than
2924 folding it into the relocation. */
2925 if (!offset.is_constant (&const_offset))
2926 {
2927 if (base == const0_rtx && aarch64_sve_cnt_immediate_p (offset))
2928 emit_insn (gen_rtx_SET (dest, imm));
2929 else
2930 {
2931 /* Do arithmetic on 32-bit values if the result is smaller
2932 than that. */
2933 if (partial_subreg_p (int_mode, SImode))
2934 {
2935 /* It is invalid to do symbol calculations in modes
2936 narrower than SImode. */
2937 gcc_assert (base == const0_rtx);
2938 dest = gen_lowpart (SImode, dest);
2939 int_mode = SImode;
2940 }
2941 if (base != const0_rtx)
2942 {
2943 base = aarch64_force_temporary (int_mode, dest, base);
2944 aarch64_add_offset (int_mode, dest, base, offset,
2945 NULL_RTX, NULL_RTX, false);
2946 }
2947 else
2948 aarch64_add_offset (int_mode, dest, base, offset,
2949 dest, NULL_RTX, false);
2950 }
2951 return;
2952 }
2953
2954 sty = aarch64_classify_symbol (base, const_offset);
82614948
RR
2955 switch (sty)
2956 {
2957 case SYMBOL_FORCE_TO_MEM:
43cacb12 2958 if (const_offset != 0
77e994c9 2959 && targetm.cannot_force_const_mem (int_mode, imm))
82614948
RR
2960 {
2961 gcc_assert (can_create_pseudo_p ());
77e994c9 2962 base = aarch64_force_temporary (int_mode, dest, base);
43cacb12
RS
2963 aarch64_add_offset (int_mode, dest, base, const_offset,
2964 NULL_RTX, NULL_RTX, false);
82614948
RR
2965 return;
2966 }
b4f50fd4 2967
82614948
RR
2968 mem = force_const_mem (ptr_mode, imm);
2969 gcc_assert (mem);
b4f50fd4
RR
2970
2971 /* If we aren't generating PC relative literals, then
2972 we need to expand the literal pool access carefully.
2973 This is something that needs to be done in a number
2974 of places, so could well live as a separate function. */
9ee6540a 2975 if (!aarch64_pcrelative_literal_loads)
b4f50fd4
RR
2976 {
2977 gcc_assert (can_create_pseudo_p ());
2978 base = gen_reg_rtx (ptr_mode);
2979 aarch64_expand_mov_immediate (base, XEXP (mem, 0));
00eee3fa
WD
2980 if (ptr_mode != Pmode)
2981 base = convert_memory_address (Pmode, base);
b4f50fd4
RR
2982 mem = gen_rtx_MEM (ptr_mode, base);
2983 }
2984
77e994c9
RS
2985 if (int_mode != ptr_mode)
2986 mem = gen_rtx_ZERO_EXTEND (int_mode, mem);
b4f50fd4 2987
f7df4a84 2988 emit_insn (gen_rtx_SET (dest, mem));
b4f50fd4 2989
82614948
RR
2990 return;
2991
2992 case SYMBOL_SMALL_TLSGD:
2993 case SYMBOL_SMALL_TLSDESC:
79496620 2994 case SYMBOL_SMALL_TLSIE:
1b1e81f8 2995 case SYMBOL_SMALL_GOT_28K:
6642bdb4 2996 case SYMBOL_SMALL_GOT_4G:
82614948 2997 case SYMBOL_TINY_GOT:
5ae7caad 2998 case SYMBOL_TINY_TLSIE:
43cacb12 2999 if (const_offset != 0)
82614948
RR
3000 {
3001 gcc_assert(can_create_pseudo_p ());
77e994c9 3002 base = aarch64_force_temporary (int_mode, dest, base);
43cacb12
RS
3003 aarch64_add_offset (int_mode, dest, base, const_offset,
3004 NULL_RTX, NULL_RTX, false);
82614948
RR
3005 return;
3006 }
3007 /* FALLTHRU */
3008
82614948
RR
3009 case SYMBOL_SMALL_ABSOLUTE:
3010 case SYMBOL_TINY_ABSOLUTE:
cbf5629e 3011 case SYMBOL_TLSLE12:
d18ba284 3012 case SYMBOL_TLSLE24:
cbf5629e
JW
3013 case SYMBOL_TLSLE32:
3014 case SYMBOL_TLSLE48:
82614948
RR
3015 aarch64_load_symref_appropriately (dest, imm, sty);
3016 return;
3017
3018 default:
3019 gcc_unreachable ();
3020 }
3021 }
3022
3023 if (!CONST_INT_P (imm))
3024 {
43cacb12
RS
3025 rtx base, step, value;
3026 if (GET_CODE (imm) == HIGH
3027 || aarch64_simd_valid_immediate (imm, NULL))
f7df4a84 3028 emit_insn (gen_rtx_SET (dest, imm));
43cacb12
RS
3029 else if (const_vec_series_p (imm, &base, &step))
3030 aarch64_expand_vec_series (dest, base, step);
3031 else if (const_vec_duplicate_p (imm, &value))
3032 {
3033 /* If the constant is out of range of an SVE vector move,
3034 load it from memory if we can, otherwise move it into
3035 a register and use a DUP. */
3036 scalar_mode inner_mode = GET_MODE_INNER (mode);
3037 rtx op = force_const_mem (inner_mode, value);
3038 if (!op)
3039 op = force_reg (inner_mode, value);
3040 else if (!aarch64_sve_ld1r_operand_p (op))
3041 {
3042 rtx addr = force_reg (Pmode, XEXP (op, 0));
3043 op = replace_equiv_address (op, addr);
3044 }
3045 emit_insn (gen_vec_duplicate (dest, op));
3046 }
3047 else if (GET_CODE (imm) == CONST_VECTOR
3048 && !GET_MODE_NUNITS (GET_MODE (imm)).is_constant ())
3049 aarch64_expand_sve_const_vector (dest, imm);
82614948 3050 else
43cacb12 3051 {
82614948
RR
3052 rtx mem = force_const_mem (mode, imm);
3053 gcc_assert (mem);
43cacb12 3054 emit_move_insn (dest, mem);
43e9d192 3055 }
82614948
RR
3056
3057 return;
43e9d192 3058 }
82614948 3059
77e994c9
RS
3060 aarch64_internal_mov_immediate (dest, imm, true,
3061 as_a <scalar_int_mode> (mode));
43e9d192
IB
3062}
3063
43cacb12
RS
3064/* Emit an SVE predicated move from SRC to DEST. PRED is a predicate
3065 that is known to contain PTRUE. */
3066
3067void
3068aarch64_emit_sve_pred_move (rtx dest, rtx pred, rtx src)
3069{
3070 emit_insn (gen_rtx_SET (dest, gen_rtx_UNSPEC (GET_MODE (dest),
3071 gen_rtvec (2, pred, src),
3072 UNSPEC_MERGE_PTRUE)));
3073}
3074
3075/* Expand a pre-RA SVE data move from SRC to DEST in which at least one
3076 operand is in memory. In this case we need to use the predicated LD1
3077 and ST1 instead of LDR and STR, both for correctness on big-endian
3078 targets and because LD1 and ST1 support a wider range of addressing modes.
3079 PRED_MODE is the mode of the predicate.
3080
3081 See the comment at the head of aarch64-sve.md for details about the
3082 big-endian handling. */
3083
3084void
3085aarch64_expand_sve_mem_move (rtx dest, rtx src, machine_mode pred_mode)
3086{
3087 machine_mode mode = GET_MODE (dest);
3088 rtx ptrue = force_reg (pred_mode, CONSTM1_RTX (pred_mode));
3089 if (!register_operand (src, mode)
3090 && !register_operand (dest, mode))
3091 {
3092 rtx tmp = gen_reg_rtx (mode);
3093 if (MEM_P (src))
3094 aarch64_emit_sve_pred_move (tmp, ptrue, src);
3095 else
3096 emit_move_insn (tmp, src);
3097 src = tmp;
3098 }
3099 aarch64_emit_sve_pred_move (dest, ptrue, src);
3100}
3101
002092be
RS
3102/* Called only on big-endian targets. See whether an SVE vector move
3103 from SRC to DEST is effectively a REV[BHW] instruction, because at
3104 least one operand is a subreg of an SVE vector that has wider or
3105 narrower elements. Return true and emit the instruction if so.
3106
3107 For example:
3108
3109 (set (reg:VNx8HI R1) (subreg:VNx8HI (reg:VNx16QI R2) 0))
3110
3111 represents a VIEW_CONVERT between the following vectors, viewed
3112 in memory order:
3113
3114 R2: { [0].high, [0].low, [1].high, [1].low, ... }
3115 R1: { [0], [1], [2], [3], ... }
3116
3117 The high part of lane X in R2 should therefore correspond to lane X*2
3118 of R1, but the register representations are:
3119
3120 msb lsb
3121 R2: ...... [1].high [1].low [0].high [0].low
3122 R1: ...... [3] [2] [1] [0]
3123
3124 where the low part of lane X in R2 corresponds to lane X*2 in R1.
3125 We therefore need a reverse operation to swap the high and low values
3126 around.
3127
3128 This is purely an optimization. Without it we would spill the
3129 subreg operand to the stack in one mode and reload it in the
3130 other mode, which has the same effect as the REV. */
3131
3132bool
3133aarch64_maybe_expand_sve_subreg_move (rtx dest, rtx src)
3134{
3135 gcc_assert (BYTES_BIG_ENDIAN);
3136 if (GET_CODE (dest) == SUBREG)
3137 dest = SUBREG_REG (dest);
3138 if (GET_CODE (src) == SUBREG)
3139 src = SUBREG_REG (src);
3140
3141 /* The optimization handles two single SVE REGs with different element
3142 sizes. */
3143 if (!REG_P (dest)
3144 || !REG_P (src)
3145 || aarch64_classify_vector_mode (GET_MODE (dest)) != VEC_SVE_DATA
3146 || aarch64_classify_vector_mode (GET_MODE (src)) != VEC_SVE_DATA
3147 || (GET_MODE_UNIT_SIZE (GET_MODE (dest))
3148 == GET_MODE_UNIT_SIZE (GET_MODE (src))))
3149 return false;
3150
3151 /* Generate *aarch64_sve_mov<mode>_subreg_be. */
3152 rtx ptrue = force_reg (VNx16BImode, CONSTM1_RTX (VNx16BImode));
3153 rtx unspec = gen_rtx_UNSPEC (GET_MODE (dest), gen_rtvec (2, ptrue, src),
3154 UNSPEC_REV_SUBREG);
3155 emit_insn (gen_rtx_SET (dest, unspec));
3156 return true;
3157}
3158
3159/* Return a copy of X with mode MODE, without changing its other
3160 attributes. Unlike gen_lowpart, this doesn't care whether the
3161 mode change is valid. */
3162
3163static rtx
3164aarch64_replace_reg_mode (rtx x, machine_mode mode)
3165{
3166 if (GET_MODE (x) == mode)
3167 return x;
3168
3169 x = shallow_copy_rtx (x);
3170 set_mode_and_regno (x, mode, REGNO (x));
3171 return x;
3172}
3173
3174/* Split a *aarch64_sve_mov<mode>_subreg_be pattern with the given
3175 operands. */
3176
3177void
3178aarch64_split_sve_subreg_move (rtx dest, rtx ptrue, rtx src)
3179{
3180 /* Decide which REV operation we need. The mode with narrower elements
3181 determines the mode of the operands and the mode with the wider
3182 elements determines the reverse width. */
3183 machine_mode mode_with_wider_elts = GET_MODE (dest);
3184 machine_mode mode_with_narrower_elts = GET_MODE (src);
3185 if (GET_MODE_UNIT_SIZE (mode_with_wider_elts)
3186 < GET_MODE_UNIT_SIZE (mode_with_narrower_elts))
3187 std::swap (mode_with_wider_elts, mode_with_narrower_elts);
3188
3189 unsigned int wider_bytes = GET_MODE_UNIT_SIZE (mode_with_wider_elts);
3190 unsigned int unspec;
3191 if (wider_bytes == 8)
3192 unspec = UNSPEC_REV64;
3193 else if (wider_bytes == 4)
3194 unspec = UNSPEC_REV32;
3195 else if (wider_bytes == 2)
3196 unspec = UNSPEC_REV16;
3197 else
3198 gcc_unreachable ();
3199 machine_mode pred_mode = aarch64_sve_pred_mode (wider_bytes).require ();
3200
3201 /* Emit:
3202
3203 (set DEST (unspec [PTRUE (unspec [SRC] UNSPEC_REV<nn>)]
3204 UNSPEC_MERGE_PTRUE))
3205
3206 with the appropriate modes. */
3207 ptrue = gen_lowpart (pred_mode, ptrue);
3208 dest = aarch64_replace_reg_mode (dest, mode_with_narrower_elts);
3209 src = aarch64_replace_reg_mode (src, mode_with_narrower_elts);
3210 src = gen_rtx_UNSPEC (mode_with_narrower_elts, gen_rtvec (1, src), unspec);
3211 src = gen_rtx_UNSPEC (mode_with_narrower_elts, gen_rtvec (2, ptrue, src),
3212 UNSPEC_MERGE_PTRUE);
3213 emit_insn (gen_rtx_SET (dest, src));
3214}
3215
43e9d192 3216static bool
fee9ba42
JW
3217aarch64_function_ok_for_sibcall (tree decl ATTRIBUTE_UNUSED,
3218 tree exp ATTRIBUTE_UNUSED)
43e9d192 3219{
fee9ba42 3220 /* Currently, always true. */
43e9d192
IB
3221 return true;
3222}
3223
3224/* Implement TARGET_PASS_BY_REFERENCE. */
3225
3226static bool
3227aarch64_pass_by_reference (cumulative_args_t pcum ATTRIBUTE_UNUSED,
ef4bddc2 3228 machine_mode mode,
43e9d192
IB
3229 const_tree type,
3230 bool named ATTRIBUTE_UNUSED)
3231{
3232 HOST_WIDE_INT size;
ef4bddc2 3233 machine_mode dummymode;
43e9d192
IB
3234 int nregs;
3235
3236 /* GET_MODE_SIZE (BLKmode) is useless since it is 0. */
6a70badb
RS
3237 if (mode == BLKmode && type)
3238 size = int_size_in_bytes (type);
3239 else
3240 /* No frontends can create types with variable-sized modes, so we
3241 shouldn't be asked to pass or return them. */
3242 size = GET_MODE_SIZE (mode).to_constant ();
43e9d192 3243
aadc1c43
MHD
3244 /* Aggregates are passed by reference based on their size. */
3245 if (type && AGGREGATE_TYPE_P (type))
43e9d192 3246 {
aadc1c43 3247 size = int_size_in_bytes (type);
43e9d192
IB
3248 }
3249
3250 /* Variable sized arguments are always returned by reference. */
3251 if (size < 0)
3252 return true;
3253
3254 /* Can this be a candidate to be passed in fp/simd register(s)? */
3255 if (aarch64_vfp_is_call_or_return_candidate (mode, type,
3256 &dummymode, &nregs,
3257 NULL))
3258 return false;
3259
3260 /* Arguments which are variable sized or larger than 2 registers are
3261 passed by reference unless they are a homogenous floating point
3262 aggregate. */
3263 return size > 2 * UNITS_PER_WORD;
3264}
3265
3266/* Return TRUE if VALTYPE is padded to its least significant bits. */
3267static bool
3268aarch64_return_in_msb (const_tree valtype)
3269{
ef4bddc2 3270 machine_mode dummy_mode;
43e9d192
IB
3271 int dummy_int;
3272
3273 /* Never happens in little-endian mode. */
3274 if (!BYTES_BIG_ENDIAN)
3275 return false;
3276
3277 /* Only composite types smaller than or equal to 16 bytes can
3278 be potentially returned in registers. */
3279 if (!aarch64_composite_type_p (valtype, TYPE_MODE (valtype))
3280 || int_size_in_bytes (valtype) <= 0
3281 || int_size_in_bytes (valtype) > 16)
3282 return false;
3283
3284 /* But not a composite that is an HFA (Homogeneous Floating-point Aggregate)
3285 or an HVA (Homogeneous Short-Vector Aggregate); such a special composite
3286 is always passed/returned in the least significant bits of fp/simd
3287 register(s). */
3288 if (aarch64_vfp_is_call_or_return_candidate (TYPE_MODE (valtype), valtype,
3289 &dummy_mode, &dummy_int, NULL))
3290 return false;
3291
3292 return true;
3293}
3294
3295/* Implement TARGET_FUNCTION_VALUE.
3296 Define how to find the value returned by a function. */
3297
3298static rtx
3299aarch64_function_value (const_tree type, const_tree func,
3300 bool outgoing ATTRIBUTE_UNUSED)
3301{
ef4bddc2 3302 machine_mode mode;
43e9d192
IB
3303 int unsignedp;
3304 int count;
ef4bddc2 3305 machine_mode ag_mode;
43e9d192
IB
3306
3307 mode = TYPE_MODE (type);
3308 if (INTEGRAL_TYPE_P (type))
3309 mode = promote_function_mode (type, mode, &unsignedp, func, 1);
3310
3311 if (aarch64_return_in_msb (type))
3312 {
3313 HOST_WIDE_INT size = int_size_in_bytes (type);
3314
3315 if (size % UNITS_PER_WORD != 0)
3316 {
3317 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
f4b31647 3318 mode = int_mode_for_size (size * BITS_PER_UNIT, 0).require ();
43e9d192
IB
3319 }
3320 }
3321
3322 if (aarch64_vfp_is_call_or_return_candidate (mode, type,
3323 &ag_mode, &count, NULL))
3324 {
3325 if (!aarch64_composite_type_p (type, mode))
3326 {
3327 gcc_assert (count == 1 && mode == ag_mode);
3328 return gen_rtx_REG (mode, V0_REGNUM);
3329 }
3330 else
3331 {
3332 int i;
3333 rtx par;
3334
3335 par = gen_rtx_PARALLEL (mode, rtvec_alloc (count));
3336 for (i = 0; i < count; i++)
3337 {
3338 rtx tmp = gen_rtx_REG (ag_mode, V0_REGNUM + i);
6a70badb
RS
3339 rtx offset = gen_int_mode (i * GET_MODE_SIZE (ag_mode), Pmode);
3340 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, offset);
43e9d192
IB
3341 XVECEXP (par, 0, i) = tmp;
3342 }
3343 return par;
3344 }
3345 }
3346 else
3347 return gen_rtx_REG (mode, R0_REGNUM);
3348}
3349
3350/* Implements TARGET_FUNCTION_VALUE_REGNO_P.
3351 Return true if REGNO is the number of a hard register in which the values
3352 of called function may come back. */
3353
3354static bool
3355aarch64_function_value_regno_p (const unsigned int regno)
3356{
3357 /* Maximum of 16 bytes can be returned in the general registers. Examples
3358 of 16-byte return values are: 128-bit integers and 16-byte small
3359 structures (excluding homogeneous floating-point aggregates). */
3360 if (regno == R0_REGNUM || regno == R1_REGNUM)
3361 return true;
3362
3363 /* Up to four fp/simd registers can return a function value, e.g. a
3364 homogeneous floating-point aggregate having four members. */
3365 if (regno >= V0_REGNUM && regno < V0_REGNUM + HA_MAX_NUM_FLDS)
d5726973 3366 return TARGET_FLOAT;
43e9d192
IB
3367
3368 return false;
3369}
3370
3371/* Implement TARGET_RETURN_IN_MEMORY.
3372
3373 If the type T of the result of a function is such that
3374 void func (T arg)
3375 would require that arg be passed as a value in a register (or set of
3376 registers) according to the parameter passing rules, then the result
3377 is returned in the same registers as would be used for such an
3378 argument. */
3379
3380static bool
3381aarch64_return_in_memory (const_tree type, const_tree fndecl ATTRIBUTE_UNUSED)
3382{
3383 HOST_WIDE_INT size;
ef4bddc2 3384 machine_mode ag_mode;
43e9d192
IB
3385 int count;
3386
3387 if (!AGGREGATE_TYPE_P (type)
3388 && TREE_CODE (type) != COMPLEX_TYPE
3389 && TREE_CODE (type) != VECTOR_TYPE)
3390 /* Simple scalar types always returned in registers. */
3391 return false;
3392
3393 if (aarch64_vfp_is_call_or_return_candidate (TYPE_MODE (type),
3394 type,
3395 &ag_mode,
3396 &count,
3397 NULL))
3398 return false;
3399
3400 /* Types larger than 2 registers returned in memory. */
3401 size = int_size_in_bytes (type);
3402 return (size < 0 || size > 2 * UNITS_PER_WORD);
3403}
3404
3405static bool
ef4bddc2 3406aarch64_vfp_is_call_candidate (cumulative_args_t pcum_v, machine_mode mode,
43e9d192
IB
3407 const_tree type, int *nregs)
3408{
3409 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
3410 return aarch64_vfp_is_call_or_return_candidate (mode,
3411 type,
3412 &pcum->aapcs_vfp_rmode,
3413 nregs,
3414 NULL);
3415}
3416
985b8393 3417/* Given MODE and TYPE of a function argument, return the alignment in
43e9d192
IB
3418 bits. The idea is to suppress any stronger alignment requested by
3419 the user and opt for the natural alignment (specified in AAPCS64 \S 4.1).
3420 This is a helper function for local use only. */
3421
985b8393 3422static unsigned int
ef4bddc2 3423aarch64_function_arg_alignment (machine_mode mode, const_tree type)
43e9d192 3424{
75d6cc81 3425 if (!type)
985b8393 3426 return GET_MODE_ALIGNMENT (mode);
2ec07fa6 3427
75d6cc81 3428 if (integer_zerop (TYPE_SIZE (type)))
985b8393 3429 return 0;
43e9d192 3430
75d6cc81
AL
3431 gcc_assert (TYPE_MODE (type) == mode);
3432
3433 if (!AGGREGATE_TYPE_P (type))
985b8393 3434 return TYPE_ALIGN (TYPE_MAIN_VARIANT (type));
75d6cc81
AL
3435
3436 if (TREE_CODE (type) == ARRAY_TYPE)
985b8393 3437 return TYPE_ALIGN (TREE_TYPE (type));
75d6cc81 3438
985b8393 3439 unsigned int alignment = 0;
75d6cc81 3440 for (tree field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
985b8393
JJ
3441 if (TREE_CODE (field) == FIELD_DECL)
3442 alignment = std::max (alignment, DECL_ALIGN (field));
43e9d192 3443
985b8393 3444 return alignment;
43e9d192
IB
3445}
3446
3447/* Layout a function argument according to the AAPCS64 rules. The rule
3448 numbers refer to the rule numbers in the AAPCS64. */
3449
3450static void
ef4bddc2 3451aarch64_layout_arg (cumulative_args_t pcum_v, machine_mode mode,
43e9d192
IB
3452 const_tree type,
3453 bool named ATTRIBUTE_UNUSED)
3454{
3455 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
3456 int ncrn, nvrn, nregs;
3457 bool allocate_ncrn, allocate_nvrn;
3abf17cf 3458 HOST_WIDE_INT size;
43e9d192
IB
3459
3460 /* We need to do this once per argument. */
3461 if (pcum->aapcs_arg_processed)
3462 return;
3463
3464 pcum->aapcs_arg_processed = true;
3465
3abf17cf 3466 /* Size in bytes, rounded to the nearest multiple of 8 bytes. */
6a70badb
RS
3467 if (type)
3468 size = int_size_in_bytes (type);
3469 else
3470 /* No frontends can create types with variable-sized modes, so we
3471 shouldn't be asked to pass or return them. */
3472 size = GET_MODE_SIZE (mode).to_constant ();
3473 size = ROUND_UP (size, UNITS_PER_WORD);
3abf17cf 3474
43e9d192
IB
3475 allocate_ncrn = (type) ? !(FLOAT_TYPE_P (type)) : !FLOAT_MODE_P (mode);
3476 allocate_nvrn = aarch64_vfp_is_call_candidate (pcum_v,
3477 mode,
3478 type,
3479 &nregs);
3480
3481 /* allocate_ncrn may be false-positive, but allocate_nvrn is quite reliable.
3482 The following code thus handles passing by SIMD/FP registers first. */
3483
3484 nvrn = pcum->aapcs_nvrn;
3485
3486 /* C1 - C5 for floating point, homogenous floating point aggregates (HFA)
3487 and homogenous short-vector aggregates (HVA). */
3488 if (allocate_nvrn)
3489 {
261fb553
AL
3490 if (!TARGET_FLOAT)
3491 aarch64_err_no_fpadvsimd (mode, "argument");
3492
43e9d192
IB
3493 if (nvrn + nregs <= NUM_FP_ARG_REGS)
3494 {
3495 pcum->aapcs_nextnvrn = nvrn + nregs;
3496 if (!aarch64_composite_type_p (type, mode))
3497 {
3498 gcc_assert (nregs == 1);
3499 pcum->aapcs_reg = gen_rtx_REG (mode, V0_REGNUM + nvrn);
3500 }
3501 else
3502 {
3503 rtx par;
3504 int i;
3505 par = gen_rtx_PARALLEL (mode, rtvec_alloc (nregs));
3506 for (i = 0; i < nregs; i++)
3507 {
3508 rtx tmp = gen_rtx_REG (pcum->aapcs_vfp_rmode,
3509 V0_REGNUM + nvrn + i);
6a70badb
RS
3510 rtx offset = gen_int_mode
3511 (i * GET_MODE_SIZE (pcum->aapcs_vfp_rmode), Pmode);
3512 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, offset);
43e9d192
IB
3513 XVECEXP (par, 0, i) = tmp;
3514 }
3515 pcum->aapcs_reg = par;
3516 }
3517 return;
3518 }
3519 else
3520 {
3521 /* C.3 NSRN is set to 8. */
3522 pcum->aapcs_nextnvrn = NUM_FP_ARG_REGS;
3523 goto on_stack;
3524 }
3525 }
3526
3527 ncrn = pcum->aapcs_ncrn;
3abf17cf 3528 nregs = size / UNITS_PER_WORD;
43e9d192
IB
3529
3530 /* C6 - C9. though the sign and zero extension semantics are
3531 handled elsewhere. This is the case where the argument fits
3532 entirely general registers. */
3533 if (allocate_ncrn && (ncrn + nregs <= NUM_ARG_REGS))
3534 {
43e9d192
IB
3535
3536 gcc_assert (nregs == 0 || nregs == 1 || nregs == 2);
3537
3538 /* C.8 if the argument has an alignment of 16 then the NGRN is
3539 rounded up to the next even number. */
985b8393
JJ
3540 if (nregs == 2
3541 && ncrn % 2
2ec07fa6 3542 /* The == 16 * BITS_PER_UNIT instead of >= 16 * BITS_PER_UNIT
985b8393 3543 comparison is there because for > 16 * BITS_PER_UNIT
2ec07fa6
RR
3544 alignment nregs should be > 2 and therefore it should be
3545 passed by reference rather than value. */
985b8393
JJ
3546 && aarch64_function_arg_alignment (mode, type) == 16 * BITS_PER_UNIT)
3547 {
3548 ++ncrn;
3549 gcc_assert (ncrn + nregs <= NUM_ARG_REGS);
43e9d192 3550 }
2ec07fa6 3551
43e9d192
IB
3552 /* NREGS can be 0 when e.g. an empty structure is to be passed.
3553 A reg is still generated for it, but the caller should be smart
3554 enough not to use it. */
3555 if (nregs == 0 || nregs == 1 || GET_MODE_CLASS (mode) == MODE_INT)
2ec07fa6 3556 pcum->aapcs_reg = gen_rtx_REG (mode, R0_REGNUM + ncrn);
43e9d192
IB
3557 else
3558 {
3559 rtx par;
3560 int i;
3561
3562 par = gen_rtx_PARALLEL (mode, rtvec_alloc (nregs));
3563 for (i = 0; i < nregs; i++)
3564 {
3565 rtx tmp = gen_rtx_REG (word_mode, R0_REGNUM + ncrn + i);
3566 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
3567 GEN_INT (i * UNITS_PER_WORD));
3568 XVECEXP (par, 0, i) = tmp;
3569 }
3570 pcum->aapcs_reg = par;
3571 }
3572
3573 pcum->aapcs_nextncrn = ncrn + nregs;
3574 return;
3575 }
3576
3577 /* C.11 */
3578 pcum->aapcs_nextncrn = NUM_ARG_REGS;
3579
3580 /* The argument is passed on stack; record the needed number of words for
3abf17cf 3581 this argument and align the total size if necessary. */
43e9d192 3582on_stack:
3abf17cf 3583 pcum->aapcs_stack_words = size / UNITS_PER_WORD;
2ec07fa6 3584
985b8393 3585 if (aarch64_function_arg_alignment (mode, type) == 16 * BITS_PER_UNIT)
4f59f9f2
UB
3586 pcum->aapcs_stack_size = ROUND_UP (pcum->aapcs_stack_size,
3587 16 / UNITS_PER_WORD);
43e9d192
IB
3588 return;
3589}
3590
3591/* Implement TARGET_FUNCTION_ARG. */
3592
3593static rtx
ef4bddc2 3594aarch64_function_arg (cumulative_args_t pcum_v, machine_mode mode,
43e9d192
IB
3595 const_tree type, bool named)
3596{
3597 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
3598 gcc_assert (pcum->pcs_variant == ARM_PCS_AAPCS64);
3599
3600 if (mode == VOIDmode)
3601 return NULL_RTX;
3602
3603 aarch64_layout_arg (pcum_v, mode, type, named);
3604 return pcum->aapcs_reg;
3605}
3606
3607void
3608aarch64_init_cumulative_args (CUMULATIVE_ARGS *pcum,
3609 const_tree fntype ATTRIBUTE_UNUSED,
3610 rtx libname ATTRIBUTE_UNUSED,
3611 const_tree fndecl ATTRIBUTE_UNUSED,
3612 unsigned n_named ATTRIBUTE_UNUSED)
3613{
3614 pcum->aapcs_ncrn = 0;
3615 pcum->aapcs_nvrn = 0;
3616 pcum->aapcs_nextncrn = 0;
3617 pcum->aapcs_nextnvrn = 0;
3618 pcum->pcs_variant = ARM_PCS_AAPCS64;
3619 pcum->aapcs_reg = NULL_RTX;
3620 pcum->aapcs_arg_processed = false;
3621 pcum->aapcs_stack_words = 0;
3622 pcum->aapcs_stack_size = 0;
3623
261fb553
AL
3624 if (!TARGET_FLOAT
3625 && fndecl && TREE_PUBLIC (fndecl)
3626 && fntype && fntype != error_mark_node)
3627 {
3628 const_tree type = TREE_TYPE (fntype);
3629 machine_mode mode ATTRIBUTE_UNUSED; /* To pass pointer as argument. */
3630 int nregs ATTRIBUTE_UNUSED; /* Likewise. */
3631 if (aarch64_vfp_is_call_or_return_candidate (TYPE_MODE (type), type,
3632 &mode, &nregs, NULL))
3633 aarch64_err_no_fpadvsimd (TYPE_MODE (type), "return type");
3634 }
43e9d192
IB
3635 return;
3636}
3637
3638static void
3639aarch64_function_arg_advance (cumulative_args_t pcum_v,
ef4bddc2 3640 machine_mode mode,
43e9d192
IB
3641 const_tree type,
3642 bool named)
3643{
3644 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
3645 if (pcum->pcs_variant == ARM_PCS_AAPCS64)
3646 {
3647 aarch64_layout_arg (pcum_v, mode, type, named);
3648 gcc_assert ((pcum->aapcs_reg != NULL_RTX)
3649 != (pcum->aapcs_stack_words != 0));
3650 pcum->aapcs_arg_processed = false;
3651 pcum->aapcs_ncrn = pcum->aapcs_nextncrn;
3652 pcum->aapcs_nvrn = pcum->aapcs_nextnvrn;
3653 pcum->aapcs_stack_size += pcum->aapcs_stack_words;
3654 pcum->aapcs_stack_words = 0;
3655 pcum->aapcs_reg = NULL_RTX;
3656 }
3657}
3658
3659bool
3660aarch64_function_arg_regno_p (unsigned regno)
3661{
3662 return ((GP_REGNUM_P (regno) && regno < R0_REGNUM + NUM_ARG_REGS)
3663 || (FP_REGNUM_P (regno) && regno < V0_REGNUM + NUM_FP_ARG_REGS));
3664}
3665
3666/* Implement FUNCTION_ARG_BOUNDARY. Every parameter gets at least
3667 PARM_BOUNDARY bits of alignment, but will be given anything up
3668 to STACK_BOUNDARY bits if the type requires it. This makes sure
3669 that both before and after the layout of each argument, the Next
3670 Stacked Argument Address (NSAA) will have a minimum alignment of
3671 8 bytes. */
3672
3673static unsigned int
ef4bddc2 3674aarch64_function_arg_boundary (machine_mode mode, const_tree type)
43e9d192 3675{
985b8393
JJ
3676 unsigned int alignment = aarch64_function_arg_alignment (mode, type);
3677 return MIN (MAX (alignment, PARM_BOUNDARY), STACK_BOUNDARY);
43e9d192
IB
3678}
3679
43cacb12
RS
3680/* Implement TARGET_GET_RAW_RESULT_MODE and TARGET_GET_RAW_ARG_MODE. */
3681
3682static fixed_size_mode
3683aarch64_get_reg_raw_mode (int regno)
3684{
3685 if (TARGET_SVE && FP_REGNUM_P (regno))
3686 /* Don't use the SVE part of the register for __builtin_apply and
3687 __builtin_return. The SVE registers aren't used by the normal PCS,
3688 so using them there would be a waste of time. The PCS extensions
3689 for SVE types are fundamentally incompatible with the
3690 __builtin_return/__builtin_apply interface. */
3691 return as_a <fixed_size_mode> (V16QImode);
3692 return default_get_reg_raw_mode (regno);
3693}
3694
76b0cbf8 3695/* Implement TARGET_FUNCTION_ARG_PADDING.
43e9d192
IB
3696
3697 Small aggregate types are placed in the lowest memory address.
3698
3699 The related parameter passing rules are B.4, C.3, C.5 and C.14. */
3700
76b0cbf8
RS
3701static pad_direction
3702aarch64_function_arg_padding (machine_mode mode, const_tree type)
43e9d192
IB
3703{
3704 /* On little-endian targets, the least significant byte of every stack
3705 argument is passed at the lowest byte address of the stack slot. */
3706 if (!BYTES_BIG_ENDIAN)
76b0cbf8 3707 return PAD_UPWARD;
43e9d192 3708
00edcfbe 3709 /* Otherwise, integral, floating-point and pointer types are padded downward:
43e9d192
IB
3710 the least significant byte of a stack argument is passed at the highest
3711 byte address of the stack slot. */
3712 if (type
00edcfbe
YZ
3713 ? (INTEGRAL_TYPE_P (type) || SCALAR_FLOAT_TYPE_P (type)
3714 || POINTER_TYPE_P (type))
43e9d192 3715 : (SCALAR_INT_MODE_P (mode) || SCALAR_FLOAT_MODE_P (mode)))
76b0cbf8 3716 return PAD_DOWNWARD;
43e9d192
IB
3717
3718 /* Everything else padded upward, i.e. data in first byte of stack slot. */
76b0cbf8 3719 return PAD_UPWARD;
43e9d192
IB
3720}
3721
3722/* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
3723
3724 It specifies padding for the last (may also be the only)
3725 element of a block move between registers and memory. If
3726 assuming the block is in the memory, padding upward means that
3727 the last element is padded after its highest significant byte,
3728 while in downward padding, the last element is padded at the
3729 its least significant byte side.
3730
3731 Small aggregates and small complex types are always padded
3732 upwards.
3733
3734 We don't need to worry about homogeneous floating-point or
3735 short-vector aggregates; their move is not affected by the
3736 padding direction determined here. Regardless of endianness,
3737 each element of such an aggregate is put in the least
3738 significant bits of a fp/simd register.
3739
3740 Return !BYTES_BIG_ENDIAN if the least significant byte of the
3741 register has useful data, and return the opposite if the most
3742 significant byte does. */
3743
3744bool
ef4bddc2 3745aarch64_pad_reg_upward (machine_mode mode, const_tree type,
43e9d192
IB
3746 bool first ATTRIBUTE_UNUSED)
3747{
3748
3749 /* Small composite types are always padded upward. */
3750 if (BYTES_BIG_ENDIAN && aarch64_composite_type_p (type, mode))
3751 {
6a70badb
RS
3752 HOST_WIDE_INT size;
3753 if (type)
3754 size = int_size_in_bytes (type);
3755 else
3756 /* No frontends can create types with variable-sized modes, so we
3757 shouldn't be asked to pass or return them. */
3758 size = GET_MODE_SIZE (mode).to_constant ();
43e9d192
IB
3759 if (size < 2 * UNITS_PER_WORD)
3760 return true;
3761 }
3762
3763 /* Otherwise, use the default padding. */
3764 return !BYTES_BIG_ENDIAN;
3765}
3766
095a2d76 3767static scalar_int_mode
43e9d192
IB
3768aarch64_libgcc_cmp_return_mode (void)
3769{
3770 return SImode;
3771}
3772
a3eb8a52
EB
3773#define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
3774
3775/* We use the 12-bit shifted immediate arithmetic instructions so values
3776 must be multiple of (1 << 12), i.e. 4096. */
3777#define ARITH_FACTOR 4096
3778
3779#if (PROBE_INTERVAL % ARITH_FACTOR) != 0
3780#error Cannot use simple address calculation for stack probing
3781#endif
3782
3783/* The pair of scratch registers used for stack probing. */
3784#define PROBE_STACK_FIRST_REG 9
3785#define PROBE_STACK_SECOND_REG 10
3786
6a70badb 3787/* Emit code to probe a range of stack addresses from FIRST to FIRST+POLY_SIZE,
a3eb8a52
EB
3788 inclusive. These are offsets from the current stack pointer. */
3789
3790static void
6a70badb 3791aarch64_emit_probe_stack_range (HOST_WIDE_INT first, poly_int64 poly_size)
a3eb8a52 3792{
6a70badb
RS
3793 HOST_WIDE_INT size;
3794 if (!poly_size.is_constant (&size))
3795 {
3796 sorry ("stack probes for SVE frames");
3797 return;
3798 }
3799
5f5c5e0f 3800 rtx reg1 = gen_rtx_REG (Pmode, PROBE_STACK_FIRST_REG);
a3eb8a52
EB
3801
3802 /* See the same assertion on PROBE_INTERVAL above. */
3803 gcc_assert ((first % ARITH_FACTOR) == 0);
3804
3805 /* See if we have a constant small number of probes to generate. If so,
3806 that's the easy case. */
3807 if (size <= PROBE_INTERVAL)
3808 {
3809 const HOST_WIDE_INT base = ROUND_UP (size, ARITH_FACTOR);
3810
3811 emit_set_insn (reg1,
5f5c5e0f 3812 plus_constant (Pmode,
a3eb8a52 3813 stack_pointer_rtx, -(first + base)));
5f5c5e0f 3814 emit_stack_probe (plus_constant (Pmode, reg1, base - size));
a3eb8a52
EB
3815 }
3816
3817 /* The run-time loop is made up of 8 insns in the generic case while the
3818 compile-time loop is made up of 4+2*(n-2) insns for n # of intervals. */
3819 else if (size <= 4 * PROBE_INTERVAL)
3820 {
3821 HOST_WIDE_INT i, rem;
3822
3823 emit_set_insn (reg1,
5f5c5e0f 3824 plus_constant (Pmode,
a3eb8a52
EB
3825 stack_pointer_rtx,
3826 -(first + PROBE_INTERVAL)));
3827 emit_stack_probe (reg1);
3828
3829 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 2 until
3830 it exceeds SIZE. If only two probes are needed, this will not
3831 generate any code. Then probe at FIRST + SIZE. */
3832 for (i = 2 * PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
3833 {
3834 emit_set_insn (reg1,
5f5c5e0f 3835 plus_constant (Pmode, reg1, -PROBE_INTERVAL));
a3eb8a52
EB
3836 emit_stack_probe (reg1);
3837 }
3838
3839 rem = size - (i - PROBE_INTERVAL);
3840 if (rem > 256)
3841 {
3842 const HOST_WIDE_INT base = ROUND_UP (rem, ARITH_FACTOR);
3843
5f5c5e0f
EB
3844 emit_set_insn (reg1, plus_constant (Pmode, reg1, -base));
3845 emit_stack_probe (plus_constant (Pmode, reg1, base - rem));
a3eb8a52
EB
3846 }
3847 else
5f5c5e0f 3848 emit_stack_probe (plus_constant (Pmode, reg1, -rem));
a3eb8a52
EB
3849 }
3850
3851 /* Otherwise, do the same as above, but in a loop. Note that we must be
3852 extra careful with variables wrapping around because we might be at
3853 the very top (or the very bottom) of the address space and we have
3854 to be able to handle this case properly; in particular, we use an
3855 equality test for the loop condition. */
3856 else
3857 {
5f5c5e0f 3858 rtx reg2 = gen_rtx_REG (Pmode, PROBE_STACK_SECOND_REG);
a3eb8a52
EB
3859
3860 /* Step 1: round SIZE to the previous multiple of the interval. */
3861
3862 HOST_WIDE_INT rounded_size = size & -PROBE_INTERVAL;
3863
3864
3865 /* Step 2: compute initial and final value of the loop counter. */
3866
3867 /* TEST_ADDR = SP + FIRST. */
3868 emit_set_insn (reg1,
5f5c5e0f 3869 plus_constant (Pmode, stack_pointer_rtx, -first));
a3eb8a52
EB
3870
3871 /* LAST_ADDR = SP + FIRST + ROUNDED_SIZE. */
13f752b2
JL
3872 HOST_WIDE_INT adjustment = - (first + rounded_size);
3873 if (! aarch64_uimm12_shift (adjustment))
3874 {
3875 aarch64_internal_mov_immediate (reg2, GEN_INT (adjustment),
3876 true, Pmode);
3877 emit_set_insn (reg2, gen_rtx_PLUS (Pmode, stack_pointer_rtx, reg2));
3878 }
3879 else
8dd64cdf
EB
3880 emit_set_insn (reg2,
3881 plus_constant (Pmode, stack_pointer_rtx, adjustment));
3882
a3eb8a52
EB
3883 /* Step 3: the loop
3884
3885 do
3886 {
3887 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
3888 probe at TEST_ADDR
3889 }
3890 while (TEST_ADDR != LAST_ADDR)
3891
3892 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
3893 until it is equal to ROUNDED_SIZE. */
3894
5f5c5e0f 3895 emit_insn (gen_probe_stack_range (reg1, reg1, reg2));
a3eb8a52
EB
3896
3897
3898 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
3899 that SIZE is equal to ROUNDED_SIZE. */
3900
3901 if (size != rounded_size)
3902 {
3903 HOST_WIDE_INT rem = size - rounded_size;
3904
3905 if (rem > 256)
3906 {
3907 const HOST_WIDE_INT base = ROUND_UP (rem, ARITH_FACTOR);
3908
5f5c5e0f
EB
3909 emit_set_insn (reg2, plus_constant (Pmode, reg2, -base));
3910 emit_stack_probe (plus_constant (Pmode, reg2, base - rem));
a3eb8a52
EB
3911 }
3912 else
5f5c5e0f 3913 emit_stack_probe (plus_constant (Pmode, reg2, -rem));
a3eb8a52
EB
3914 }
3915 }
3916
3917 /* Make sure nothing is scheduled before we are done. */
3918 emit_insn (gen_blockage ());
3919}
3920
3921/* Probe a range of stack addresses from REG1 to REG2 inclusive. These are
3922 absolute addresses. */
3923
3924const char *
3925aarch64_output_probe_stack_range (rtx reg1, rtx reg2)
3926{
3927 static int labelno = 0;
3928 char loop_lab[32];
3929 rtx xops[2];
3930
3931 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno++);
3932
3933 /* Loop. */
3934 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
3935
3936 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
3937 xops[0] = reg1;
3938 xops[1] = GEN_INT (PROBE_INTERVAL);
3939 output_asm_insn ("sub\t%0, %0, %1", xops);
3940
3941 /* Probe at TEST_ADDR. */
3942 output_asm_insn ("str\txzr, [%0]", xops);
3943
3944 /* Test if TEST_ADDR == LAST_ADDR. */
3945 xops[1] = reg2;
3946 output_asm_insn ("cmp\t%0, %1", xops);
3947
3948 /* Branch. */
3949 fputs ("\tb.ne\t", asm_out_file);
3950 assemble_name_raw (asm_out_file, loop_lab);
3951 fputc ('\n', asm_out_file);
3952
3953 return "";
3954}
3955
d6cb6d6a
WD
3956/* Determine whether a frame chain needs to be generated. */
3957static bool
3958aarch64_needs_frame_chain (void)
3959{
3960 /* Force a frame chain for EH returns so the return address is at FP+8. */
3961 if (frame_pointer_needed || crtl->calls_eh_return)
3962 return true;
3963
3964 /* A leaf function cannot have calls or write LR. */
3965 bool is_leaf = crtl->is_leaf && !df_regs_ever_live_p (LR_REGNUM);
3966
3967 /* Don't use a frame chain in leaf functions if leaf frame pointers
3968 are disabled. */
3969 if (flag_omit_leaf_frame_pointer && is_leaf)
3970 return false;
3971
3972 return aarch64_use_frame_pointer;
3973}
3974
43e9d192
IB
3975/* Mark the registers that need to be saved by the callee and calculate
3976 the size of the callee-saved registers area and frame record (both FP
33a2e348 3977 and LR may be omitted). */
43e9d192
IB
3978static void
3979aarch64_layout_frame (void)
3980{
3981 HOST_WIDE_INT offset = 0;
4b0685d9 3982 int regno, last_fp_reg = INVALID_REGNUM;
43e9d192
IB
3983
3984 if (reload_completed && cfun->machine->frame.laid_out)
3985 return;
3986
d6cb6d6a 3987 cfun->machine->frame.emit_frame_chain = aarch64_needs_frame_chain ();
7040939b 3988
97826595
MS
3989#define SLOT_NOT_REQUIRED (-2)
3990#define SLOT_REQUIRED (-1)
3991
71bfb77a
WD
3992 cfun->machine->frame.wb_candidate1 = INVALID_REGNUM;
3993 cfun->machine->frame.wb_candidate2 = INVALID_REGNUM;
363ffa50 3994
43e9d192
IB
3995 /* First mark all the registers that really need to be saved... */
3996 for (regno = R0_REGNUM; regno <= R30_REGNUM; regno++)
97826595 3997 cfun->machine->frame.reg_offset[regno] = SLOT_NOT_REQUIRED;
43e9d192
IB
3998
3999 for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
97826595 4000 cfun->machine->frame.reg_offset[regno] = SLOT_NOT_REQUIRED;
43e9d192
IB
4001
4002 /* ... that includes the eh data registers (if needed)... */
4003 if (crtl->calls_eh_return)
4004 for (regno = 0; EH_RETURN_DATA_REGNO (regno) != INVALID_REGNUM; regno++)
97826595
MS
4005 cfun->machine->frame.reg_offset[EH_RETURN_DATA_REGNO (regno)]
4006 = SLOT_REQUIRED;
43e9d192
IB
4007
4008 /* ... and any callee saved register that dataflow says is live. */
4009 for (regno = R0_REGNUM; regno <= R30_REGNUM; regno++)
4010 if (df_regs_ever_live_p (regno)
1c923b60
JW
4011 && (regno == R30_REGNUM
4012 || !call_used_regs[regno]))
97826595 4013 cfun->machine->frame.reg_offset[regno] = SLOT_REQUIRED;
43e9d192
IB
4014
4015 for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
4016 if (df_regs_ever_live_p (regno)
4017 && !call_used_regs[regno])
4b0685d9
WD
4018 {
4019 cfun->machine->frame.reg_offset[regno] = SLOT_REQUIRED;
4020 last_fp_reg = regno;
4021 }
43e9d192 4022
204d2c03 4023 if (cfun->machine->frame.emit_frame_chain)
43e9d192 4024 {
2e1cdae5 4025 /* FP and LR are placed in the linkage record. */
43e9d192 4026 cfun->machine->frame.reg_offset[R29_REGNUM] = 0;
363ffa50 4027 cfun->machine->frame.wb_candidate1 = R29_REGNUM;
2e1cdae5 4028 cfun->machine->frame.reg_offset[R30_REGNUM] = UNITS_PER_WORD;
363ffa50 4029 cfun->machine->frame.wb_candidate2 = R30_REGNUM;
1f7bffd0
WD
4030 offset = 2 * UNITS_PER_WORD;
4031 }
43e9d192
IB
4032
4033 /* Now assign stack slots for them. */
2e1cdae5 4034 for (regno = R0_REGNUM; regno <= R30_REGNUM; regno++)
97826595 4035 if (cfun->machine->frame.reg_offset[regno] == SLOT_REQUIRED)
43e9d192
IB
4036 {
4037 cfun->machine->frame.reg_offset[regno] = offset;
71bfb77a 4038 if (cfun->machine->frame.wb_candidate1 == INVALID_REGNUM)
363ffa50 4039 cfun->machine->frame.wb_candidate1 = regno;
71bfb77a 4040 else if (cfun->machine->frame.wb_candidate2 == INVALID_REGNUM)
363ffa50 4041 cfun->machine->frame.wb_candidate2 = regno;
43e9d192
IB
4042 offset += UNITS_PER_WORD;
4043 }
4044
4b0685d9
WD
4045 HOST_WIDE_INT max_int_offset = offset;
4046 offset = ROUND_UP (offset, STACK_BOUNDARY / BITS_PER_UNIT);
4047 bool has_align_gap = offset != max_int_offset;
4048
43e9d192 4049 for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
97826595 4050 if (cfun->machine->frame.reg_offset[regno] == SLOT_REQUIRED)
43e9d192 4051 {
4b0685d9
WD
4052 /* If there is an alignment gap between integer and fp callee-saves,
4053 allocate the last fp register to it if possible. */
4054 if (regno == last_fp_reg && has_align_gap && (offset & 8) == 0)
4055 {
4056 cfun->machine->frame.reg_offset[regno] = max_int_offset;
4057 break;
4058 }
4059
43e9d192 4060 cfun->machine->frame.reg_offset[regno] = offset;
71bfb77a 4061 if (cfun->machine->frame.wb_candidate1 == INVALID_REGNUM)
363ffa50 4062 cfun->machine->frame.wb_candidate1 = regno;
71bfb77a 4063 else if (cfun->machine->frame.wb_candidate2 == INVALID_REGNUM
363ffa50
JW
4064 && cfun->machine->frame.wb_candidate1 >= V0_REGNUM)
4065 cfun->machine->frame.wb_candidate2 = regno;
43e9d192
IB
4066 offset += UNITS_PER_WORD;
4067 }
4068
4f59f9f2 4069 offset = ROUND_UP (offset, STACK_BOUNDARY / BITS_PER_UNIT);
43e9d192
IB
4070
4071 cfun->machine->frame.saved_regs_size = offset;
1c960e02 4072
71bfb77a
WD
4073 HOST_WIDE_INT varargs_and_saved_regs_size
4074 = offset + cfun->machine->frame.saved_varargs_size;
4075
1c960e02 4076 cfun->machine->frame.hard_fp_offset
6a70badb
RS
4077 = aligned_upper_bound (varargs_and_saved_regs_size
4078 + get_frame_size (),
4079 STACK_BOUNDARY / BITS_PER_UNIT);
1c960e02 4080
6a70badb
RS
4081 /* Both these values are already aligned. */
4082 gcc_assert (multiple_p (crtl->outgoing_args_size,
4083 STACK_BOUNDARY / BITS_PER_UNIT));
1c960e02 4084 cfun->machine->frame.frame_size
6a70badb
RS
4085 = (cfun->machine->frame.hard_fp_offset
4086 + crtl->outgoing_args_size);
1c960e02 4087
71bfb77a
WD
4088 cfun->machine->frame.locals_offset = cfun->machine->frame.saved_varargs_size;
4089
4090 cfun->machine->frame.initial_adjust = 0;
4091 cfun->machine->frame.final_adjust = 0;
4092 cfun->machine->frame.callee_adjust = 0;
4093 cfun->machine->frame.callee_offset = 0;
4094
4095 HOST_WIDE_INT max_push_offset = 0;
4096 if (cfun->machine->frame.wb_candidate2 != INVALID_REGNUM)
4097 max_push_offset = 512;
4098 else if (cfun->machine->frame.wb_candidate1 != INVALID_REGNUM)
4099 max_push_offset = 256;
4100
6a70badb
RS
4101 HOST_WIDE_INT const_size, const_fp_offset;
4102 if (cfun->machine->frame.frame_size.is_constant (&const_size)
4103 && const_size < max_push_offset
4104 && known_eq (crtl->outgoing_args_size, 0))
71bfb77a
WD
4105 {
4106 /* Simple, small frame with no outgoing arguments:
4107 stp reg1, reg2, [sp, -frame_size]!
4108 stp reg3, reg4, [sp, 16] */
6a70badb 4109 cfun->machine->frame.callee_adjust = const_size;
71bfb77a 4110 }
6a70badb
RS
4111 else if (known_lt (crtl->outgoing_args_size
4112 + cfun->machine->frame.saved_regs_size, 512)
71bfb77a 4113 && !(cfun->calls_alloca
6a70badb
RS
4114 && known_lt (cfun->machine->frame.hard_fp_offset,
4115 max_push_offset)))
71bfb77a
WD
4116 {
4117 /* Frame with small outgoing arguments:
4118 sub sp, sp, frame_size
4119 stp reg1, reg2, [sp, outgoing_args_size]
4120 stp reg3, reg4, [sp, outgoing_args_size + 16] */
4121 cfun->machine->frame.initial_adjust = cfun->machine->frame.frame_size;
4122 cfun->machine->frame.callee_offset
4123 = cfun->machine->frame.frame_size - cfun->machine->frame.hard_fp_offset;
4124 }
6a70badb
RS
4125 else if (cfun->machine->frame.hard_fp_offset.is_constant (&const_fp_offset)
4126 && const_fp_offset < max_push_offset)
71bfb77a
WD
4127 {
4128 /* Frame with large outgoing arguments but a small local area:
4129 stp reg1, reg2, [sp, -hard_fp_offset]!
4130 stp reg3, reg4, [sp, 16]
4131 sub sp, sp, outgoing_args_size */
6a70badb 4132 cfun->machine->frame.callee_adjust = const_fp_offset;
71bfb77a
WD
4133 cfun->machine->frame.final_adjust
4134 = cfun->machine->frame.frame_size - cfun->machine->frame.callee_adjust;
4135 }
71bfb77a
WD
4136 else
4137 {
4138 /* Frame with large local area and outgoing arguments using frame pointer:
4139 sub sp, sp, hard_fp_offset
4140 stp x29, x30, [sp, 0]
4141 add x29, sp, 0
4142 stp reg3, reg4, [sp, 16]
4143 sub sp, sp, outgoing_args_size */
4144 cfun->machine->frame.initial_adjust = cfun->machine->frame.hard_fp_offset;
4145 cfun->machine->frame.final_adjust
4146 = cfun->machine->frame.frame_size - cfun->machine->frame.initial_adjust;
4147 }
4148
43e9d192
IB
4149 cfun->machine->frame.laid_out = true;
4150}
4151
04ddfe06
KT
4152/* Return true if the register REGNO is saved on entry to
4153 the current function. */
4154
43e9d192
IB
4155static bool
4156aarch64_register_saved_on_entry (int regno)
4157{
97826595 4158 return cfun->machine->frame.reg_offset[regno] >= 0;
43e9d192
IB
4159}
4160
04ddfe06
KT
4161/* Return the next register up from REGNO up to LIMIT for the callee
4162 to save. */
4163
64dedd72
JW
4164static unsigned
4165aarch64_next_callee_save (unsigned regno, unsigned limit)
4166{
4167 while (regno <= limit && !aarch64_register_saved_on_entry (regno))
4168 regno ++;
4169 return regno;
4170}
43e9d192 4171
04ddfe06
KT
4172/* Push the register number REGNO of mode MODE to the stack with write-back
4173 adjusting the stack by ADJUSTMENT. */
4174
c5e1f66e 4175static void
ef4bddc2 4176aarch64_pushwb_single_reg (machine_mode mode, unsigned regno,
c5e1f66e
JW
4177 HOST_WIDE_INT adjustment)
4178 {
4179 rtx base_rtx = stack_pointer_rtx;
4180 rtx insn, reg, mem;
4181
4182 reg = gen_rtx_REG (mode, regno);
4183 mem = gen_rtx_PRE_MODIFY (Pmode, base_rtx,
4184 plus_constant (Pmode, base_rtx, -adjustment));
30079dde 4185 mem = gen_frame_mem (mode, mem);
c5e1f66e
JW
4186
4187 insn = emit_move_insn (mem, reg);
4188 RTX_FRAME_RELATED_P (insn) = 1;
4189}
4190
04ddfe06
KT
4191/* Generate and return an instruction to store the pair of registers
4192 REG and REG2 of mode MODE to location BASE with write-back adjusting
4193 the stack location BASE by ADJUSTMENT. */
4194
80c11907 4195static rtx
ef4bddc2 4196aarch64_gen_storewb_pair (machine_mode mode, rtx base, rtx reg, rtx reg2,
80c11907
JW
4197 HOST_WIDE_INT adjustment)
4198{
4199 switch (mode)
4200 {
4e10a5a7 4201 case E_DImode:
80c11907
JW
4202 return gen_storewb_pairdi_di (base, base, reg, reg2,
4203 GEN_INT (-adjustment),
4204 GEN_INT (UNITS_PER_WORD - adjustment));
4e10a5a7 4205 case E_DFmode:
80c11907
JW
4206 return gen_storewb_pairdf_di (base, base, reg, reg2,
4207 GEN_INT (-adjustment),
4208 GEN_INT (UNITS_PER_WORD - adjustment));
4209 default:
4210 gcc_unreachable ();
4211 }
4212}
4213
04ddfe06
KT
4214/* Push registers numbered REGNO1 and REGNO2 to the stack, adjusting the
4215 stack pointer by ADJUSTMENT. */
4216
80c11907 4217static void
89ac681e 4218aarch64_push_regs (unsigned regno1, unsigned regno2, HOST_WIDE_INT adjustment)
80c11907 4219{
5d8a22a5 4220 rtx_insn *insn;
0d4a1197 4221 machine_mode mode = (regno1 <= R30_REGNUM) ? E_DImode : E_DFmode;
89ac681e 4222
71bfb77a 4223 if (regno2 == INVALID_REGNUM)
89ac681e
WD
4224 return aarch64_pushwb_single_reg (mode, regno1, adjustment);
4225
80c11907
JW
4226 rtx reg1 = gen_rtx_REG (mode, regno1);
4227 rtx reg2 = gen_rtx_REG (mode, regno2);
4228
4229 insn = emit_insn (aarch64_gen_storewb_pair (mode, stack_pointer_rtx, reg1,
4230 reg2, adjustment));
4231 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 2)) = 1;
80c11907
JW
4232 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
4233 RTX_FRAME_RELATED_P (insn) = 1;
4234}
4235
04ddfe06
KT
4236/* Load the pair of register REG, REG2 of mode MODE from stack location BASE,
4237 adjusting it by ADJUSTMENT afterwards. */
4238
159313d9 4239static rtx
ef4bddc2 4240aarch64_gen_loadwb_pair (machine_mode mode, rtx base, rtx reg, rtx reg2,
159313d9
JW
4241 HOST_WIDE_INT adjustment)
4242{
4243 switch (mode)
4244 {
4e10a5a7 4245 case E_DImode:
159313d9 4246 return gen_loadwb_pairdi_di (base, base, reg, reg2, GEN_INT (adjustment),
3e322b3f 4247 GEN_INT (UNITS_PER_WORD));
4e10a5a7 4248 case E_DFmode:
159313d9 4249 return gen_loadwb_pairdf_di (base, base, reg, reg2, GEN_INT (adjustment),
3e322b3f 4250 GEN_INT (UNITS_PER_WORD));
159313d9
JW
4251 default:
4252 gcc_unreachable ();
4253 }
4254}
4255
04ddfe06
KT
4256/* Pop the two registers numbered REGNO1, REGNO2 from the stack, adjusting it
4257 afterwards by ADJUSTMENT and writing the appropriate REG_CFA_RESTORE notes
4258 into CFI_OPS. */
4259
89ac681e
WD
4260static void
4261aarch64_pop_regs (unsigned regno1, unsigned regno2, HOST_WIDE_INT adjustment,
4262 rtx *cfi_ops)
4263{
0d4a1197 4264 machine_mode mode = (regno1 <= R30_REGNUM) ? E_DImode : E_DFmode;
89ac681e
WD
4265 rtx reg1 = gen_rtx_REG (mode, regno1);
4266
4267 *cfi_ops = alloc_reg_note (REG_CFA_RESTORE, reg1, *cfi_ops);
4268
71bfb77a 4269 if (regno2 == INVALID_REGNUM)
89ac681e
WD
4270 {
4271 rtx mem = plus_constant (Pmode, stack_pointer_rtx, adjustment);
4272 mem = gen_rtx_POST_MODIFY (Pmode, stack_pointer_rtx, mem);
30079dde 4273 emit_move_insn (reg1, gen_frame_mem (mode, mem));
89ac681e
WD
4274 }
4275 else
4276 {
4277 rtx reg2 = gen_rtx_REG (mode, regno2);
4278 *cfi_ops = alloc_reg_note (REG_CFA_RESTORE, reg2, *cfi_ops);
4279 emit_insn (aarch64_gen_loadwb_pair (mode, stack_pointer_rtx, reg1,
4280 reg2, adjustment));
4281 }
4282}
4283
04ddfe06
KT
4284/* Generate and return a store pair instruction of mode MODE to store
4285 register REG1 to MEM1 and register REG2 to MEM2. */
4286
72df5c1f 4287static rtx
ef4bddc2 4288aarch64_gen_store_pair (machine_mode mode, rtx mem1, rtx reg1, rtx mem2,
72df5c1f
JW
4289 rtx reg2)
4290{
4291 switch (mode)
4292 {
4e10a5a7 4293 case E_DImode:
dfe1da23 4294 return gen_store_pair_dw_didi (mem1, reg1, mem2, reg2);
72df5c1f 4295
4e10a5a7 4296 case E_DFmode:
dfe1da23 4297 return gen_store_pair_dw_dfdf (mem1, reg1, mem2, reg2);
72df5c1f
JW
4298
4299 default:
4300 gcc_unreachable ();
4301 }
4302}
4303
04ddfe06
KT
4304/* Generate and regurn a load pair isntruction of mode MODE to load register
4305 REG1 from MEM1 and register REG2 from MEM2. */
4306
72df5c1f 4307static rtx
ef4bddc2 4308aarch64_gen_load_pair (machine_mode mode, rtx reg1, rtx mem1, rtx reg2,
72df5c1f
JW
4309 rtx mem2)
4310{
4311 switch (mode)
4312 {
4e10a5a7 4313 case E_DImode:
dfe1da23 4314 return gen_load_pair_dw_didi (reg1, mem1, reg2, mem2);
72df5c1f 4315
4e10a5a7 4316 case E_DFmode:
dfe1da23 4317 return gen_load_pair_dw_dfdf (reg1, mem1, reg2, mem2);
72df5c1f
JW
4318
4319 default:
4320 gcc_unreachable ();
4321 }
4322}
4323
db58fd89
JW
4324/* Return TRUE if return address signing should be enabled for the current
4325 function, otherwise return FALSE. */
4326
4327bool
4328aarch64_return_address_signing_enabled (void)
4329{
4330 /* This function should only be called after frame laid out. */
4331 gcc_assert (cfun->machine->frame.laid_out);
4332
4333 /* If signing scope is AARCH64_FUNCTION_NON_LEAF, we only sign a leaf function
4334 if it's LR is pushed onto stack. */
4335 return (aarch64_ra_sign_scope == AARCH64_FUNCTION_ALL
4336 || (aarch64_ra_sign_scope == AARCH64_FUNCTION_NON_LEAF
4337 && cfun->machine->frame.reg_offset[LR_REGNUM] >= 0));
4338}
4339
04ddfe06
KT
4340/* Emit code to save the callee-saved registers from register number START
4341 to LIMIT to the stack at the location starting at offset START_OFFSET,
4342 skipping any write-back candidates if SKIP_WB is true. */
43e9d192 4343
43e9d192 4344static void
6a70badb 4345aarch64_save_callee_saves (machine_mode mode, poly_int64 start_offset,
ae13fce3 4346 unsigned start, unsigned limit, bool skip_wb)
43e9d192 4347{
5d8a22a5 4348 rtx_insn *insn;
43e9d192
IB
4349 unsigned regno;
4350 unsigned regno2;
4351
0ec74a1e 4352 for (regno = aarch64_next_callee_save (start, limit);
64dedd72
JW
4353 regno <= limit;
4354 regno = aarch64_next_callee_save (regno + 1, limit))
43e9d192 4355 {
ae13fce3 4356 rtx reg, mem;
6a70badb 4357 poly_int64 offset;
64dedd72 4358
ae13fce3
JW
4359 if (skip_wb
4360 && (regno == cfun->machine->frame.wb_candidate1
4361 || regno == cfun->machine->frame.wb_candidate2))
4362 continue;
4363
827ab47a
KT
4364 if (cfun->machine->reg_is_wrapped_separately[regno])
4365 continue;
4366
ae13fce3
JW
4367 reg = gen_rtx_REG (mode, regno);
4368 offset = start_offset + cfun->machine->frame.reg_offset[regno];
30079dde
WD
4369 mem = gen_frame_mem (mode, plus_constant (Pmode, stack_pointer_rtx,
4370 offset));
64dedd72
JW
4371
4372 regno2 = aarch64_next_callee_save (regno + 1, limit);
4373
4374 if (regno2 <= limit
827ab47a 4375 && !cfun->machine->reg_is_wrapped_separately[regno2]
64dedd72
JW
4376 && ((cfun->machine->frame.reg_offset[regno] + UNITS_PER_WORD)
4377 == cfun->machine->frame.reg_offset[regno2]))
4378
43e9d192 4379 {
0ec74a1e 4380 rtx reg2 = gen_rtx_REG (mode, regno2);
64dedd72
JW
4381 rtx mem2;
4382
4383 offset = start_offset + cfun->machine->frame.reg_offset[regno2];
30079dde
WD
4384 mem2 = gen_frame_mem (mode, plus_constant (Pmode, stack_pointer_rtx,
4385 offset));
8ed2fc62
JW
4386 insn = emit_insn (aarch64_gen_store_pair (mode, mem, reg, mem2,
4387 reg2));
0b4a9743 4388
64dedd72
JW
4389 /* The first part of a frame-related parallel insn is
4390 always assumed to be relevant to the frame
4391 calculations; subsequent parts, are only
4392 frame-related if explicitly marked. */
4393 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
4394 regno = regno2;
4395 }
4396 else
8ed2fc62
JW
4397 insn = emit_move_insn (mem, reg);
4398
4399 RTX_FRAME_RELATED_P (insn) = 1;
4400 }
4401}
4402
04ddfe06
KT
4403/* Emit code to restore the callee registers of mode MODE from register
4404 number START up to and including LIMIT. Restore from the stack offset
4405 START_OFFSET, skipping any write-back candidates if SKIP_WB is true.
4406 Write the appropriate REG_CFA_RESTORE notes into CFI_OPS. */
4407
8ed2fc62 4408static void
ef4bddc2 4409aarch64_restore_callee_saves (machine_mode mode,
6a70badb 4410 poly_int64 start_offset, unsigned start,
dd991abb 4411 unsigned limit, bool skip_wb, rtx *cfi_ops)
8ed2fc62 4412{
8ed2fc62 4413 rtx base_rtx = stack_pointer_rtx;
8ed2fc62
JW
4414 unsigned regno;
4415 unsigned regno2;
6a70badb 4416 poly_int64 offset;
8ed2fc62
JW
4417
4418 for (regno = aarch64_next_callee_save (start, limit);
4419 regno <= limit;
4420 regno = aarch64_next_callee_save (regno + 1, limit))
4421 {
827ab47a
KT
4422 if (cfun->machine->reg_is_wrapped_separately[regno])
4423 continue;
4424
ae13fce3 4425 rtx reg, mem;
8ed2fc62 4426
ae13fce3
JW
4427 if (skip_wb
4428 && (regno == cfun->machine->frame.wb_candidate1
4429 || regno == cfun->machine->frame.wb_candidate2))
4430 continue;
4431
4432 reg = gen_rtx_REG (mode, regno);
8ed2fc62 4433 offset = start_offset + cfun->machine->frame.reg_offset[regno];
30079dde 4434 mem = gen_frame_mem (mode, plus_constant (Pmode, base_rtx, offset));
8ed2fc62
JW
4435
4436 regno2 = aarch64_next_callee_save (regno + 1, limit);
4437
4438 if (regno2 <= limit
827ab47a 4439 && !cfun->machine->reg_is_wrapped_separately[regno2]
8ed2fc62
JW
4440 && ((cfun->machine->frame.reg_offset[regno] + UNITS_PER_WORD)
4441 == cfun->machine->frame.reg_offset[regno2]))
64dedd72 4442 {
8ed2fc62
JW
4443 rtx reg2 = gen_rtx_REG (mode, regno2);
4444 rtx mem2;
4445
4446 offset = start_offset + cfun->machine->frame.reg_offset[regno2];
30079dde 4447 mem2 = gen_frame_mem (mode, plus_constant (Pmode, base_rtx, offset));
dd991abb 4448 emit_insn (aarch64_gen_load_pair (mode, reg, mem, reg2, mem2));
8ed2fc62 4449
dd991abb 4450 *cfi_ops = alloc_reg_note (REG_CFA_RESTORE, reg2, *cfi_ops);
8ed2fc62 4451 regno = regno2;
43e9d192 4452 }
8ed2fc62 4453 else
dd991abb
RH
4454 emit_move_insn (reg, mem);
4455 *cfi_ops = alloc_reg_note (REG_CFA_RESTORE, reg, *cfi_ops);
43e9d192 4456 }
43e9d192
IB
4457}
4458
43cacb12
RS
4459/* Return true if OFFSET is a signed 4-bit value multiplied by the size
4460 of MODE. */
4461
4462static inline bool
4463offset_4bit_signed_scaled_p (machine_mode mode, poly_int64 offset)
4464{
4465 HOST_WIDE_INT multiple;
4466 return (constant_multiple_p (offset, GET_MODE_SIZE (mode), &multiple)
4467 && IN_RANGE (multiple, -8, 7));
4468}
4469
4470/* Return true if OFFSET is a unsigned 6-bit value multiplied by the size
4471 of MODE. */
4472
4473static inline bool
4474offset_6bit_unsigned_scaled_p (machine_mode mode, poly_int64 offset)
4475{
4476 HOST_WIDE_INT multiple;
4477 return (constant_multiple_p (offset, GET_MODE_SIZE (mode), &multiple)
4478 && IN_RANGE (multiple, 0, 63));
4479}
4480
4481/* Return true if OFFSET is a signed 7-bit value multiplied by the size
4482 of MODE. */
4483
4484bool
4485aarch64_offset_7bit_signed_scaled_p (machine_mode mode, poly_int64 offset)
4486{
4487 HOST_WIDE_INT multiple;
4488 return (constant_multiple_p (offset, GET_MODE_SIZE (mode), &multiple)
4489 && IN_RANGE (multiple, -64, 63));
4490}
4491
4492/* Return true if OFFSET is a signed 9-bit value. */
4493
827ab47a
KT
4494static inline bool
4495offset_9bit_signed_unscaled_p (machine_mode mode ATTRIBUTE_UNUSED,
6a70badb 4496 poly_int64 offset)
827ab47a 4497{
6a70badb
RS
4498 HOST_WIDE_INT const_offset;
4499 return (offset.is_constant (&const_offset)
4500 && IN_RANGE (const_offset, -256, 255));
827ab47a
KT
4501}
4502
43cacb12
RS
4503/* Return true if OFFSET is a signed 9-bit value multiplied by the size
4504 of MODE. */
4505
827ab47a 4506static inline bool
43cacb12 4507offset_9bit_signed_scaled_p (machine_mode mode, poly_int64 offset)
827ab47a 4508{
6a70badb
RS
4509 HOST_WIDE_INT multiple;
4510 return (constant_multiple_p (offset, GET_MODE_SIZE (mode), &multiple)
43cacb12 4511 && IN_RANGE (multiple, -256, 255));
827ab47a
KT
4512}
4513
43cacb12
RS
4514/* Return true if OFFSET is an unsigned 12-bit value multiplied by the size
4515 of MODE. */
4516
4517static inline bool
4518offset_12bit_unsigned_scaled_p (machine_mode mode, poly_int64 offset)
827ab47a 4519{
6a70badb
RS
4520 HOST_WIDE_INT multiple;
4521 return (constant_multiple_p (offset, GET_MODE_SIZE (mode), &multiple)
43cacb12 4522 && IN_RANGE (multiple, 0, 4095));
827ab47a
KT
4523}
4524
4525/* Implement TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS. */
4526
4527static sbitmap
4528aarch64_get_separate_components (void)
4529{
4530 aarch64_layout_frame ();
4531
4532 sbitmap components = sbitmap_alloc (LAST_SAVED_REGNUM + 1);
4533 bitmap_clear (components);
4534
4535 /* The registers we need saved to the frame. */
4536 for (unsigned regno = 0; regno <= LAST_SAVED_REGNUM; regno++)
4537 if (aarch64_register_saved_on_entry (regno))
4538 {
6a70badb 4539 poly_int64 offset = cfun->machine->frame.reg_offset[regno];
827ab47a
KT
4540 if (!frame_pointer_needed)
4541 offset += cfun->machine->frame.frame_size
4542 - cfun->machine->frame.hard_fp_offset;
4543 /* Check that we can access the stack slot of the register with one
4544 direct load with no adjustments needed. */
4545 if (offset_12bit_unsigned_scaled_p (DImode, offset))
4546 bitmap_set_bit (components, regno);
4547 }
4548
4549 /* Don't mess with the hard frame pointer. */
4550 if (frame_pointer_needed)
4551 bitmap_clear_bit (components, HARD_FRAME_POINTER_REGNUM);
4552
4553 unsigned reg1 = cfun->machine->frame.wb_candidate1;
4554 unsigned reg2 = cfun->machine->frame.wb_candidate2;
4555 /* If aarch64_layout_frame has chosen registers to store/restore with
4556 writeback don't interfere with them to avoid having to output explicit
4557 stack adjustment instructions. */
4558 if (reg2 != INVALID_REGNUM)
4559 bitmap_clear_bit (components, reg2);
4560 if (reg1 != INVALID_REGNUM)
4561 bitmap_clear_bit (components, reg1);
4562
4563 bitmap_clear_bit (components, LR_REGNUM);
4564 bitmap_clear_bit (components, SP_REGNUM);
4565
4566 return components;
4567}
4568
4569/* Implement TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB. */
4570
4571static sbitmap
4572aarch64_components_for_bb (basic_block bb)
4573{
4574 bitmap in = DF_LIVE_IN (bb);
4575 bitmap gen = &DF_LIVE_BB_INFO (bb)->gen;
4576 bitmap kill = &DF_LIVE_BB_INFO (bb)->kill;
4577
4578 sbitmap components = sbitmap_alloc (LAST_SAVED_REGNUM + 1);
4579 bitmap_clear (components);
4580
4581 /* GPRs are used in a bb if they are in the IN, GEN, or KILL sets. */
4582 for (unsigned regno = 0; regno <= LAST_SAVED_REGNUM; regno++)
4583 if ((!call_used_regs[regno])
4584 && (bitmap_bit_p (in, regno)
4585 || bitmap_bit_p (gen, regno)
4586 || bitmap_bit_p (kill, regno)))
3f26f054
WD
4587 {
4588 unsigned regno2, offset, offset2;
4589 bitmap_set_bit (components, regno);
4590
4591 /* If there is a callee-save at an adjacent offset, add it too
4592 to increase the use of LDP/STP. */
4593 offset = cfun->machine->frame.reg_offset[regno];
4594 regno2 = ((offset & 8) == 0) ? regno + 1 : regno - 1;
4595
4596 if (regno2 <= LAST_SAVED_REGNUM)
4597 {
4598 offset2 = cfun->machine->frame.reg_offset[regno2];
4599 if ((offset & ~8) == (offset2 & ~8))
4600 bitmap_set_bit (components, regno2);
4601 }
4602 }
827ab47a
KT
4603
4604 return components;
4605}
4606
4607/* Implement TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS.
4608 Nothing to do for aarch64. */
4609
4610static void
4611aarch64_disqualify_components (sbitmap, edge, sbitmap, bool)
4612{
4613}
4614
4615/* Return the next set bit in BMP from START onwards. Return the total number
4616 of bits in BMP if no set bit is found at or after START. */
4617
4618static unsigned int
4619aarch64_get_next_set_bit (sbitmap bmp, unsigned int start)
4620{
4621 unsigned int nbits = SBITMAP_SIZE (bmp);
4622 if (start == nbits)
4623 return start;
4624
4625 gcc_assert (start < nbits);
4626 for (unsigned int i = start; i < nbits; i++)
4627 if (bitmap_bit_p (bmp, i))
4628 return i;
4629
4630 return nbits;
4631}
4632
4633/* Do the work for aarch64_emit_prologue_components and
4634 aarch64_emit_epilogue_components. COMPONENTS is the bitmap of registers
4635 to save/restore, PROLOGUE_P indicates whether to emit the prologue sequence
4636 for these components or the epilogue sequence. That is, it determines
4637 whether we should emit stores or loads and what kind of CFA notes to attach
4638 to the insns. Otherwise the logic for the two sequences is very
4639 similar. */
4640
4641static void
4642aarch64_process_components (sbitmap components, bool prologue_p)
4643{
4644 rtx ptr_reg = gen_rtx_REG (Pmode, frame_pointer_needed
4645 ? HARD_FRAME_POINTER_REGNUM
4646 : STACK_POINTER_REGNUM);
4647
4648 unsigned last_regno = SBITMAP_SIZE (components);
4649 unsigned regno = aarch64_get_next_set_bit (components, R0_REGNUM);
4650 rtx_insn *insn = NULL;
4651
4652 while (regno != last_regno)
4653 {
4654 /* AAPCS64 section 5.1.2 requires only the bottom 64 bits to be saved
4655 so DFmode for the vector registers is enough. */
0d4a1197 4656 machine_mode mode = GP_REGNUM_P (regno) ? E_DImode : E_DFmode;
827ab47a 4657 rtx reg = gen_rtx_REG (mode, regno);
6a70badb 4658 poly_int64 offset = cfun->machine->frame.reg_offset[regno];
827ab47a
KT
4659 if (!frame_pointer_needed)
4660 offset += cfun->machine->frame.frame_size
4661 - cfun->machine->frame.hard_fp_offset;
4662 rtx addr = plus_constant (Pmode, ptr_reg, offset);
4663 rtx mem = gen_frame_mem (mode, addr);
4664
4665 rtx set = prologue_p ? gen_rtx_SET (mem, reg) : gen_rtx_SET (reg, mem);
4666 unsigned regno2 = aarch64_get_next_set_bit (components, regno + 1);
4667 /* No more registers to handle after REGNO.
4668 Emit a single save/restore and exit. */
4669 if (regno2 == last_regno)
4670 {
4671 insn = emit_insn (set);
4672 RTX_FRAME_RELATED_P (insn) = 1;
4673 if (prologue_p)
4674 add_reg_note (insn, REG_CFA_OFFSET, copy_rtx (set));
4675 else
4676 add_reg_note (insn, REG_CFA_RESTORE, reg);
4677 break;
4678 }
4679
6a70badb 4680 poly_int64 offset2 = cfun->machine->frame.reg_offset[regno2];
827ab47a
KT
4681 /* The next register is not of the same class or its offset is not
4682 mergeable with the current one into a pair. */
4683 if (!satisfies_constraint_Ump (mem)
4684 || GP_REGNUM_P (regno) != GP_REGNUM_P (regno2)
6a70badb
RS
4685 || maybe_ne ((offset2 - cfun->machine->frame.reg_offset[regno]),
4686 GET_MODE_SIZE (mode)))
827ab47a
KT
4687 {
4688 insn = emit_insn (set);
4689 RTX_FRAME_RELATED_P (insn) = 1;
4690 if (prologue_p)
4691 add_reg_note (insn, REG_CFA_OFFSET, copy_rtx (set));
4692 else
4693 add_reg_note (insn, REG_CFA_RESTORE, reg);
4694
4695 regno = regno2;
4696 continue;
4697 }
4698
4699 /* REGNO2 can be saved/restored in a pair with REGNO. */
4700 rtx reg2 = gen_rtx_REG (mode, regno2);
4701 if (!frame_pointer_needed)
4702 offset2 += cfun->machine->frame.frame_size
4703 - cfun->machine->frame.hard_fp_offset;
4704 rtx addr2 = plus_constant (Pmode, ptr_reg, offset2);
4705 rtx mem2 = gen_frame_mem (mode, addr2);
4706 rtx set2 = prologue_p ? gen_rtx_SET (mem2, reg2)
4707 : gen_rtx_SET (reg2, mem2);
4708
4709 if (prologue_p)
4710 insn = emit_insn (aarch64_gen_store_pair (mode, mem, reg, mem2, reg2));
4711 else
4712 insn = emit_insn (aarch64_gen_load_pair (mode, reg, mem, reg2, mem2));
4713
4714 RTX_FRAME_RELATED_P (insn) = 1;
4715 if (prologue_p)
4716 {
4717 add_reg_note (insn, REG_CFA_OFFSET, set);
4718 add_reg_note (insn, REG_CFA_OFFSET, set2);
4719 }
4720 else
4721 {
4722 add_reg_note (insn, REG_CFA_RESTORE, reg);
4723 add_reg_note (insn, REG_CFA_RESTORE, reg2);
4724 }
4725
4726 regno = aarch64_get_next_set_bit (components, regno2 + 1);
4727 }
4728}
4729
4730/* Implement TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS. */
4731
4732static void
4733aarch64_emit_prologue_components (sbitmap components)
4734{
4735 aarch64_process_components (components, true);
4736}
4737
4738/* Implement TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS. */
4739
4740static void
4741aarch64_emit_epilogue_components (sbitmap components)
4742{
4743 aarch64_process_components (components, false);
4744}
4745
4746/* Implement TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS. */
4747
4748static void
4749aarch64_set_handled_components (sbitmap components)
4750{
4751 for (unsigned regno = 0; regno <= LAST_SAVED_REGNUM; regno++)
4752 if (bitmap_bit_p (components, regno))
4753 cfun->machine->reg_is_wrapped_separately[regno] = true;
4754}
4755
43cacb12
RS
4756/* Add a REG_CFA_EXPRESSION note to INSN to say that register REG
4757 is saved at BASE + OFFSET. */
4758
4759static void
4760aarch64_add_cfa_expression (rtx_insn *insn, unsigned int reg,
4761 rtx base, poly_int64 offset)
4762{
4763 rtx mem = gen_frame_mem (DImode, plus_constant (Pmode, base, offset));
4764 add_reg_note (insn, REG_CFA_EXPRESSION,
4765 gen_rtx_SET (mem, regno_reg_rtx[reg]));
4766}
4767
43e9d192
IB
4768/* AArch64 stack frames generated by this compiler look like:
4769
4770 +-------------------------------+
4771 | |
4772 | incoming stack arguments |
4773 | |
34834420
MS
4774 +-------------------------------+
4775 | | <-- incoming stack pointer (aligned)
43e9d192
IB
4776 | callee-allocated save area |
4777 | for register varargs |
4778 | |
34834420
MS
4779 +-------------------------------+
4780 | local variables | <-- frame_pointer_rtx
43e9d192
IB
4781 | |
4782 +-------------------------------+
454fdba9
RL
4783 | padding0 | \
4784 +-------------------------------+ |
454fdba9 4785 | callee-saved registers | | frame.saved_regs_size
454fdba9
RL
4786 +-------------------------------+ |
4787 | LR' | |
4788 +-------------------------------+ |
34834420
MS
4789 | FP' | / <- hard_frame_pointer_rtx (aligned)
4790 +-------------------------------+
43e9d192
IB
4791 | dynamic allocation |
4792 +-------------------------------+
34834420
MS
4793 | padding |
4794 +-------------------------------+
4795 | outgoing stack arguments | <-- arg_pointer
4796 | |
4797 +-------------------------------+
4798 | | <-- stack_pointer_rtx (aligned)
43e9d192 4799
34834420
MS
4800 Dynamic stack allocations via alloca() decrease stack_pointer_rtx
4801 but leave frame_pointer_rtx and hard_frame_pointer_rtx
4802 unchanged. */
43e9d192
IB
4803
4804/* Generate the prologue instructions for entry into a function.
4805 Establish the stack frame by decreasing the stack pointer with a
4806 properly calculated size and, if necessary, create a frame record
4807 filled with the values of LR and previous frame pointer. The
6991c977 4808 current FP is also set up if it is in use. */
43e9d192
IB
4809
4810void
4811aarch64_expand_prologue (void)
4812{
43e9d192 4813 aarch64_layout_frame ();
43e9d192 4814
6a70badb
RS
4815 poly_int64 frame_size = cfun->machine->frame.frame_size;
4816 poly_int64 initial_adjust = cfun->machine->frame.initial_adjust;
71bfb77a 4817 HOST_WIDE_INT callee_adjust = cfun->machine->frame.callee_adjust;
6a70badb
RS
4818 poly_int64 final_adjust = cfun->machine->frame.final_adjust;
4819 poly_int64 callee_offset = cfun->machine->frame.callee_offset;
71bfb77a
WD
4820 unsigned reg1 = cfun->machine->frame.wb_candidate1;
4821 unsigned reg2 = cfun->machine->frame.wb_candidate2;
204d2c03 4822 bool emit_frame_chain = cfun->machine->frame.emit_frame_chain;
71bfb77a 4823 rtx_insn *insn;
43e9d192 4824
db58fd89
JW
4825 /* Sign return address for functions. */
4826 if (aarch64_return_address_signing_enabled ())
27169e45
JW
4827 {
4828 insn = emit_insn (gen_pacisp ());
4829 add_reg_note (insn, REG_CFA_TOGGLE_RA_MANGLE, const0_rtx);
4830 RTX_FRAME_RELATED_P (insn) = 1;
4831 }
db58fd89 4832
dd991abb 4833 if (flag_stack_usage_info)
6a70badb 4834 current_function_static_stack_size = constant_lower_bound (frame_size);
43e9d192 4835
a3eb8a52
EB
4836 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
4837 {
4838 if (crtl->is_leaf && !cfun->calls_alloca)
4839 {
6a70badb
RS
4840 if (maybe_gt (frame_size, PROBE_INTERVAL)
4841 && maybe_gt (frame_size, get_stack_check_protect ()))
8c1dd970
JL
4842 aarch64_emit_probe_stack_range (get_stack_check_protect (),
4843 (frame_size
4844 - get_stack_check_protect ()));
a3eb8a52 4845 }
6a70badb 4846 else if (maybe_gt (frame_size, 0))
8c1dd970 4847 aarch64_emit_probe_stack_range (get_stack_check_protect (), frame_size);
a3eb8a52
EB
4848 }
4849
f5470a77
RS
4850 rtx ip0_rtx = gen_rtx_REG (Pmode, IP0_REGNUM);
4851 rtx ip1_rtx = gen_rtx_REG (Pmode, IP1_REGNUM);
4852
43cacb12 4853 aarch64_sub_sp (ip0_rtx, ip1_rtx, initial_adjust, true);
43e9d192 4854
71bfb77a
WD
4855 if (callee_adjust != 0)
4856 aarch64_push_regs (reg1, reg2, callee_adjust);
43e9d192 4857
204d2c03 4858 if (emit_frame_chain)
43e9d192 4859 {
43cacb12 4860 poly_int64 reg_offset = callee_adjust;
71bfb77a 4861 if (callee_adjust == 0)
43cacb12
RS
4862 {
4863 reg1 = R29_REGNUM;
4864 reg2 = R30_REGNUM;
4865 reg_offset = callee_offset;
4866 aarch64_save_callee_saves (DImode, reg_offset, reg1, reg2, false);
4867 }
f5470a77 4868 aarch64_add_offset (Pmode, hard_frame_pointer_rtx,
43cacb12
RS
4869 stack_pointer_rtx, callee_offset,
4870 ip1_rtx, ip0_rtx, frame_pointer_needed);
4871 if (frame_pointer_needed && !frame_size.is_constant ())
4872 {
4873 /* Variable-sized frames need to describe the save slot
4874 address using DW_CFA_expression rather than DW_CFA_offset.
4875 This means that, without taking further action, the
4876 locations of the registers that we've already saved would
4877 remain based on the stack pointer even after we redefine
4878 the CFA based on the frame pointer. We therefore need new
4879 DW_CFA_expressions to re-express the save slots with addresses
4880 based on the frame pointer. */
4881 rtx_insn *insn = get_last_insn ();
4882 gcc_assert (RTX_FRAME_RELATED_P (insn));
4883
4884 /* Add an explicit CFA definition if this was previously
4885 implicit. */
4886 if (!find_reg_note (insn, REG_CFA_ADJUST_CFA, NULL_RTX))
4887 {
4888 rtx src = plus_constant (Pmode, stack_pointer_rtx,
4889 callee_offset);
4890 add_reg_note (insn, REG_CFA_ADJUST_CFA,
4891 gen_rtx_SET (hard_frame_pointer_rtx, src));
4892 }
4893
4894 /* Change the save slot expressions for the registers that
4895 we've already saved. */
4896 reg_offset -= callee_offset;
4897 aarch64_add_cfa_expression (insn, reg2, hard_frame_pointer_rtx,
4898 reg_offset + UNITS_PER_WORD);
4899 aarch64_add_cfa_expression (insn, reg1, hard_frame_pointer_rtx,
4900 reg_offset);
4901 }
71bfb77a 4902 emit_insn (gen_stack_tie (stack_pointer_rtx, hard_frame_pointer_rtx));
43e9d192 4903 }
71bfb77a
WD
4904
4905 aarch64_save_callee_saves (DImode, callee_offset, R0_REGNUM, R30_REGNUM,
204d2c03 4906 callee_adjust != 0 || emit_frame_chain);
71bfb77a 4907 aarch64_save_callee_saves (DFmode, callee_offset, V0_REGNUM, V31_REGNUM,
204d2c03 4908 callee_adjust != 0 || emit_frame_chain);
43cacb12 4909 aarch64_sub_sp (ip1_rtx, ip0_rtx, final_adjust, !frame_pointer_needed);
43e9d192
IB
4910}
4911
4f942779
RL
4912/* Return TRUE if we can use a simple_return insn.
4913
4914 This function checks whether the callee saved stack is empty, which
4915 means no restore actions are need. The pro_and_epilogue will use
4916 this to check whether shrink-wrapping opt is feasible. */
4917
4918bool
4919aarch64_use_return_insn_p (void)
4920{
4921 if (!reload_completed)
4922 return false;
4923
4924 if (crtl->profile)
4925 return false;
4926
4927 aarch64_layout_frame ();
4928
6a70badb 4929 return known_eq (cfun->machine->frame.frame_size, 0);
4f942779
RL
4930}
4931
71bfb77a
WD
4932/* Generate the epilogue instructions for returning from a function.
4933 This is almost exactly the reverse of the prolog sequence, except
4934 that we need to insert barriers to avoid scheduling loads that read
4935 from a deallocated stack, and we optimize the unwind records by
4936 emitting them all together if possible. */
43e9d192
IB
4937void
4938aarch64_expand_epilogue (bool for_sibcall)
4939{
43e9d192 4940 aarch64_layout_frame ();
43e9d192 4941
6a70badb 4942 poly_int64 initial_adjust = cfun->machine->frame.initial_adjust;
71bfb77a 4943 HOST_WIDE_INT callee_adjust = cfun->machine->frame.callee_adjust;
6a70badb
RS
4944 poly_int64 final_adjust = cfun->machine->frame.final_adjust;
4945 poly_int64 callee_offset = cfun->machine->frame.callee_offset;
71bfb77a
WD
4946 unsigned reg1 = cfun->machine->frame.wb_candidate1;
4947 unsigned reg2 = cfun->machine->frame.wb_candidate2;
4948 rtx cfi_ops = NULL;
4949 rtx_insn *insn;
43cacb12
RS
4950 /* A stack clash protection prologue may not have left IP0_REGNUM or
4951 IP1_REGNUM in a usable state. The same is true for allocations
4952 with an SVE component, since we then need both temporary registers
4953 for each allocation. */
4954 bool can_inherit_p = (initial_adjust.is_constant ()
4955 && final_adjust.is_constant ()
4956 && !flag_stack_clash_protection);
44c0e7b9 4957
71bfb77a 4958 /* We need to add memory barrier to prevent read from deallocated stack. */
6a70badb
RS
4959 bool need_barrier_p
4960 = maybe_ne (get_frame_size ()
4961 + cfun->machine->frame.saved_varargs_size, 0);
43e9d192 4962
71bfb77a 4963 /* Emit a barrier to prevent loads from a deallocated stack. */
6a70badb
RS
4964 if (maybe_gt (final_adjust, crtl->outgoing_args_size)
4965 || cfun->calls_alloca
8144a493 4966 || crtl->calls_eh_return)
43e9d192 4967 {
71bfb77a
WD
4968 emit_insn (gen_stack_tie (stack_pointer_rtx, stack_pointer_rtx));
4969 need_barrier_p = false;
4970 }
7e8c2bd5 4971
71bfb77a
WD
4972 /* Restore the stack pointer from the frame pointer if it may not
4973 be the same as the stack pointer. */
f5470a77
RS
4974 rtx ip0_rtx = gen_rtx_REG (Pmode, IP0_REGNUM);
4975 rtx ip1_rtx = gen_rtx_REG (Pmode, IP1_REGNUM);
6a70badb
RS
4976 if (frame_pointer_needed
4977 && (maybe_ne (final_adjust, 0) || cfun->calls_alloca))
f5470a77
RS
4978 /* If writeback is used when restoring callee-saves, the CFA
4979 is restored on the instruction doing the writeback. */
4980 aarch64_add_offset (Pmode, stack_pointer_rtx,
4981 hard_frame_pointer_rtx, -callee_offset,
43cacb12 4982 ip1_rtx, ip0_rtx, callee_adjust == 0);
71bfb77a 4983 else
43cacb12
RS
4984 aarch64_add_sp (ip1_rtx, ip0_rtx, final_adjust,
4985 !can_inherit_p || df_regs_ever_live_p (IP1_REGNUM));
43e9d192 4986
71bfb77a
WD
4987 aarch64_restore_callee_saves (DImode, callee_offset, R0_REGNUM, R30_REGNUM,
4988 callee_adjust != 0, &cfi_ops);
4989 aarch64_restore_callee_saves (DFmode, callee_offset, V0_REGNUM, V31_REGNUM,
4990 callee_adjust != 0, &cfi_ops);
43e9d192 4991
71bfb77a
WD
4992 if (need_barrier_p)
4993 emit_insn (gen_stack_tie (stack_pointer_rtx, stack_pointer_rtx));
4994
4995 if (callee_adjust != 0)
4996 aarch64_pop_regs (reg1, reg2, callee_adjust, &cfi_ops);
4997
6a70badb 4998 if (callee_adjust != 0 || maybe_gt (initial_adjust, 65536))
71bfb77a
WD
4999 {
5000 /* Emit delayed restores and set the CFA to be SP + initial_adjust. */
89ac681e 5001 insn = get_last_insn ();
71bfb77a
WD
5002 rtx new_cfa = plus_constant (Pmode, stack_pointer_rtx, initial_adjust);
5003 REG_NOTES (insn) = alloc_reg_note (REG_CFA_DEF_CFA, new_cfa, cfi_ops);
43e9d192 5004 RTX_FRAME_RELATED_P (insn) = 1;
71bfb77a 5005 cfi_ops = NULL;
43e9d192
IB
5006 }
5007
43cacb12
RS
5008 aarch64_add_sp (ip0_rtx, ip1_rtx, initial_adjust,
5009 !can_inherit_p || df_regs_ever_live_p (IP0_REGNUM));
7e8c2bd5 5010
71bfb77a
WD
5011 if (cfi_ops)
5012 {
5013 /* Emit delayed restores and reset the CFA to be SP. */
5014 insn = get_last_insn ();
5015 cfi_ops = alloc_reg_note (REG_CFA_DEF_CFA, stack_pointer_rtx, cfi_ops);
5016 REG_NOTES (insn) = cfi_ops;
5017 RTX_FRAME_RELATED_P (insn) = 1;
dd991abb
RH
5018 }
5019
db58fd89
JW
5020 /* We prefer to emit the combined return/authenticate instruction RETAA,
5021 however there are three cases in which we must instead emit an explicit
5022 authentication instruction.
5023
5024 1) Sibcalls don't return in a normal way, so if we're about to call one
5025 we must authenticate.
5026
5027 2) The RETAA instruction is not available before ARMv8.3-A, so if we are
5028 generating code for !TARGET_ARMV8_3 we can't use it and must
5029 explicitly authenticate.
5030
5031 3) On an eh_return path we make extra stack adjustments to update the
5032 canonical frame address to be the exception handler's CFA. We want
5033 to authenticate using the CFA of the function which calls eh_return.
5034 */
5035 if (aarch64_return_address_signing_enabled ()
5036 && (for_sibcall || !TARGET_ARMV8_3 || crtl->calls_eh_return))
27169e45
JW
5037 {
5038 insn = emit_insn (gen_autisp ());
5039 add_reg_note (insn, REG_CFA_TOGGLE_RA_MANGLE, const0_rtx);
5040 RTX_FRAME_RELATED_P (insn) = 1;
5041 }
db58fd89 5042
dd991abb
RH
5043 /* Stack adjustment for exception handler. */
5044 if (crtl->calls_eh_return)
5045 {
5046 /* We need to unwind the stack by the offset computed by
5047 EH_RETURN_STACKADJ_RTX. We have already reset the CFA
5048 to be SP; letting the CFA move during this adjustment
5049 is just as correct as retaining the CFA from the body
5050 of the function. Therefore, do nothing special. */
5051 emit_insn (gen_add2_insn (stack_pointer_rtx, EH_RETURN_STACKADJ_RTX));
43e9d192
IB
5052 }
5053
5054 emit_use (gen_rtx_REG (DImode, LR_REGNUM));
5055 if (!for_sibcall)
5056 emit_jump_insn (ret_rtx);
5057}
5058
8144a493
WD
5059/* Implement EH_RETURN_HANDLER_RTX. EH returns need to either return
5060 normally or return to a previous frame after unwinding.
1c960e02 5061
8144a493
WD
5062 An EH return uses a single shared return sequence. The epilogue is
5063 exactly like a normal epilogue except that it has an extra input
5064 register (EH_RETURN_STACKADJ_RTX) which contains the stack adjustment
5065 that must be applied after the frame has been destroyed. An extra label
5066 is inserted before the epilogue which initializes this register to zero,
5067 and this is the entry point for a normal return.
43e9d192 5068
8144a493
WD
5069 An actual EH return updates the return address, initializes the stack
5070 adjustment and jumps directly into the epilogue (bypassing the zeroing
5071 of the adjustment). Since the return address is typically saved on the
5072 stack when a function makes a call, the saved LR must be updated outside
5073 the epilogue.
43e9d192 5074
8144a493
WD
5075 This poses problems as the store is generated well before the epilogue,
5076 so the offset of LR is not known yet. Also optimizations will remove the
5077 store as it appears dead, even after the epilogue is generated (as the
5078 base or offset for loading LR is different in many cases).
43e9d192 5079
8144a493
WD
5080 To avoid these problems this implementation forces the frame pointer
5081 in eh_return functions so that the location of LR is fixed and known early.
5082 It also marks the store volatile, so no optimization is permitted to
5083 remove the store. */
5084rtx
5085aarch64_eh_return_handler_rtx (void)
5086{
5087 rtx tmp = gen_frame_mem (Pmode,
5088 plus_constant (Pmode, hard_frame_pointer_rtx, UNITS_PER_WORD));
43e9d192 5089
8144a493
WD
5090 /* Mark the store volatile, so no optimization is permitted to remove it. */
5091 MEM_VOLATILE_P (tmp) = true;
5092 return tmp;
43e9d192
IB
5093}
5094
43e9d192
IB
5095/* Output code to add DELTA to the first argument, and then jump
5096 to FUNCTION. Used for C++ multiple inheritance. */
5097static void
5098aarch64_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
5099 HOST_WIDE_INT delta,
5100 HOST_WIDE_INT vcall_offset,
5101 tree function)
5102{
5103 /* The this pointer is always in x0. Note that this differs from
5104 Arm where the this pointer maybe bumped to r1 if r0 is required
5105 to return a pointer to an aggregate. On AArch64 a result value
5106 pointer will be in x8. */
5107 int this_regno = R0_REGNUM;
5d8a22a5
DM
5108 rtx this_rtx, temp0, temp1, addr, funexp;
5109 rtx_insn *insn;
43e9d192 5110
75f1d6fc
SN
5111 reload_completed = 1;
5112 emit_note (NOTE_INSN_PROLOGUE_END);
43e9d192 5113
f5470a77
RS
5114 this_rtx = gen_rtx_REG (Pmode, this_regno);
5115 temp0 = gen_rtx_REG (Pmode, IP0_REGNUM);
5116 temp1 = gen_rtx_REG (Pmode, IP1_REGNUM);
5117
43e9d192 5118 if (vcall_offset == 0)
43cacb12 5119 aarch64_add_offset (Pmode, this_rtx, this_rtx, delta, temp1, temp0, false);
43e9d192
IB
5120 else
5121 {
28514dda 5122 gcc_assert ((vcall_offset & (POINTER_BYTES - 1)) == 0);
43e9d192 5123
75f1d6fc
SN
5124 addr = this_rtx;
5125 if (delta != 0)
5126 {
5127 if (delta >= -256 && delta < 256)
5128 addr = gen_rtx_PRE_MODIFY (Pmode, this_rtx,
5129 plus_constant (Pmode, this_rtx, delta));
5130 else
43cacb12
RS
5131 aarch64_add_offset (Pmode, this_rtx, this_rtx, delta,
5132 temp1, temp0, false);
43e9d192
IB
5133 }
5134
28514dda
YZ
5135 if (Pmode == ptr_mode)
5136 aarch64_emit_move (temp0, gen_rtx_MEM (ptr_mode, addr));
5137 else
5138 aarch64_emit_move (temp0,
5139 gen_rtx_ZERO_EXTEND (Pmode,
5140 gen_rtx_MEM (ptr_mode, addr)));
75f1d6fc 5141
28514dda 5142 if (vcall_offset >= -256 && vcall_offset < 4096 * POINTER_BYTES)
75f1d6fc 5143 addr = plus_constant (Pmode, temp0, vcall_offset);
43e9d192
IB
5144 else
5145 {
f43657b4
JW
5146 aarch64_internal_mov_immediate (temp1, GEN_INT (vcall_offset), true,
5147 Pmode);
75f1d6fc 5148 addr = gen_rtx_PLUS (Pmode, temp0, temp1);
43e9d192
IB
5149 }
5150
28514dda
YZ
5151 if (Pmode == ptr_mode)
5152 aarch64_emit_move (temp1, gen_rtx_MEM (ptr_mode,addr));
5153 else
5154 aarch64_emit_move (temp1,
5155 gen_rtx_SIGN_EXTEND (Pmode,
5156 gen_rtx_MEM (ptr_mode, addr)));
5157
75f1d6fc 5158 emit_insn (gen_add2_insn (this_rtx, temp1));
43e9d192
IB
5159 }
5160
75f1d6fc
SN
5161 /* Generate a tail call to the target function. */
5162 if (!TREE_USED (function))
5163 {
5164 assemble_external (function);
5165 TREE_USED (function) = 1;
5166 }
5167 funexp = XEXP (DECL_RTL (function), 0);
5168 funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
5169 insn = emit_call_insn (gen_sibcall (funexp, const0_rtx, NULL_RTX));
5170 SIBLING_CALL_P (insn) = 1;
5171
5172 insn = get_insns ();
5173 shorten_branches (insn);
5174 final_start_function (insn, file, 1);
5175 final (insn, file, 1);
43e9d192 5176 final_end_function ();
75f1d6fc
SN
5177
5178 /* Stop pretending to be a post-reload pass. */
5179 reload_completed = 0;
43e9d192
IB
5180}
5181
43e9d192
IB
5182static bool
5183aarch64_tls_referenced_p (rtx x)
5184{
5185 if (!TARGET_HAVE_TLS)
5186 return false;
e7de8563
RS
5187 subrtx_iterator::array_type array;
5188 FOR_EACH_SUBRTX (iter, array, x, ALL)
5189 {
5190 const_rtx x = *iter;
5191 if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (x) != 0)
5192 return true;
5193 /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
5194 TLS offsets, not real symbol references. */
5195 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
5196 iter.skip_subrtxes ();
5197 }
5198 return false;
43e9d192
IB
5199}
5200
5201
43e9d192
IB
5202/* Return true if val can be encoded as a 12-bit unsigned immediate with
5203 a left shift of 0 or 12 bits. */
5204bool
5205aarch64_uimm12_shift (HOST_WIDE_INT val)
5206{
5207 return ((val & (((HOST_WIDE_INT) 0xfff) << 0)) == val
5208 || (val & (((HOST_WIDE_INT) 0xfff) << 12)) == val
5209 );
5210}
5211
5212
5213/* Return true if val is an immediate that can be loaded into a
5214 register by a MOVZ instruction. */
5215static bool
77e994c9 5216aarch64_movw_imm (HOST_WIDE_INT val, scalar_int_mode mode)
43e9d192
IB
5217{
5218 if (GET_MODE_SIZE (mode) > 4)
5219 {
5220 if ((val & (((HOST_WIDE_INT) 0xffff) << 32)) == val
5221 || (val & (((HOST_WIDE_INT) 0xffff) << 48)) == val)
5222 return 1;
5223 }
5224 else
5225 {
43cacb12
RS
5226 /* Ignore sign extension. */
5227 val &= (HOST_WIDE_INT) 0xffffffff;
5228 }
5229 return ((val & (((HOST_WIDE_INT) 0xffff) << 0)) == val
5230 || (val & (((HOST_WIDE_INT) 0xffff) << 16)) == val);
5231}
5232
5233/* VAL is a value with the inner mode of MODE. Replicate it to fill a
5234 64-bit (DImode) integer. */
5235
5236static unsigned HOST_WIDE_INT
5237aarch64_replicate_bitmask_imm (unsigned HOST_WIDE_INT val, machine_mode mode)
5238{
5239 unsigned int size = GET_MODE_UNIT_PRECISION (mode);
5240 while (size < 64)
5241 {
5242 val &= (HOST_WIDE_INT_1U << size) - 1;
5243 val |= val << size;
5244 size *= 2;
43e9d192 5245 }
43cacb12 5246 return val;
43e9d192
IB
5247}
5248
a64c73a2
WD
5249/* Multipliers for repeating bitmasks of width 32, 16, 8, 4, and 2. */
5250
5251static const unsigned HOST_WIDE_INT bitmask_imm_mul[] =
5252 {
5253 0x0000000100000001ull,
5254 0x0001000100010001ull,
5255 0x0101010101010101ull,
5256 0x1111111111111111ull,
5257 0x5555555555555555ull,
5258 };
5259
43e9d192
IB
5260
5261/* Return true if val is a valid bitmask immediate. */
a64c73a2 5262
43e9d192 5263bool
a64c73a2 5264aarch64_bitmask_imm (HOST_WIDE_INT val_in, machine_mode mode)
43e9d192 5265{
a64c73a2
WD
5266 unsigned HOST_WIDE_INT val, tmp, mask, first_one, next_one;
5267 int bits;
5268
5269 /* Check for a single sequence of one bits and return quickly if so.
5270 The special cases of all ones and all zeroes returns false. */
43cacb12 5271 val = aarch64_replicate_bitmask_imm (val_in, mode);
a64c73a2
WD
5272 tmp = val + (val & -val);
5273
5274 if (tmp == (tmp & -tmp))
5275 return (val + 1) > 1;
5276
5277 /* Replicate 32-bit immediates so we can treat them as 64-bit. */
5278 if (mode == SImode)
5279 val = (val << 32) | (val & 0xffffffff);
5280
5281 /* Invert if the immediate doesn't start with a zero bit - this means we
5282 only need to search for sequences of one bits. */
5283 if (val & 1)
5284 val = ~val;
5285
5286 /* Find the first set bit and set tmp to val with the first sequence of one
5287 bits removed. Return success if there is a single sequence of ones. */
5288 first_one = val & -val;
5289 tmp = val & (val + first_one);
5290
5291 if (tmp == 0)
5292 return true;
5293
5294 /* Find the next set bit and compute the difference in bit position. */
5295 next_one = tmp & -tmp;
5296 bits = clz_hwi (first_one) - clz_hwi (next_one);
5297 mask = val ^ tmp;
5298
5299 /* Check the bit position difference is a power of 2, and that the first
5300 sequence of one bits fits within 'bits' bits. */
5301 if ((mask >> bits) != 0 || bits != (bits & -bits))
5302 return false;
5303
5304 /* Check the sequence of one bits is repeated 64/bits times. */
5305 return val == mask * bitmask_imm_mul[__builtin_clz (bits) - 26];
43e9d192
IB
5306}
5307
43fd192f
MC
5308/* Create mask of ones, covering the lowest to highest bits set in VAL_IN.
5309 Assumed precondition: VAL_IN Is not zero. */
5310
5311unsigned HOST_WIDE_INT
5312aarch64_and_split_imm1 (HOST_WIDE_INT val_in)
5313{
5314 int lowest_bit_set = ctz_hwi (val_in);
5315 int highest_bit_set = floor_log2 (val_in);
5316 gcc_assert (val_in != 0);
5317
5318 return ((HOST_WIDE_INT_UC (2) << highest_bit_set) -
5319 (HOST_WIDE_INT_1U << lowest_bit_set));
5320}
5321
5322/* Create constant where bits outside of lowest bit set to highest bit set
5323 are set to 1. */
5324
5325unsigned HOST_WIDE_INT
5326aarch64_and_split_imm2 (HOST_WIDE_INT val_in)
5327{
5328 return val_in | ~aarch64_and_split_imm1 (val_in);
5329}
5330
5331/* Return true if VAL_IN is a valid 'and' bitmask immediate. */
5332
5333bool
5334aarch64_and_bitmask_imm (unsigned HOST_WIDE_INT val_in, machine_mode mode)
5335{
77e994c9
RS
5336 scalar_int_mode int_mode;
5337 if (!is_a <scalar_int_mode> (mode, &int_mode))
5338 return false;
5339
5340 if (aarch64_bitmask_imm (val_in, int_mode))
43fd192f
MC
5341 return false;
5342
77e994c9 5343 if (aarch64_move_imm (val_in, int_mode))
43fd192f
MC
5344 return false;
5345
5346 unsigned HOST_WIDE_INT imm2 = aarch64_and_split_imm2 (val_in);
5347
77e994c9 5348 return aarch64_bitmask_imm (imm2, int_mode);
43fd192f 5349}
43e9d192
IB
5350
5351/* Return true if val is an immediate that can be loaded into a
5352 register in a single instruction. */
5353bool
ef4bddc2 5354aarch64_move_imm (HOST_WIDE_INT val, machine_mode mode)
43e9d192 5355{
77e994c9
RS
5356 scalar_int_mode int_mode;
5357 if (!is_a <scalar_int_mode> (mode, &int_mode))
5358 return false;
5359
5360 if (aarch64_movw_imm (val, int_mode) || aarch64_movw_imm (~val, int_mode))
43e9d192 5361 return 1;
77e994c9 5362 return aarch64_bitmask_imm (val, int_mode);
43e9d192
IB
5363}
5364
5365static bool
ef4bddc2 5366aarch64_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
43e9d192
IB
5367{
5368 rtx base, offset;
7eda14e1 5369
43e9d192
IB
5370 if (GET_CODE (x) == HIGH)
5371 return true;
5372
43cacb12
RS
5373 /* There's no way to calculate VL-based values using relocations. */
5374 subrtx_iterator::array_type array;
5375 FOR_EACH_SUBRTX (iter, array, x, ALL)
5376 if (GET_CODE (*iter) == CONST_POLY_INT)
5377 return true;
5378
43e9d192
IB
5379 split_const (x, &base, &offset);
5380 if (GET_CODE (base) == SYMBOL_REF || GET_CODE (base) == LABEL_REF)
28514dda 5381 {
43cacb12 5382 if (aarch64_classify_symbol (base, INTVAL (offset))
28514dda
YZ
5383 != SYMBOL_FORCE_TO_MEM)
5384 return true;
5385 else
5386 /* Avoid generating a 64-bit relocation in ILP32; leave
5387 to aarch64_expand_mov_immediate to handle it properly. */
5388 return mode != ptr_mode;
5389 }
43e9d192
IB
5390
5391 return aarch64_tls_referenced_p (x);
5392}
5393
e79136e4
WD
5394/* Implement TARGET_CASE_VALUES_THRESHOLD.
5395 The expansion for a table switch is quite expensive due to the number
5396 of instructions, the table lookup and hard to predict indirect jump.
5397 When optimizing for speed, and -O3 enabled, use the per-core tuning if
5398 set, otherwise use tables for > 16 cases as a tradeoff between size and
5399 performance. When optimizing for size, use the default setting. */
50487d79
EM
5400
5401static unsigned int
5402aarch64_case_values_threshold (void)
5403{
5404 /* Use the specified limit for the number of cases before using jump
5405 tables at higher optimization levels. */
5406 if (optimize > 2
5407 && selected_cpu->tune->max_case_values != 0)
5408 return selected_cpu->tune->max_case_values;
5409 else
e79136e4 5410 return optimize_size ? default_case_values_threshold () : 17;
50487d79
EM
5411}
5412
43e9d192
IB
5413/* Return true if register REGNO is a valid index register.
5414 STRICT_P is true if REG_OK_STRICT is in effect. */
5415
5416bool
5417aarch64_regno_ok_for_index_p (int regno, bool strict_p)
5418{
5419 if (!HARD_REGISTER_NUM_P (regno))
5420 {
5421 if (!strict_p)
5422 return true;
5423
5424 if (!reg_renumber)
5425 return false;
5426
5427 regno = reg_renumber[regno];
5428 }
5429 return GP_REGNUM_P (regno);
5430}
5431
5432/* Return true if register REGNO is a valid base register for mode MODE.
5433 STRICT_P is true if REG_OK_STRICT is in effect. */
5434
5435bool
5436aarch64_regno_ok_for_base_p (int regno, bool strict_p)
5437{
5438 if (!HARD_REGISTER_NUM_P (regno))
5439 {
5440 if (!strict_p)
5441 return true;
5442
5443 if (!reg_renumber)
5444 return false;
5445
5446 regno = reg_renumber[regno];
5447 }
5448
5449 /* The fake registers will be eliminated to either the stack or
5450 hard frame pointer, both of which are usually valid base registers.
5451 Reload deals with the cases where the eliminated form isn't valid. */
5452 return (GP_REGNUM_P (regno)
5453 || regno == SP_REGNUM
5454 || regno == FRAME_POINTER_REGNUM
5455 || regno == ARG_POINTER_REGNUM);
5456}
5457
5458/* Return true if X is a valid base register for mode MODE.
5459 STRICT_P is true if REG_OK_STRICT is in effect. */
5460
5461static bool
5462aarch64_base_register_rtx_p (rtx x, bool strict_p)
5463{
76160199
RS
5464 if (!strict_p
5465 && GET_CODE (x) == SUBREG
5466 && contains_reg_of_mode[GENERAL_REGS][GET_MODE (SUBREG_REG (x))])
43e9d192
IB
5467 x = SUBREG_REG (x);
5468
5469 return (REG_P (x) && aarch64_regno_ok_for_base_p (REGNO (x), strict_p));
5470}
5471
5472/* Return true if address offset is a valid index. If it is, fill in INFO
5473 appropriately. STRICT_P is true if REG_OK_STRICT is in effect. */
5474
5475static bool
5476aarch64_classify_index (struct aarch64_address_info *info, rtx x,
ef4bddc2 5477 machine_mode mode, bool strict_p)
43e9d192
IB
5478{
5479 enum aarch64_address_type type;
5480 rtx index;
5481 int shift;
5482
5483 /* (reg:P) */
5484 if ((REG_P (x) || GET_CODE (x) == SUBREG)
5485 && GET_MODE (x) == Pmode)
5486 {
5487 type = ADDRESS_REG_REG;
5488 index = x;
5489 shift = 0;
5490 }
5491 /* (sign_extend:DI (reg:SI)) */
5492 else if ((GET_CODE (x) == SIGN_EXTEND
5493 || GET_CODE (x) == ZERO_EXTEND)
5494 && GET_MODE (x) == DImode
5495 && GET_MODE (XEXP (x, 0)) == SImode)
5496 {
5497 type = (GET_CODE (x) == SIGN_EXTEND)
5498 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
5499 index = XEXP (x, 0);
5500 shift = 0;
5501 }
5502 /* (mult:DI (sign_extend:DI (reg:SI)) (const_int scale)) */
5503 else if (GET_CODE (x) == MULT
5504 && (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
5505 || GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
5506 && GET_MODE (XEXP (x, 0)) == DImode
5507 && GET_MODE (XEXP (XEXP (x, 0), 0)) == SImode
5508 && CONST_INT_P (XEXP (x, 1)))
5509 {
5510 type = (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
5511 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
5512 index = XEXP (XEXP (x, 0), 0);
5513 shift = exact_log2 (INTVAL (XEXP (x, 1)));
5514 }
5515 /* (ashift:DI (sign_extend:DI (reg:SI)) (const_int shift)) */
5516 else if (GET_CODE (x) == ASHIFT
5517 && (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
5518 || GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
5519 && GET_MODE (XEXP (x, 0)) == DImode
5520 && GET_MODE (XEXP (XEXP (x, 0), 0)) == SImode
5521 && CONST_INT_P (XEXP (x, 1)))
5522 {
5523 type = (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
5524 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
5525 index = XEXP (XEXP (x, 0), 0);
5526 shift = INTVAL (XEXP (x, 1));
5527 }
5528 /* (sign_extract:DI (mult:DI (reg:DI) (const_int scale)) 32+shift 0) */
5529 else if ((GET_CODE (x) == SIGN_EXTRACT
5530 || GET_CODE (x) == ZERO_EXTRACT)
5531 && GET_MODE (x) == DImode
5532 && GET_CODE (XEXP (x, 0)) == MULT
5533 && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
5534 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
5535 {
5536 type = (GET_CODE (x) == SIGN_EXTRACT)
5537 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
5538 index = XEXP (XEXP (x, 0), 0);
5539 shift = exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)));
5540 if (INTVAL (XEXP (x, 1)) != 32 + shift
5541 || INTVAL (XEXP (x, 2)) != 0)
5542 shift = -1;
5543 }
5544 /* (and:DI (mult:DI (reg:DI) (const_int scale))
5545 (const_int 0xffffffff<<shift)) */
5546 else if (GET_CODE (x) == AND
5547 && GET_MODE (x) == DImode
5548 && GET_CODE (XEXP (x, 0)) == MULT
5549 && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
5550 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
5551 && CONST_INT_P (XEXP (x, 1)))
5552 {
5553 type = ADDRESS_REG_UXTW;
5554 index = XEXP (XEXP (x, 0), 0);
5555 shift = exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)));
5556 if (INTVAL (XEXP (x, 1)) != (HOST_WIDE_INT)0xffffffff << shift)
5557 shift = -1;
5558 }
5559 /* (sign_extract:DI (ashift:DI (reg:DI) (const_int shift)) 32+shift 0) */
5560 else if ((GET_CODE (x) == SIGN_EXTRACT
5561 || GET_CODE (x) == ZERO_EXTRACT)
5562 && GET_MODE (x) == DImode
5563 && GET_CODE (XEXP (x, 0)) == ASHIFT
5564 && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
5565 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
5566 {
5567 type = (GET_CODE (x) == SIGN_EXTRACT)
5568 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
5569 index = XEXP (XEXP (x, 0), 0);
5570 shift = INTVAL (XEXP (XEXP (x, 0), 1));
5571 if (INTVAL (XEXP (x, 1)) != 32 + shift
5572 || INTVAL (XEXP (x, 2)) != 0)
5573 shift = -1;
5574 }
5575 /* (and:DI (ashift:DI (reg:DI) (const_int shift))
5576 (const_int 0xffffffff<<shift)) */
5577 else if (GET_CODE (x) == AND
5578 && GET_MODE (x) == DImode
5579 && GET_CODE (XEXP (x, 0)) == ASHIFT
5580 && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
5581 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
5582 && CONST_INT_P (XEXP (x, 1)))
5583 {
5584 type = ADDRESS_REG_UXTW;
5585 index = XEXP (XEXP (x, 0), 0);
5586 shift = INTVAL (XEXP (XEXP (x, 0), 1));
5587 if (INTVAL (XEXP (x, 1)) != (HOST_WIDE_INT)0xffffffff << shift)
5588 shift = -1;
5589 }
5590 /* (mult:P (reg:P) (const_int scale)) */
5591 else if (GET_CODE (x) == MULT
5592 && GET_MODE (x) == Pmode
5593 && GET_MODE (XEXP (x, 0)) == Pmode
5594 && CONST_INT_P (XEXP (x, 1)))
5595 {
5596 type = ADDRESS_REG_REG;
5597 index = XEXP (x, 0);
5598 shift = exact_log2 (INTVAL (XEXP (x, 1)));
5599 }
5600 /* (ashift:P (reg:P) (const_int shift)) */
5601 else if (GET_CODE (x) == ASHIFT
5602 && GET_MODE (x) == Pmode
5603 && GET_MODE (XEXP (x, 0)) == Pmode
5604 && CONST_INT_P (XEXP (x, 1)))
5605 {
5606 type = ADDRESS_REG_REG;
5607 index = XEXP (x, 0);
5608 shift = INTVAL (XEXP (x, 1));
5609 }
5610 else
5611 return false;
5612
76160199
RS
5613 if (!strict_p
5614 && GET_CODE (index) == SUBREG
5615 && contains_reg_of_mode[GENERAL_REGS][GET_MODE (SUBREG_REG (index))])
43e9d192
IB
5616 index = SUBREG_REG (index);
5617
43cacb12
RS
5618 if (aarch64_sve_data_mode_p (mode))
5619 {
5620 if (type != ADDRESS_REG_REG
5621 || (1 << shift) != GET_MODE_UNIT_SIZE (mode))
5622 return false;
5623 }
5624 else
5625 {
5626 if (shift != 0
5627 && !(IN_RANGE (shift, 1, 3)
5628 && known_eq (1 << shift, GET_MODE_SIZE (mode))))
5629 return false;
5630 }
5631
5632 if (REG_P (index)
43e9d192
IB
5633 && aarch64_regno_ok_for_index_p (REGNO (index), strict_p))
5634 {
5635 info->type = type;
5636 info->offset = index;
5637 info->shift = shift;
5638 return true;
5639 }
5640
5641 return false;
5642}
5643
abc52318
KT
5644/* Return true if MODE is one of the modes for which we
5645 support LDP/STP operations. */
5646
5647static bool
5648aarch64_mode_valid_for_sched_fusion_p (machine_mode mode)
5649{
5650 return mode == SImode || mode == DImode
5651 || mode == SFmode || mode == DFmode
5652 || (aarch64_vector_mode_supported_p (mode)
6a70badb 5653 && known_eq (GET_MODE_SIZE (mode), 8));
abc52318
KT
5654}
5655
9e0218fc
RH
5656/* Return true if REGNO is a virtual pointer register, or an eliminable
5657 "soft" frame register. Like REGNO_PTR_FRAME_P except that we don't
5658 include stack_pointer or hard_frame_pointer. */
5659static bool
5660virt_or_elim_regno_p (unsigned regno)
5661{
5662 return ((regno >= FIRST_VIRTUAL_REGISTER
5663 && regno <= LAST_VIRTUAL_POINTER_REGISTER)
5664 || regno == FRAME_POINTER_REGNUM
5665 || regno == ARG_POINTER_REGNUM);
5666}
5667
a97d8b98
RS
5668/* Return true if X is a valid address of type TYPE for machine mode MODE.
5669 If it is, fill in INFO appropriately. STRICT_P is true if
5670 REG_OK_STRICT is in effect. */
43e9d192
IB
5671
5672static bool
5673aarch64_classify_address (struct aarch64_address_info *info,
a97d8b98
RS
5674 rtx x, machine_mode mode, bool strict_p,
5675 aarch64_addr_query_type type = ADDR_QUERY_M)
43e9d192
IB
5676{
5677 enum rtx_code code = GET_CODE (x);
5678 rtx op0, op1;
dc640181
RS
5679 poly_int64 offset;
5680
6a70badb 5681 HOST_WIDE_INT const_size;
2d8c6dc1 5682
80d43579
WD
5683 /* On BE, we use load/store pair for all large int mode load/stores.
5684 TI/TFmode may also use a load/store pair. */
43cacb12
RS
5685 unsigned int vec_flags = aarch64_classify_vector_mode (mode);
5686 bool advsimd_struct_p = (vec_flags == (VEC_ADVSIMD | VEC_STRUCT));
a97d8b98 5687 bool load_store_pair_p = (type == ADDR_QUERY_LDP_STP
80d43579
WD
5688 || mode == TImode
5689 || mode == TFmode
43cacb12 5690 || (BYTES_BIG_ENDIAN && advsimd_struct_p));
2d8c6dc1 5691
6a70badb 5692 bool allow_reg_index_p = (!load_store_pair_p
43cacb12
RS
5693 && (known_lt (GET_MODE_SIZE (mode), 16)
5694 || vec_flags == VEC_ADVSIMD
5695 || vec_flags == VEC_SVE_DATA));
5696
5697 /* For SVE, only accept [Rn], [Rn, Rm, LSL #shift] and
5698 [Rn, #offset, MUL VL]. */
5699 if ((vec_flags & (VEC_SVE_DATA | VEC_SVE_PRED)) != 0
5700 && (code != REG && code != PLUS))
5701 return false;
2d8c6dc1
AH
5702
5703 /* On LE, for AdvSIMD, don't support anything other than POST_INC or
5704 REG addressing. */
43cacb12
RS
5705 if (advsimd_struct_p
5706 && !BYTES_BIG_ENDIAN
43e9d192
IB
5707 && (code != POST_INC && code != REG))
5708 return false;
5709
43cacb12
RS
5710 gcc_checking_assert (GET_MODE (x) == VOIDmode
5711 || SCALAR_INT_MODE_P (GET_MODE (x)));
5712
43e9d192
IB
5713 switch (code)
5714 {
5715 case REG:
5716 case SUBREG:
5717 info->type = ADDRESS_REG_IMM;
5718 info->base = x;
5719 info->offset = const0_rtx;
dc640181 5720 info->const_offset = 0;
43e9d192
IB
5721 return aarch64_base_register_rtx_p (x, strict_p);
5722
5723 case PLUS:
5724 op0 = XEXP (x, 0);
5725 op1 = XEXP (x, 1);
15c0c5c9
JW
5726
5727 if (! strict_p
4aa81c2e 5728 && REG_P (op0)
9e0218fc 5729 && virt_or_elim_regno_p (REGNO (op0))
dc640181 5730 && poly_int_rtx_p (op1, &offset))
15c0c5c9
JW
5731 {
5732 info->type = ADDRESS_REG_IMM;
5733 info->base = op0;
5734 info->offset = op1;
dc640181 5735 info->const_offset = offset;
15c0c5c9
JW
5736
5737 return true;
5738 }
5739
6a70badb 5740 if (maybe_ne (GET_MODE_SIZE (mode), 0)
dc640181
RS
5741 && aarch64_base_register_rtx_p (op0, strict_p)
5742 && poly_int_rtx_p (op1, &offset))
43e9d192 5743 {
43e9d192
IB
5744 info->type = ADDRESS_REG_IMM;
5745 info->base = op0;
5746 info->offset = op1;
dc640181 5747 info->const_offset = offset;
43e9d192
IB
5748
5749 /* TImode and TFmode values are allowed in both pairs of X
5750 registers and individual Q registers. The available
5751 address modes are:
5752 X,X: 7-bit signed scaled offset
5753 Q: 9-bit signed offset
5754 We conservatively require an offset representable in either mode.
8ed49fab
KT
5755 When performing the check for pairs of X registers i.e. LDP/STP
5756 pass down DImode since that is the natural size of the LDP/STP
5757 instruction memory accesses. */
43e9d192 5758 if (mode == TImode || mode == TFmode)
8ed49fab 5759 return (aarch64_offset_7bit_signed_scaled_p (DImode, offset)
8734dfac
WD
5760 && (offset_9bit_signed_unscaled_p (mode, offset)
5761 || offset_12bit_unsigned_scaled_p (mode, offset)));
43e9d192 5762
2d8c6dc1
AH
5763 /* A 7bit offset check because OImode will emit a ldp/stp
5764 instruction (only big endian will get here).
5765 For ldp/stp instructions, the offset is scaled for the size of a
5766 single element of the pair. */
5767 if (mode == OImode)
5768 return aarch64_offset_7bit_signed_scaled_p (TImode, offset);
5769
5770 /* Three 9/12 bit offsets checks because CImode will emit three
5771 ldr/str instructions (only big endian will get here). */
5772 if (mode == CImode)
5773 return (aarch64_offset_7bit_signed_scaled_p (TImode, offset)
5774 && (offset_9bit_signed_unscaled_p (V16QImode, offset + 32)
5775 || offset_12bit_unsigned_scaled_p (V16QImode,
5776 offset + 32)));
5777
5778 /* Two 7bit offsets checks because XImode will emit two ldp/stp
5779 instructions (only big endian will get here). */
5780 if (mode == XImode)
5781 return (aarch64_offset_7bit_signed_scaled_p (TImode, offset)
5782 && aarch64_offset_7bit_signed_scaled_p (TImode,
5783 offset + 32));
5784
43cacb12
RS
5785 /* Make "m" use the LD1 offset range for SVE data modes, so
5786 that pre-RTL optimizers like ivopts will work to that
5787 instead of the wider LDR/STR range. */
5788 if (vec_flags == VEC_SVE_DATA)
5789 return (type == ADDR_QUERY_M
5790 ? offset_4bit_signed_scaled_p (mode, offset)
5791 : offset_9bit_signed_scaled_p (mode, offset));
5792
9f4cbab8
RS
5793 if (vec_flags == (VEC_SVE_DATA | VEC_STRUCT))
5794 {
5795 poly_int64 end_offset = (offset
5796 + GET_MODE_SIZE (mode)
5797 - BYTES_PER_SVE_VECTOR);
5798 return (type == ADDR_QUERY_M
5799 ? offset_4bit_signed_scaled_p (mode, offset)
5800 : (offset_9bit_signed_scaled_p (SVE_BYTE_MODE, offset)
5801 && offset_9bit_signed_scaled_p (SVE_BYTE_MODE,
5802 end_offset)));
5803 }
5804
43cacb12
RS
5805 if (vec_flags == VEC_SVE_PRED)
5806 return offset_9bit_signed_scaled_p (mode, offset);
5807
2d8c6dc1 5808 if (load_store_pair_p)
6a70badb
RS
5809 return ((known_eq (GET_MODE_SIZE (mode), 4)
5810 || known_eq (GET_MODE_SIZE (mode), 8))
44707478 5811 && aarch64_offset_7bit_signed_scaled_p (mode, offset));
43e9d192
IB
5812 else
5813 return (offset_9bit_signed_unscaled_p (mode, offset)
5814 || offset_12bit_unsigned_scaled_p (mode, offset));
5815 }
5816
5817 if (allow_reg_index_p)
5818 {
5819 /* Look for base + (scaled/extended) index register. */
5820 if (aarch64_base_register_rtx_p (op0, strict_p)
5821 && aarch64_classify_index (info, op1, mode, strict_p))
5822 {
5823 info->base = op0;
5824 return true;
5825 }
5826 if (aarch64_base_register_rtx_p (op1, strict_p)
5827 && aarch64_classify_index (info, op0, mode, strict_p))
5828 {
5829 info->base = op1;
5830 return true;
5831 }
5832 }
5833
5834 return false;
5835
5836 case POST_INC:
5837 case POST_DEC:
5838 case PRE_INC:
5839 case PRE_DEC:
5840 info->type = ADDRESS_REG_WB;
5841 info->base = XEXP (x, 0);
5842 info->offset = NULL_RTX;
5843 return aarch64_base_register_rtx_p (info->base, strict_p);
5844
5845 case POST_MODIFY:
5846 case PRE_MODIFY:
5847 info->type = ADDRESS_REG_WB;
5848 info->base = XEXP (x, 0);
5849 if (GET_CODE (XEXP (x, 1)) == PLUS
dc640181 5850 && poly_int_rtx_p (XEXP (XEXP (x, 1), 1), &offset)
43e9d192
IB
5851 && rtx_equal_p (XEXP (XEXP (x, 1), 0), info->base)
5852 && aarch64_base_register_rtx_p (info->base, strict_p))
5853 {
43e9d192 5854 info->offset = XEXP (XEXP (x, 1), 1);
dc640181 5855 info->const_offset = offset;
43e9d192
IB
5856
5857 /* TImode and TFmode values are allowed in both pairs of X
5858 registers and individual Q registers. The available
5859 address modes are:
5860 X,X: 7-bit signed scaled offset
5861 Q: 9-bit signed offset
5862 We conservatively require an offset representable in either mode.
5863 */
5864 if (mode == TImode || mode == TFmode)
44707478 5865 return (aarch64_offset_7bit_signed_scaled_p (mode, offset)
43e9d192
IB
5866 && offset_9bit_signed_unscaled_p (mode, offset));
5867
2d8c6dc1 5868 if (load_store_pair_p)
6a70badb
RS
5869 return ((known_eq (GET_MODE_SIZE (mode), 4)
5870 || known_eq (GET_MODE_SIZE (mode), 8))
44707478 5871 && aarch64_offset_7bit_signed_scaled_p (mode, offset));
43e9d192
IB
5872 else
5873 return offset_9bit_signed_unscaled_p (mode, offset);
5874 }
5875 return false;
5876
5877 case CONST:
5878 case SYMBOL_REF:
5879 case LABEL_REF:
79517551
SN
5880 /* load literal: pc-relative constant pool entry. Only supported
5881 for SI mode or larger. */
43e9d192 5882 info->type = ADDRESS_SYMBOLIC;
2d8c6dc1 5883
6a70badb
RS
5884 if (!load_store_pair_p
5885 && GET_MODE_SIZE (mode).is_constant (&const_size)
5886 && const_size >= 4)
43e9d192
IB
5887 {
5888 rtx sym, addend;
5889
5890 split_const (x, &sym, &addend);
b4f50fd4
RR
5891 return ((GET_CODE (sym) == LABEL_REF
5892 || (GET_CODE (sym) == SYMBOL_REF
5893 && CONSTANT_POOL_ADDRESS_P (sym)
9ee6540a 5894 && aarch64_pcrelative_literal_loads)));
43e9d192
IB
5895 }
5896 return false;
5897
5898 case LO_SUM:
5899 info->type = ADDRESS_LO_SUM;
5900 info->base = XEXP (x, 0);
5901 info->offset = XEXP (x, 1);
5902 if (allow_reg_index_p
5903 && aarch64_base_register_rtx_p (info->base, strict_p))
5904 {
5905 rtx sym, offs;
5906 split_const (info->offset, &sym, &offs);
5907 if (GET_CODE (sym) == SYMBOL_REF
43cacb12
RS
5908 && (aarch64_classify_symbol (sym, INTVAL (offs))
5909 == SYMBOL_SMALL_ABSOLUTE))
43e9d192
IB
5910 {
5911 /* The symbol and offset must be aligned to the access size. */
5912 unsigned int align;
43e9d192
IB
5913
5914 if (CONSTANT_POOL_ADDRESS_P (sym))
5915 align = GET_MODE_ALIGNMENT (get_pool_mode (sym));
5916 else if (TREE_CONSTANT_POOL_ADDRESS_P (sym))
5917 {
5918 tree exp = SYMBOL_REF_DECL (sym);
5919 align = TYPE_ALIGN (TREE_TYPE (exp));
58e17cf8 5920 align = aarch64_constant_alignment (exp, align);
43e9d192
IB
5921 }
5922 else if (SYMBOL_REF_DECL (sym))
5923 align = DECL_ALIGN (SYMBOL_REF_DECL (sym));
6c031d8d
KV
5924 else if (SYMBOL_REF_HAS_BLOCK_INFO_P (sym)
5925 && SYMBOL_REF_BLOCK (sym) != NULL)
5926 align = SYMBOL_REF_BLOCK (sym)->alignment;
43e9d192
IB
5927 else
5928 align = BITS_PER_UNIT;
5929
6a70badb
RS
5930 poly_int64 ref_size = GET_MODE_SIZE (mode);
5931 if (known_eq (ref_size, 0))
43e9d192
IB
5932 ref_size = GET_MODE_SIZE (DImode);
5933
6a70badb
RS
5934 return (multiple_p (INTVAL (offs), ref_size)
5935 && multiple_p (align / BITS_PER_UNIT, ref_size));
43e9d192
IB
5936 }
5937 }
5938 return false;
5939
5940 default:
5941 return false;
5942 }
5943}
5944
9bf2f779
KT
5945/* Return true if the address X is valid for a PRFM instruction.
5946 STRICT_P is true if we should do strict checking with
5947 aarch64_classify_address. */
5948
5949bool
5950aarch64_address_valid_for_prefetch_p (rtx x, bool strict_p)
5951{
5952 struct aarch64_address_info addr;
5953
5954 /* PRFM accepts the same addresses as DImode... */
a97d8b98 5955 bool res = aarch64_classify_address (&addr, x, DImode, strict_p);
9bf2f779
KT
5956 if (!res)
5957 return false;
5958
5959 /* ... except writeback forms. */
5960 return addr.type != ADDRESS_REG_WB;
5961}
5962
43e9d192
IB
5963bool
5964aarch64_symbolic_address_p (rtx x)
5965{
5966 rtx offset;
5967
5968 split_const (x, &x, &offset);
5969 return GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF;
5970}
5971
a6e0bfa7 5972/* Classify the base of symbolic expression X. */
da4f13a4
MS
5973
5974enum aarch64_symbol_type
a6e0bfa7 5975aarch64_classify_symbolic_expression (rtx x)
43e9d192
IB
5976{
5977 rtx offset;
da4f13a4 5978
43e9d192 5979 split_const (x, &x, &offset);
43cacb12 5980 return aarch64_classify_symbol (x, INTVAL (offset));
43e9d192
IB
5981}
5982
5983
5984/* Return TRUE if X is a legitimate address for accessing memory in
5985 mode MODE. */
5986static bool
ef4bddc2 5987aarch64_legitimate_address_hook_p (machine_mode mode, rtx x, bool strict_p)
43e9d192
IB
5988{
5989 struct aarch64_address_info addr;
5990
a97d8b98 5991 return aarch64_classify_address (&addr, x, mode, strict_p);
43e9d192
IB
5992}
5993
a97d8b98
RS
5994/* Return TRUE if X is a legitimate address of type TYPE for accessing
5995 memory in mode MODE. STRICT_P is true if REG_OK_STRICT is in effect. */
43e9d192 5996bool
a97d8b98
RS
5997aarch64_legitimate_address_p (machine_mode mode, rtx x, bool strict_p,
5998 aarch64_addr_query_type type)
43e9d192
IB
5999{
6000 struct aarch64_address_info addr;
6001
a97d8b98 6002 return aarch64_classify_address (&addr, x, mode, strict_p, type);
43e9d192
IB
6003}
6004
9005477f
RS
6005/* Implement TARGET_LEGITIMIZE_ADDRESS_DISPLACEMENT. */
6006
491ec060 6007static bool
9005477f
RS
6008aarch64_legitimize_address_displacement (rtx *offset1, rtx *offset2,
6009 poly_int64 orig_offset,
6010 machine_mode mode)
491ec060 6011{
6a70badb
RS
6012 HOST_WIDE_INT size;
6013 if (GET_MODE_SIZE (mode).is_constant (&size))
6014 {
9005477f
RS
6015 HOST_WIDE_INT const_offset, second_offset;
6016
6017 /* A general SVE offset is A * VQ + B. Remove the A component from
6018 coefficient 0 in order to get the constant B. */
6019 const_offset = orig_offset.coeffs[0] - orig_offset.coeffs[1];
6020
6021 /* Split an out-of-range address displacement into a base and
6022 offset. Use 4KB range for 1- and 2-byte accesses and a 16KB
6023 range otherwise to increase opportunities for sharing the base
6024 address of different sizes. Unaligned accesses use the signed
6025 9-bit range, TImode/TFmode use the intersection of signed
6026 scaled 7-bit and signed 9-bit offset. */
6a70badb 6027 if (mode == TImode || mode == TFmode)
9005477f
RS
6028 second_offset = ((const_offset + 0x100) & 0x1f8) - 0x100;
6029 else if ((const_offset & (size - 1)) != 0)
6030 second_offset = ((const_offset + 0x100) & 0x1ff) - 0x100;
6a70badb 6031 else
9005477f 6032 second_offset = const_offset & (size < 4 ? 0xfff : 0x3ffc);
491ec060 6033
9005477f
RS
6034 if (second_offset == 0 || known_eq (orig_offset, second_offset))
6035 return false;
6036
6037 /* Split the offset into second_offset and the rest. */
6038 *offset1 = gen_int_mode (orig_offset - second_offset, Pmode);
6039 *offset2 = gen_int_mode (second_offset, Pmode);
6040 return true;
6041 }
6042 else
6043 {
6044 /* Get the mode we should use as the basis of the range. For structure
6045 modes this is the mode of one vector. */
6046 unsigned int vec_flags = aarch64_classify_vector_mode (mode);
6047 machine_mode step_mode
6048 = (vec_flags & VEC_STRUCT) != 0 ? SVE_BYTE_MODE : mode;
6049
6050 /* Get the "mul vl" multiplier we'd like to use. */
6051 HOST_WIDE_INT factor = GET_MODE_SIZE (step_mode).coeffs[1];
6052 HOST_WIDE_INT vnum = orig_offset.coeffs[1] / factor;
6053 if (vec_flags & VEC_SVE_DATA)
6054 /* LDR supports a 9-bit range, but the move patterns for
6055 structure modes require all vectors to be in range of the
6056 same base. The simplest way of accomodating that while still
6057 promoting reuse of anchor points between different modes is
6058 to use an 8-bit range unconditionally. */
6059 vnum = ((vnum + 128) & 255) - 128;
6060 else
6061 /* Predicates are only handled singly, so we might as well use
6062 the full range. */
6063 vnum = ((vnum + 256) & 511) - 256;
6064 if (vnum == 0)
6065 return false;
6066
6067 /* Convert the "mul vl" multiplier into a byte offset. */
6068 poly_int64 second_offset = GET_MODE_SIZE (step_mode) * vnum;
6069 if (known_eq (second_offset, orig_offset))
6070 return false;
6071
6072 /* Split the offset into second_offset and the rest. */
6073 *offset1 = gen_int_mode (orig_offset - second_offset, Pmode);
6074 *offset2 = gen_int_mode (second_offset, Pmode);
6a70badb
RS
6075 return true;
6076 }
491ec060
WD
6077}
6078
a2170965
TC
6079/* Return the binary representation of floating point constant VALUE in INTVAL.
6080 If the value cannot be converted, return false without setting INTVAL.
6081 The conversion is done in the given MODE. */
6082bool
6083aarch64_reinterpret_float_as_int (rtx value, unsigned HOST_WIDE_INT *intval)
6084{
6085
6086 /* We make a general exception for 0. */
6087 if (aarch64_float_const_zero_rtx_p (value))
6088 {
6089 *intval = 0;
6090 return true;
6091 }
6092
0d0e0188 6093 scalar_float_mode mode;
a2170965 6094 if (GET_CODE (value) != CONST_DOUBLE
0d0e0188 6095 || !is_a <scalar_float_mode> (GET_MODE (value), &mode)
a2170965
TC
6096 || GET_MODE_BITSIZE (mode) > HOST_BITS_PER_WIDE_INT
6097 /* Only support up to DF mode. */
6098 || GET_MODE_BITSIZE (mode) > GET_MODE_BITSIZE (DFmode))
6099 return false;
6100
6101 unsigned HOST_WIDE_INT ival = 0;
6102
6103 long res[2];
6104 real_to_target (res,
6105 CONST_DOUBLE_REAL_VALUE (value),
6106 REAL_MODE_FORMAT (mode));
6107
5c22bb48
TC
6108 if (mode == DFmode)
6109 {
6110 int order = BYTES_BIG_ENDIAN ? 1 : 0;
6111 ival = zext_hwi (res[order], 32);
6112 ival |= (zext_hwi (res[1 - order], 32) << 32);
6113 }
6114 else
6115 ival = zext_hwi (res[0], 32);
a2170965
TC
6116
6117 *intval = ival;
6118 return true;
6119}
6120
6121/* Return TRUE if rtx X is an immediate constant that can be moved using a
6122 single MOV(+MOVK) followed by an FMOV. */
6123bool
6124aarch64_float_const_rtx_p (rtx x)
6125{
6126 machine_mode mode = GET_MODE (x);
6127 if (mode == VOIDmode)
6128 return false;
6129
6130 /* Determine whether it's cheaper to write float constants as
6131 mov/movk pairs over ldr/adrp pairs. */
6132 unsigned HOST_WIDE_INT ival;
6133
6134 if (GET_CODE (x) == CONST_DOUBLE
6135 && SCALAR_FLOAT_MODE_P (mode)
6136 && aarch64_reinterpret_float_as_int (x, &ival))
6137 {
77e994c9
RS
6138 scalar_int_mode imode = (mode == HFmode
6139 ? SImode
6140 : int_mode_for_mode (mode).require ());
a2170965
TC
6141 int num_instr = aarch64_internal_mov_immediate
6142 (NULL_RTX, gen_int_mode (ival, imode), false, imode);
6143 return num_instr < 3;
6144 }
6145
6146 return false;
6147}
6148
43e9d192
IB
6149/* Return TRUE if rtx X is immediate constant 0.0 */
6150bool
3520f7cc 6151aarch64_float_const_zero_rtx_p (rtx x)
43e9d192 6152{
43e9d192
IB
6153 if (GET_MODE (x) == VOIDmode)
6154 return false;
6155
34a72c33 6156 if (REAL_VALUE_MINUS_ZERO (*CONST_DOUBLE_REAL_VALUE (x)))
43e9d192 6157 return !HONOR_SIGNED_ZEROS (GET_MODE (x));
34a72c33 6158 return real_equal (CONST_DOUBLE_REAL_VALUE (x), &dconst0);
43e9d192
IB
6159}
6160
a2170965
TC
6161/* Return TRUE if rtx X is immediate constant that fits in a single
6162 MOVI immediate operation. */
6163bool
6164aarch64_can_const_movi_rtx_p (rtx x, machine_mode mode)
6165{
6166 if (!TARGET_SIMD)
6167 return false;
6168
77e994c9
RS
6169 machine_mode vmode;
6170 scalar_int_mode imode;
a2170965
TC
6171 unsigned HOST_WIDE_INT ival;
6172
6173 if (GET_CODE (x) == CONST_DOUBLE
6174 && SCALAR_FLOAT_MODE_P (mode))
6175 {
6176 if (!aarch64_reinterpret_float_as_int (x, &ival))
6177 return false;
6178
35c38fa6
TC
6179 /* We make a general exception for 0. */
6180 if (aarch64_float_const_zero_rtx_p (x))
6181 return true;
6182
304b9962 6183 imode = int_mode_for_mode (mode).require ();
a2170965
TC
6184 }
6185 else if (GET_CODE (x) == CONST_INT
77e994c9
RS
6186 && is_a <scalar_int_mode> (mode, &imode))
6187 ival = INTVAL (x);
a2170965
TC
6188 else
6189 return false;
6190
6191 /* use a 64 bit mode for everything except for DI/DF mode, where we use
6192 a 128 bit vector mode. */
77e994c9 6193 int width = GET_MODE_BITSIZE (imode) == 64 ? 128 : 64;
a2170965
TC
6194
6195 vmode = aarch64_simd_container_mode (imode, width);
6196 rtx v_op = aarch64_simd_gen_const_vector_dup (vmode, ival);
6197
b187677b 6198 return aarch64_simd_valid_immediate (v_op, NULL);
a2170965
TC
6199}
6200
6201
70f09188
AP
6202/* Return the fixed registers used for condition codes. */
6203
6204static bool
6205aarch64_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
6206{
6207 *p1 = CC_REGNUM;
6208 *p2 = INVALID_REGNUM;
6209 return true;
6210}
6211
47210a04
RL
6212/* This function is used by the call expanders of the machine description.
6213 RESULT is the register in which the result is returned. It's NULL for
6214 "call" and "sibcall".
6215 MEM is the location of the function call.
6216 SIBCALL indicates whether this function call is normal call or sibling call.
6217 It will generate different pattern accordingly. */
6218
6219void
6220aarch64_expand_call (rtx result, rtx mem, bool sibcall)
6221{
6222 rtx call, callee, tmp;
6223 rtvec vec;
6224 machine_mode mode;
6225
6226 gcc_assert (MEM_P (mem));
6227 callee = XEXP (mem, 0);
6228 mode = GET_MODE (callee);
6229 gcc_assert (mode == Pmode);
6230
6231 /* Decide if we should generate indirect calls by loading the
6232 address of the callee into a register before performing
6233 the branch-and-link. */
6234 if (SYMBOL_REF_P (callee)
6235 ? (aarch64_is_long_call_p (callee)
6236 || aarch64_is_noplt_call_p (callee))
6237 : !REG_P (callee))
6238 XEXP (mem, 0) = force_reg (mode, callee);
6239
6240 call = gen_rtx_CALL (VOIDmode, mem, const0_rtx);
6241
6242 if (result != NULL_RTX)
6243 call = gen_rtx_SET (result, call);
6244
6245 if (sibcall)
6246 tmp = ret_rtx;
6247 else
6248 tmp = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (Pmode, LR_REGNUM));
6249
6250 vec = gen_rtvec (2, call, tmp);
6251 call = gen_rtx_PARALLEL (VOIDmode, vec);
6252
6253 aarch64_emit_call_insn (call);
6254}
6255
78607708
TV
6256/* Emit call insn with PAT and do aarch64-specific handling. */
6257
d07a3fed 6258void
78607708
TV
6259aarch64_emit_call_insn (rtx pat)
6260{
6261 rtx insn = emit_call_insn (pat);
6262
6263 rtx *fusage = &CALL_INSN_FUNCTION_USAGE (insn);
6264 clobber_reg (fusage, gen_rtx_REG (word_mode, IP0_REGNUM));
6265 clobber_reg (fusage, gen_rtx_REG (word_mode, IP1_REGNUM));
6266}
6267
ef4bddc2 6268machine_mode
43e9d192
IB
6269aarch64_select_cc_mode (RTX_CODE code, rtx x, rtx y)
6270{
6271 /* All floating point compares return CCFP if it is an equality
6272 comparison, and CCFPE otherwise. */
6273 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
6274 {
6275 switch (code)
6276 {
6277 case EQ:
6278 case NE:
6279 case UNORDERED:
6280 case ORDERED:
6281 case UNLT:
6282 case UNLE:
6283 case UNGT:
6284 case UNGE:
6285 case UNEQ:
43e9d192
IB
6286 return CCFPmode;
6287
6288 case LT:
6289 case LE:
6290 case GT:
6291 case GE:
8332c5ee 6292 case LTGT:
43e9d192
IB
6293 return CCFPEmode;
6294
6295 default:
6296 gcc_unreachable ();
6297 }
6298 }
6299
2b8568fe
KT
6300 /* Equality comparisons of short modes against zero can be performed
6301 using the TST instruction with the appropriate bitmask. */
6302 if (y == const0_rtx && REG_P (x)
6303 && (code == EQ || code == NE)
6304 && (GET_MODE (x) == HImode || GET_MODE (x) == QImode))
6305 return CC_NZmode;
6306
b06335f9
KT
6307 /* Similarly, comparisons of zero_extends from shorter modes can
6308 be performed using an ANDS with an immediate mask. */
6309 if (y == const0_rtx && GET_CODE (x) == ZERO_EXTEND
6310 && (GET_MODE (x) == SImode || GET_MODE (x) == DImode)
6311 && (GET_MODE (XEXP (x, 0)) == HImode || GET_MODE (XEXP (x, 0)) == QImode)
6312 && (code == EQ || code == NE))
6313 return CC_NZmode;
6314
43e9d192
IB
6315 if ((GET_MODE (x) == SImode || GET_MODE (x) == DImode)
6316 && y == const0_rtx
6317 && (code == EQ || code == NE || code == LT || code == GE)
b056c910 6318 && (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS || GET_CODE (x) == AND
7325d85a
KT
6319 || GET_CODE (x) == NEG
6320 || (GET_CODE (x) == ZERO_EXTRACT && CONST_INT_P (XEXP (x, 1))
6321 && CONST_INT_P (XEXP (x, 2)))))
43e9d192
IB
6322 return CC_NZmode;
6323
1c992d1e 6324 /* A compare with a shifted operand. Because of canonicalization,
43e9d192
IB
6325 the comparison will have to be swapped when we emit the assembly
6326 code. */
6327 if ((GET_MODE (x) == SImode || GET_MODE (x) == DImode)
ffa8a921 6328 && (REG_P (y) || GET_CODE (y) == SUBREG || y == const0_rtx)
43e9d192
IB
6329 && (GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
6330 || GET_CODE (x) == LSHIFTRT
1c992d1e 6331 || GET_CODE (x) == ZERO_EXTEND || GET_CODE (x) == SIGN_EXTEND))
43e9d192
IB
6332 return CC_SWPmode;
6333
1c992d1e
RE
6334 /* Similarly for a negated operand, but we can only do this for
6335 equalities. */
6336 if ((GET_MODE (x) == SImode || GET_MODE (x) == DImode)
4aa81c2e 6337 && (REG_P (y) || GET_CODE (y) == SUBREG)
1c992d1e
RE
6338 && (code == EQ || code == NE)
6339 && GET_CODE (x) == NEG)
6340 return CC_Zmode;
6341
ef22810a
RH
6342 /* A test for unsigned overflow. */
6343 if ((GET_MODE (x) == DImode || GET_MODE (x) == TImode)
6344 && code == NE
6345 && GET_CODE (x) == PLUS
6346 && GET_CODE (y) == ZERO_EXTEND)
6347 return CC_Cmode;
6348
43e9d192
IB
6349 /* For everything else, return CCmode. */
6350 return CCmode;
6351}
6352
3dfa7055 6353static int
b8506a8a 6354aarch64_get_condition_code_1 (machine_mode, enum rtx_code);
3dfa7055 6355
cd5660ab 6356int
43e9d192
IB
6357aarch64_get_condition_code (rtx x)
6358{
ef4bddc2 6359 machine_mode mode = GET_MODE (XEXP (x, 0));
43e9d192
IB
6360 enum rtx_code comp_code = GET_CODE (x);
6361
6362 if (GET_MODE_CLASS (mode) != MODE_CC)
6363 mode = SELECT_CC_MODE (comp_code, XEXP (x, 0), XEXP (x, 1));
3dfa7055
ZC
6364 return aarch64_get_condition_code_1 (mode, comp_code);
6365}
43e9d192 6366
3dfa7055 6367static int
b8506a8a 6368aarch64_get_condition_code_1 (machine_mode mode, enum rtx_code comp_code)
3dfa7055 6369{
43e9d192
IB
6370 switch (mode)
6371 {
4e10a5a7
RS
6372 case E_CCFPmode:
6373 case E_CCFPEmode:
43e9d192
IB
6374 switch (comp_code)
6375 {
6376 case GE: return AARCH64_GE;
6377 case GT: return AARCH64_GT;
6378 case LE: return AARCH64_LS;
6379 case LT: return AARCH64_MI;
6380 case NE: return AARCH64_NE;
6381 case EQ: return AARCH64_EQ;
6382 case ORDERED: return AARCH64_VC;
6383 case UNORDERED: return AARCH64_VS;
6384 case UNLT: return AARCH64_LT;
6385 case UNLE: return AARCH64_LE;
6386 case UNGT: return AARCH64_HI;
6387 case UNGE: return AARCH64_PL;
cd5660ab 6388 default: return -1;
43e9d192
IB
6389 }
6390 break;
6391
4e10a5a7 6392 case E_CCmode:
43e9d192
IB
6393 switch (comp_code)
6394 {
6395 case NE: return AARCH64_NE;
6396 case EQ: return AARCH64_EQ;
6397 case GE: return AARCH64_GE;
6398 case GT: return AARCH64_GT;
6399 case LE: return AARCH64_LE;
6400 case LT: return AARCH64_LT;
6401 case GEU: return AARCH64_CS;
6402 case GTU: return AARCH64_HI;
6403 case LEU: return AARCH64_LS;
6404 case LTU: return AARCH64_CC;
cd5660ab 6405 default: return -1;
43e9d192
IB
6406 }
6407 break;
6408
4e10a5a7 6409 case E_CC_SWPmode:
43e9d192
IB
6410 switch (comp_code)
6411 {
6412 case NE: return AARCH64_NE;
6413 case EQ: return AARCH64_EQ;
6414 case GE: return AARCH64_LE;
6415 case GT: return AARCH64_LT;
6416 case LE: return AARCH64_GE;
6417 case LT: return AARCH64_GT;
6418 case GEU: return AARCH64_LS;
6419 case GTU: return AARCH64_CC;
6420 case LEU: return AARCH64_CS;
6421 case LTU: return AARCH64_HI;
cd5660ab 6422 default: return -1;
43e9d192
IB
6423 }
6424 break;
6425
4e10a5a7 6426 case E_CC_NZmode:
43e9d192
IB
6427 switch (comp_code)
6428 {
6429 case NE: return AARCH64_NE;
6430 case EQ: return AARCH64_EQ;
6431 case GE: return AARCH64_PL;
6432 case LT: return AARCH64_MI;
cd5660ab 6433 default: return -1;
43e9d192
IB
6434 }
6435 break;
6436
4e10a5a7 6437 case E_CC_Zmode:
1c992d1e
RE
6438 switch (comp_code)
6439 {
6440 case NE: return AARCH64_NE;
6441 case EQ: return AARCH64_EQ;
cd5660ab 6442 default: return -1;
1c992d1e
RE
6443 }
6444 break;
6445
4e10a5a7 6446 case E_CC_Cmode:
ef22810a
RH
6447 switch (comp_code)
6448 {
6449 case NE: return AARCH64_CS;
6450 case EQ: return AARCH64_CC;
6451 default: return -1;
6452 }
6453 break;
6454
43e9d192 6455 default:
cd5660ab 6456 return -1;
43e9d192 6457 }
3dfa7055 6458
3dfa7055 6459 return -1;
43e9d192
IB
6460}
6461
ddeabd3e
AL
6462bool
6463aarch64_const_vec_all_same_in_range_p (rtx x,
6a70badb
RS
6464 HOST_WIDE_INT minval,
6465 HOST_WIDE_INT maxval)
ddeabd3e 6466{
6a70badb
RS
6467 rtx elt;
6468 return (const_vec_duplicate_p (x, &elt)
6469 && CONST_INT_P (elt)
6470 && IN_RANGE (INTVAL (elt), minval, maxval));
ddeabd3e
AL
6471}
6472
6473bool
6474aarch64_const_vec_all_same_int_p (rtx x, HOST_WIDE_INT val)
6475{
6476 return aarch64_const_vec_all_same_in_range_p (x, val, val);
6477}
6478
43cacb12
RS
6479/* Return true if VEC is a constant in which every element is in the range
6480 [MINVAL, MAXVAL]. The elements do not need to have the same value. */
6481
6482static bool
6483aarch64_const_vec_all_in_range_p (rtx vec,
6484 HOST_WIDE_INT minval,
6485 HOST_WIDE_INT maxval)
6486{
6487 if (GET_CODE (vec) != CONST_VECTOR
6488 || GET_MODE_CLASS (GET_MODE (vec)) != MODE_VECTOR_INT)
6489 return false;
6490
6491 int nunits;
6492 if (!CONST_VECTOR_STEPPED_P (vec))
6493 nunits = const_vector_encoded_nelts (vec);
6494 else if (!CONST_VECTOR_NUNITS (vec).is_constant (&nunits))
6495 return false;
6496
6497 for (int i = 0; i < nunits; i++)
6498 {
6499 rtx vec_elem = CONST_VECTOR_ELT (vec, i);
6500 if (!CONST_INT_P (vec_elem)
6501 || !IN_RANGE (INTVAL (vec_elem), minval, maxval))
6502 return false;
6503 }
6504 return true;
6505}
43e9d192 6506
cf670503
ZC
6507/* N Z C V. */
6508#define AARCH64_CC_V 1
6509#define AARCH64_CC_C (1 << 1)
6510#define AARCH64_CC_Z (1 << 2)
6511#define AARCH64_CC_N (1 << 3)
6512
c8012fbc
WD
6513/* N Z C V flags for ccmp. Indexed by AARCH64_COND_CODE. */
6514static const int aarch64_nzcv_codes[] =
6515{
6516 0, /* EQ, Z == 1. */
6517 AARCH64_CC_Z, /* NE, Z == 0. */
6518 0, /* CS, C == 1. */
6519 AARCH64_CC_C, /* CC, C == 0. */
6520 0, /* MI, N == 1. */
6521 AARCH64_CC_N, /* PL, N == 0. */
6522 0, /* VS, V == 1. */
6523 AARCH64_CC_V, /* VC, V == 0. */
6524 0, /* HI, C ==1 && Z == 0. */
6525 AARCH64_CC_C, /* LS, !(C == 1 && Z == 0). */
6526 AARCH64_CC_V, /* GE, N == V. */
6527 0, /* LT, N != V. */
6528 AARCH64_CC_Z, /* GT, Z == 0 && N == V. */
6529 0, /* LE, !(Z == 0 && N == V). */
6530 0, /* AL, Any. */
6531 0 /* NV, Any. */
cf670503
ZC
6532};
6533
43cacb12
RS
6534/* Print floating-point vector immediate operand X to F, negating it
6535 first if NEGATE is true. Return true on success, false if it isn't
6536 a constant we can handle. */
6537
6538static bool
6539aarch64_print_vector_float_operand (FILE *f, rtx x, bool negate)
6540{
6541 rtx elt;
6542
6543 if (!const_vec_duplicate_p (x, &elt))
6544 return false;
6545
6546 REAL_VALUE_TYPE r = *CONST_DOUBLE_REAL_VALUE (elt);
6547 if (negate)
6548 r = real_value_negate (&r);
6549
6550 /* We only handle the SVE single-bit immediates here. */
6551 if (real_equal (&r, &dconst0))
6552 asm_fprintf (f, "0.0");
6553 else if (real_equal (&r, &dconst1))
6554 asm_fprintf (f, "1.0");
6555 else if (real_equal (&r, &dconsthalf))
6556 asm_fprintf (f, "0.5");
6557 else
6558 return false;
6559
6560 return true;
6561}
6562
9f4cbab8
RS
6563/* Return the equivalent letter for size. */
6564static char
6565sizetochar (int size)
6566{
6567 switch (size)
6568 {
6569 case 64: return 'd';
6570 case 32: return 's';
6571 case 16: return 'h';
6572 case 8 : return 'b';
6573 default: gcc_unreachable ();
6574 }
6575}
6576
bcf19844
JW
6577/* Print operand X to file F in a target specific manner according to CODE.
6578 The acceptable formatting commands given by CODE are:
6579 'c': An integer or symbol address without a preceding #
6580 sign.
43cacb12
RS
6581 'C': Take the duplicated element in a vector constant
6582 and print it in hex.
6583 'D': Take the duplicated element in a vector constant
6584 and print it as an unsigned integer, in decimal.
bcf19844
JW
6585 'e': Print the sign/zero-extend size as a character 8->b,
6586 16->h, 32->w.
6587 'p': Prints N such that 2^N == X (X must be power of 2 and
6588 const int).
6589 'P': Print the number of non-zero bits in X (a const_int).
6590 'H': Print the higher numbered register of a pair (TImode)
6591 of regs.
6592 'm': Print a condition (eq, ne, etc).
6593 'M': Same as 'm', but invert condition.
43cacb12
RS
6594 'N': Take the duplicated element in a vector constant
6595 and print the negative of it in decimal.
bcf19844
JW
6596 'b/h/s/d/q': Print a scalar FP/SIMD register name.
6597 'S/T/U/V': Print a FP/SIMD register name for a register list.
6598 The register printed is the FP/SIMD register name
6599 of X + 0/1/2/3 for S/T/U/V.
6600 'R': Print a scalar FP/SIMD register name + 1.
6601 'X': Print bottom 16 bits of integer constant in hex.
6602 'w/x': Print a general register name or the zero register
6603 (32-bit or 64-bit).
6604 '0': Print a normal operand, if it's a general register,
6605 then we assume DImode.
6606 'k': Print NZCV for conditional compare instructions.
6607 'A': Output address constant representing the first
6608 argument of X, specifying a relocation offset
6609 if appropriate.
6610 'L': Output constant address specified by X
6611 with a relocation offset if appropriate.
6612 'G': Prints address of X, specifying a PC relative
e69a816d
WD
6613 relocation mode if appropriate.
6614 'y': Output address of LDP or STP - this is used for
6615 some LDP/STPs which don't use a PARALLEL in their
6616 pattern (so the mode needs to be adjusted).
6617 'z': Output address of a typical LDP or STP. */
bcf19844 6618
cc8ca59e
JB
6619static void
6620aarch64_print_operand (FILE *f, rtx x, int code)
43e9d192 6621{
43cacb12 6622 rtx elt;
43e9d192
IB
6623 switch (code)
6624 {
f541a481
KT
6625 case 'c':
6626 switch (GET_CODE (x))
6627 {
6628 case CONST_INT:
6629 fprintf (f, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
6630 break;
6631
6632 case SYMBOL_REF:
6633 output_addr_const (f, x);
6634 break;
6635
6636 case CONST:
6637 if (GET_CODE (XEXP (x, 0)) == PLUS
6638 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
6639 {
6640 output_addr_const (f, x);
6641 break;
6642 }
6643 /* Fall through. */
6644
6645 default:
ee61f880 6646 output_operand_lossage ("unsupported operand for code '%c'", code);
f541a481
KT
6647 }
6648 break;
6649
43e9d192 6650 case 'e':
43e9d192
IB
6651 {
6652 int n;
6653
4aa81c2e 6654 if (!CONST_INT_P (x)
43e9d192
IB
6655 || (n = exact_log2 (INTVAL (x) & ~7)) <= 0)
6656 {
6657 output_operand_lossage ("invalid operand for '%%%c'", code);
6658 return;
6659 }
6660
6661 switch (n)
6662 {
6663 case 3:
6664 fputc ('b', f);
6665 break;
6666 case 4:
6667 fputc ('h', f);
6668 break;
6669 case 5:
6670 fputc ('w', f);
6671 break;
6672 default:
6673 output_operand_lossage ("invalid operand for '%%%c'", code);
6674 return;
6675 }
6676 }
6677 break;
6678
6679 case 'p':
6680 {
6681 int n;
6682
4aa81c2e 6683 if (!CONST_INT_P (x) || (n = exact_log2 (INTVAL (x))) < 0)
43e9d192
IB
6684 {
6685 output_operand_lossage ("invalid operand for '%%%c'", code);
6686 return;
6687 }
6688
6689 asm_fprintf (f, "%d", n);
6690 }
6691 break;
6692
6693 case 'P':
4aa81c2e 6694 if (!CONST_INT_P (x))
43e9d192
IB
6695 {
6696 output_operand_lossage ("invalid operand for '%%%c'", code);
6697 return;
6698 }
6699
8d55c61b 6700 asm_fprintf (f, "%u", popcount_hwi (INTVAL (x)));
43e9d192
IB
6701 break;
6702
6703 case 'H':
4aa81c2e 6704 if (!REG_P (x) || !GP_REGNUM_P (REGNO (x) + 1))
43e9d192
IB
6705 {
6706 output_operand_lossage ("invalid operand for '%%%c'", code);
6707 return;
6708 }
6709
01a3a324 6710 asm_fprintf (f, "%s", reg_names [REGNO (x) + 1]);
43e9d192
IB
6711 break;
6712
43e9d192 6713 case 'M':
c8012fbc 6714 case 'm':
cd5660ab
KT
6715 {
6716 int cond_code;
c8012fbc
WD
6717 /* CONST_TRUE_RTX means al/nv (al is the default, don't print it). */
6718 if (x == const_true_rtx)
cd5660ab 6719 {
c8012fbc
WD
6720 if (code == 'M')
6721 fputs ("nv", f);
cd5660ab
KT
6722 return;
6723 }
43e9d192 6724
cd5660ab
KT
6725 if (!COMPARISON_P (x))
6726 {
6727 output_operand_lossage ("invalid operand for '%%%c'", code);
6728 return;
6729 }
c8012fbc 6730
cd5660ab
KT
6731 cond_code = aarch64_get_condition_code (x);
6732 gcc_assert (cond_code >= 0);
c8012fbc
WD
6733 if (code == 'M')
6734 cond_code = AARCH64_INVERSE_CONDITION_CODE (cond_code);
6735 fputs (aarch64_condition_codes[cond_code], f);
cd5660ab 6736 }
43e9d192
IB
6737 break;
6738
43cacb12
RS
6739 case 'N':
6740 if (!const_vec_duplicate_p (x, &elt))
6741 {
6742 output_operand_lossage ("invalid vector constant");
6743 return;
6744 }
6745
6746 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_VECTOR_INT)
6747 asm_fprintf (f, "%wd", -INTVAL (elt));
6748 else if (GET_MODE_CLASS (GET_MODE (x)) == MODE_VECTOR_FLOAT
6749 && aarch64_print_vector_float_operand (f, x, true))
6750 ;
6751 else
6752 {
6753 output_operand_lossage ("invalid vector constant");
6754 return;
6755 }
6756 break;
6757
43e9d192
IB
6758 case 'b':
6759 case 'h':
6760 case 's':
6761 case 'd':
6762 case 'q':
43e9d192
IB
6763 if (!REG_P (x) || !FP_REGNUM_P (REGNO (x)))
6764 {
6765 output_operand_lossage ("incompatible floating point / vector register operand for '%%%c'", code);
6766 return;
6767 }
50ce6f88 6768 asm_fprintf (f, "%c%d", code, REGNO (x) - V0_REGNUM);
43e9d192
IB
6769 break;
6770
6771 case 'S':
6772 case 'T':
6773 case 'U':
6774 case 'V':
43e9d192
IB
6775 if (!REG_P (x) || !FP_REGNUM_P (REGNO (x)))
6776 {
6777 output_operand_lossage ("incompatible floating point / vector register operand for '%%%c'", code);
6778 return;
6779 }
43cacb12
RS
6780 asm_fprintf (f, "%c%d",
6781 aarch64_sve_data_mode_p (GET_MODE (x)) ? 'z' : 'v',
6782 REGNO (x) - V0_REGNUM + (code - 'S'));
43e9d192
IB
6783 break;
6784
2d8c6dc1 6785 case 'R':
2d8c6dc1
AH
6786 if (!REG_P (x) || !FP_REGNUM_P (REGNO (x)))
6787 {
6788 output_operand_lossage ("incompatible floating point / vector register operand for '%%%c'", code);
6789 return;
6790 }
6791 asm_fprintf (f, "q%d", REGNO (x) - V0_REGNUM + 1);
6792 break;
6793
a05c0ddf 6794 case 'X':
4aa81c2e 6795 if (!CONST_INT_P (x))
a05c0ddf
IB
6796 {
6797 output_operand_lossage ("invalid operand for '%%%c'", code);
6798 return;
6799 }
50d38551 6800 asm_fprintf (f, "0x%wx", UINTVAL (x) & 0xffff);
a05c0ddf
IB
6801 break;
6802
43cacb12
RS
6803 case 'C':
6804 {
6805 /* Print a replicated constant in hex. */
6806 if (!const_vec_duplicate_p (x, &elt) || !CONST_INT_P (elt))
6807 {
6808 output_operand_lossage ("invalid operand for '%%%c'", code);
6809 return;
6810 }
6811 scalar_mode inner_mode = GET_MODE_INNER (GET_MODE (x));
6812 asm_fprintf (f, "0x%wx", UINTVAL (elt) & GET_MODE_MASK (inner_mode));
6813 }
6814 break;
6815
6816 case 'D':
6817 {
6818 /* Print a replicated constant in decimal, treating it as
6819 unsigned. */
6820 if (!const_vec_duplicate_p (x, &elt) || !CONST_INT_P (elt))
6821 {
6822 output_operand_lossage ("invalid operand for '%%%c'", code);
6823 return;
6824 }
6825 scalar_mode inner_mode = GET_MODE_INNER (GET_MODE (x));
6826 asm_fprintf (f, "%wd", UINTVAL (elt) & GET_MODE_MASK (inner_mode));
6827 }
6828 break;
6829
43e9d192
IB
6830 case 'w':
6831 case 'x':
3520f7cc
JG
6832 if (x == const0_rtx
6833 || (CONST_DOUBLE_P (x) && aarch64_float_const_zero_rtx_p (x)))
43e9d192 6834 {
50ce6f88 6835 asm_fprintf (f, "%czr", code);
43e9d192
IB
6836 break;
6837 }
6838
6839 if (REG_P (x) && GP_REGNUM_P (REGNO (x)))
6840 {
50ce6f88 6841 asm_fprintf (f, "%c%d", code, REGNO (x) - R0_REGNUM);
43e9d192
IB
6842 break;
6843 }
6844
6845 if (REG_P (x) && REGNO (x) == SP_REGNUM)
6846 {
50ce6f88 6847 asm_fprintf (f, "%ssp", code == 'w' ? "w" : "");
43e9d192
IB
6848 break;
6849 }
6850
6851 /* Fall through */
6852
6853 case 0:
43e9d192
IB
6854 if (x == NULL)
6855 {
6856 output_operand_lossage ("missing operand");
6857 return;
6858 }
6859
6860 switch (GET_CODE (x))
6861 {
6862 case REG:
43cacb12 6863 if (aarch64_sve_data_mode_p (GET_MODE (x)))
9f4cbab8
RS
6864 {
6865 if (REG_NREGS (x) == 1)
6866 asm_fprintf (f, "z%d", REGNO (x) - V0_REGNUM);
6867 else
6868 {
6869 char suffix
6870 = sizetochar (GET_MODE_UNIT_BITSIZE (GET_MODE (x)));
6871 asm_fprintf (f, "{z%d.%c - z%d.%c}",
6872 REGNO (x) - V0_REGNUM, suffix,
6873 END_REGNO (x) - V0_REGNUM - 1, suffix);
6874 }
6875 }
43cacb12
RS
6876 else
6877 asm_fprintf (f, "%s", reg_names [REGNO (x)]);
43e9d192
IB
6878 break;
6879
6880 case MEM:
cc8ca59e 6881 output_address (GET_MODE (x), XEXP (x, 0));
43e9d192
IB
6882 break;
6883
6884 case LABEL_REF:
6885 case SYMBOL_REF:
6886 output_addr_const (asm_out_file, x);
6887 break;
6888
6889 case CONST_INT:
6890 asm_fprintf (f, "%wd", INTVAL (x));
6891 break;
6892
43cacb12
RS
6893 case CONST:
6894 if (!VECTOR_MODE_P (GET_MODE (x)))
3520f7cc 6895 {
43cacb12
RS
6896 output_addr_const (asm_out_file, x);
6897 break;
3520f7cc 6898 }
43cacb12
RS
6899 /* fall through */
6900
6901 case CONST_VECTOR:
6902 if (!const_vec_duplicate_p (x, &elt))
3520f7cc 6903 {
43cacb12
RS
6904 output_operand_lossage ("invalid vector constant");
6905 return;
3520f7cc 6906 }
43cacb12
RS
6907
6908 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_VECTOR_INT)
6909 asm_fprintf (f, "%wd", INTVAL (elt));
6910 else if (GET_MODE_CLASS (GET_MODE (x)) == MODE_VECTOR_FLOAT
6911 && aarch64_print_vector_float_operand (f, x, false))
6912 ;
3520f7cc 6913 else
43cacb12
RS
6914 {
6915 output_operand_lossage ("invalid vector constant");
6916 return;
6917 }
43e9d192
IB
6918 break;
6919
3520f7cc 6920 case CONST_DOUBLE:
2ca5b430
KT
6921 /* Since we define TARGET_SUPPORTS_WIDE_INT we shouldn't ever
6922 be getting CONST_DOUBLEs holding integers. */
6923 gcc_assert (GET_MODE (x) != VOIDmode);
6924 if (aarch64_float_const_zero_rtx_p (x))
3520f7cc
JG
6925 {
6926 fputc ('0', f);
6927 break;
6928 }
6929 else if (aarch64_float_const_representable_p (x))
6930 {
6931#define buf_size 20
6932 char float_buf[buf_size] = {'\0'};
34a72c33
RS
6933 real_to_decimal_for_mode (float_buf,
6934 CONST_DOUBLE_REAL_VALUE (x),
3520f7cc
JG
6935 buf_size, buf_size,
6936 1, GET_MODE (x));
6937 asm_fprintf (asm_out_file, "%s", float_buf);
6938 break;
6939#undef buf_size
6940 }
6941 output_operand_lossage ("invalid constant");
6942 return;
43e9d192
IB
6943 default:
6944 output_operand_lossage ("invalid operand");
6945 return;
6946 }
6947 break;
6948
6949 case 'A':
6950 if (GET_CODE (x) == HIGH)
6951 x = XEXP (x, 0);
6952
a6e0bfa7 6953 switch (aarch64_classify_symbolic_expression (x))
43e9d192 6954 {
6642bdb4 6955 case SYMBOL_SMALL_GOT_4G:
43e9d192
IB
6956 asm_fprintf (asm_out_file, ":got:");
6957 break;
6958
6959 case SYMBOL_SMALL_TLSGD:
6960 asm_fprintf (asm_out_file, ":tlsgd:");
6961 break;
6962
6963 case SYMBOL_SMALL_TLSDESC:
6964 asm_fprintf (asm_out_file, ":tlsdesc:");
6965 break;
6966
79496620 6967 case SYMBOL_SMALL_TLSIE:
43e9d192
IB
6968 asm_fprintf (asm_out_file, ":gottprel:");
6969 break;
6970
d18ba284 6971 case SYMBOL_TLSLE24:
43e9d192
IB
6972 asm_fprintf (asm_out_file, ":tprel:");
6973 break;
6974
87dd8ab0
MS
6975 case SYMBOL_TINY_GOT:
6976 gcc_unreachable ();
6977 break;
6978
43e9d192
IB
6979 default:
6980 break;
6981 }
6982 output_addr_const (asm_out_file, x);
6983 break;
6984
6985 case 'L':
a6e0bfa7 6986 switch (aarch64_classify_symbolic_expression (x))
43e9d192 6987 {
6642bdb4 6988 case SYMBOL_SMALL_GOT_4G:
43e9d192
IB
6989 asm_fprintf (asm_out_file, ":lo12:");
6990 break;
6991
6992 case SYMBOL_SMALL_TLSGD:
6993 asm_fprintf (asm_out_file, ":tlsgd_lo12:");
6994 break;
6995
6996 case SYMBOL_SMALL_TLSDESC:
6997 asm_fprintf (asm_out_file, ":tlsdesc_lo12:");
6998 break;
6999
79496620 7000 case SYMBOL_SMALL_TLSIE:
43e9d192
IB
7001 asm_fprintf (asm_out_file, ":gottprel_lo12:");
7002 break;
7003
cbf5629e
JW
7004 case SYMBOL_TLSLE12:
7005 asm_fprintf (asm_out_file, ":tprel_lo12:");
7006 break;
7007
d18ba284 7008 case SYMBOL_TLSLE24:
43e9d192
IB
7009 asm_fprintf (asm_out_file, ":tprel_lo12_nc:");
7010 break;
7011
87dd8ab0
MS
7012 case SYMBOL_TINY_GOT:
7013 asm_fprintf (asm_out_file, ":got:");
7014 break;
7015
5ae7caad
JW
7016 case SYMBOL_TINY_TLSIE:
7017 asm_fprintf (asm_out_file, ":gottprel:");
7018 break;
7019
43e9d192
IB
7020 default:
7021 break;
7022 }
7023 output_addr_const (asm_out_file, x);
7024 break;
7025
7026 case 'G':
a6e0bfa7 7027 switch (aarch64_classify_symbolic_expression (x))
43e9d192 7028 {
d18ba284 7029 case SYMBOL_TLSLE24:
43e9d192
IB
7030 asm_fprintf (asm_out_file, ":tprel_hi12:");
7031 break;
7032 default:
7033 break;
7034 }
7035 output_addr_const (asm_out_file, x);
7036 break;
7037
cf670503
ZC
7038 case 'k':
7039 {
c8012fbc 7040 HOST_WIDE_INT cond_code;
cf670503 7041
c8012fbc 7042 if (!CONST_INT_P (x))
cf670503
ZC
7043 {
7044 output_operand_lossage ("invalid operand for '%%%c'", code);
7045 return;
7046 }
7047
c8012fbc
WD
7048 cond_code = INTVAL (x);
7049 gcc_assert (cond_code >= 0 && cond_code <= AARCH64_NV);
7050 asm_fprintf (f, "%d", aarch64_nzcv_codes[cond_code]);
cf670503
ZC
7051 }
7052 break;
7053
e69a816d
WD
7054 case 'y':
7055 case 'z':
7056 {
7057 machine_mode mode = GET_MODE (x);
7058
c348cab0 7059 if (GET_CODE (x) != MEM
6a70badb 7060 || (code == 'y' && maybe_ne (GET_MODE_SIZE (mode), 16)))
e69a816d
WD
7061 {
7062 output_operand_lossage ("invalid operand for '%%%c'", code);
7063 return;
7064 }
7065
7066 if (code == 'y')
c348cab0
RS
7067 /* LDP/STP which uses a single double-width memory operand.
7068 Adjust the mode to appear like a typical LDP/STP.
7069 Currently this is supported for 16-byte accesses only. */
7070 mode = DFmode;
e69a816d 7071
c348cab0
RS
7072 if (!aarch64_print_ldpstp_address (f, mode, XEXP (x, 0)))
7073 output_operand_lossage ("invalid operand prefix '%%%c'", code);
e69a816d
WD
7074 }
7075 break;
7076
43e9d192
IB
7077 default:
7078 output_operand_lossage ("invalid operand prefix '%%%c'", code);
7079 return;
7080 }
7081}
7082
e69a816d
WD
7083/* Print address 'x' of a memory access with mode 'mode'.
7084 'op' is the context required by aarch64_classify_address. It can either be
7085 MEM for a normal memory access or PARALLEL for LDP/STP. */
c348cab0 7086static bool
a97d8b98
RS
7087aarch64_print_address_internal (FILE *f, machine_mode mode, rtx x,
7088 aarch64_addr_query_type type)
43e9d192
IB
7089{
7090 struct aarch64_address_info addr;
6a70badb 7091 unsigned int size;
43e9d192 7092
e69a816d 7093 /* Check all addresses are Pmode - including ILP32. */
67c58c8f
SE
7094 if (GET_MODE (x) != Pmode)
7095 output_operand_lossage ("invalid address mode");
e69a816d 7096
a97d8b98 7097 if (aarch64_classify_address (&addr, x, mode, true, type))
43e9d192
IB
7098 switch (addr.type)
7099 {
7100 case ADDRESS_REG_IMM:
dc640181 7101 if (known_eq (addr.const_offset, 0))
01a3a324 7102 asm_fprintf (f, "[%s]", reg_names [REGNO (addr.base)]);
43cacb12
RS
7103 else if (aarch64_sve_data_mode_p (mode))
7104 {
7105 HOST_WIDE_INT vnum
7106 = exact_div (addr.const_offset,
7107 BYTES_PER_SVE_VECTOR).to_constant ();
7108 asm_fprintf (f, "[%s, #%wd, mul vl]",
7109 reg_names[REGNO (addr.base)], vnum);
7110 }
7111 else if (aarch64_sve_pred_mode_p (mode))
7112 {
7113 HOST_WIDE_INT vnum
7114 = exact_div (addr.const_offset,
7115 BYTES_PER_SVE_PRED).to_constant ();
7116 asm_fprintf (f, "[%s, #%wd, mul vl]",
7117 reg_names[REGNO (addr.base)], vnum);
7118 }
43e9d192 7119 else
16a3246f 7120 asm_fprintf (f, "[%s, %wd]", reg_names [REGNO (addr.base)],
43e9d192 7121 INTVAL (addr.offset));
c348cab0 7122 return true;
43e9d192
IB
7123
7124 case ADDRESS_REG_REG:
7125 if (addr.shift == 0)
16a3246f 7126 asm_fprintf (f, "[%s, %s]", reg_names [REGNO (addr.base)],
01a3a324 7127 reg_names [REGNO (addr.offset)]);
43e9d192 7128 else
16a3246f 7129 asm_fprintf (f, "[%s, %s, lsl %u]", reg_names [REGNO (addr.base)],
01a3a324 7130 reg_names [REGNO (addr.offset)], addr.shift);
c348cab0 7131 return true;
43e9d192
IB
7132
7133 case ADDRESS_REG_UXTW:
7134 if (addr.shift == 0)
16a3246f 7135 asm_fprintf (f, "[%s, w%d, uxtw]", reg_names [REGNO (addr.base)],
43e9d192
IB
7136 REGNO (addr.offset) - R0_REGNUM);
7137 else
16a3246f 7138 asm_fprintf (f, "[%s, w%d, uxtw %u]", reg_names [REGNO (addr.base)],
43e9d192 7139 REGNO (addr.offset) - R0_REGNUM, addr.shift);
c348cab0 7140 return true;
43e9d192
IB
7141
7142 case ADDRESS_REG_SXTW:
7143 if (addr.shift == 0)
16a3246f 7144 asm_fprintf (f, "[%s, w%d, sxtw]", reg_names [REGNO (addr.base)],
43e9d192
IB
7145 REGNO (addr.offset) - R0_REGNUM);
7146 else
16a3246f 7147 asm_fprintf (f, "[%s, w%d, sxtw %u]", reg_names [REGNO (addr.base)],
43e9d192 7148 REGNO (addr.offset) - R0_REGNUM, addr.shift);
c348cab0 7149 return true;
43e9d192
IB
7150
7151 case ADDRESS_REG_WB:
6a70badb
RS
7152 /* Writeback is only supported for fixed-width modes. */
7153 size = GET_MODE_SIZE (mode).to_constant ();
43e9d192
IB
7154 switch (GET_CODE (x))
7155 {
7156 case PRE_INC:
6a70badb 7157 asm_fprintf (f, "[%s, %d]!", reg_names [REGNO (addr.base)], size);
c348cab0 7158 return true;
43e9d192 7159 case POST_INC:
6a70badb 7160 asm_fprintf (f, "[%s], %d", reg_names [REGNO (addr.base)], size);
c348cab0 7161 return true;
43e9d192 7162 case PRE_DEC:
6a70badb 7163 asm_fprintf (f, "[%s, -%d]!", reg_names [REGNO (addr.base)], size);
c348cab0 7164 return true;
43e9d192 7165 case POST_DEC:
6a70badb 7166 asm_fprintf (f, "[%s], -%d", reg_names [REGNO (addr.base)], size);
c348cab0 7167 return true;
43e9d192 7168 case PRE_MODIFY:
6a70badb 7169 asm_fprintf (f, "[%s, %wd]!", reg_names[REGNO (addr.base)],
43e9d192 7170 INTVAL (addr.offset));
c348cab0 7171 return true;
43e9d192 7172 case POST_MODIFY:
6a70badb 7173 asm_fprintf (f, "[%s], %wd", reg_names[REGNO (addr.base)],
43e9d192 7174 INTVAL (addr.offset));
c348cab0 7175 return true;
43e9d192
IB
7176 default:
7177 break;
7178 }
7179 break;
7180
7181 case ADDRESS_LO_SUM:
16a3246f 7182 asm_fprintf (f, "[%s, #:lo12:", reg_names [REGNO (addr.base)]);
43e9d192
IB
7183 output_addr_const (f, addr.offset);
7184 asm_fprintf (f, "]");
c348cab0 7185 return true;
43e9d192
IB
7186
7187 case ADDRESS_SYMBOLIC:
d6591257 7188 output_addr_const (f, x);
c348cab0 7189 return true;
43e9d192
IB
7190 }
7191
c348cab0 7192 return false;
43e9d192
IB
7193}
7194
e69a816d 7195/* Print address 'x' of a LDP/STP with mode 'mode'. */
c348cab0 7196static bool
e69a816d
WD
7197aarch64_print_ldpstp_address (FILE *f, machine_mode mode, rtx x)
7198{
a97d8b98 7199 return aarch64_print_address_internal (f, mode, x, ADDR_QUERY_LDP_STP);
e69a816d
WD
7200}
7201
7202/* Print address 'x' of a memory access with mode 'mode'. */
7203static void
7204aarch64_print_operand_address (FILE *f, machine_mode mode, rtx x)
7205{
43cacb12 7206 if (!aarch64_print_address_internal (f, mode, x, ADDR_QUERY_ANY))
c348cab0 7207 output_addr_const (f, x);
e69a816d
WD
7208}
7209
43e9d192
IB
7210bool
7211aarch64_label_mentioned_p (rtx x)
7212{
7213 const char *fmt;
7214 int i;
7215
7216 if (GET_CODE (x) == LABEL_REF)
7217 return true;
7218
7219 /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the
7220 referencing instruction, but they are constant offsets, not
7221 symbols. */
7222 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
7223 return false;
7224
7225 fmt = GET_RTX_FORMAT (GET_CODE (x));
7226 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
7227 {
7228 if (fmt[i] == 'E')
7229 {
7230 int j;
7231
7232 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
7233 if (aarch64_label_mentioned_p (XVECEXP (x, i, j)))
7234 return 1;
7235 }
7236 else if (fmt[i] == 'e' && aarch64_label_mentioned_p (XEXP (x, i)))
7237 return 1;
7238 }
7239
7240 return 0;
7241}
7242
7243/* Implement REGNO_REG_CLASS. */
7244
7245enum reg_class
7246aarch64_regno_regclass (unsigned regno)
7247{
7248 if (GP_REGNUM_P (regno))
a4a182c6 7249 return GENERAL_REGS;
43e9d192
IB
7250
7251 if (regno == SP_REGNUM)
7252 return STACK_REG;
7253
7254 if (regno == FRAME_POINTER_REGNUM
7255 || regno == ARG_POINTER_REGNUM)
f24bb080 7256 return POINTER_REGS;
43e9d192
IB
7257
7258 if (FP_REGNUM_P (regno))
7259 return FP_LO_REGNUM_P (regno) ? FP_LO_REGS : FP_REGS;
7260
43cacb12
RS
7261 if (PR_REGNUM_P (regno))
7262 return PR_LO_REGNUM_P (regno) ? PR_LO_REGS : PR_HI_REGS;
7263
43e9d192
IB
7264 return NO_REGS;
7265}
7266
6a70badb
RS
7267/* OFFSET is an address offset for mode MODE, which has SIZE bytes.
7268 If OFFSET is out of range, return an offset of an anchor point
7269 that is in range. Return 0 otherwise. */
7270
7271static HOST_WIDE_INT
7272aarch64_anchor_offset (HOST_WIDE_INT offset, HOST_WIDE_INT size,
7273 machine_mode mode)
7274{
7275 /* Does it look like we'll need a 16-byte load/store-pair operation? */
7276 if (size > 16)
7277 return (offset + 0x400) & ~0x7f0;
7278
7279 /* For offsets that aren't a multiple of the access size, the limit is
7280 -256...255. */
7281 if (offset & (size - 1))
7282 {
7283 /* BLKmode typically uses LDP of X-registers. */
7284 if (mode == BLKmode)
7285 return (offset + 512) & ~0x3ff;
7286 return (offset + 0x100) & ~0x1ff;
7287 }
7288
7289 /* Small negative offsets are supported. */
7290 if (IN_RANGE (offset, -256, 0))
7291 return 0;
7292
7293 if (mode == TImode || mode == TFmode)
7294 return (offset + 0x100) & ~0x1ff;
7295
7296 /* Use 12-bit offset by access size. */
7297 return offset & (~0xfff * size);
7298}
7299
0c4ec427 7300static rtx
ef4bddc2 7301aarch64_legitimize_address (rtx x, rtx /* orig_x */, machine_mode mode)
0c4ec427
RE
7302{
7303 /* Try to split X+CONST into Y=X+(CONST & ~mask), Y+(CONST&mask),
7304 where mask is selected by alignment and size of the offset.
7305 We try to pick as large a range for the offset as possible to
7306 maximize the chance of a CSE. However, for aligned addresses
7307 we limit the range to 4k so that structures with different sized
e8426e0a
BC
7308 elements are likely to use the same base. We need to be careful
7309 not to split a CONST for some forms of address expression, otherwise
7310 it will generate sub-optimal code. */
0c4ec427
RE
7311
7312 if (GET_CODE (x) == PLUS && CONST_INT_P (XEXP (x, 1)))
7313 {
9e0218fc 7314 rtx base = XEXP (x, 0);
17d7bdd8 7315 rtx offset_rtx = XEXP (x, 1);
9e0218fc 7316 HOST_WIDE_INT offset = INTVAL (offset_rtx);
0c4ec427 7317
9e0218fc 7318 if (GET_CODE (base) == PLUS)
e8426e0a 7319 {
9e0218fc
RH
7320 rtx op0 = XEXP (base, 0);
7321 rtx op1 = XEXP (base, 1);
7322
7323 /* Force any scaling into a temp for CSE. */
7324 op0 = force_reg (Pmode, op0);
7325 op1 = force_reg (Pmode, op1);
7326
7327 /* Let the pointer register be in op0. */
7328 if (REG_POINTER (op1))
7329 std::swap (op0, op1);
7330
7331 /* If the pointer is virtual or frame related, then we know that
7332 virtual register instantiation or register elimination is going
7333 to apply a second constant. We want the two constants folded
7334 together easily. Therefore, emit as (OP0 + CONST) + OP1. */
7335 if (virt_or_elim_regno_p (REGNO (op0)))
e8426e0a 7336 {
9e0218fc
RH
7337 base = expand_binop (Pmode, add_optab, op0, offset_rtx,
7338 NULL_RTX, true, OPTAB_DIRECT);
7339 return gen_rtx_PLUS (Pmode, base, op1);
e8426e0a 7340 }
e8426e0a 7341
9e0218fc
RH
7342 /* Otherwise, in order to encourage CSE (and thence loop strength
7343 reduce) scaled addresses, emit as (OP0 + OP1) + CONST. */
7344 base = expand_binop (Pmode, add_optab, op0, op1,
7345 NULL_RTX, true, OPTAB_DIRECT);
7346 x = gen_rtx_PLUS (Pmode, base, offset_rtx);
e8426e0a
BC
7347 }
7348
6a70badb
RS
7349 HOST_WIDE_INT size;
7350 if (GET_MODE_SIZE (mode).is_constant (&size))
ff0f3f1c 7351 {
6a70badb
RS
7352 HOST_WIDE_INT base_offset = aarch64_anchor_offset (offset, size,
7353 mode);
7354 if (base_offset != 0)
7355 {
7356 base = plus_constant (Pmode, base, base_offset);
7357 base = force_operand (base, NULL_RTX);
7358 return plus_constant (Pmode, base, offset - base_offset);
7359 }
9e0218fc 7360 }
0c4ec427
RE
7361 }
7362
7363 return x;
7364}
7365
b4f50fd4
RR
7366/* Return the reload icode required for a constant pool in mode. */
7367static enum insn_code
7368aarch64_constant_pool_reload_icode (machine_mode mode)
7369{
7370 switch (mode)
7371 {
4e10a5a7 7372 case E_SFmode:
b4f50fd4
RR
7373 return CODE_FOR_aarch64_reload_movcpsfdi;
7374
4e10a5a7 7375 case E_DFmode:
b4f50fd4
RR
7376 return CODE_FOR_aarch64_reload_movcpdfdi;
7377
4e10a5a7 7378 case E_TFmode:
b4f50fd4
RR
7379 return CODE_FOR_aarch64_reload_movcptfdi;
7380
4e10a5a7 7381 case E_V8QImode:
b4f50fd4
RR
7382 return CODE_FOR_aarch64_reload_movcpv8qidi;
7383
4e10a5a7 7384 case E_V16QImode:
b4f50fd4
RR
7385 return CODE_FOR_aarch64_reload_movcpv16qidi;
7386
4e10a5a7 7387 case E_V4HImode:
b4f50fd4
RR
7388 return CODE_FOR_aarch64_reload_movcpv4hidi;
7389
4e10a5a7 7390 case E_V8HImode:
b4f50fd4
RR
7391 return CODE_FOR_aarch64_reload_movcpv8hidi;
7392
4e10a5a7 7393 case E_V2SImode:
b4f50fd4
RR
7394 return CODE_FOR_aarch64_reload_movcpv2sidi;
7395
4e10a5a7 7396 case E_V4SImode:
b4f50fd4
RR
7397 return CODE_FOR_aarch64_reload_movcpv4sidi;
7398
4e10a5a7 7399 case E_V2DImode:
b4f50fd4
RR
7400 return CODE_FOR_aarch64_reload_movcpv2didi;
7401
4e10a5a7 7402 case E_V2DFmode:
b4f50fd4
RR
7403 return CODE_FOR_aarch64_reload_movcpv2dfdi;
7404
7405 default:
7406 gcc_unreachable ();
7407 }
7408
7409 gcc_unreachable ();
7410}
43e9d192
IB
7411static reg_class_t
7412aarch64_secondary_reload (bool in_p ATTRIBUTE_UNUSED, rtx x,
7413 reg_class_t rclass,
ef4bddc2 7414 machine_mode mode,
43e9d192
IB
7415 secondary_reload_info *sri)
7416{
9a1b9cb4
RS
7417 /* Use aarch64_sve_reload_be for SVE reloads that cannot be handled
7418 directly by the *aarch64_sve_mov<mode>_be move pattern. See the
7419 comment at the head of aarch64-sve.md for more details about the
7420 big-endian handling. */
43cacb12
RS
7421 if (BYTES_BIG_ENDIAN
7422 && reg_class_subset_p (rclass, FP_REGS)
9a1b9cb4
RS
7423 && !((REG_P (x) && HARD_REGISTER_P (x))
7424 || aarch64_simd_valid_immediate (x, NULL))
43cacb12
RS
7425 && aarch64_sve_data_mode_p (mode))
7426 {
7427 sri->icode = CODE_FOR_aarch64_sve_reload_be;
7428 return NO_REGS;
7429 }
b4f50fd4
RR
7430
7431 /* If we have to disable direct literal pool loads and stores because the
7432 function is too big, then we need a scratch register. */
7433 if (MEM_P (x) && GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x)
7434 && (SCALAR_FLOAT_MODE_P (GET_MODE (x))
7435 || targetm.vector_mode_supported_p (GET_MODE (x)))
9ee6540a 7436 && !aarch64_pcrelative_literal_loads)
b4f50fd4
RR
7437 {
7438 sri->icode = aarch64_constant_pool_reload_icode (mode);
7439 return NO_REGS;
7440 }
7441
43e9d192
IB
7442 /* Without the TARGET_SIMD instructions we cannot move a Q register
7443 to a Q register directly. We need a scratch. */
7444 if (REG_P (x) && (mode == TFmode || mode == TImode) && mode == GET_MODE (x)
7445 && FP_REGNUM_P (REGNO (x)) && !TARGET_SIMD
7446 && reg_class_subset_p (rclass, FP_REGS))
7447 {
7448 if (mode == TFmode)
7449 sri->icode = CODE_FOR_aarch64_reload_movtf;
7450 else if (mode == TImode)
7451 sri->icode = CODE_FOR_aarch64_reload_movti;
7452 return NO_REGS;
7453 }
7454
7455 /* A TFmode or TImode memory access should be handled via an FP_REGS
7456 because AArch64 has richer addressing modes for LDR/STR instructions
7457 than LDP/STP instructions. */
d5726973 7458 if (TARGET_FLOAT && rclass == GENERAL_REGS
6a70badb 7459 && known_eq (GET_MODE_SIZE (mode), 16) && MEM_P (x))
43e9d192
IB
7460 return FP_REGS;
7461
7462 if (rclass == FP_REGS && (mode == TImode || mode == TFmode) && CONSTANT_P(x))
a4a182c6 7463 return GENERAL_REGS;
43e9d192
IB
7464
7465 return NO_REGS;
7466}
7467
7468static bool
6216fd90 7469aarch64_can_eliminate (const int from ATTRIBUTE_UNUSED, const int to)
43e9d192 7470{
6216fd90 7471 gcc_assert (from == ARG_POINTER_REGNUM || from == FRAME_POINTER_REGNUM);
43e9d192 7472
6216fd90
WD
7473 /* If we need a frame pointer, ARG_POINTER_REGNUM and FRAME_POINTER_REGNUM
7474 can only eliminate to HARD_FRAME_POINTER_REGNUM. */
43e9d192 7475 if (frame_pointer_needed)
6216fd90 7476 return to == HARD_FRAME_POINTER_REGNUM;
43e9d192
IB
7477 return true;
7478}
7479
6a70badb 7480poly_int64
43e9d192
IB
7481aarch64_initial_elimination_offset (unsigned from, unsigned to)
7482{
43e9d192 7483 aarch64_layout_frame ();
78c29983
MS
7484
7485 if (to == HARD_FRAME_POINTER_REGNUM)
7486 {
7487 if (from == ARG_POINTER_REGNUM)
71bfb77a 7488 return cfun->machine->frame.hard_fp_offset;
78c29983
MS
7489
7490 if (from == FRAME_POINTER_REGNUM)
71bfb77a
WD
7491 return cfun->machine->frame.hard_fp_offset
7492 - cfun->machine->frame.locals_offset;
78c29983
MS
7493 }
7494
7495 if (to == STACK_POINTER_REGNUM)
7496 {
7497 if (from == FRAME_POINTER_REGNUM)
71bfb77a
WD
7498 return cfun->machine->frame.frame_size
7499 - cfun->machine->frame.locals_offset;
78c29983
MS
7500 }
7501
1c960e02 7502 return cfun->machine->frame.frame_size;
43e9d192
IB
7503}
7504
43e9d192
IB
7505/* Implement RETURN_ADDR_RTX. We do not support moving back to a
7506 previous frame. */
7507
7508rtx
7509aarch64_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
7510{
7511 if (count != 0)
7512 return const0_rtx;
7513 return get_hard_reg_initial_val (Pmode, LR_REGNUM);
7514}
7515
7516
7517static void
7518aarch64_asm_trampoline_template (FILE *f)
7519{
28514dda
YZ
7520 if (TARGET_ILP32)
7521 {
7522 asm_fprintf (f, "\tldr\tw%d, .+16\n", IP1_REGNUM - R0_REGNUM);
7523 asm_fprintf (f, "\tldr\tw%d, .+16\n", STATIC_CHAIN_REGNUM - R0_REGNUM);
7524 }
7525 else
7526 {
7527 asm_fprintf (f, "\tldr\t%s, .+16\n", reg_names [IP1_REGNUM]);
7528 asm_fprintf (f, "\tldr\t%s, .+20\n", reg_names [STATIC_CHAIN_REGNUM]);
7529 }
01a3a324 7530 asm_fprintf (f, "\tbr\t%s\n", reg_names [IP1_REGNUM]);
43e9d192 7531 assemble_aligned_integer (4, const0_rtx);
28514dda
YZ
7532 assemble_aligned_integer (POINTER_BYTES, const0_rtx);
7533 assemble_aligned_integer (POINTER_BYTES, const0_rtx);
43e9d192
IB
7534}
7535
7536static void
7537aarch64_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
7538{
7539 rtx fnaddr, mem, a_tramp;
28514dda 7540 const int tramp_code_sz = 16;
43e9d192
IB
7541
7542 /* Don't need to copy the trailing D-words, we fill those in below. */
7543 emit_block_move (m_tramp, assemble_trampoline_template (),
28514dda
YZ
7544 GEN_INT (tramp_code_sz), BLOCK_OP_NORMAL);
7545 mem = adjust_address (m_tramp, ptr_mode, tramp_code_sz);
43e9d192 7546 fnaddr = XEXP (DECL_RTL (fndecl), 0);
28514dda
YZ
7547 if (GET_MODE (fnaddr) != ptr_mode)
7548 fnaddr = convert_memory_address (ptr_mode, fnaddr);
43e9d192
IB
7549 emit_move_insn (mem, fnaddr);
7550
28514dda 7551 mem = adjust_address (m_tramp, ptr_mode, tramp_code_sz + POINTER_BYTES);
43e9d192
IB
7552 emit_move_insn (mem, chain_value);
7553
7554 /* XXX We should really define a "clear_cache" pattern and use
7555 gen_clear_cache(). */
7556 a_tramp = XEXP (m_tramp, 0);
7557 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__clear_cache"),
db69559b 7558 LCT_NORMAL, VOIDmode, a_tramp, ptr_mode,
28514dda
YZ
7559 plus_constant (ptr_mode, a_tramp, TRAMPOLINE_SIZE),
7560 ptr_mode);
43e9d192
IB
7561}
7562
7563static unsigned char
ef4bddc2 7564aarch64_class_max_nregs (reg_class_t regclass, machine_mode mode)
43e9d192 7565{
6a70badb
RS
7566 /* ??? Logically we should only need to provide a value when
7567 HARD_REGNO_MODE_OK says that at least one register in REGCLASS
7568 can hold MODE, but at the moment we need to handle all modes.
7569 Just ignore any runtime parts for registers that can't store them. */
7570 HOST_WIDE_INT lowest_size = constant_lower_bound (GET_MODE_SIZE (mode));
43cacb12 7571 unsigned int nregs;
43e9d192
IB
7572 switch (regclass)
7573 {
d677263e 7574 case TAILCALL_ADDR_REGS:
43e9d192
IB
7575 case POINTER_REGS:
7576 case GENERAL_REGS:
7577 case ALL_REGS:
f25a140b 7578 case POINTER_AND_FP_REGS:
43e9d192
IB
7579 case FP_REGS:
7580 case FP_LO_REGS:
43cacb12
RS
7581 if (aarch64_sve_data_mode_p (mode)
7582 && constant_multiple_p (GET_MODE_SIZE (mode),
7583 BYTES_PER_SVE_VECTOR, &nregs))
7584 return nregs;
7585 return (aarch64_vector_data_mode_p (mode)
6a70badb
RS
7586 ? CEIL (lowest_size, UNITS_PER_VREG)
7587 : CEIL (lowest_size, UNITS_PER_WORD));
43e9d192 7588 case STACK_REG:
43cacb12
RS
7589 case PR_REGS:
7590 case PR_LO_REGS:
7591 case PR_HI_REGS:
43e9d192
IB
7592 return 1;
7593
7594 case NO_REGS:
7595 return 0;
7596
7597 default:
7598 break;
7599 }
7600 gcc_unreachable ();
7601}
7602
7603static reg_class_t
78d8b9f0 7604aarch64_preferred_reload_class (rtx x, reg_class_t regclass)
43e9d192 7605{
51bb310d 7606 if (regclass == POINTER_REGS)
78d8b9f0
IB
7607 return GENERAL_REGS;
7608
51bb310d
MS
7609 if (regclass == STACK_REG)
7610 {
7611 if (REG_P(x)
7612 && reg_class_subset_p (REGNO_REG_CLASS (REGNO (x)), POINTER_REGS))
7613 return regclass;
7614
7615 return NO_REGS;
7616 }
7617
27bd251b
IB
7618 /* Register eliminiation can result in a request for
7619 SP+constant->FP_REGS. We cannot support such operations which
7620 use SP as source and an FP_REG as destination, so reject out
7621 right now. */
7622 if (! reg_class_subset_p (regclass, GENERAL_REGS) && GET_CODE (x) == PLUS)
7623 {
7624 rtx lhs = XEXP (x, 0);
7625
7626 /* Look through a possible SUBREG introduced by ILP32. */
7627 if (GET_CODE (lhs) == SUBREG)
7628 lhs = SUBREG_REG (lhs);
7629
7630 gcc_assert (REG_P (lhs));
7631 gcc_assert (reg_class_subset_p (REGNO_REG_CLASS (REGNO (lhs)),
7632 POINTER_REGS));
7633 return NO_REGS;
7634 }
7635
78d8b9f0 7636 return regclass;
43e9d192
IB
7637}
7638
7639void
7640aarch64_asm_output_labelref (FILE* f, const char *name)
7641{
7642 asm_fprintf (f, "%U%s", name);
7643}
7644
7645static void
7646aarch64_elf_asm_constructor (rtx symbol, int priority)
7647{
7648 if (priority == DEFAULT_INIT_PRIORITY)
7649 default_ctor_section_asm_out_constructor (symbol, priority);
7650 else
7651 {
7652 section *s;
53d190c1
AT
7653 /* While priority is known to be in range [0, 65535], so 18 bytes
7654 would be enough, the compiler might not know that. To avoid
7655 -Wformat-truncation false positive, use a larger size. */
7656 char buf[23];
43e9d192 7657 snprintf (buf, sizeof (buf), ".init_array.%.5u", priority);
fcef3abd 7658 s = get_section (buf, SECTION_WRITE | SECTION_NOTYPE, NULL);
43e9d192
IB
7659 switch_to_section (s);
7660 assemble_align (POINTER_SIZE);
28514dda 7661 assemble_aligned_integer (POINTER_BYTES, symbol);
43e9d192
IB
7662 }
7663}
7664
7665static void
7666aarch64_elf_asm_destructor (rtx symbol, int priority)
7667{
7668 if (priority == DEFAULT_INIT_PRIORITY)
7669 default_dtor_section_asm_out_destructor (symbol, priority);
7670 else
7671 {
7672 section *s;
53d190c1
AT
7673 /* While priority is known to be in range [0, 65535], so 18 bytes
7674 would be enough, the compiler might not know that. To avoid
7675 -Wformat-truncation false positive, use a larger size. */
7676 char buf[23];
43e9d192 7677 snprintf (buf, sizeof (buf), ".fini_array.%.5u", priority);
fcef3abd 7678 s = get_section (buf, SECTION_WRITE | SECTION_NOTYPE, NULL);
43e9d192
IB
7679 switch_to_section (s);
7680 assemble_align (POINTER_SIZE);
28514dda 7681 assemble_aligned_integer (POINTER_BYTES, symbol);
43e9d192
IB
7682 }
7683}
7684
7685const char*
7686aarch64_output_casesi (rtx *operands)
7687{
7688 char buf[100];
7689 char label[100];
b32d5189 7690 rtx diff_vec = PATTERN (NEXT_INSN (as_a <rtx_insn *> (operands[2])));
43e9d192
IB
7691 int index;
7692 static const char *const patterns[4][2] =
7693 {
7694 {
7695 "ldrb\t%w3, [%0,%w1,uxtw]",
7696 "add\t%3, %4, %w3, sxtb #2"
7697 },
7698 {
7699 "ldrh\t%w3, [%0,%w1,uxtw #1]",
7700 "add\t%3, %4, %w3, sxth #2"
7701 },
7702 {
7703 "ldr\t%w3, [%0,%w1,uxtw #2]",
7704 "add\t%3, %4, %w3, sxtw #2"
7705 },
7706 /* We assume that DImode is only generated when not optimizing and
7707 that we don't really need 64-bit address offsets. That would
7708 imply an object file with 8GB of code in a single function! */
7709 {
7710 "ldr\t%w3, [%0,%w1,uxtw #2]",
7711 "add\t%3, %4, %w3, sxtw #2"
7712 }
7713 };
7714
7715 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
7716
77e994c9
RS
7717 scalar_int_mode mode = as_a <scalar_int_mode> (GET_MODE (diff_vec));
7718 index = exact_log2 (GET_MODE_SIZE (mode));
43e9d192
IB
7719
7720 gcc_assert (index >= 0 && index <= 3);
7721
7722 /* Need to implement table size reduction, by chaning the code below. */
7723 output_asm_insn (patterns[index][0], operands);
7724 ASM_GENERATE_INTERNAL_LABEL (label, "Lrtx", CODE_LABEL_NUMBER (operands[2]));
7725 snprintf (buf, sizeof (buf),
7726 "adr\t%%4, %s", targetm.strip_name_encoding (label));
7727 output_asm_insn (buf, operands);
7728 output_asm_insn (patterns[index][1], operands);
7729 output_asm_insn ("br\t%3", operands);
7730 assemble_label (asm_out_file, label);
7731 return "";
7732}
7733
7734
7735/* Return size in bits of an arithmetic operand which is shifted/scaled and
7736 masked such that it is suitable for a UXTB, UXTH, or UXTW extend
7737 operator. */
7738
7739int
7740aarch64_uxt_size (int shift, HOST_WIDE_INT mask)
7741{
7742 if (shift >= 0 && shift <= 3)
7743 {
7744 int size;
7745 for (size = 8; size <= 32; size *= 2)
7746 {
7747 HOST_WIDE_INT bits = ((HOST_WIDE_INT)1U << size) - 1;
7748 if (mask == bits << shift)
7749 return size;
7750 }
7751 }
7752 return 0;
7753}
7754
e78d485e
RR
7755/* Constant pools are per function only when PC relative
7756 literal loads are true or we are in the large memory
7757 model. */
7758
7759static inline bool
7760aarch64_can_use_per_function_literal_pools_p (void)
7761{
9ee6540a 7762 return (aarch64_pcrelative_literal_loads
e78d485e
RR
7763 || aarch64_cmodel == AARCH64_CMODEL_LARGE);
7764}
7765
43e9d192 7766static bool
e78d485e 7767aarch64_use_blocks_for_constant_p (machine_mode, const_rtx)
43e9d192 7768{
74a9301d
VM
7769 /* We can't use blocks for constants when we're using a per-function
7770 constant pool. */
7771 return !aarch64_can_use_per_function_literal_pools_p ();
43e9d192
IB
7772}
7773
e78d485e
RR
7774/* Select appropriate section for constants depending
7775 on where we place literal pools. */
7776
43e9d192 7777static section *
e78d485e
RR
7778aarch64_select_rtx_section (machine_mode mode,
7779 rtx x,
7780 unsigned HOST_WIDE_INT align)
43e9d192 7781{
e78d485e
RR
7782 if (aarch64_can_use_per_function_literal_pools_p ())
7783 return function_section (current_function_decl);
43e9d192 7784
e78d485e
RR
7785 return default_elf_select_rtx_section (mode, x, align);
7786}
43e9d192 7787
5fca7b66
RH
7788/* Implement ASM_OUTPUT_POOL_EPILOGUE. */
7789void
7790aarch64_asm_output_pool_epilogue (FILE *f, const char *, tree,
7791 HOST_WIDE_INT offset)
7792{
7793 /* When using per-function literal pools, we must ensure that any code
7794 section is aligned to the minimal instruction length, lest we get
7795 errors from the assembler re "unaligned instructions". */
7796 if ((offset & 3) && aarch64_can_use_per_function_literal_pools_p ())
7797 ASM_OUTPUT_ALIGN (f, 2);
7798}
7799
43e9d192
IB
7800/* Costs. */
7801
7802/* Helper function for rtx cost calculation. Strip a shift expression
7803 from X. Returns the inner operand if successful, or the original
7804 expression on failure. */
7805static rtx
7806aarch64_strip_shift (rtx x)
7807{
7808 rtx op = x;
7809
57b77d46
RE
7810 /* We accept both ROTATERT and ROTATE: since the RHS must be a constant
7811 we can convert both to ROR during final output. */
43e9d192
IB
7812 if ((GET_CODE (op) == ASHIFT
7813 || GET_CODE (op) == ASHIFTRT
57b77d46
RE
7814 || GET_CODE (op) == LSHIFTRT
7815 || GET_CODE (op) == ROTATERT
7816 || GET_CODE (op) == ROTATE)
43e9d192
IB
7817 && CONST_INT_P (XEXP (op, 1)))
7818 return XEXP (op, 0);
7819
7820 if (GET_CODE (op) == MULT
7821 && CONST_INT_P (XEXP (op, 1))
7822 && ((unsigned) exact_log2 (INTVAL (XEXP (op, 1)))) < 64)
7823 return XEXP (op, 0);
7824
7825 return x;
7826}
7827
4745e701 7828/* Helper function for rtx cost calculation. Strip an extend
43e9d192
IB
7829 expression from X. Returns the inner operand if successful, or the
7830 original expression on failure. We deal with a number of possible
b10f1009
AP
7831 canonicalization variations here. If STRIP_SHIFT is true, then
7832 we can strip off a shift also. */
43e9d192 7833static rtx
b10f1009 7834aarch64_strip_extend (rtx x, bool strip_shift)
43e9d192 7835{
77e994c9 7836 scalar_int_mode mode;
43e9d192
IB
7837 rtx op = x;
7838
77e994c9
RS
7839 if (!is_a <scalar_int_mode> (GET_MODE (op), &mode))
7840 return op;
7841
43e9d192
IB
7842 /* Zero and sign extraction of a widened value. */
7843 if ((GET_CODE (op) == ZERO_EXTRACT || GET_CODE (op) == SIGN_EXTRACT)
7844 && XEXP (op, 2) == const0_rtx
4745e701 7845 && GET_CODE (XEXP (op, 0)) == MULT
77e994c9 7846 && aarch64_is_extend_from_extract (mode, XEXP (XEXP (op, 0), 1),
43e9d192
IB
7847 XEXP (op, 1)))
7848 return XEXP (XEXP (op, 0), 0);
7849
7850 /* It can also be represented (for zero-extend) as an AND with an
7851 immediate. */
7852 if (GET_CODE (op) == AND
7853 && GET_CODE (XEXP (op, 0)) == MULT
7854 && CONST_INT_P (XEXP (XEXP (op, 0), 1))
7855 && CONST_INT_P (XEXP (op, 1))
7856 && aarch64_uxt_size (exact_log2 (INTVAL (XEXP (XEXP (op, 0), 1))),
7857 INTVAL (XEXP (op, 1))) != 0)
7858 return XEXP (XEXP (op, 0), 0);
7859
7860 /* Now handle extended register, as this may also have an optional
7861 left shift by 1..4. */
b10f1009
AP
7862 if (strip_shift
7863 && GET_CODE (op) == ASHIFT
43e9d192
IB
7864 && CONST_INT_P (XEXP (op, 1))
7865 && ((unsigned HOST_WIDE_INT) INTVAL (XEXP (op, 1))) <= 4)
7866 op = XEXP (op, 0);
7867
7868 if (GET_CODE (op) == ZERO_EXTEND
7869 || GET_CODE (op) == SIGN_EXTEND)
7870 op = XEXP (op, 0);
7871
7872 if (op != x)
7873 return op;
7874
4745e701
JG
7875 return x;
7876}
7877
0a78ebe4
KT
7878/* Return true iff CODE is a shift supported in combination
7879 with arithmetic instructions. */
4d1919ed 7880
0a78ebe4
KT
7881static bool
7882aarch64_shift_p (enum rtx_code code)
7883{
7884 return code == ASHIFT || code == ASHIFTRT || code == LSHIFTRT;
7885}
7886
b10f1009
AP
7887
7888/* Return true iff X is a cheap shift without a sign extend. */
7889
7890static bool
7891aarch64_cheap_mult_shift_p (rtx x)
7892{
7893 rtx op0, op1;
7894
7895 op0 = XEXP (x, 0);
7896 op1 = XEXP (x, 1);
7897
7898 if (!(aarch64_tune_params.extra_tuning_flags
7899 & AARCH64_EXTRA_TUNE_CHEAP_SHIFT_EXTEND))
7900 return false;
7901
7902 if (GET_CODE (op0) == SIGN_EXTEND)
7903 return false;
7904
7905 if (GET_CODE (x) == ASHIFT && CONST_INT_P (op1)
7906 && UINTVAL (op1) <= 4)
7907 return true;
7908
7909 if (GET_CODE (x) != MULT || !CONST_INT_P (op1))
7910 return false;
7911
7912 HOST_WIDE_INT l2 = exact_log2 (INTVAL (op1));
7913
7914 if (l2 > 0 && l2 <= 4)
7915 return true;
7916
7917 return false;
7918}
7919
4745e701 7920/* Helper function for rtx cost calculation. Calculate the cost of
0a78ebe4
KT
7921 a MULT or ASHIFT, which may be part of a compound PLUS/MINUS rtx.
7922 Return the calculated cost of the expression, recursing manually in to
4745e701
JG
7923 operands where needed. */
7924
7925static int
e548c9df 7926aarch64_rtx_mult_cost (rtx x, enum rtx_code code, int outer, bool speed)
4745e701
JG
7927{
7928 rtx op0, op1;
7929 const struct cpu_cost_table *extra_cost
b175b679 7930 = aarch64_tune_params.insn_extra_cost;
4745e701 7931 int cost = 0;
0a78ebe4 7932 bool compound_p = (outer == PLUS || outer == MINUS);
ef4bddc2 7933 machine_mode mode = GET_MODE (x);
4745e701
JG
7934
7935 gcc_checking_assert (code == MULT);
7936
7937 op0 = XEXP (x, 0);
7938 op1 = XEXP (x, 1);
7939
7940 if (VECTOR_MODE_P (mode))
7941 mode = GET_MODE_INNER (mode);
7942
7943 /* Integer multiply/fma. */
7944 if (GET_MODE_CLASS (mode) == MODE_INT)
7945 {
7946 /* The multiply will be canonicalized as a shift, cost it as such. */
0a78ebe4
KT
7947 if (aarch64_shift_p (GET_CODE (x))
7948 || (CONST_INT_P (op1)
7949 && exact_log2 (INTVAL (op1)) > 0))
4745e701 7950 {
0a78ebe4
KT
7951 bool is_extend = GET_CODE (op0) == ZERO_EXTEND
7952 || GET_CODE (op0) == SIGN_EXTEND;
4745e701
JG
7953 if (speed)
7954 {
0a78ebe4
KT
7955 if (compound_p)
7956 {
b10f1009
AP
7957 /* If the shift is considered cheap,
7958 then don't add any cost. */
7959 if (aarch64_cheap_mult_shift_p (x))
7960 ;
7961 else if (REG_P (op1))
0a78ebe4
KT
7962 /* ARITH + shift-by-register. */
7963 cost += extra_cost->alu.arith_shift_reg;
7964 else if (is_extend)
7965 /* ARITH + extended register. We don't have a cost field
7966 for ARITH+EXTEND+SHIFT, so use extend_arith here. */
7967 cost += extra_cost->alu.extend_arith;
7968 else
7969 /* ARITH + shift-by-immediate. */
7970 cost += extra_cost->alu.arith_shift;
7971 }
4745e701
JG
7972 else
7973 /* LSL (immediate). */
0a78ebe4
KT
7974 cost += extra_cost->alu.shift;
7975
4745e701 7976 }
0a78ebe4
KT
7977 /* Strip extends as we will have costed them in the case above. */
7978 if (is_extend)
b10f1009 7979 op0 = aarch64_strip_extend (op0, true);
4745e701 7980
e548c9df 7981 cost += rtx_cost (op0, VOIDmode, code, 0, speed);
4745e701
JG
7982
7983 return cost;
7984 }
7985
d2ac256b
KT
7986 /* MNEG or [US]MNEGL. Extract the NEG operand and indicate that it's a
7987 compound and let the below cases handle it. After all, MNEG is a
7988 special-case alias of MSUB. */
7989 if (GET_CODE (op0) == NEG)
7990 {
7991 op0 = XEXP (op0, 0);
7992 compound_p = true;
7993 }
7994
4745e701
JG
7995 /* Integer multiplies or FMAs have zero/sign extending variants. */
7996 if ((GET_CODE (op0) == ZERO_EXTEND
7997 && GET_CODE (op1) == ZERO_EXTEND)
7998 || (GET_CODE (op0) == SIGN_EXTEND
7999 && GET_CODE (op1) == SIGN_EXTEND))
8000 {
e548c9df
AM
8001 cost += rtx_cost (XEXP (op0, 0), VOIDmode, MULT, 0, speed);
8002 cost += rtx_cost (XEXP (op1, 0), VOIDmode, MULT, 1, speed);
4745e701
JG
8003
8004 if (speed)
8005 {
0a78ebe4 8006 if (compound_p)
d2ac256b 8007 /* SMADDL/UMADDL/UMSUBL/SMSUBL. */
4745e701
JG
8008 cost += extra_cost->mult[0].extend_add;
8009 else
8010 /* MUL/SMULL/UMULL. */
8011 cost += extra_cost->mult[0].extend;
8012 }
8013
8014 return cost;
8015 }
8016
d2ac256b 8017 /* This is either an integer multiply or a MADD. In both cases
4745e701 8018 we want to recurse and cost the operands. */
e548c9df
AM
8019 cost += rtx_cost (op0, mode, MULT, 0, speed);
8020 cost += rtx_cost (op1, mode, MULT, 1, speed);
4745e701
JG
8021
8022 if (speed)
8023 {
0a78ebe4 8024 if (compound_p)
d2ac256b 8025 /* MADD/MSUB. */
4745e701
JG
8026 cost += extra_cost->mult[mode == DImode].add;
8027 else
8028 /* MUL. */
8029 cost += extra_cost->mult[mode == DImode].simple;
8030 }
8031
8032 return cost;
8033 }
8034 else
8035 {
8036 if (speed)
8037 {
3d840f7d 8038 /* Floating-point FMA/FMUL can also support negations of the
d318517d
SN
8039 operands, unless the rounding mode is upward or downward in
8040 which case FNMUL is different than FMUL with operand negation. */
8041 bool neg0 = GET_CODE (op0) == NEG;
8042 bool neg1 = GET_CODE (op1) == NEG;
8043 if (compound_p || !flag_rounding_math || (neg0 && neg1))
8044 {
8045 if (neg0)
8046 op0 = XEXP (op0, 0);
8047 if (neg1)
8048 op1 = XEXP (op1, 0);
8049 }
4745e701 8050
0a78ebe4 8051 if (compound_p)
4745e701
JG
8052 /* FMADD/FNMADD/FNMSUB/FMSUB. */
8053 cost += extra_cost->fp[mode == DFmode].fma;
8054 else
3d840f7d 8055 /* FMUL/FNMUL. */
4745e701
JG
8056 cost += extra_cost->fp[mode == DFmode].mult;
8057 }
8058
e548c9df
AM
8059 cost += rtx_cost (op0, mode, MULT, 0, speed);
8060 cost += rtx_cost (op1, mode, MULT, 1, speed);
4745e701
JG
8061 return cost;
8062 }
43e9d192
IB
8063}
8064
67747367
JG
8065static int
8066aarch64_address_cost (rtx x,
ef4bddc2 8067 machine_mode mode,
67747367
JG
8068 addr_space_t as ATTRIBUTE_UNUSED,
8069 bool speed)
8070{
8071 enum rtx_code c = GET_CODE (x);
b175b679 8072 const struct cpu_addrcost_table *addr_cost = aarch64_tune_params.addr_cost;
67747367
JG
8073 struct aarch64_address_info info;
8074 int cost = 0;
8075 info.shift = 0;
8076
a97d8b98 8077 if (!aarch64_classify_address (&info, x, mode, false))
67747367
JG
8078 {
8079 if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF)
8080 {
8081 /* This is a CONST or SYMBOL ref which will be split
8082 in a different way depending on the code model in use.
8083 Cost it through the generic infrastructure. */
e548c9df 8084 int cost_symbol_ref = rtx_cost (x, Pmode, MEM, 1, speed);
67747367
JG
8085 /* Divide through by the cost of one instruction to
8086 bring it to the same units as the address costs. */
8087 cost_symbol_ref /= COSTS_N_INSNS (1);
8088 /* The cost is then the cost of preparing the address,
8089 followed by an immediate (possibly 0) offset. */
8090 return cost_symbol_ref + addr_cost->imm_offset;
8091 }
8092 else
8093 {
8094 /* This is most likely a jump table from a case
8095 statement. */
8096 return addr_cost->register_offset;
8097 }
8098 }
8099
8100 switch (info.type)
8101 {
8102 case ADDRESS_LO_SUM:
8103 case ADDRESS_SYMBOLIC:
8104 case ADDRESS_REG_IMM:
8105 cost += addr_cost->imm_offset;
8106 break;
8107
8108 case ADDRESS_REG_WB:
8109 if (c == PRE_INC || c == PRE_DEC || c == PRE_MODIFY)
8110 cost += addr_cost->pre_modify;
8111 else if (c == POST_INC || c == POST_DEC || c == POST_MODIFY)
8112 cost += addr_cost->post_modify;
8113 else
8114 gcc_unreachable ();
8115
8116 break;
8117
8118 case ADDRESS_REG_REG:
8119 cost += addr_cost->register_offset;
8120 break;
8121
67747367 8122 case ADDRESS_REG_SXTW:
783879e6
EM
8123 cost += addr_cost->register_sextend;
8124 break;
8125
8126 case ADDRESS_REG_UXTW:
8127 cost += addr_cost->register_zextend;
67747367
JG
8128 break;
8129
8130 default:
8131 gcc_unreachable ();
8132 }
8133
8134
8135 if (info.shift > 0)
8136 {
8137 /* For the sake of calculating the cost of the shifted register
8138 component, we can treat same sized modes in the same way. */
6a70badb
RS
8139 if (known_eq (GET_MODE_BITSIZE (mode), 16))
8140 cost += addr_cost->addr_scale_costs.hi;
8141 else if (known_eq (GET_MODE_BITSIZE (mode), 32))
8142 cost += addr_cost->addr_scale_costs.si;
8143 else if (known_eq (GET_MODE_BITSIZE (mode), 64))
8144 cost += addr_cost->addr_scale_costs.di;
8145 else
8146 /* We can't tell, or this is a 128-bit vector. */
8147 cost += addr_cost->addr_scale_costs.ti;
67747367
JG
8148 }
8149
8150 return cost;
8151}
8152
b9066f5a
MW
8153/* Return the cost of a branch. If SPEED_P is true then the compiler is
8154 optimizing for speed. If PREDICTABLE_P is true then the branch is predicted
8155 to be taken. */
8156
8157int
8158aarch64_branch_cost (bool speed_p, bool predictable_p)
8159{
8160 /* When optimizing for speed, use the cost of unpredictable branches. */
8161 const struct cpu_branch_cost *branch_costs =
b175b679 8162 aarch64_tune_params.branch_costs;
b9066f5a
MW
8163
8164 if (!speed_p || predictable_p)
8165 return branch_costs->predictable;
8166 else
8167 return branch_costs->unpredictable;
8168}
8169
7cc2145f
JG
8170/* Return true if the RTX X in mode MODE is a zero or sign extract
8171 usable in an ADD or SUB (extended register) instruction. */
8172static bool
77e994c9 8173aarch64_rtx_arith_op_extract_p (rtx x, scalar_int_mode mode)
7cc2145f
JG
8174{
8175 /* Catch add with a sign extract.
8176 This is add_<optab><mode>_multp2. */
8177 if (GET_CODE (x) == SIGN_EXTRACT
8178 || GET_CODE (x) == ZERO_EXTRACT)
8179 {
8180 rtx op0 = XEXP (x, 0);
8181 rtx op1 = XEXP (x, 1);
8182 rtx op2 = XEXP (x, 2);
8183
8184 if (GET_CODE (op0) == MULT
8185 && CONST_INT_P (op1)
8186 && op2 == const0_rtx
8187 && CONST_INT_P (XEXP (op0, 1))
8188 && aarch64_is_extend_from_extract (mode,
8189 XEXP (op0, 1),
8190 op1))
8191 {
8192 return true;
8193 }
8194 }
e47c4031
KT
8195 /* The simple case <ARITH>, XD, XN, XM, [us]xt.
8196 No shift. */
8197 else if (GET_CODE (x) == SIGN_EXTEND
8198 || GET_CODE (x) == ZERO_EXTEND)
8199 return REG_P (XEXP (x, 0));
7cc2145f
JG
8200
8201 return false;
8202}
8203
61263118
KT
8204static bool
8205aarch64_frint_unspec_p (unsigned int u)
8206{
8207 switch (u)
8208 {
8209 case UNSPEC_FRINTZ:
8210 case UNSPEC_FRINTP:
8211 case UNSPEC_FRINTM:
8212 case UNSPEC_FRINTA:
8213 case UNSPEC_FRINTN:
8214 case UNSPEC_FRINTX:
8215 case UNSPEC_FRINTI:
8216 return true;
8217
8218 default:
8219 return false;
8220 }
8221}
8222
fb0cb7fa
KT
8223/* Return true iff X is an rtx that will match an extr instruction
8224 i.e. as described in the *extr<mode>5_insn family of patterns.
8225 OP0 and OP1 will be set to the operands of the shifts involved
8226 on success and will be NULL_RTX otherwise. */
8227
8228static bool
8229aarch64_extr_rtx_p (rtx x, rtx *res_op0, rtx *res_op1)
8230{
8231 rtx op0, op1;
77e994c9
RS
8232 scalar_int_mode mode;
8233 if (!is_a <scalar_int_mode> (GET_MODE (x), &mode))
8234 return false;
fb0cb7fa
KT
8235
8236 *res_op0 = NULL_RTX;
8237 *res_op1 = NULL_RTX;
8238
8239 if (GET_CODE (x) != IOR)
8240 return false;
8241
8242 op0 = XEXP (x, 0);
8243 op1 = XEXP (x, 1);
8244
8245 if ((GET_CODE (op0) == ASHIFT && GET_CODE (op1) == LSHIFTRT)
8246 || (GET_CODE (op1) == ASHIFT && GET_CODE (op0) == LSHIFTRT))
8247 {
8248 /* Canonicalise locally to ashift in op0, lshiftrt in op1. */
8249 if (GET_CODE (op1) == ASHIFT)
8250 std::swap (op0, op1);
8251
8252 if (!CONST_INT_P (XEXP (op0, 1)) || !CONST_INT_P (XEXP (op1, 1)))
8253 return false;
8254
8255 unsigned HOST_WIDE_INT shft_amnt_0 = UINTVAL (XEXP (op0, 1));
8256 unsigned HOST_WIDE_INT shft_amnt_1 = UINTVAL (XEXP (op1, 1));
8257
8258 if (shft_amnt_0 < GET_MODE_BITSIZE (mode)
8259 && shft_amnt_0 + shft_amnt_1 == GET_MODE_BITSIZE (mode))
8260 {
8261 *res_op0 = XEXP (op0, 0);
8262 *res_op1 = XEXP (op1, 0);
8263 return true;
8264 }
8265 }
8266
8267 return false;
8268}
8269
2d5ffe46
AP
8270/* Calculate the cost of calculating (if_then_else (OP0) (OP1) (OP2)),
8271 storing it in *COST. Result is true if the total cost of the operation
8272 has now been calculated. */
8273static bool
8274aarch64_if_then_else_costs (rtx op0, rtx op1, rtx op2, int *cost, bool speed)
8275{
b9e3afe9
AP
8276 rtx inner;
8277 rtx comparator;
8278 enum rtx_code cmpcode;
8279
8280 if (COMPARISON_P (op0))
8281 {
8282 inner = XEXP (op0, 0);
8283 comparator = XEXP (op0, 1);
8284 cmpcode = GET_CODE (op0);
8285 }
8286 else
8287 {
8288 inner = op0;
8289 comparator = const0_rtx;
8290 cmpcode = NE;
8291 }
8292
2d5ffe46
AP
8293 if (GET_CODE (op1) == PC || GET_CODE (op2) == PC)
8294 {
8295 /* Conditional branch. */
b9e3afe9 8296 if (GET_MODE_CLASS (GET_MODE (inner)) == MODE_CC)
2d5ffe46
AP
8297 return true;
8298 else
8299 {
b9e3afe9 8300 if (cmpcode == NE || cmpcode == EQ)
2d5ffe46 8301 {
2d5ffe46
AP
8302 if (comparator == const0_rtx)
8303 {
8304 /* TBZ/TBNZ/CBZ/CBNZ. */
8305 if (GET_CODE (inner) == ZERO_EXTRACT)
8306 /* TBZ/TBNZ. */
e548c9df
AM
8307 *cost += rtx_cost (XEXP (inner, 0), VOIDmode,
8308 ZERO_EXTRACT, 0, speed);
8309 else
8310 /* CBZ/CBNZ. */
8311 *cost += rtx_cost (inner, VOIDmode, cmpcode, 0, speed);
2d5ffe46
AP
8312
8313 return true;
8314 }
8315 }
b9e3afe9 8316 else if (cmpcode == LT || cmpcode == GE)
2d5ffe46 8317 {
2d5ffe46
AP
8318 /* TBZ/TBNZ. */
8319 if (comparator == const0_rtx)
8320 return true;
8321 }
8322 }
8323 }
b9e3afe9 8324 else if (GET_MODE_CLASS (GET_MODE (inner)) == MODE_CC)
2d5ffe46 8325 {
786298dc 8326 /* CCMP. */
6dfeb7ce 8327 if (GET_CODE (op1) == COMPARE)
786298dc
WD
8328 {
8329 /* Increase cost of CCMP reg, 0, imm, CC to prefer CMP reg, 0. */
8330 if (XEXP (op1, 1) == const0_rtx)
8331 *cost += 1;
8332 if (speed)
8333 {
8334 machine_mode mode = GET_MODE (XEXP (op1, 0));
8335 const struct cpu_cost_table *extra_cost
8336 = aarch64_tune_params.insn_extra_cost;
8337
8338 if (GET_MODE_CLASS (mode) == MODE_INT)
8339 *cost += extra_cost->alu.arith;
8340 else
8341 *cost += extra_cost->fp[mode == DFmode].compare;
8342 }
8343 return true;
8344 }
8345
2d5ffe46
AP
8346 /* It's a conditional operation based on the status flags,
8347 so it must be some flavor of CSEL. */
8348
8349 /* CSNEG, CSINV, and CSINC are handled for free as part of CSEL. */
8350 if (GET_CODE (op1) == NEG
8351 || GET_CODE (op1) == NOT
8352 || (GET_CODE (op1) == PLUS && XEXP (op1, 1) == const1_rtx))
8353 op1 = XEXP (op1, 0);
bad00732
KT
8354 else if (GET_CODE (op1) == ZERO_EXTEND && GET_CODE (op2) == ZERO_EXTEND)
8355 {
8356 /* CSEL with zero-extension (*cmovdi_insn_uxtw). */
8357 op1 = XEXP (op1, 0);
8358 op2 = XEXP (op2, 0);
8359 }
2d5ffe46 8360
e548c9df
AM
8361 *cost += rtx_cost (op1, VOIDmode, IF_THEN_ELSE, 1, speed);
8362 *cost += rtx_cost (op2, VOIDmode, IF_THEN_ELSE, 2, speed);
2d5ffe46
AP
8363 return true;
8364 }
8365
8366 /* We don't know what this is, cost all operands. */
8367 return false;
8368}
8369
283b6c85
KT
8370/* Check whether X is a bitfield operation of the form shift + extend that
8371 maps down to a UBFIZ/SBFIZ/UBFX/SBFX instruction. If so, return the
8372 operand to which the bitfield operation is applied. Otherwise return
8373 NULL_RTX. */
8374
8375static rtx
8376aarch64_extend_bitfield_pattern_p (rtx x)
8377{
8378 rtx_code outer_code = GET_CODE (x);
8379 machine_mode outer_mode = GET_MODE (x);
8380
8381 if (outer_code != ZERO_EXTEND && outer_code != SIGN_EXTEND
8382 && outer_mode != SImode && outer_mode != DImode)
8383 return NULL_RTX;
8384
8385 rtx inner = XEXP (x, 0);
8386 rtx_code inner_code = GET_CODE (inner);
8387 machine_mode inner_mode = GET_MODE (inner);
8388 rtx op = NULL_RTX;
8389
8390 switch (inner_code)
8391 {
8392 case ASHIFT:
8393 if (CONST_INT_P (XEXP (inner, 1))
8394 && (inner_mode == QImode || inner_mode == HImode))
8395 op = XEXP (inner, 0);
8396 break;
8397 case LSHIFTRT:
8398 if (outer_code == ZERO_EXTEND && CONST_INT_P (XEXP (inner, 1))
8399 && (inner_mode == QImode || inner_mode == HImode))
8400 op = XEXP (inner, 0);
8401 break;
8402 case ASHIFTRT:
8403 if (outer_code == SIGN_EXTEND && CONST_INT_P (XEXP (inner, 1))
8404 && (inner_mode == QImode || inner_mode == HImode))
8405 op = XEXP (inner, 0);
8406 break;
8407 default:
8408 break;
8409 }
8410
8411 return op;
8412}
8413
8c83f71d
KT
8414/* Return true if the mask and a shift amount from an RTX of the form
8415 (x << SHFT_AMNT) & MASK are valid to combine into a UBFIZ instruction of
8416 mode MODE. See the *andim_ashift<mode>_bfiz pattern. */
8417
8418bool
77e994c9
RS
8419aarch64_mask_and_shift_for_ubfiz_p (scalar_int_mode mode, rtx mask,
8420 rtx shft_amnt)
8c83f71d
KT
8421{
8422 return CONST_INT_P (mask) && CONST_INT_P (shft_amnt)
8423 && INTVAL (shft_amnt) < GET_MODE_BITSIZE (mode)
8424 && exact_log2 ((INTVAL (mask) >> INTVAL (shft_amnt)) + 1) >= 0
8425 && (INTVAL (mask) & ((1 << INTVAL (shft_amnt)) - 1)) == 0;
8426}
8427
43e9d192
IB
8428/* Calculate the cost of calculating X, storing it in *COST. Result
8429 is true if the total cost of the operation has now been calculated. */
8430static bool
e548c9df 8431aarch64_rtx_costs (rtx x, machine_mode mode, int outer ATTRIBUTE_UNUSED,
43e9d192
IB
8432 int param ATTRIBUTE_UNUSED, int *cost, bool speed)
8433{
a8eecd00 8434 rtx op0, op1, op2;
73250c4c 8435 const struct cpu_cost_table *extra_cost
b175b679 8436 = aarch64_tune_params.insn_extra_cost;
e548c9df 8437 int code = GET_CODE (x);
b4206259 8438 scalar_int_mode int_mode;
43e9d192 8439
7fc5ef02
JG
8440 /* By default, assume that everything has equivalent cost to the
8441 cheapest instruction. Any additional costs are applied as a delta
8442 above this default. */
8443 *cost = COSTS_N_INSNS (1);
8444
43e9d192
IB
8445 switch (code)
8446 {
8447 case SET:
ba123b0d
JG
8448 /* The cost depends entirely on the operands to SET. */
8449 *cost = 0;
43e9d192
IB
8450 op0 = SET_DEST (x);
8451 op1 = SET_SRC (x);
8452
8453 switch (GET_CODE (op0))
8454 {
8455 case MEM:
8456 if (speed)
2961177e
JG
8457 {
8458 rtx address = XEXP (op0, 0);
b6875aac
KV
8459 if (VECTOR_MODE_P (mode))
8460 *cost += extra_cost->ldst.storev;
8461 else if (GET_MODE_CLASS (mode) == MODE_INT)
2961177e
JG
8462 *cost += extra_cost->ldst.store;
8463 else if (mode == SFmode)
8464 *cost += extra_cost->ldst.storef;
8465 else if (mode == DFmode)
8466 *cost += extra_cost->ldst.stored;
8467
8468 *cost +=
8469 COSTS_N_INSNS (aarch64_address_cost (address, mode,
8470 0, speed));
8471 }
43e9d192 8472
e548c9df 8473 *cost += rtx_cost (op1, mode, SET, 1, speed);
43e9d192
IB
8474 return true;
8475
8476 case SUBREG:
8477 if (! REG_P (SUBREG_REG (op0)))
e548c9df 8478 *cost += rtx_cost (SUBREG_REG (op0), VOIDmode, SET, 0, speed);
ba123b0d 8479
43e9d192
IB
8480 /* Fall through. */
8481 case REG:
b6875aac
KV
8482 /* The cost is one per vector-register copied. */
8483 if (VECTOR_MODE_P (GET_MODE (op0)) && REG_P (op1))
8484 {
fe1447a1
RS
8485 int nregs = aarch64_hard_regno_nregs (V0_REGNUM, GET_MODE (op0));
8486 *cost = COSTS_N_INSNS (nregs);
b6875aac 8487 }
ba123b0d
JG
8488 /* const0_rtx is in general free, but we will use an
8489 instruction to set a register to 0. */
b6875aac
KV
8490 else if (REG_P (op1) || op1 == const0_rtx)
8491 {
8492 /* The cost is 1 per register copied. */
fe1447a1
RS
8493 int nregs = aarch64_hard_regno_nregs (R0_REGNUM, GET_MODE (op0));
8494 *cost = COSTS_N_INSNS (nregs);
b6875aac 8495 }
ba123b0d
JG
8496 else
8497 /* Cost is just the cost of the RHS of the set. */
e548c9df 8498 *cost += rtx_cost (op1, mode, SET, 1, speed);
43e9d192
IB
8499 return true;
8500
ba123b0d 8501 case ZERO_EXTRACT:
43e9d192 8502 case SIGN_EXTRACT:
ba123b0d
JG
8503 /* Bit-field insertion. Strip any redundant widening of
8504 the RHS to meet the width of the target. */
43e9d192
IB
8505 if (GET_CODE (op1) == SUBREG)
8506 op1 = SUBREG_REG (op1);
8507 if ((GET_CODE (op1) == ZERO_EXTEND
8508 || GET_CODE (op1) == SIGN_EXTEND)
4aa81c2e 8509 && CONST_INT_P (XEXP (op0, 1))
77e994c9
RS
8510 && is_a <scalar_int_mode> (GET_MODE (XEXP (op1, 0)), &int_mode)
8511 && GET_MODE_BITSIZE (int_mode) >= INTVAL (XEXP (op0, 1)))
43e9d192 8512 op1 = XEXP (op1, 0);
ba123b0d
JG
8513
8514 if (CONST_INT_P (op1))
8515 {
8516 /* MOV immediate is assumed to always be cheap. */
8517 *cost = COSTS_N_INSNS (1);
8518 }
8519 else
8520 {
8521 /* BFM. */
8522 if (speed)
8523 *cost += extra_cost->alu.bfi;
e548c9df 8524 *cost += rtx_cost (op1, VOIDmode, (enum rtx_code) code, 1, speed);
ba123b0d
JG
8525 }
8526
43e9d192
IB
8527 return true;
8528
8529 default:
ba123b0d
JG
8530 /* We can't make sense of this, assume default cost. */
8531 *cost = COSTS_N_INSNS (1);
61263118 8532 return false;
43e9d192
IB
8533 }
8534 return false;
8535
9dfc162c
JG
8536 case CONST_INT:
8537 /* If an instruction can incorporate a constant within the
8538 instruction, the instruction's expression avoids calling
8539 rtx_cost() on the constant. If rtx_cost() is called on a
8540 constant, then it is usually because the constant must be
8541 moved into a register by one or more instructions.
8542
8543 The exception is constant 0, which can be expressed
8544 as XZR/WZR and is therefore free. The exception to this is
8545 if we have (set (reg) (const0_rtx)) in which case we must cost
8546 the move. However, we can catch that when we cost the SET, so
8547 we don't need to consider that here. */
8548 if (x == const0_rtx)
8549 *cost = 0;
8550 else
8551 {
8552 /* To an approximation, building any other constant is
8553 proportionally expensive to the number of instructions
8554 required to build that constant. This is true whether we
8555 are compiling for SPEED or otherwise. */
77e994c9
RS
8556 if (!is_a <scalar_int_mode> (mode, &int_mode))
8557 int_mode = word_mode;
82614948 8558 *cost = COSTS_N_INSNS (aarch64_internal_mov_immediate
77e994c9 8559 (NULL_RTX, x, false, int_mode));
9dfc162c
JG
8560 }
8561 return true;
8562
8563 case CONST_DOUBLE:
a2170965
TC
8564
8565 /* First determine number of instructions to do the move
8566 as an integer constant. */
8567 if (!aarch64_float_const_representable_p (x)
8568 && !aarch64_can_const_movi_rtx_p (x, mode)
8569 && aarch64_float_const_rtx_p (x))
8570 {
8571 unsigned HOST_WIDE_INT ival;
8572 bool succeed = aarch64_reinterpret_float_as_int (x, &ival);
8573 gcc_assert (succeed);
8574
77e994c9
RS
8575 scalar_int_mode imode = (mode == HFmode
8576 ? SImode
8577 : int_mode_for_mode (mode).require ());
a2170965
TC
8578 int ncost = aarch64_internal_mov_immediate
8579 (NULL_RTX, gen_int_mode (ival, imode), false, imode);
8580 *cost += COSTS_N_INSNS (ncost);
8581 return true;
8582 }
8583
9dfc162c
JG
8584 if (speed)
8585 {
8586 /* mov[df,sf]_aarch64. */
8587 if (aarch64_float_const_representable_p (x))
8588 /* FMOV (scalar immediate). */
8589 *cost += extra_cost->fp[mode == DFmode].fpconst;
8590 else if (!aarch64_float_const_zero_rtx_p (x))
8591 {
8592 /* This will be a load from memory. */
8593 if (mode == DFmode)
8594 *cost += extra_cost->ldst.loadd;
8595 else
8596 *cost += extra_cost->ldst.loadf;
8597 }
8598 else
8599 /* Otherwise this is +0.0. We get this using MOVI d0, #0
8600 or MOV v0.s[0], wzr - neither of which are modeled by the
8601 cost tables. Just use the default cost. */
8602 {
8603 }
8604 }
8605
8606 return true;
8607
43e9d192
IB
8608 case MEM:
8609 if (speed)
2961177e
JG
8610 {
8611 /* For loads we want the base cost of a load, plus an
8612 approximation for the additional cost of the addressing
8613 mode. */
8614 rtx address = XEXP (x, 0);
b6875aac
KV
8615 if (VECTOR_MODE_P (mode))
8616 *cost += extra_cost->ldst.loadv;
8617 else if (GET_MODE_CLASS (mode) == MODE_INT)
2961177e
JG
8618 *cost += extra_cost->ldst.load;
8619 else if (mode == SFmode)
8620 *cost += extra_cost->ldst.loadf;
8621 else if (mode == DFmode)
8622 *cost += extra_cost->ldst.loadd;
8623
8624 *cost +=
8625 COSTS_N_INSNS (aarch64_address_cost (address, mode,
8626 0, speed));
8627 }
43e9d192
IB
8628
8629 return true;
8630
8631 case NEG:
4745e701
JG
8632 op0 = XEXP (x, 0);
8633
b6875aac
KV
8634 if (VECTOR_MODE_P (mode))
8635 {
8636 if (speed)
8637 {
8638 /* FNEG. */
8639 *cost += extra_cost->vect.alu;
8640 }
8641 return false;
8642 }
8643
e548c9df
AM
8644 if (GET_MODE_CLASS (mode) == MODE_INT)
8645 {
4745e701
JG
8646 if (GET_RTX_CLASS (GET_CODE (op0)) == RTX_COMPARE
8647 || GET_RTX_CLASS (GET_CODE (op0)) == RTX_COMM_COMPARE)
8648 {
8649 /* CSETM. */
e548c9df 8650 *cost += rtx_cost (XEXP (op0, 0), VOIDmode, NEG, 0, speed);
4745e701
JG
8651 return true;
8652 }
8653
8654 /* Cost this as SUB wzr, X. */
e548c9df 8655 op0 = CONST0_RTX (mode);
4745e701
JG
8656 op1 = XEXP (x, 0);
8657 goto cost_minus;
8658 }
8659
e548c9df 8660 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
4745e701
JG
8661 {
8662 /* Support (neg(fma...)) as a single instruction only if
8663 sign of zeros is unimportant. This matches the decision
8664 making in aarch64.md. */
8665 if (GET_CODE (op0) == FMA && !HONOR_SIGNED_ZEROS (GET_MODE (op0)))
8666 {
8667 /* FNMADD. */
e548c9df 8668 *cost = rtx_cost (op0, mode, NEG, 0, speed);
4745e701
JG
8669 return true;
8670 }
d318517d
SN
8671 if (GET_CODE (op0) == MULT)
8672 {
8673 /* FNMUL. */
8674 *cost = rtx_cost (op0, mode, NEG, 0, speed);
8675 return true;
8676 }
4745e701
JG
8677 if (speed)
8678 /* FNEG. */
8679 *cost += extra_cost->fp[mode == DFmode].neg;
8680 return false;
8681 }
8682
8683 return false;
43e9d192 8684
781aeb73
KT
8685 case CLRSB:
8686 case CLZ:
8687 if (speed)
b6875aac
KV
8688 {
8689 if (VECTOR_MODE_P (mode))
8690 *cost += extra_cost->vect.alu;
8691 else
8692 *cost += extra_cost->alu.clz;
8693 }
781aeb73
KT
8694
8695 return false;
8696
43e9d192
IB
8697 case COMPARE:
8698 op0 = XEXP (x, 0);
8699 op1 = XEXP (x, 1);
8700
8701 if (op1 == const0_rtx
8702 && GET_CODE (op0) == AND)
8703 {
8704 x = op0;
e548c9df 8705 mode = GET_MODE (op0);
43e9d192
IB
8706 goto cost_logic;
8707 }
8708
a8eecd00
JG
8709 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT)
8710 {
8711 /* TODO: A write to the CC flags possibly costs extra, this
8712 needs encoding in the cost tables. */
8713
e548c9df 8714 mode = GET_MODE (op0);
a8eecd00
JG
8715 /* ANDS. */
8716 if (GET_CODE (op0) == AND)
8717 {
8718 x = op0;
8719 goto cost_logic;
8720 }
8721
8722 if (GET_CODE (op0) == PLUS)
8723 {
8724 /* ADDS (and CMN alias). */
8725 x = op0;
8726 goto cost_plus;
8727 }
8728
8729 if (GET_CODE (op0) == MINUS)
8730 {
8731 /* SUBS. */
8732 x = op0;
8733 goto cost_minus;
8734 }
8735
345854d8
KT
8736 if (GET_CODE (op0) == ZERO_EXTRACT && op1 == const0_rtx
8737 && GET_MODE (x) == CC_NZmode && CONST_INT_P (XEXP (op0, 1))
8738 && CONST_INT_P (XEXP (op0, 2)))
8739 {
8740 /* COMPARE of ZERO_EXTRACT form of TST-immediate.
8741 Handle it here directly rather than going to cost_logic
8742 since we know the immediate generated for the TST is valid
8743 so we can avoid creating an intermediate rtx for it only
8744 for costing purposes. */
8745 if (speed)
8746 *cost += extra_cost->alu.logical;
8747
8748 *cost += rtx_cost (XEXP (op0, 0), GET_MODE (op0),
8749 ZERO_EXTRACT, 0, speed);
8750 return true;
8751 }
8752
a8eecd00
JG
8753 if (GET_CODE (op1) == NEG)
8754 {
8755 /* CMN. */
8756 if (speed)
8757 *cost += extra_cost->alu.arith;
8758
e548c9df
AM
8759 *cost += rtx_cost (op0, mode, COMPARE, 0, speed);
8760 *cost += rtx_cost (XEXP (op1, 0), mode, NEG, 1, speed);
a8eecd00
JG
8761 return true;
8762 }
8763
8764 /* CMP.
8765
8766 Compare can freely swap the order of operands, and
8767 canonicalization puts the more complex operation first.
8768 But the integer MINUS logic expects the shift/extend
8769 operation in op1. */
8770 if (! (REG_P (op0)
8771 || (GET_CODE (op0) == SUBREG && REG_P (SUBREG_REG (op0)))))
8772 {
8773 op0 = XEXP (x, 1);
8774 op1 = XEXP (x, 0);
8775 }
8776 goto cost_minus;
8777 }
8778
8779 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
8780 {
8781 /* FCMP. */
8782 if (speed)
8783 *cost += extra_cost->fp[mode == DFmode].compare;
8784
8785 if (CONST_DOUBLE_P (op1) && aarch64_float_const_zero_rtx_p (op1))
8786 {
e548c9df 8787 *cost += rtx_cost (op0, VOIDmode, COMPARE, 0, speed);
a8eecd00
JG
8788 /* FCMP supports constant 0.0 for no extra cost. */
8789 return true;
8790 }
8791 return false;
8792 }
8793
b6875aac
KV
8794 if (VECTOR_MODE_P (mode))
8795 {
8796 /* Vector compare. */
8797 if (speed)
8798 *cost += extra_cost->vect.alu;
8799
8800 if (aarch64_float_const_zero_rtx_p (op1))
8801 {
8802 /* Vector cm (eq|ge|gt|lt|le) supports constant 0.0 for no extra
8803 cost. */
8804 return true;
8805 }
8806 return false;
8807 }
a8eecd00 8808 return false;
43e9d192
IB
8809
8810 case MINUS:
4745e701
JG
8811 {
8812 op0 = XEXP (x, 0);
8813 op1 = XEXP (x, 1);
8814
8815cost_minus:
e548c9df 8816 *cost += rtx_cost (op0, mode, MINUS, 0, speed);
23cb6618 8817
4745e701
JG
8818 /* Detect valid immediates. */
8819 if ((GET_MODE_CLASS (mode) == MODE_INT
8820 || (GET_MODE_CLASS (mode) == MODE_CC
8821 && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT))
8822 && CONST_INT_P (op1)
8823 && aarch64_uimm12_shift (INTVAL (op1)))
8824 {
4745e701
JG
8825 if (speed)
8826 /* SUB(S) (immediate). */
8827 *cost += extra_cost->alu.arith;
8828 return true;
4745e701
JG
8829 }
8830
7cc2145f 8831 /* Look for SUB (extended register). */
77e994c9
RS
8832 if (is_a <scalar_int_mode> (mode, &int_mode)
8833 && aarch64_rtx_arith_op_extract_p (op1, int_mode))
7cc2145f
JG
8834 {
8835 if (speed)
2533c820 8836 *cost += extra_cost->alu.extend_arith;
7cc2145f 8837
b10f1009 8838 op1 = aarch64_strip_extend (op1, true);
e47c4031 8839 *cost += rtx_cost (op1, VOIDmode,
e548c9df 8840 (enum rtx_code) GET_CODE (op1), 0, speed);
7cc2145f
JG
8841 return true;
8842 }
8843
b10f1009 8844 rtx new_op1 = aarch64_strip_extend (op1, false);
4745e701
JG
8845
8846 /* Cost this as an FMA-alike operation. */
8847 if ((GET_CODE (new_op1) == MULT
0a78ebe4 8848 || aarch64_shift_p (GET_CODE (new_op1)))
4745e701
JG
8849 && code != COMPARE)
8850 {
8851 *cost += aarch64_rtx_mult_cost (new_op1, MULT,
8852 (enum rtx_code) code,
8853 speed);
4745e701
JG
8854 return true;
8855 }
43e9d192 8856
e548c9df 8857 *cost += rtx_cost (new_op1, VOIDmode, MINUS, 1, speed);
43e9d192 8858
4745e701
JG
8859 if (speed)
8860 {
b6875aac
KV
8861 if (VECTOR_MODE_P (mode))
8862 {
8863 /* Vector SUB. */
8864 *cost += extra_cost->vect.alu;
8865 }
8866 else if (GET_MODE_CLASS (mode) == MODE_INT)
8867 {
8868 /* SUB(S). */
8869 *cost += extra_cost->alu.arith;
8870 }
4745e701 8871 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
b6875aac
KV
8872 {
8873 /* FSUB. */
8874 *cost += extra_cost->fp[mode == DFmode].addsub;
8875 }
4745e701
JG
8876 }
8877 return true;
8878 }
43e9d192
IB
8879
8880 case PLUS:
4745e701
JG
8881 {
8882 rtx new_op0;
43e9d192 8883
4745e701
JG
8884 op0 = XEXP (x, 0);
8885 op1 = XEXP (x, 1);
43e9d192 8886
a8eecd00 8887cost_plus:
4745e701
JG
8888 if (GET_RTX_CLASS (GET_CODE (op0)) == RTX_COMPARE
8889 || GET_RTX_CLASS (GET_CODE (op0)) == RTX_COMM_COMPARE)
8890 {
8891 /* CSINC. */
e548c9df
AM
8892 *cost += rtx_cost (XEXP (op0, 0), mode, PLUS, 0, speed);
8893 *cost += rtx_cost (op1, mode, PLUS, 1, speed);
4745e701
JG
8894 return true;
8895 }
43e9d192 8896
4745e701 8897 if (GET_MODE_CLASS (mode) == MODE_INT
43cacb12
RS
8898 && ((CONST_INT_P (op1) && aarch64_uimm12_shift (INTVAL (op1)))
8899 || aarch64_sve_addvl_addpl_immediate (op1, mode)))
4745e701 8900 {
e548c9df 8901 *cost += rtx_cost (op0, mode, PLUS, 0, speed);
43e9d192 8902
4745e701
JG
8903 if (speed)
8904 /* ADD (immediate). */
8905 *cost += extra_cost->alu.arith;
8906 return true;
8907 }
8908
e548c9df 8909 *cost += rtx_cost (op1, mode, PLUS, 1, speed);
23cb6618 8910
7cc2145f 8911 /* Look for ADD (extended register). */
77e994c9
RS
8912 if (is_a <scalar_int_mode> (mode, &int_mode)
8913 && aarch64_rtx_arith_op_extract_p (op0, int_mode))
7cc2145f
JG
8914 {
8915 if (speed)
2533c820 8916 *cost += extra_cost->alu.extend_arith;
7cc2145f 8917
b10f1009 8918 op0 = aarch64_strip_extend (op0, true);
e47c4031 8919 *cost += rtx_cost (op0, VOIDmode,
e548c9df 8920 (enum rtx_code) GET_CODE (op0), 0, speed);
7cc2145f
JG
8921 return true;
8922 }
8923
4745e701
JG
8924 /* Strip any extend, leave shifts behind as we will
8925 cost them through mult_cost. */
b10f1009 8926 new_op0 = aarch64_strip_extend (op0, false);
4745e701
JG
8927
8928 if (GET_CODE (new_op0) == MULT
0a78ebe4 8929 || aarch64_shift_p (GET_CODE (new_op0)))
4745e701
JG
8930 {
8931 *cost += aarch64_rtx_mult_cost (new_op0, MULT, PLUS,
8932 speed);
4745e701
JG
8933 return true;
8934 }
8935
e548c9df 8936 *cost += rtx_cost (new_op0, VOIDmode, PLUS, 0, speed);
4745e701
JG
8937
8938 if (speed)
8939 {
b6875aac
KV
8940 if (VECTOR_MODE_P (mode))
8941 {
8942 /* Vector ADD. */
8943 *cost += extra_cost->vect.alu;
8944 }
8945 else if (GET_MODE_CLASS (mode) == MODE_INT)
8946 {
8947 /* ADD. */
8948 *cost += extra_cost->alu.arith;
8949 }
4745e701 8950 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
b6875aac
KV
8951 {
8952 /* FADD. */
8953 *cost += extra_cost->fp[mode == DFmode].addsub;
8954 }
4745e701
JG
8955 }
8956 return true;
8957 }
43e9d192 8958
18b42b2a
KT
8959 case BSWAP:
8960 *cost = COSTS_N_INSNS (1);
8961
8962 if (speed)
b6875aac
KV
8963 {
8964 if (VECTOR_MODE_P (mode))
8965 *cost += extra_cost->vect.alu;
8966 else
8967 *cost += extra_cost->alu.rev;
8968 }
18b42b2a
KT
8969 return false;
8970
43e9d192 8971 case IOR:
f7d5cf8d
KT
8972 if (aarch_rev16_p (x))
8973 {
8974 *cost = COSTS_N_INSNS (1);
8975
b6875aac
KV
8976 if (speed)
8977 {
8978 if (VECTOR_MODE_P (mode))
8979 *cost += extra_cost->vect.alu;
8980 else
8981 *cost += extra_cost->alu.rev;
8982 }
8983 return true;
f7d5cf8d 8984 }
fb0cb7fa
KT
8985
8986 if (aarch64_extr_rtx_p (x, &op0, &op1))
8987 {
e548c9df
AM
8988 *cost += rtx_cost (op0, mode, IOR, 0, speed);
8989 *cost += rtx_cost (op1, mode, IOR, 1, speed);
fb0cb7fa
KT
8990 if (speed)
8991 *cost += extra_cost->alu.shift;
8992
8993 return true;
8994 }
f7d5cf8d 8995 /* Fall through. */
43e9d192
IB
8996 case XOR:
8997 case AND:
8998 cost_logic:
8999 op0 = XEXP (x, 0);
9000 op1 = XEXP (x, 1);
9001
b6875aac
KV
9002 if (VECTOR_MODE_P (mode))
9003 {
9004 if (speed)
9005 *cost += extra_cost->vect.alu;
9006 return true;
9007 }
9008
268c3b47
JG
9009 if (code == AND
9010 && GET_CODE (op0) == MULT
9011 && CONST_INT_P (XEXP (op0, 1))
9012 && CONST_INT_P (op1)
9013 && aarch64_uxt_size (exact_log2 (INTVAL (XEXP (op0, 1))),
9014 INTVAL (op1)) != 0)
9015 {
9016 /* This is a UBFM/SBFM. */
e548c9df 9017 *cost += rtx_cost (XEXP (op0, 0), mode, ZERO_EXTRACT, 0, speed);
268c3b47
JG
9018 if (speed)
9019 *cost += extra_cost->alu.bfx;
9020 return true;
9021 }
9022
b4206259 9023 if (is_int_mode (mode, &int_mode))
43e9d192 9024 {
8c83f71d 9025 if (CONST_INT_P (op1))
43e9d192 9026 {
8c83f71d
KT
9027 /* We have a mask + shift version of a UBFIZ
9028 i.e. the *andim_ashift<mode>_bfiz pattern. */
9029 if (GET_CODE (op0) == ASHIFT
b4206259
RS
9030 && aarch64_mask_and_shift_for_ubfiz_p (int_mode, op1,
9031 XEXP (op0, 1)))
8c83f71d 9032 {
b4206259 9033 *cost += rtx_cost (XEXP (op0, 0), int_mode,
8c83f71d
KT
9034 (enum rtx_code) code, 0, speed);
9035 if (speed)
9036 *cost += extra_cost->alu.bfx;
268c3b47 9037
8c83f71d
KT
9038 return true;
9039 }
b4206259 9040 else if (aarch64_bitmask_imm (INTVAL (op1), int_mode))
8c83f71d
KT
9041 {
9042 /* We possibly get the immediate for free, this is not
9043 modelled. */
b4206259
RS
9044 *cost += rtx_cost (op0, int_mode,
9045 (enum rtx_code) code, 0, speed);
8c83f71d
KT
9046 if (speed)
9047 *cost += extra_cost->alu.logical;
268c3b47 9048
8c83f71d
KT
9049 return true;
9050 }
43e9d192
IB
9051 }
9052 else
9053 {
268c3b47
JG
9054 rtx new_op0 = op0;
9055
9056 /* Handle ORN, EON, or BIC. */
43e9d192
IB
9057 if (GET_CODE (op0) == NOT)
9058 op0 = XEXP (op0, 0);
268c3b47
JG
9059
9060 new_op0 = aarch64_strip_shift (op0);
9061
9062 /* If we had a shift on op0 then this is a logical-shift-
9063 by-register/immediate operation. Otherwise, this is just
9064 a logical operation. */
9065 if (speed)
9066 {
9067 if (new_op0 != op0)
9068 {
9069 /* Shift by immediate. */
9070 if (CONST_INT_P (XEXP (op0, 1)))
9071 *cost += extra_cost->alu.log_shift;
9072 else
9073 *cost += extra_cost->alu.log_shift_reg;
9074 }
9075 else
9076 *cost += extra_cost->alu.logical;
9077 }
9078
9079 /* In both cases we want to cost both operands. */
b4206259
RS
9080 *cost += rtx_cost (new_op0, int_mode, (enum rtx_code) code,
9081 0, speed);
9082 *cost += rtx_cost (op1, int_mode, (enum rtx_code) code,
9083 1, speed);
268c3b47
JG
9084
9085 return true;
43e9d192 9086 }
43e9d192
IB
9087 }
9088 return false;
9089
268c3b47 9090 case NOT:
6365da9e
KT
9091 x = XEXP (x, 0);
9092 op0 = aarch64_strip_shift (x);
9093
b6875aac
KV
9094 if (VECTOR_MODE_P (mode))
9095 {
9096 /* Vector NOT. */
9097 *cost += extra_cost->vect.alu;
9098 return false;
9099 }
9100
6365da9e
KT
9101 /* MVN-shifted-reg. */
9102 if (op0 != x)
9103 {
e548c9df 9104 *cost += rtx_cost (op0, mode, (enum rtx_code) code, 0, speed);
6365da9e
KT
9105
9106 if (speed)
9107 *cost += extra_cost->alu.log_shift;
9108
9109 return true;
9110 }
9111 /* EON can have two forms: (xor (not a) b) but also (not (xor a b)).
9112 Handle the second form here taking care that 'a' in the above can
9113 be a shift. */
9114 else if (GET_CODE (op0) == XOR)
9115 {
9116 rtx newop0 = XEXP (op0, 0);
9117 rtx newop1 = XEXP (op0, 1);
9118 rtx op0_stripped = aarch64_strip_shift (newop0);
9119
e548c9df
AM
9120 *cost += rtx_cost (newop1, mode, (enum rtx_code) code, 1, speed);
9121 *cost += rtx_cost (op0_stripped, mode, XOR, 0, speed);
6365da9e
KT
9122
9123 if (speed)
9124 {
9125 if (op0_stripped != newop0)
9126 *cost += extra_cost->alu.log_shift;
9127 else
9128 *cost += extra_cost->alu.logical;
9129 }
9130
9131 return true;
9132 }
268c3b47
JG
9133 /* MVN. */
9134 if (speed)
9135 *cost += extra_cost->alu.logical;
9136
268c3b47
JG
9137 return false;
9138
43e9d192 9139 case ZERO_EXTEND:
b1685e62
JG
9140
9141 op0 = XEXP (x, 0);
9142 /* If a value is written in SI mode, then zero extended to DI
9143 mode, the operation will in general be free as a write to
9144 a 'w' register implicitly zeroes the upper bits of an 'x'
9145 register. However, if this is
9146
9147 (set (reg) (zero_extend (reg)))
9148
9149 we must cost the explicit register move. */
9150 if (mode == DImode
9151 && GET_MODE (op0) == SImode
9152 && outer == SET)
9153 {
e548c9df 9154 int op_cost = rtx_cost (op0, VOIDmode, ZERO_EXTEND, 0, speed);
b1685e62 9155
dde23f43
KM
9156 /* If OP_COST is non-zero, then the cost of the zero extend
9157 is effectively the cost of the inner operation. Otherwise
9158 we have a MOV instruction and we take the cost from the MOV
9159 itself. This is true independently of whether we are
9160 optimizing for space or time. */
9161 if (op_cost)
b1685e62
JG
9162 *cost = op_cost;
9163
9164 return true;
9165 }
e548c9df 9166 else if (MEM_P (op0))
43e9d192 9167 {
b1685e62 9168 /* All loads can zero extend to any size for free. */
e548c9df 9169 *cost = rtx_cost (op0, VOIDmode, ZERO_EXTEND, param, speed);
43e9d192
IB
9170 return true;
9171 }
b1685e62 9172
283b6c85
KT
9173 op0 = aarch64_extend_bitfield_pattern_p (x);
9174 if (op0)
9175 {
9176 *cost += rtx_cost (op0, mode, ZERO_EXTEND, 0, speed);
9177 if (speed)
9178 *cost += extra_cost->alu.bfx;
9179 return true;
9180 }
9181
b1685e62 9182 if (speed)
b6875aac
KV
9183 {
9184 if (VECTOR_MODE_P (mode))
9185 {
9186 /* UMOV. */
9187 *cost += extra_cost->vect.alu;
9188 }
9189 else
9190 {
63715e5e
WD
9191 /* We generate an AND instead of UXTB/UXTH. */
9192 *cost += extra_cost->alu.logical;
b6875aac
KV
9193 }
9194 }
43e9d192
IB
9195 return false;
9196
9197 case SIGN_EXTEND:
b1685e62 9198 if (MEM_P (XEXP (x, 0)))
43e9d192 9199 {
b1685e62
JG
9200 /* LDRSH. */
9201 if (speed)
9202 {
9203 rtx address = XEXP (XEXP (x, 0), 0);
9204 *cost += extra_cost->ldst.load_sign_extend;
9205
9206 *cost +=
9207 COSTS_N_INSNS (aarch64_address_cost (address, mode,
9208 0, speed));
9209 }
43e9d192
IB
9210 return true;
9211 }
b1685e62 9212
283b6c85
KT
9213 op0 = aarch64_extend_bitfield_pattern_p (x);
9214 if (op0)
9215 {
9216 *cost += rtx_cost (op0, mode, SIGN_EXTEND, 0, speed);
9217 if (speed)
9218 *cost += extra_cost->alu.bfx;
9219 return true;
9220 }
9221
b1685e62 9222 if (speed)
b6875aac
KV
9223 {
9224 if (VECTOR_MODE_P (mode))
9225 *cost += extra_cost->vect.alu;
9226 else
9227 *cost += extra_cost->alu.extend;
9228 }
43e9d192
IB
9229 return false;
9230
ba0cfa17
JG
9231 case ASHIFT:
9232 op0 = XEXP (x, 0);
9233 op1 = XEXP (x, 1);
9234
9235 if (CONST_INT_P (op1))
9236 {
ba0cfa17 9237 if (speed)
b6875aac
KV
9238 {
9239 if (VECTOR_MODE_P (mode))
9240 {
9241 /* Vector shift (immediate). */
9242 *cost += extra_cost->vect.alu;
9243 }
9244 else
9245 {
9246 /* LSL (immediate), UBMF, UBFIZ and friends. These are all
9247 aliases. */
9248 *cost += extra_cost->alu.shift;
9249 }
9250 }
ba0cfa17
JG
9251
9252 /* We can incorporate zero/sign extend for free. */
9253 if (GET_CODE (op0) == ZERO_EXTEND
9254 || GET_CODE (op0) == SIGN_EXTEND)
9255 op0 = XEXP (op0, 0);
9256
e548c9df 9257 *cost += rtx_cost (op0, VOIDmode, ASHIFT, 0, speed);
ba0cfa17
JG
9258 return true;
9259 }
9260 else
9261 {
7813b280 9262 if (VECTOR_MODE_P (mode))
b6875aac 9263 {
7813b280
KT
9264 if (speed)
9265 /* Vector shift (register). */
9266 *cost += extra_cost->vect.alu;
9267 }
9268 else
9269 {
9270 if (speed)
9271 /* LSLV. */
9272 *cost += extra_cost->alu.shift_reg;
9273
9274 if (GET_CODE (op1) == AND && REG_P (XEXP (op1, 0))
9275 && CONST_INT_P (XEXP (op1, 1))
6a70badb
RS
9276 && known_eq (INTVAL (XEXP (op1, 1)),
9277 GET_MODE_BITSIZE (mode) - 1))
b6875aac 9278 {
7813b280
KT
9279 *cost += rtx_cost (op0, mode, (rtx_code) code, 0, speed);
9280 /* We already demanded XEXP (op1, 0) to be REG_P, so
9281 don't recurse into it. */
9282 return true;
b6875aac
KV
9283 }
9284 }
ba0cfa17
JG
9285 return false; /* All arguments need to be in registers. */
9286 }
9287
43e9d192 9288 case ROTATE:
43e9d192
IB
9289 case ROTATERT:
9290 case LSHIFTRT:
43e9d192 9291 case ASHIFTRT:
ba0cfa17
JG
9292 op0 = XEXP (x, 0);
9293 op1 = XEXP (x, 1);
43e9d192 9294
ba0cfa17
JG
9295 if (CONST_INT_P (op1))
9296 {
9297 /* ASR (immediate) and friends. */
9298 if (speed)
b6875aac
KV
9299 {
9300 if (VECTOR_MODE_P (mode))
9301 *cost += extra_cost->vect.alu;
9302 else
9303 *cost += extra_cost->alu.shift;
9304 }
43e9d192 9305
e548c9df 9306 *cost += rtx_cost (op0, mode, (enum rtx_code) code, 0, speed);
ba0cfa17
JG
9307 return true;
9308 }
9309 else
9310 {
7813b280 9311 if (VECTOR_MODE_P (mode))
b6875aac 9312 {
7813b280
KT
9313 if (speed)
9314 /* Vector shift (register). */
b6875aac 9315 *cost += extra_cost->vect.alu;
7813b280
KT
9316 }
9317 else
9318 {
9319 if (speed)
9320 /* ASR (register) and friends. */
b6875aac 9321 *cost += extra_cost->alu.shift_reg;
7813b280
KT
9322
9323 if (GET_CODE (op1) == AND && REG_P (XEXP (op1, 0))
9324 && CONST_INT_P (XEXP (op1, 1))
6a70badb
RS
9325 && known_eq (INTVAL (XEXP (op1, 1)),
9326 GET_MODE_BITSIZE (mode) - 1))
7813b280
KT
9327 {
9328 *cost += rtx_cost (op0, mode, (rtx_code) code, 0, speed);
9329 /* We already demanded XEXP (op1, 0) to be REG_P, so
9330 don't recurse into it. */
9331 return true;
9332 }
b6875aac 9333 }
ba0cfa17
JG
9334 return false; /* All arguments need to be in registers. */
9335 }
43e9d192 9336
909734be
JG
9337 case SYMBOL_REF:
9338
1b1e81f8
JW
9339 if (aarch64_cmodel == AARCH64_CMODEL_LARGE
9340 || aarch64_cmodel == AARCH64_CMODEL_SMALL_SPIC)
909734be
JG
9341 {
9342 /* LDR. */
9343 if (speed)
9344 *cost += extra_cost->ldst.load;
9345 }
9346 else if (aarch64_cmodel == AARCH64_CMODEL_SMALL
9347 || aarch64_cmodel == AARCH64_CMODEL_SMALL_PIC)
9348 {
9349 /* ADRP, followed by ADD. */
9350 *cost += COSTS_N_INSNS (1);
9351 if (speed)
9352 *cost += 2 * extra_cost->alu.arith;
9353 }
9354 else if (aarch64_cmodel == AARCH64_CMODEL_TINY
9355 || aarch64_cmodel == AARCH64_CMODEL_TINY_PIC)
9356 {
9357 /* ADR. */
9358 if (speed)
9359 *cost += extra_cost->alu.arith;
9360 }
9361
9362 if (flag_pic)
9363 {
9364 /* One extra load instruction, after accessing the GOT. */
9365 *cost += COSTS_N_INSNS (1);
9366 if (speed)
9367 *cost += extra_cost->ldst.load;
9368 }
43e9d192
IB
9369 return true;
9370
909734be 9371 case HIGH:
43e9d192 9372 case LO_SUM:
909734be
JG
9373 /* ADRP/ADD (immediate). */
9374 if (speed)
9375 *cost += extra_cost->alu.arith;
43e9d192
IB
9376 return true;
9377
9378 case ZERO_EXTRACT:
9379 case SIGN_EXTRACT:
7cc2145f
JG
9380 /* UBFX/SBFX. */
9381 if (speed)
b6875aac
KV
9382 {
9383 if (VECTOR_MODE_P (mode))
9384 *cost += extra_cost->vect.alu;
9385 else
9386 *cost += extra_cost->alu.bfx;
9387 }
7cc2145f
JG
9388
9389 /* We can trust that the immediates used will be correct (there
9390 are no by-register forms), so we need only cost op0. */
e548c9df 9391 *cost += rtx_cost (XEXP (x, 0), VOIDmode, (enum rtx_code) code, 0, speed);
43e9d192
IB
9392 return true;
9393
9394 case MULT:
4745e701
JG
9395 *cost += aarch64_rtx_mult_cost (x, MULT, 0, speed);
9396 /* aarch64_rtx_mult_cost always handles recursion to its
9397 operands. */
9398 return true;
43e9d192
IB
9399
9400 case MOD:
4f58fe36
KT
9401 /* We can expand signed mod by power of 2 using a NEGS, two parallel
9402 ANDs and a CSNEG. Assume here that CSNEG is the same as the cost of
9403 an unconditional negate. This case should only ever be reached through
9404 the set_smod_pow2_cheap check in expmed.c. */
9405 if (CONST_INT_P (XEXP (x, 1))
9406 && exact_log2 (INTVAL (XEXP (x, 1))) > 0
9407 && (mode == SImode || mode == DImode))
9408 {
9409 /* We expand to 4 instructions. Reset the baseline. */
9410 *cost = COSTS_N_INSNS (4);
9411
9412 if (speed)
9413 *cost += 2 * extra_cost->alu.logical
9414 + 2 * extra_cost->alu.arith;
9415
9416 return true;
9417 }
9418
9419 /* Fall-through. */
43e9d192 9420 case UMOD:
43e9d192
IB
9421 if (speed)
9422 {
cb9ac430 9423 /* Slighly prefer UMOD over SMOD. */
b6875aac
KV
9424 if (VECTOR_MODE_P (mode))
9425 *cost += extra_cost->vect.alu;
e548c9df
AM
9426 else if (GET_MODE_CLASS (mode) == MODE_INT)
9427 *cost += (extra_cost->mult[mode == DImode].add
cb9ac430
TC
9428 + extra_cost->mult[mode == DImode].idiv
9429 + (code == MOD ? 1 : 0));
43e9d192
IB
9430 }
9431 return false; /* All arguments need to be in registers. */
9432
9433 case DIV:
9434 case UDIV:
4105fe38 9435 case SQRT:
43e9d192
IB
9436 if (speed)
9437 {
b6875aac
KV
9438 if (VECTOR_MODE_P (mode))
9439 *cost += extra_cost->vect.alu;
9440 else if (GET_MODE_CLASS (mode) == MODE_INT)
4105fe38
JG
9441 /* There is no integer SQRT, so only DIV and UDIV can get
9442 here. */
cb9ac430
TC
9443 *cost += (extra_cost->mult[mode == DImode].idiv
9444 /* Slighly prefer UDIV over SDIV. */
9445 + (code == DIV ? 1 : 0));
4105fe38
JG
9446 else
9447 *cost += extra_cost->fp[mode == DFmode].div;
43e9d192
IB
9448 }
9449 return false; /* All arguments need to be in registers. */
9450
a8eecd00 9451 case IF_THEN_ELSE:
2d5ffe46
AP
9452 return aarch64_if_then_else_costs (XEXP (x, 0), XEXP (x, 1),
9453 XEXP (x, 2), cost, speed);
a8eecd00
JG
9454
9455 case EQ:
9456 case NE:
9457 case GT:
9458 case GTU:
9459 case LT:
9460 case LTU:
9461 case GE:
9462 case GEU:
9463 case LE:
9464 case LEU:
9465
9466 return false; /* All arguments must be in registers. */
9467
b292109f
JG
9468 case FMA:
9469 op0 = XEXP (x, 0);
9470 op1 = XEXP (x, 1);
9471 op2 = XEXP (x, 2);
9472
9473 if (speed)
b6875aac
KV
9474 {
9475 if (VECTOR_MODE_P (mode))
9476 *cost += extra_cost->vect.alu;
9477 else
9478 *cost += extra_cost->fp[mode == DFmode].fma;
9479 }
b292109f
JG
9480
9481 /* FMSUB, FNMADD, and FNMSUB are free. */
9482 if (GET_CODE (op0) == NEG)
9483 op0 = XEXP (op0, 0);
9484
9485 if (GET_CODE (op2) == NEG)
9486 op2 = XEXP (op2, 0);
9487
9488 /* aarch64_fnma4_elt_to_64v2df has the NEG as operand 1,
9489 and the by-element operand as operand 0. */
9490 if (GET_CODE (op1) == NEG)
9491 op1 = XEXP (op1, 0);
9492
9493 /* Catch vector-by-element operations. The by-element operand can
9494 either be (vec_duplicate (vec_select (x))) or just
9495 (vec_select (x)), depending on whether we are multiplying by
9496 a vector or a scalar.
9497
9498 Canonicalization is not very good in these cases, FMA4 will put the
9499 by-element operand as operand 0, FNMA4 will have it as operand 1. */
9500 if (GET_CODE (op0) == VEC_DUPLICATE)
9501 op0 = XEXP (op0, 0);
9502 else if (GET_CODE (op1) == VEC_DUPLICATE)
9503 op1 = XEXP (op1, 0);
9504
9505 if (GET_CODE (op0) == VEC_SELECT)
9506 op0 = XEXP (op0, 0);
9507 else if (GET_CODE (op1) == VEC_SELECT)
9508 op1 = XEXP (op1, 0);
9509
9510 /* If the remaining parameters are not registers,
9511 get the cost to put them into registers. */
e548c9df
AM
9512 *cost += rtx_cost (op0, mode, FMA, 0, speed);
9513 *cost += rtx_cost (op1, mode, FMA, 1, speed);
9514 *cost += rtx_cost (op2, mode, FMA, 2, speed);
b292109f
JG
9515 return true;
9516
5e2a765b
KT
9517 case FLOAT:
9518 case UNSIGNED_FLOAT:
9519 if (speed)
9520 *cost += extra_cost->fp[mode == DFmode].fromint;
9521 return false;
9522
b292109f
JG
9523 case FLOAT_EXTEND:
9524 if (speed)
b6875aac
KV
9525 {
9526 if (VECTOR_MODE_P (mode))
9527 {
9528 /*Vector truncate. */
9529 *cost += extra_cost->vect.alu;
9530 }
9531 else
9532 *cost += extra_cost->fp[mode == DFmode].widen;
9533 }
b292109f
JG
9534 return false;
9535
9536 case FLOAT_TRUNCATE:
9537 if (speed)
b6875aac
KV
9538 {
9539 if (VECTOR_MODE_P (mode))
9540 {
9541 /*Vector conversion. */
9542 *cost += extra_cost->vect.alu;
9543 }
9544 else
9545 *cost += extra_cost->fp[mode == DFmode].narrow;
9546 }
b292109f
JG
9547 return false;
9548
61263118
KT
9549 case FIX:
9550 case UNSIGNED_FIX:
9551 x = XEXP (x, 0);
9552 /* Strip the rounding part. They will all be implemented
9553 by the fcvt* family of instructions anyway. */
9554 if (GET_CODE (x) == UNSPEC)
9555 {
9556 unsigned int uns_code = XINT (x, 1);
9557
9558 if (uns_code == UNSPEC_FRINTA
9559 || uns_code == UNSPEC_FRINTM
9560 || uns_code == UNSPEC_FRINTN
9561 || uns_code == UNSPEC_FRINTP
9562 || uns_code == UNSPEC_FRINTZ)
9563 x = XVECEXP (x, 0, 0);
9564 }
9565
9566 if (speed)
b6875aac
KV
9567 {
9568 if (VECTOR_MODE_P (mode))
9569 *cost += extra_cost->vect.alu;
9570 else
9571 *cost += extra_cost->fp[GET_MODE (x) == DFmode].toint;
9572 }
39252973
KT
9573
9574 /* We can combine fmul by a power of 2 followed by a fcvt into a single
9575 fixed-point fcvt. */
9576 if (GET_CODE (x) == MULT
9577 && ((VECTOR_MODE_P (mode)
9578 && aarch64_vec_fpconst_pow_of_2 (XEXP (x, 1)) > 0)
9579 || aarch64_fpconst_pow_of_2 (XEXP (x, 1)) > 0))
9580 {
9581 *cost += rtx_cost (XEXP (x, 0), VOIDmode, (rtx_code) code,
9582 0, speed);
9583 return true;
9584 }
9585
e548c9df 9586 *cost += rtx_cost (x, VOIDmode, (enum rtx_code) code, 0, speed);
61263118
KT
9587 return true;
9588
b292109f 9589 case ABS:
b6875aac
KV
9590 if (VECTOR_MODE_P (mode))
9591 {
9592 /* ABS (vector). */
9593 if (speed)
9594 *cost += extra_cost->vect.alu;
9595 }
9596 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
b292109f 9597 {
19261b99
KT
9598 op0 = XEXP (x, 0);
9599
9600 /* FABD, which is analogous to FADD. */
9601 if (GET_CODE (op0) == MINUS)
9602 {
e548c9df
AM
9603 *cost += rtx_cost (XEXP (op0, 0), mode, MINUS, 0, speed);
9604 *cost += rtx_cost (XEXP (op0, 1), mode, MINUS, 1, speed);
19261b99
KT
9605 if (speed)
9606 *cost += extra_cost->fp[mode == DFmode].addsub;
9607
9608 return true;
9609 }
9610 /* Simple FABS is analogous to FNEG. */
b292109f
JG
9611 if (speed)
9612 *cost += extra_cost->fp[mode == DFmode].neg;
9613 }
9614 else
9615 {
9616 /* Integer ABS will either be split to
9617 two arithmetic instructions, or will be an ABS
9618 (scalar), which we don't model. */
9619 *cost = COSTS_N_INSNS (2);
9620 if (speed)
9621 *cost += 2 * extra_cost->alu.arith;
9622 }
9623 return false;
9624
9625 case SMAX:
9626 case SMIN:
9627 if (speed)
9628 {
b6875aac
KV
9629 if (VECTOR_MODE_P (mode))
9630 *cost += extra_cost->vect.alu;
9631 else
9632 {
9633 /* FMAXNM/FMINNM/FMAX/FMIN.
9634 TODO: This may not be accurate for all implementations, but
9635 we do not model this in the cost tables. */
9636 *cost += extra_cost->fp[mode == DFmode].addsub;
9637 }
b292109f
JG
9638 }
9639 return false;
9640
61263118
KT
9641 case UNSPEC:
9642 /* The floating point round to integer frint* instructions. */
9643 if (aarch64_frint_unspec_p (XINT (x, 1)))
9644 {
9645 if (speed)
9646 *cost += extra_cost->fp[mode == DFmode].roundint;
9647
9648 return false;
9649 }
781aeb73
KT
9650
9651 if (XINT (x, 1) == UNSPEC_RBIT)
9652 {
9653 if (speed)
9654 *cost += extra_cost->alu.rev;
9655
9656 return false;
9657 }
61263118
KT
9658 break;
9659
fb620c4a
JG
9660 case TRUNCATE:
9661
9662 /* Decompose <su>muldi3_highpart. */
9663 if (/* (truncate:DI */
9664 mode == DImode
9665 /* (lshiftrt:TI */
9666 && GET_MODE (XEXP (x, 0)) == TImode
9667 && GET_CODE (XEXP (x, 0)) == LSHIFTRT
9668 /* (mult:TI */
9669 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
9670 /* (ANY_EXTEND:TI (reg:DI))
9671 (ANY_EXTEND:TI (reg:DI))) */
9672 && ((GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND
9673 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == ZERO_EXTEND)
9674 || (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND
9675 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == SIGN_EXTEND))
9676 && GET_MODE (XEXP (XEXP (XEXP (XEXP (x, 0), 0), 0), 0)) == DImode
9677 && GET_MODE (XEXP (XEXP (XEXP (XEXP (x, 0), 0), 1), 0)) == DImode
9678 /* (const_int 64) */
9679 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
9680 && UINTVAL (XEXP (XEXP (x, 0), 1)) == 64)
9681 {
9682 /* UMULH/SMULH. */
9683 if (speed)
9684 *cost += extra_cost->mult[mode == DImode].extend;
e548c9df
AM
9685 *cost += rtx_cost (XEXP (XEXP (XEXP (XEXP (x, 0), 0), 0), 0),
9686 mode, MULT, 0, speed);
9687 *cost += rtx_cost (XEXP (XEXP (XEXP (XEXP (x, 0), 0), 1), 0),
9688 mode, MULT, 1, speed);
fb620c4a
JG
9689 return true;
9690 }
9691
9692 /* Fall through. */
43e9d192 9693 default:
61263118 9694 break;
43e9d192 9695 }
61263118 9696
c10e3d7f
AP
9697 if (dump_file
9698 && flag_aarch64_verbose_cost)
61263118
KT
9699 fprintf (dump_file,
9700 "\nFailed to cost RTX. Assuming default cost.\n");
9701
9702 return true;
43e9d192
IB
9703}
9704
0ee859b5
JG
9705/* Wrapper around aarch64_rtx_costs, dumps the partial, or total cost
9706 calculated for X. This cost is stored in *COST. Returns true
9707 if the total cost of X was calculated. */
9708static bool
e548c9df 9709aarch64_rtx_costs_wrapper (rtx x, machine_mode mode, int outer,
0ee859b5
JG
9710 int param, int *cost, bool speed)
9711{
e548c9df 9712 bool result = aarch64_rtx_costs (x, mode, outer, param, cost, speed);
0ee859b5 9713
c10e3d7f
AP
9714 if (dump_file
9715 && flag_aarch64_verbose_cost)
0ee859b5
JG
9716 {
9717 print_rtl_single (dump_file, x);
9718 fprintf (dump_file, "\n%s cost: %d (%s)\n",
9719 speed ? "Hot" : "Cold",
9720 *cost, result ? "final" : "partial");
9721 }
9722
9723 return result;
9724}
9725
43e9d192 9726static int
ef4bddc2 9727aarch64_register_move_cost (machine_mode mode,
8a3a7e67 9728 reg_class_t from_i, reg_class_t to_i)
43e9d192 9729{
8a3a7e67
RH
9730 enum reg_class from = (enum reg_class) from_i;
9731 enum reg_class to = (enum reg_class) to_i;
43e9d192 9732 const struct cpu_regmove_cost *regmove_cost
b175b679 9733 = aarch64_tune_params.regmove_cost;
43e9d192 9734
3be07662 9735 /* Caller save and pointer regs are equivalent to GENERAL_REGS. */
d677263e 9736 if (to == TAILCALL_ADDR_REGS || to == POINTER_REGS)
3be07662
WD
9737 to = GENERAL_REGS;
9738
d677263e 9739 if (from == TAILCALL_ADDR_REGS || from == POINTER_REGS)
3be07662
WD
9740 from = GENERAL_REGS;
9741
6ee70f81
AP
9742 /* Moving between GPR and stack cost is the same as GP2GP. */
9743 if ((from == GENERAL_REGS && to == STACK_REG)
9744 || (to == GENERAL_REGS && from == STACK_REG))
9745 return regmove_cost->GP2GP;
9746
9747 /* To/From the stack register, we move via the gprs. */
9748 if (to == STACK_REG || from == STACK_REG)
9749 return aarch64_register_move_cost (mode, from, GENERAL_REGS)
9750 + aarch64_register_move_cost (mode, GENERAL_REGS, to);
9751
6a70badb 9752 if (known_eq (GET_MODE_SIZE (mode), 16))
8919453c
WD
9753 {
9754 /* 128-bit operations on general registers require 2 instructions. */
9755 if (from == GENERAL_REGS && to == GENERAL_REGS)
9756 return regmove_cost->GP2GP * 2;
9757 else if (from == GENERAL_REGS)
9758 return regmove_cost->GP2FP * 2;
9759 else if (to == GENERAL_REGS)
9760 return regmove_cost->FP2GP * 2;
9761
9762 /* When AdvSIMD instructions are disabled it is not possible to move
9763 a 128-bit value directly between Q registers. This is handled in
9764 secondary reload. A general register is used as a scratch to move
9765 the upper DI value and the lower DI value is moved directly,
9766 hence the cost is the sum of three moves. */
9767 if (! TARGET_SIMD)
9768 return regmove_cost->GP2FP + regmove_cost->FP2GP + regmove_cost->FP2FP;
9769
9770 return regmove_cost->FP2FP;
9771 }
9772
43e9d192
IB
9773 if (from == GENERAL_REGS && to == GENERAL_REGS)
9774 return regmove_cost->GP2GP;
9775 else if (from == GENERAL_REGS)
9776 return regmove_cost->GP2FP;
9777 else if (to == GENERAL_REGS)
9778 return regmove_cost->FP2GP;
9779
43e9d192
IB
9780 return regmove_cost->FP2FP;
9781}
9782
9783static int
ef4bddc2 9784aarch64_memory_move_cost (machine_mode mode ATTRIBUTE_UNUSED,
43e9d192
IB
9785 reg_class_t rclass ATTRIBUTE_UNUSED,
9786 bool in ATTRIBUTE_UNUSED)
9787{
b175b679 9788 return aarch64_tune_params.memmov_cost;
43e9d192
IB
9789}
9790
0c30e0f3
EM
9791/* Return true if it is safe and beneficial to use the approximate rsqrt optabs
9792 to optimize 1.0/sqrt. */
ee62a5a6
RS
9793
9794static bool
9acc9cbe 9795use_rsqrt_p (machine_mode mode)
ee62a5a6
RS
9796{
9797 return (!flag_trapping_math
9798 && flag_unsafe_math_optimizations
9acc9cbe
EM
9799 && ((aarch64_tune_params.approx_modes->recip_sqrt
9800 & AARCH64_APPROX_MODE (mode))
1a33079e 9801 || flag_mrecip_low_precision_sqrt));
ee62a5a6
RS
9802}
9803
0c30e0f3
EM
9804/* Function to decide when to use the approximate reciprocal square root
9805 builtin. */
a6fc00da
BH
9806
9807static tree
ee62a5a6 9808aarch64_builtin_reciprocal (tree fndecl)
a6fc00da 9809{
9acc9cbe
EM
9810 machine_mode mode = TYPE_MODE (TREE_TYPE (fndecl));
9811
9812 if (!use_rsqrt_p (mode))
a6fc00da 9813 return NULL_TREE;
ee62a5a6 9814 return aarch64_builtin_rsqrt (DECL_FUNCTION_CODE (fndecl));
a6fc00da
BH
9815}
9816
9817typedef rtx (*rsqrte_type) (rtx, rtx);
9818
98daafa0
EM
9819/* Select reciprocal square root initial estimate insn depending on machine
9820 mode. */
a6fc00da 9821
98daafa0 9822static rsqrte_type
a6fc00da
BH
9823get_rsqrte_type (machine_mode mode)
9824{
9825 switch (mode)
9826 {
4e10a5a7
RS
9827 case E_DFmode: return gen_aarch64_rsqrtedf;
9828 case E_SFmode: return gen_aarch64_rsqrtesf;
9829 case E_V2DFmode: return gen_aarch64_rsqrtev2df;
9830 case E_V2SFmode: return gen_aarch64_rsqrtev2sf;
9831 case E_V4SFmode: return gen_aarch64_rsqrtev4sf;
a6fc00da
BH
9832 default: gcc_unreachable ();
9833 }
9834}
9835
9836typedef rtx (*rsqrts_type) (rtx, rtx, rtx);
9837
98daafa0 9838/* Select reciprocal square root series step insn depending on machine mode. */
a6fc00da 9839
98daafa0 9840static rsqrts_type
a6fc00da
BH
9841get_rsqrts_type (machine_mode mode)
9842{
9843 switch (mode)
9844 {
4e10a5a7
RS
9845 case E_DFmode: return gen_aarch64_rsqrtsdf;
9846 case E_SFmode: return gen_aarch64_rsqrtssf;
9847 case E_V2DFmode: return gen_aarch64_rsqrtsv2df;
9848 case E_V2SFmode: return gen_aarch64_rsqrtsv2sf;
9849 case E_V4SFmode: return gen_aarch64_rsqrtsv4sf;
a6fc00da
BH
9850 default: gcc_unreachable ();
9851 }
9852}
9853
98daafa0
EM
9854/* Emit instruction sequence to compute either the approximate square root
9855 or its approximate reciprocal, depending on the flag RECP, and return
9856 whether the sequence was emitted or not. */
a6fc00da 9857
98daafa0
EM
9858bool
9859aarch64_emit_approx_sqrt (rtx dst, rtx src, bool recp)
a6fc00da 9860{
98daafa0 9861 machine_mode mode = GET_MODE (dst);
daef0a8c
JW
9862
9863 if (GET_MODE_INNER (mode) == HFmode)
2e19adc8
RE
9864 {
9865 gcc_assert (!recp);
9866 return false;
9867 }
9868
2e19adc8
RE
9869 if (!recp)
9870 {
9871 if (!(flag_mlow_precision_sqrt
9872 || (aarch64_tune_params.approx_modes->sqrt
9873 & AARCH64_APPROX_MODE (mode))))
9874 return false;
9875
9876 if (flag_finite_math_only
9877 || flag_trapping_math
9878 || !flag_unsafe_math_optimizations
9879 || optimize_function_for_size_p (cfun))
9880 return false;
9881 }
9882 else
9883 /* Caller assumes we cannot fail. */
9884 gcc_assert (use_rsqrt_p (mode));
daef0a8c 9885
ddc203a7 9886 machine_mode mmsk = mode_for_int_vector (mode).require ();
98daafa0
EM
9887 rtx xmsk = gen_reg_rtx (mmsk);
9888 if (!recp)
2e19adc8
RE
9889 /* When calculating the approximate square root, compare the
9890 argument with 0.0 and create a mask. */
9891 emit_insn (gen_rtx_SET (xmsk,
9892 gen_rtx_NEG (mmsk,
9893 gen_rtx_EQ (mmsk, src,
9894 CONST0_RTX (mode)))));
a6fc00da 9895
98daafa0
EM
9896 /* Estimate the approximate reciprocal square root. */
9897 rtx xdst = gen_reg_rtx (mode);
9898 emit_insn ((*get_rsqrte_type (mode)) (xdst, src));
a6fc00da 9899
98daafa0
EM
9900 /* Iterate over the series twice for SF and thrice for DF. */
9901 int iterations = (GET_MODE_INNER (mode) == DFmode) ? 3 : 2;
a6fc00da 9902
98daafa0
EM
9903 /* Optionally iterate over the series once less for faster performance
9904 while sacrificing the accuracy. */
9905 if ((recp && flag_mrecip_low_precision_sqrt)
9906 || (!recp && flag_mlow_precision_sqrt))
a6fc00da
BH
9907 iterations--;
9908
98daafa0
EM
9909 /* Iterate over the series to calculate the approximate reciprocal square
9910 root. */
9911 rtx x1 = gen_reg_rtx (mode);
9912 while (iterations--)
a6fc00da 9913 {
a6fc00da 9914 rtx x2 = gen_reg_rtx (mode);
98daafa0
EM
9915 emit_set_insn (x2, gen_rtx_MULT (mode, xdst, xdst));
9916
9917 emit_insn ((*get_rsqrts_type (mode)) (x1, src, x2));
a6fc00da 9918
98daafa0
EM
9919 if (iterations > 0)
9920 emit_set_insn (xdst, gen_rtx_MULT (mode, xdst, x1));
9921 }
9922
9923 if (!recp)
9924 {
9925 /* Qualify the approximate reciprocal square root when the argument is
9926 0.0 by squashing the intermediary result to 0.0. */
9927 rtx xtmp = gen_reg_rtx (mmsk);
9928 emit_set_insn (xtmp, gen_rtx_AND (mmsk, gen_rtx_NOT (mmsk, xmsk),
9929 gen_rtx_SUBREG (mmsk, xdst, 0)));
9930 emit_move_insn (xdst, gen_rtx_SUBREG (mode, xtmp, 0));
a6fc00da 9931
98daafa0
EM
9932 /* Calculate the approximate square root. */
9933 emit_set_insn (xdst, gen_rtx_MULT (mode, xdst, src));
a6fc00da
BH
9934 }
9935
98daafa0
EM
9936 /* Finalize the approximation. */
9937 emit_set_insn (dst, gen_rtx_MULT (mode, xdst, x1));
9938
9939 return true;
a6fc00da
BH
9940}
9941
79a2bc2d
EM
9942typedef rtx (*recpe_type) (rtx, rtx);
9943
9944/* Select reciprocal initial estimate insn depending on machine mode. */
9945
9946static recpe_type
9947get_recpe_type (machine_mode mode)
9948{
9949 switch (mode)
9950 {
4e10a5a7
RS
9951 case E_SFmode: return (gen_aarch64_frecpesf);
9952 case E_V2SFmode: return (gen_aarch64_frecpev2sf);
9953 case E_V4SFmode: return (gen_aarch64_frecpev4sf);
9954 case E_DFmode: return (gen_aarch64_frecpedf);
9955 case E_V2DFmode: return (gen_aarch64_frecpev2df);
9956 default: gcc_unreachable ();
79a2bc2d
EM
9957 }
9958}
9959
9960typedef rtx (*recps_type) (rtx, rtx, rtx);
9961
9962/* Select reciprocal series step insn depending on machine mode. */
9963
9964static recps_type
9965get_recps_type (machine_mode mode)
9966{
9967 switch (mode)
9968 {
4e10a5a7
RS
9969 case E_SFmode: return (gen_aarch64_frecpssf);
9970 case E_V2SFmode: return (gen_aarch64_frecpsv2sf);
9971 case E_V4SFmode: return (gen_aarch64_frecpsv4sf);
9972 case E_DFmode: return (gen_aarch64_frecpsdf);
9973 case E_V2DFmode: return (gen_aarch64_frecpsv2df);
9974 default: gcc_unreachable ();
79a2bc2d
EM
9975 }
9976}
9977
9978/* Emit the instruction sequence to compute the approximation for the division
9979 of NUM by DEN in QUO and return whether the sequence was emitted or not. */
9980
9981bool
9982aarch64_emit_approx_div (rtx quo, rtx num, rtx den)
9983{
9984 machine_mode mode = GET_MODE (quo);
33d72b63
JW
9985
9986 if (GET_MODE_INNER (mode) == HFmode)
9987 return false;
9988
79a2bc2d
EM
9989 bool use_approx_division_p = (flag_mlow_precision_div
9990 || (aarch64_tune_params.approx_modes->division
9991 & AARCH64_APPROX_MODE (mode)));
9992
9993 if (!flag_finite_math_only
9994 || flag_trapping_math
9995 || !flag_unsafe_math_optimizations
9996 || optimize_function_for_size_p (cfun)
9997 || !use_approx_division_p)
9998 return false;
9999
1be49a38
RR
10000 if (!TARGET_SIMD && VECTOR_MODE_P (mode))
10001 return false;
10002
79a2bc2d
EM
10003 /* Estimate the approximate reciprocal. */
10004 rtx xrcp = gen_reg_rtx (mode);
10005 emit_insn ((*get_recpe_type (mode)) (xrcp, den));
10006
10007 /* Iterate over the series twice for SF and thrice for DF. */
10008 int iterations = (GET_MODE_INNER (mode) == DFmode) ? 3 : 2;
10009
10010 /* Optionally iterate over the series once less for faster performance,
10011 while sacrificing the accuracy. */
10012 if (flag_mlow_precision_div)
10013 iterations--;
10014
10015 /* Iterate over the series to calculate the approximate reciprocal. */
10016 rtx xtmp = gen_reg_rtx (mode);
10017 while (iterations--)
10018 {
10019 emit_insn ((*get_recps_type (mode)) (xtmp, xrcp, den));
10020
10021 if (iterations > 0)
10022 emit_set_insn (xrcp, gen_rtx_MULT (mode, xrcp, xtmp));
10023 }
10024
10025 if (num != CONST1_RTX (mode))
10026 {
10027 /* As the approximate reciprocal of DEN is already calculated, only
10028 calculate the approximate division when NUM is not 1.0. */
10029 rtx xnum = force_reg (mode, num);
10030 emit_set_insn (xrcp, gen_rtx_MULT (mode, xrcp, xnum));
10031 }
10032
10033 /* Finalize the approximation. */
10034 emit_set_insn (quo, gen_rtx_MULT (mode, xrcp, xtmp));
10035 return true;
10036}
10037
d126a4ae
AP
10038/* Return the number of instructions that can be issued per cycle. */
10039static int
10040aarch64_sched_issue_rate (void)
10041{
b175b679 10042 return aarch64_tune_params.issue_rate;
d126a4ae
AP
10043}
10044
d03f7e44
MK
10045static int
10046aarch64_sched_first_cycle_multipass_dfa_lookahead (void)
10047{
10048 int issue_rate = aarch64_sched_issue_rate ();
10049
10050 return issue_rate > 1 && !sched_fusion ? issue_rate : 0;
10051}
10052
2d6bc7fa
KT
10053
10054/* Implement TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD as
10055 autopref_multipass_dfa_lookahead_guard from haifa-sched.c. It only
10056 has an effect if PARAM_SCHED_AUTOPREF_QUEUE_DEPTH > 0. */
10057
10058static int
10059aarch64_first_cycle_multipass_dfa_lookahead_guard (rtx_insn *insn,
10060 int ready_index)
10061{
10062 return autopref_multipass_dfa_lookahead_guard (insn, ready_index);
10063}
10064
10065
8990e73a
TB
10066/* Vectorizer cost model target hooks. */
10067
10068/* Implement targetm.vectorize.builtin_vectorization_cost. */
10069static int
10070aarch64_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
10071 tree vectype,
10072 int misalign ATTRIBUTE_UNUSED)
10073{
10074 unsigned elements;
cd8ae5ed
AP
10075 const cpu_vector_cost *costs = aarch64_tune_params.vec_costs;
10076 bool fp = false;
10077
10078 if (vectype != NULL)
10079 fp = FLOAT_TYPE_P (vectype);
8990e73a
TB
10080
10081 switch (type_of_cost)
10082 {
10083 case scalar_stmt:
cd8ae5ed 10084 return fp ? costs->scalar_fp_stmt_cost : costs->scalar_int_stmt_cost;
8990e73a
TB
10085
10086 case scalar_load:
cd8ae5ed 10087 return costs->scalar_load_cost;
8990e73a
TB
10088
10089 case scalar_store:
cd8ae5ed 10090 return costs->scalar_store_cost;
8990e73a
TB
10091
10092 case vector_stmt:
cd8ae5ed 10093 return fp ? costs->vec_fp_stmt_cost : costs->vec_int_stmt_cost;
8990e73a
TB
10094
10095 case vector_load:
cd8ae5ed 10096 return costs->vec_align_load_cost;
8990e73a
TB
10097
10098 case vector_store:
cd8ae5ed 10099 return costs->vec_store_cost;
8990e73a
TB
10100
10101 case vec_to_scalar:
cd8ae5ed 10102 return costs->vec_to_scalar_cost;
8990e73a
TB
10103
10104 case scalar_to_vec:
cd8ae5ed 10105 return costs->scalar_to_vec_cost;
8990e73a
TB
10106
10107 case unaligned_load:
cc9fe6bb 10108 case vector_gather_load:
cd8ae5ed 10109 return costs->vec_unalign_load_cost;
8990e73a
TB
10110
10111 case unaligned_store:
cc9fe6bb 10112 case vector_scatter_store:
cd8ae5ed 10113 return costs->vec_unalign_store_cost;
8990e73a
TB
10114
10115 case cond_branch_taken:
cd8ae5ed 10116 return costs->cond_taken_branch_cost;
8990e73a
TB
10117
10118 case cond_branch_not_taken:
cd8ae5ed 10119 return costs->cond_not_taken_branch_cost;
8990e73a
TB
10120
10121 case vec_perm:
cd8ae5ed 10122 return costs->vec_permute_cost;
c428f91c 10123
8990e73a 10124 case vec_promote_demote:
cd8ae5ed 10125 return fp ? costs->vec_fp_stmt_cost : costs->vec_int_stmt_cost;
8990e73a
TB
10126
10127 case vec_construct:
6a70badb 10128 elements = estimated_poly_value (TYPE_VECTOR_SUBPARTS (vectype));
8990e73a
TB
10129 return elements / 2 + 1;
10130
10131 default:
10132 gcc_unreachable ();
10133 }
10134}
10135
10136/* Implement targetm.vectorize.add_stmt_cost. */
10137static unsigned
10138aarch64_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
10139 struct _stmt_vec_info *stmt_info, int misalign,
10140 enum vect_cost_model_location where)
10141{
10142 unsigned *cost = (unsigned *) data;
10143 unsigned retval = 0;
10144
10145 if (flag_vect_cost_model)
10146 {
10147 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
10148 int stmt_cost =
10149 aarch64_builtin_vectorization_cost (kind, vectype, misalign);
10150
10151 /* Statements in an inner loop relative to the loop being
10152 vectorized are weighted more heavily. The value here is
058e4c71 10153 arbitrary and could potentially be improved with analysis. */
8990e73a 10154 if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
058e4c71 10155 count *= 50; /* FIXME */
8990e73a
TB
10156
10157 retval = (unsigned) (count * stmt_cost);
10158 cost[where] += retval;
10159 }
10160
10161 return retval;
10162}
10163
0cfff2a1 10164static void initialize_aarch64_code_model (struct gcc_options *);
43e9d192 10165
0cfff2a1
KT
10166/* Parse the TO_PARSE string and put the architecture struct that it
10167 selects into RES and the architectural features into ISA_FLAGS.
10168 Return an aarch64_parse_opt_result describing the parse result.
10169 If there is an error parsing, RES and ISA_FLAGS are left unchanged. */
43e9d192 10170
0cfff2a1
KT
10171static enum aarch64_parse_opt_result
10172aarch64_parse_arch (const char *to_parse, const struct processor **res,
10173 unsigned long *isa_flags)
43e9d192
IB
10174{
10175 char *ext;
10176 const struct processor *arch;
0cfff2a1 10177 char *str = (char *) alloca (strlen (to_parse) + 1);
43e9d192
IB
10178 size_t len;
10179
0cfff2a1 10180 strcpy (str, to_parse);
43e9d192
IB
10181
10182 ext = strchr (str, '+');
10183
10184 if (ext != NULL)
10185 len = ext - str;
10186 else
10187 len = strlen (str);
10188
10189 if (len == 0)
0cfff2a1
KT
10190 return AARCH64_PARSE_MISSING_ARG;
10191
43e9d192 10192
0cfff2a1 10193 /* Loop through the list of supported ARCHes to find a match. */
43e9d192
IB
10194 for (arch = all_architectures; arch->name != NULL; arch++)
10195 {
10196 if (strlen (arch->name) == len && strncmp (arch->name, str, len) == 0)
10197 {
0cfff2a1 10198 unsigned long isa_temp = arch->flags;
43e9d192
IB
10199
10200 if (ext != NULL)
10201 {
0cfff2a1
KT
10202 /* TO_PARSE string contains at least one extension. */
10203 enum aarch64_parse_opt_result ext_res
10204 = aarch64_parse_extension (ext, &isa_temp);
43e9d192 10205
0cfff2a1
KT
10206 if (ext_res != AARCH64_PARSE_OK)
10207 return ext_res;
ffee7aa9 10208 }
0cfff2a1
KT
10209 /* Extension parsing was successful. Confirm the result
10210 arch and ISA flags. */
10211 *res = arch;
10212 *isa_flags = isa_temp;
10213 return AARCH64_PARSE_OK;
43e9d192
IB
10214 }
10215 }
10216
10217 /* ARCH name not found in list. */
0cfff2a1 10218 return AARCH64_PARSE_INVALID_ARG;
43e9d192
IB
10219}
10220
0cfff2a1
KT
10221/* Parse the TO_PARSE string and put the result tuning in RES and the
10222 architecture flags in ISA_FLAGS. Return an aarch64_parse_opt_result
10223 describing the parse result. If there is an error parsing, RES and
10224 ISA_FLAGS are left unchanged. */
43e9d192 10225
0cfff2a1
KT
10226static enum aarch64_parse_opt_result
10227aarch64_parse_cpu (const char *to_parse, const struct processor **res,
10228 unsigned long *isa_flags)
43e9d192
IB
10229{
10230 char *ext;
10231 const struct processor *cpu;
0cfff2a1 10232 char *str = (char *) alloca (strlen (to_parse) + 1);
43e9d192
IB
10233 size_t len;
10234
0cfff2a1 10235 strcpy (str, to_parse);
43e9d192
IB
10236
10237 ext = strchr (str, '+');
10238
10239 if (ext != NULL)
10240 len = ext - str;
10241 else
10242 len = strlen (str);
10243
10244 if (len == 0)
0cfff2a1
KT
10245 return AARCH64_PARSE_MISSING_ARG;
10246
43e9d192
IB
10247
10248 /* Loop through the list of supported CPUs to find a match. */
10249 for (cpu = all_cores; cpu->name != NULL; cpu++)
10250 {
10251 if (strlen (cpu->name) == len && strncmp (cpu->name, str, len) == 0)
10252 {
0cfff2a1
KT
10253 unsigned long isa_temp = cpu->flags;
10254
43e9d192
IB
10255
10256 if (ext != NULL)
10257 {
0cfff2a1
KT
10258 /* TO_PARSE string contains at least one extension. */
10259 enum aarch64_parse_opt_result ext_res
10260 = aarch64_parse_extension (ext, &isa_temp);
43e9d192 10261
0cfff2a1
KT
10262 if (ext_res != AARCH64_PARSE_OK)
10263 return ext_res;
10264 }
10265 /* Extension parsing was successfull. Confirm the result
10266 cpu and ISA flags. */
10267 *res = cpu;
10268 *isa_flags = isa_temp;
10269 return AARCH64_PARSE_OK;
43e9d192
IB
10270 }
10271 }
10272
10273 /* CPU name not found in list. */
0cfff2a1 10274 return AARCH64_PARSE_INVALID_ARG;
43e9d192
IB
10275}
10276
0cfff2a1
KT
10277/* Parse the TO_PARSE string and put the cpu it selects into RES.
10278 Return an aarch64_parse_opt_result describing the parse result.
10279 If the parsing fails the RES does not change. */
43e9d192 10280
0cfff2a1
KT
10281static enum aarch64_parse_opt_result
10282aarch64_parse_tune (const char *to_parse, const struct processor **res)
43e9d192
IB
10283{
10284 const struct processor *cpu;
0cfff2a1
KT
10285 char *str = (char *) alloca (strlen (to_parse) + 1);
10286
10287 strcpy (str, to_parse);
43e9d192
IB
10288
10289 /* Loop through the list of supported CPUs to find a match. */
10290 for (cpu = all_cores; cpu->name != NULL; cpu++)
10291 {
10292 if (strcmp (cpu->name, str) == 0)
10293 {
0cfff2a1
KT
10294 *res = cpu;
10295 return AARCH64_PARSE_OK;
43e9d192
IB
10296 }
10297 }
10298
10299 /* CPU name not found in list. */
0cfff2a1 10300 return AARCH64_PARSE_INVALID_ARG;
43e9d192
IB
10301}
10302
8dec06f2
JG
10303/* Parse TOKEN, which has length LENGTH to see if it is an option
10304 described in FLAG. If it is, return the index bit for that fusion type.
10305 If not, error (printing OPTION_NAME) and return zero. */
10306
10307static unsigned int
10308aarch64_parse_one_option_token (const char *token,
10309 size_t length,
10310 const struct aarch64_flag_desc *flag,
10311 const char *option_name)
10312{
10313 for (; flag->name != NULL; flag++)
10314 {
10315 if (length == strlen (flag->name)
10316 && !strncmp (flag->name, token, length))
10317 return flag->flag;
10318 }
10319
10320 error ("unknown flag passed in -moverride=%s (%s)", option_name, token);
10321 return 0;
10322}
10323
10324/* Parse OPTION which is a comma-separated list of flags to enable.
10325 FLAGS gives the list of flags we understand, INITIAL_STATE gives any
10326 default state we inherit from the CPU tuning structures. OPTION_NAME
10327 gives the top-level option we are parsing in the -moverride string,
10328 for use in error messages. */
10329
10330static unsigned int
10331aarch64_parse_boolean_options (const char *option,
10332 const struct aarch64_flag_desc *flags,
10333 unsigned int initial_state,
10334 const char *option_name)
10335{
10336 const char separator = '.';
10337 const char* specs = option;
10338 const char* ntoken = option;
10339 unsigned int found_flags = initial_state;
10340
10341 while ((ntoken = strchr (specs, separator)))
10342 {
10343 size_t token_length = ntoken - specs;
10344 unsigned token_ops = aarch64_parse_one_option_token (specs,
10345 token_length,
10346 flags,
10347 option_name);
10348 /* If we find "none" (or, for simplicity's sake, an error) anywhere
10349 in the token stream, reset the supported operations. So:
10350
10351 adrp+add.cmp+branch.none.adrp+add
10352
10353 would have the result of turning on only adrp+add fusion. */
10354 if (!token_ops)
10355 found_flags = 0;
10356
10357 found_flags |= token_ops;
10358 specs = ++ntoken;
10359 }
10360
10361 /* We ended with a comma, print something. */
10362 if (!(*specs))
10363 {
10364 error ("%s string ill-formed\n", option_name);
10365 return 0;
10366 }
10367
10368 /* We still have one more token to parse. */
10369 size_t token_length = strlen (specs);
10370 unsigned token_ops = aarch64_parse_one_option_token (specs,
10371 token_length,
10372 flags,
10373 option_name);
10374 if (!token_ops)
10375 found_flags = 0;
10376
10377 found_flags |= token_ops;
10378 return found_flags;
10379}
10380
10381/* Support for overriding instruction fusion. */
10382
10383static void
10384aarch64_parse_fuse_string (const char *fuse_string,
10385 struct tune_params *tune)
10386{
10387 tune->fusible_ops = aarch64_parse_boolean_options (fuse_string,
10388 aarch64_fusible_pairs,
10389 tune->fusible_ops,
10390 "fuse=");
10391}
10392
10393/* Support for overriding other tuning flags. */
10394
10395static void
10396aarch64_parse_tune_string (const char *tune_string,
10397 struct tune_params *tune)
10398{
10399 tune->extra_tuning_flags
10400 = aarch64_parse_boolean_options (tune_string,
10401 aarch64_tuning_flags,
10402 tune->extra_tuning_flags,
10403 "tune=");
10404}
10405
10406/* Parse TOKEN, which has length LENGTH to see if it is a tuning option
10407 we understand. If it is, extract the option string and handoff to
10408 the appropriate function. */
10409
10410void
10411aarch64_parse_one_override_token (const char* token,
10412 size_t length,
10413 struct tune_params *tune)
10414{
10415 const struct aarch64_tuning_override_function *fn
10416 = aarch64_tuning_override_functions;
10417
10418 const char *option_part = strchr (token, '=');
10419 if (!option_part)
10420 {
10421 error ("tuning string missing in option (%s)", token);
10422 return;
10423 }
10424
10425 /* Get the length of the option name. */
10426 length = option_part - token;
10427 /* Skip the '=' to get to the option string. */
10428 option_part++;
10429
10430 for (; fn->name != NULL; fn++)
10431 {
10432 if (!strncmp (fn->name, token, length))
10433 {
10434 fn->parse_override (option_part, tune);
10435 return;
10436 }
10437 }
10438
10439 error ("unknown tuning option (%s)",token);
10440 return;
10441}
10442
5eee3c34
JW
10443/* A checking mechanism for the implementation of the tls size. */
10444
10445static void
10446initialize_aarch64_tls_size (struct gcc_options *opts)
10447{
10448 if (aarch64_tls_size == 0)
10449 aarch64_tls_size = 24;
10450
10451 switch (opts->x_aarch64_cmodel_var)
10452 {
10453 case AARCH64_CMODEL_TINY:
10454 /* Both the default and maximum TLS size allowed under tiny is 1M which
10455 needs two instructions to address, so we clamp the size to 24. */
10456 if (aarch64_tls_size > 24)
10457 aarch64_tls_size = 24;
10458 break;
10459 case AARCH64_CMODEL_SMALL:
10460 /* The maximum TLS size allowed under small is 4G. */
10461 if (aarch64_tls_size > 32)
10462 aarch64_tls_size = 32;
10463 break;
10464 case AARCH64_CMODEL_LARGE:
10465 /* The maximum TLS size allowed under large is 16E.
10466 FIXME: 16E should be 64bit, we only support 48bit offset now. */
10467 if (aarch64_tls_size > 48)
10468 aarch64_tls_size = 48;
10469 break;
10470 default:
10471 gcc_unreachable ();
10472 }
10473
10474 return;
10475}
10476
8dec06f2
JG
10477/* Parse STRING looking for options in the format:
10478 string :: option:string
10479 option :: name=substring
10480 name :: {a-z}
10481 substring :: defined by option. */
10482
10483static void
10484aarch64_parse_override_string (const char* input_string,
10485 struct tune_params* tune)
10486{
10487 const char separator = ':';
10488 size_t string_length = strlen (input_string) + 1;
10489 char *string_root = (char *) xmalloc (sizeof (*string_root) * string_length);
10490 char *string = string_root;
10491 strncpy (string, input_string, string_length);
10492 string[string_length - 1] = '\0';
10493
10494 char* ntoken = string;
10495
10496 while ((ntoken = strchr (string, separator)))
10497 {
10498 size_t token_length = ntoken - string;
10499 /* Make this substring look like a string. */
10500 *ntoken = '\0';
10501 aarch64_parse_one_override_token (string, token_length, tune);
10502 string = ++ntoken;
10503 }
10504
10505 /* One last option to parse. */
10506 aarch64_parse_one_override_token (string, strlen (string), tune);
10507 free (string_root);
10508}
43e9d192 10509
43e9d192
IB
10510
10511static void
0cfff2a1 10512aarch64_override_options_after_change_1 (struct gcc_options *opts)
43e9d192 10513{
acea40ac
WD
10514 /* PR 70044: We have to be careful about being called multiple times for the
10515 same function. This means all changes should be repeatable. */
10516
d6cb6d6a
WD
10517 /* Set aarch64_use_frame_pointer based on -fno-omit-frame-pointer.
10518 Disable the frame pointer flag so the mid-end will not use a frame
10519 pointer in leaf functions in order to support -fomit-leaf-frame-pointer.
10520 Set x_flag_omit_frame_pointer to the special value 2 to differentiate
10521 between -fomit-frame-pointer (1) and -fno-omit-frame-pointer (2). */
10522 aarch64_use_frame_pointer = opts->x_flag_omit_frame_pointer != 1;
acea40ac 10523 if (opts->x_flag_omit_frame_pointer == 0)
a3dc8760 10524 opts->x_flag_omit_frame_pointer = 2;
43e9d192 10525
1be34295 10526 /* If not optimizing for size, set the default
0cfff2a1
KT
10527 alignment to what the target wants. */
10528 if (!opts->x_optimize_size)
43e9d192 10529 {
0cfff2a1
KT
10530 if (opts->x_align_loops <= 0)
10531 opts->x_align_loops = aarch64_tune_params.loop_align;
10532 if (opts->x_align_jumps <= 0)
10533 opts->x_align_jumps = aarch64_tune_params.jump_align;
10534 if (opts->x_align_functions <= 0)
10535 opts->x_align_functions = aarch64_tune_params.function_align;
43e9d192 10536 }
b4f50fd4 10537
9ee6540a
WD
10538 /* We default to no pc-relative literal loads. */
10539
10540 aarch64_pcrelative_literal_loads = false;
10541
10542 /* If -mpc-relative-literal-loads is set on the command line, this
b4f50fd4 10543 implies that the user asked for PC relative literal loads. */
9ee6540a
WD
10544 if (opts->x_pcrelative_literal_loads == 1)
10545 aarch64_pcrelative_literal_loads = true;
b4f50fd4 10546
9ee6540a
WD
10547 /* In the tiny memory model it makes no sense to disallow PC relative
10548 literal pool loads. */
10549 if (aarch64_cmodel == AARCH64_CMODEL_TINY
10550 || aarch64_cmodel == AARCH64_CMODEL_TINY_PIC)
10551 aarch64_pcrelative_literal_loads = true;
98daafa0
EM
10552
10553 /* When enabling the lower precision Newton series for the square root, also
10554 enable it for the reciprocal square root, since the latter is an
10555 intermediary step for the former. */
10556 if (flag_mlow_precision_sqrt)
10557 flag_mrecip_low_precision_sqrt = true;
0cfff2a1 10558}
43e9d192 10559
0cfff2a1
KT
10560/* 'Unpack' up the internal tuning structs and update the options
10561 in OPTS. The caller must have set up selected_tune and selected_arch
10562 as all the other target-specific codegen decisions are
10563 derived from them. */
10564
e4ea20c8 10565void
0cfff2a1
KT
10566aarch64_override_options_internal (struct gcc_options *opts)
10567{
10568 aarch64_tune_flags = selected_tune->flags;
10569 aarch64_tune = selected_tune->sched_core;
10570 /* Make a copy of the tuning parameters attached to the core, which
10571 we may later overwrite. */
10572 aarch64_tune_params = *(selected_tune->tune);
10573 aarch64_architecture_version = selected_arch->architecture_version;
10574
10575 if (opts->x_aarch64_override_tune_string)
10576 aarch64_parse_override_string (opts->x_aarch64_override_tune_string,
10577 &aarch64_tune_params);
10578
10579 /* This target defaults to strict volatile bitfields. */
10580 if (opts->x_flag_strict_volatile_bitfields < 0 && abi_version_at_least (2))
10581 opts->x_flag_strict_volatile_bitfields = 1;
10582
0cfff2a1 10583 initialize_aarch64_code_model (opts);
5eee3c34 10584 initialize_aarch64_tls_size (opts);
63892fa2 10585
2d6bc7fa
KT
10586 int queue_depth = 0;
10587 switch (aarch64_tune_params.autoprefetcher_model)
10588 {
10589 case tune_params::AUTOPREFETCHER_OFF:
10590 queue_depth = -1;
10591 break;
10592 case tune_params::AUTOPREFETCHER_WEAK:
10593 queue_depth = 0;
10594 break;
10595 case tune_params::AUTOPREFETCHER_STRONG:
10596 queue_depth = max_insn_queue_index + 1;
10597 break;
10598 default:
10599 gcc_unreachable ();
10600 }
10601
10602 /* We don't mind passing in global_options_set here as we don't use
10603 the *options_set structs anyway. */
10604 maybe_set_param_value (PARAM_SCHED_AUTOPREF_QUEUE_DEPTH,
10605 queue_depth,
10606 opts->x_param_values,
10607 global_options_set.x_param_values);
10608
9d2c6e2e
MK
10609 /* Set up parameters to be used in prefetching algorithm. Do not
10610 override the defaults unless we are tuning for a core we have
10611 researched values for. */
10612 if (aarch64_tune_params.prefetch->num_slots > 0)
10613 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
10614 aarch64_tune_params.prefetch->num_slots,
10615 opts->x_param_values,
10616 global_options_set.x_param_values);
10617 if (aarch64_tune_params.prefetch->l1_cache_size >= 0)
10618 maybe_set_param_value (PARAM_L1_CACHE_SIZE,
10619 aarch64_tune_params.prefetch->l1_cache_size,
10620 opts->x_param_values,
10621 global_options_set.x_param_values);
10622 if (aarch64_tune_params.prefetch->l1_cache_line_size >= 0)
50487d79 10623 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
9d2c6e2e
MK
10624 aarch64_tune_params.prefetch->l1_cache_line_size,
10625 opts->x_param_values,
10626 global_options_set.x_param_values);
10627 if (aarch64_tune_params.prefetch->l2_cache_size >= 0)
10628 maybe_set_param_value (PARAM_L2_CACHE_SIZE,
10629 aarch64_tune_params.prefetch->l2_cache_size,
50487d79
EM
10630 opts->x_param_values,
10631 global_options_set.x_param_values);
10632
13494fcb
WD
10633 /* Use the alternative scheduling-pressure algorithm by default. */
10634 maybe_set_param_value (PARAM_SCHED_PRESSURE_ALGORITHM, SCHED_PRESSURE_MODEL,
10635 opts->x_param_values,
10636 global_options_set.x_param_values);
10637
16b2cafd
MK
10638 /* Enable sw prefetching at specified optimization level for
10639 CPUS that have prefetch. Lower optimization level threshold by 1
10640 when profiling is enabled. */
10641 if (opts->x_flag_prefetch_loop_arrays < 0
10642 && !opts->x_optimize_size
10643 && aarch64_tune_params.prefetch->default_opt_level >= 0
10644 && opts->x_optimize >= aarch64_tune_params.prefetch->default_opt_level)
10645 opts->x_flag_prefetch_loop_arrays = 1;
10646
0cfff2a1
KT
10647 aarch64_override_options_after_change_1 (opts);
10648}
43e9d192 10649
01f44038
KT
10650/* Print a hint with a suggestion for a core or architecture name that
10651 most closely resembles what the user passed in STR. ARCH is true if
10652 the user is asking for an architecture name. ARCH is false if the user
10653 is asking for a core name. */
10654
10655static void
10656aarch64_print_hint_for_core_or_arch (const char *str, bool arch)
10657{
10658 auto_vec<const char *> candidates;
10659 const struct processor *entry = arch ? all_architectures : all_cores;
10660 for (; entry->name != NULL; entry++)
10661 candidates.safe_push (entry->name);
a08b5429
ML
10662
10663#ifdef HAVE_LOCAL_CPU_DETECT
10664 /* Add also "native" as possible value. */
10665 if (arch)
10666 candidates.safe_push ("native");
10667#endif
10668
01f44038
KT
10669 char *s;
10670 const char *hint = candidates_list_and_hint (str, s, candidates);
10671 if (hint)
10672 inform (input_location, "valid arguments are: %s;"
10673 " did you mean %qs?", s, hint);
6285e915
ML
10674 else
10675 inform (input_location, "valid arguments are: %s", s);
10676
01f44038
KT
10677 XDELETEVEC (s);
10678}
10679
10680/* Print a hint with a suggestion for a core name that most closely resembles
10681 what the user passed in STR. */
10682
10683inline static void
10684aarch64_print_hint_for_core (const char *str)
10685{
10686 aarch64_print_hint_for_core_or_arch (str, false);
10687}
10688
10689/* Print a hint with a suggestion for an architecture name that most closely
10690 resembles what the user passed in STR. */
10691
10692inline static void
10693aarch64_print_hint_for_arch (const char *str)
10694{
10695 aarch64_print_hint_for_core_or_arch (str, true);
10696}
10697
0cfff2a1
KT
10698/* Validate a command-line -mcpu option. Parse the cpu and extensions (if any)
10699 specified in STR and throw errors if appropriate. Put the results if
361fb3ee
KT
10700 they are valid in RES and ISA_FLAGS. Return whether the option is
10701 valid. */
43e9d192 10702
361fb3ee 10703static bool
0cfff2a1
KT
10704aarch64_validate_mcpu (const char *str, const struct processor **res,
10705 unsigned long *isa_flags)
10706{
10707 enum aarch64_parse_opt_result parse_res
10708 = aarch64_parse_cpu (str, res, isa_flags);
10709
10710 if (parse_res == AARCH64_PARSE_OK)
361fb3ee 10711 return true;
0cfff2a1
KT
10712
10713 switch (parse_res)
10714 {
10715 case AARCH64_PARSE_MISSING_ARG:
fb241da2 10716 error ("missing cpu name in %<-mcpu=%s%>", str);
0cfff2a1
KT
10717 break;
10718 case AARCH64_PARSE_INVALID_ARG:
10719 error ("unknown value %qs for -mcpu", str);
01f44038 10720 aarch64_print_hint_for_core (str);
0cfff2a1
KT
10721 break;
10722 case AARCH64_PARSE_INVALID_FEATURE:
fb241da2 10723 error ("invalid feature modifier in %<-mcpu=%s%>", str);
0cfff2a1
KT
10724 break;
10725 default:
10726 gcc_unreachable ();
10727 }
361fb3ee
KT
10728
10729 return false;
0cfff2a1
KT
10730}
10731
10732/* Validate a command-line -march option. Parse the arch and extensions
10733 (if any) specified in STR and throw errors if appropriate. Put the
361fb3ee
KT
10734 results, if they are valid, in RES and ISA_FLAGS. Return whether the
10735 option is valid. */
0cfff2a1 10736
361fb3ee 10737static bool
0cfff2a1 10738aarch64_validate_march (const char *str, const struct processor **res,
01f44038 10739 unsigned long *isa_flags)
0cfff2a1
KT
10740{
10741 enum aarch64_parse_opt_result parse_res
10742 = aarch64_parse_arch (str, res, isa_flags);
10743
10744 if (parse_res == AARCH64_PARSE_OK)
361fb3ee 10745 return true;
0cfff2a1
KT
10746
10747 switch (parse_res)
10748 {
10749 case AARCH64_PARSE_MISSING_ARG:
fb241da2 10750 error ("missing arch name in %<-march=%s%>", str);
0cfff2a1
KT
10751 break;
10752 case AARCH64_PARSE_INVALID_ARG:
10753 error ("unknown value %qs for -march", str);
01f44038 10754 aarch64_print_hint_for_arch (str);
0cfff2a1
KT
10755 break;
10756 case AARCH64_PARSE_INVALID_FEATURE:
fb241da2 10757 error ("invalid feature modifier in %<-march=%s%>", str);
0cfff2a1
KT
10758 break;
10759 default:
10760 gcc_unreachable ();
10761 }
361fb3ee
KT
10762
10763 return false;
0cfff2a1
KT
10764}
10765
10766/* Validate a command-line -mtune option. Parse the cpu
10767 specified in STR and throw errors if appropriate. Put the
361fb3ee
KT
10768 result, if it is valid, in RES. Return whether the option is
10769 valid. */
0cfff2a1 10770
361fb3ee 10771static bool
0cfff2a1
KT
10772aarch64_validate_mtune (const char *str, const struct processor **res)
10773{
10774 enum aarch64_parse_opt_result parse_res
10775 = aarch64_parse_tune (str, res);
10776
10777 if (parse_res == AARCH64_PARSE_OK)
361fb3ee 10778 return true;
0cfff2a1
KT
10779
10780 switch (parse_res)
10781 {
10782 case AARCH64_PARSE_MISSING_ARG:
fb241da2 10783 error ("missing cpu name in %<-mtune=%s%>", str);
0cfff2a1
KT
10784 break;
10785 case AARCH64_PARSE_INVALID_ARG:
10786 error ("unknown value %qs for -mtune", str);
01f44038 10787 aarch64_print_hint_for_core (str);
0cfff2a1
KT
10788 break;
10789 default:
10790 gcc_unreachable ();
10791 }
361fb3ee
KT
10792 return false;
10793}
10794
10795/* Return the CPU corresponding to the enum CPU.
10796 If it doesn't specify a cpu, return the default. */
10797
10798static const struct processor *
10799aarch64_get_tune_cpu (enum aarch64_processor cpu)
10800{
10801 if (cpu != aarch64_none)
10802 return &all_cores[cpu];
10803
10804 /* The & 0x3f is to extract the bottom 6 bits that encode the
10805 default cpu as selected by the --with-cpu GCC configure option
10806 in config.gcc.
10807 ???: The whole TARGET_CPU_DEFAULT and AARCH64_CPU_DEFAULT_FLAGS
10808 flags mechanism should be reworked to make it more sane. */
10809 return &all_cores[TARGET_CPU_DEFAULT & 0x3f];
10810}
10811
10812/* Return the architecture corresponding to the enum ARCH.
10813 If it doesn't specify a valid architecture, return the default. */
10814
10815static const struct processor *
10816aarch64_get_arch (enum aarch64_arch arch)
10817{
10818 if (arch != aarch64_no_arch)
10819 return &all_architectures[arch];
10820
10821 const struct processor *cpu = &all_cores[TARGET_CPU_DEFAULT & 0x3f];
10822
10823 return &all_architectures[cpu->arch];
0cfff2a1
KT
10824}
10825
43cacb12
RS
10826/* Return the VG value associated with -msve-vector-bits= value VALUE. */
10827
10828static poly_uint16
10829aarch64_convert_sve_vector_bits (aarch64_sve_vector_bits_enum value)
10830{
10831 /* For now generate vector-length agnostic code for -msve-vector-bits=128.
10832 This ensures we can clearly distinguish SVE and Advanced SIMD modes when
10833 deciding which .md file patterns to use and when deciding whether
10834 something is a legitimate address or constant. */
10835 if (value == SVE_SCALABLE || value == SVE_128)
10836 return poly_uint16 (2, 2);
10837 else
10838 return (int) value / 64;
10839}
10840
0cfff2a1
KT
10841/* Implement TARGET_OPTION_OVERRIDE. This is called once in the beginning
10842 and is used to parse the -m{cpu,tune,arch} strings and setup the initial
10843 tuning structs. In particular it must set selected_tune and
10844 aarch64_isa_flags that define the available ISA features and tuning
10845 decisions. It must also set selected_arch as this will be used to
10846 output the .arch asm tags for each function. */
10847
10848static void
10849aarch64_override_options (void)
10850{
10851 unsigned long cpu_isa = 0;
10852 unsigned long arch_isa = 0;
10853 aarch64_isa_flags = 0;
10854
361fb3ee
KT
10855 bool valid_cpu = true;
10856 bool valid_tune = true;
10857 bool valid_arch = true;
10858
0cfff2a1
KT
10859 selected_cpu = NULL;
10860 selected_arch = NULL;
10861 selected_tune = NULL;
10862
10863 /* -mcpu=CPU is shorthand for -march=ARCH_FOR_CPU, -mtune=CPU.
10864 If either of -march or -mtune is given, they override their
10865 respective component of -mcpu. */
10866 if (aarch64_cpu_string)
361fb3ee
KT
10867 valid_cpu = aarch64_validate_mcpu (aarch64_cpu_string, &selected_cpu,
10868 &cpu_isa);
0cfff2a1
KT
10869
10870 if (aarch64_arch_string)
361fb3ee
KT
10871 valid_arch = aarch64_validate_march (aarch64_arch_string, &selected_arch,
10872 &arch_isa);
0cfff2a1
KT
10873
10874 if (aarch64_tune_string)
361fb3ee 10875 valid_tune = aarch64_validate_mtune (aarch64_tune_string, &selected_tune);
43e9d192
IB
10876
10877 /* If the user did not specify a processor, choose the default
10878 one for them. This will be the CPU set during configuration using
a3cd0246 10879 --with-cpu, otherwise it is "generic". */
43e9d192
IB
10880 if (!selected_cpu)
10881 {
0cfff2a1
KT
10882 if (selected_arch)
10883 {
10884 selected_cpu = &all_cores[selected_arch->ident];
10885 aarch64_isa_flags = arch_isa;
361fb3ee 10886 explicit_arch = selected_arch->arch;
0cfff2a1
KT
10887 }
10888 else
10889 {
361fb3ee
KT
10890 /* Get default configure-time CPU. */
10891 selected_cpu = aarch64_get_tune_cpu (aarch64_none);
0cfff2a1
KT
10892 aarch64_isa_flags = TARGET_CPU_DEFAULT >> 6;
10893 }
361fb3ee
KT
10894
10895 if (selected_tune)
10896 explicit_tune_core = selected_tune->ident;
0cfff2a1
KT
10897 }
10898 /* If both -mcpu and -march are specified check that they are architecturally
10899 compatible, warn if they're not and prefer the -march ISA flags. */
10900 else if (selected_arch)
10901 {
10902 if (selected_arch->arch != selected_cpu->arch)
10903 {
10904 warning (0, "switch -mcpu=%s conflicts with -march=%s switch",
10905 all_architectures[selected_cpu->arch].name,
10906 selected_arch->name);
10907 }
10908 aarch64_isa_flags = arch_isa;
361fb3ee
KT
10909 explicit_arch = selected_arch->arch;
10910 explicit_tune_core = selected_tune ? selected_tune->ident
10911 : selected_cpu->ident;
0cfff2a1
KT
10912 }
10913 else
10914 {
10915 /* -mcpu but no -march. */
10916 aarch64_isa_flags = cpu_isa;
361fb3ee
KT
10917 explicit_tune_core = selected_tune ? selected_tune->ident
10918 : selected_cpu->ident;
10919 gcc_assert (selected_cpu);
10920 selected_arch = &all_architectures[selected_cpu->arch];
10921 explicit_arch = selected_arch->arch;
43e9d192
IB
10922 }
10923
0cfff2a1
KT
10924 /* Set the arch as well as we will need it when outputing
10925 the .arch directive in assembly. */
10926 if (!selected_arch)
10927 {
10928 gcc_assert (selected_cpu);
10929 selected_arch = &all_architectures[selected_cpu->arch];
10930 }
43e9d192 10931
43e9d192 10932 if (!selected_tune)
3edaf26d 10933 selected_tune = selected_cpu;
43e9d192 10934
0cfff2a1
KT
10935#ifndef HAVE_AS_MABI_OPTION
10936 /* The compiler may have been configured with 2.23.* binutils, which does
10937 not have support for ILP32. */
10938 if (TARGET_ILP32)
ee61f880 10939 error ("assembler does not support -mabi=ilp32");
0cfff2a1 10940#endif
43e9d192 10941
43cacb12
RS
10942 /* Convert -msve-vector-bits to a VG count. */
10943 aarch64_sve_vg = aarch64_convert_sve_vector_bits (aarch64_sve_vector_bits);
10944
db58fd89 10945 if (aarch64_ra_sign_scope != AARCH64_FUNCTION_NONE && TARGET_ILP32)
ee61f880 10946 sorry ("return address signing is only supported for -mabi=lp64");
db58fd89 10947
361fb3ee
KT
10948 /* Make sure we properly set up the explicit options. */
10949 if ((aarch64_cpu_string && valid_cpu)
10950 || (aarch64_tune_string && valid_tune))
10951 gcc_assert (explicit_tune_core != aarch64_none);
10952
10953 if ((aarch64_cpu_string && valid_cpu)
10954 || (aarch64_arch_string && valid_arch))
10955 gcc_assert (explicit_arch != aarch64_no_arch);
10956
0cfff2a1
KT
10957 aarch64_override_options_internal (&global_options);
10958
10959 /* Save these options as the default ones in case we push and pop them later
10960 while processing functions with potential target attributes. */
10961 target_option_default_node = target_option_current_node
10962 = build_target_option_node (&global_options);
43e9d192
IB
10963}
10964
10965/* Implement targetm.override_options_after_change. */
10966
10967static void
10968aarch64_override_options_after_change (void)
10969{
0cfff2a1 10970 aarch64_override_options_after_change_1 (&global_options);
43e9d192
IB
10971}
10972
10973static struct machine_function *
10974aarch64_init_machine_status (void)
10975{
10976 struct machine_function *machine;
766090c2 10977 machine = ggc_cleared_alloc<machine_function> ();
43e9d192
IB
10978 return machine;
10979}
10980
10981void
10982aarch64_init_expanders (void)
10983{
10984 init_machine_status = aarch64_init_machine_status;
10985}
10986
10987/* A checking mechanism for the implementation of the various code models. */
10988static void
0cfff2a1 10989initialize_aarch64_code_model (struct gcc_options *opts)
43e9d192 10990{
0cfff2a1 10991 if (opts->x_flag_pic)
43e9d192 10992 {
0cfff2a1 10993 switch (opts->x_aarch64_cmodel_var)
43e9d192
IB
10994 {
10995 case AARCH64_CMODEL_TINY:
10996 aarch64_cmodel = AARCH64_CMODEL_TINY_PIC;
10997 break;
10998 case AARCH64_CMODEL_SMALL:
34ecdb0f 10999#ifdef HAVE_AS_SMALL_PIC_RELOCS
1b1e81f8
JW
11000 aarch64_cmodel = (flag_pic == 2
11001 ? AARCH64_CMODEL_SMALL_PIC
11002 : AARCH64_CMODEL_SMALL_SPIC);
34ecdb0f
JW
11003#else
11004 aarch64_cmodel = AARCH64_CMODEL_SMALL_PIC;
11005#endif
43e9d192
IB
11006 break;
11007 case AARCH64_CMODEL_LARGE:
11008 sorry ("code model %qs with -f%s", "large",
0cfff2a1 11009 opts->x_flag_pic > 1 ? "PIC" : "pic");
1c652781 11010 break;
43e9d192
IB
11011 default:
11012 gcc_unreachable ();
11013 }
11014 }
11015 else
0cfff2a1 11016 aarch64_cmodel = opts->x_aarch64_cmodel_var;
43e9d192
IB
11017}
11018
361fb3ee
KT
11019/* Implement TARGET_OPTION_SAVE. */
11020
11021static void
11022aarch64_option_save (struct cl_target_option *ptr, struct gcc_options *opts)
11023{
11024 ptr->x_aarch64_override_tune_string = opts->x_aarch64_override_tune_string;
11025}
11026
11027/* Implements TARGET_OPTION_RESTORE. Restore the backend codegen decisions
11028 using the information saved in PTR. */
11029
11030static void
11031aarch64_option_restore (struct gcc_options *opts, struct cl_target_option *ptr)
11032{
11033 opts->x_explicit_tune_core = ptr->x_explicit_tune_core;
11034 selected_tune = aarch64_get_tune_cpu (ptr->x_explicit_tune_core);
11035 opts->x_explicit_arch = ptr->x_explicit_arch;
11036 selected_arch = aarch64_get_arch (ptr->x_explicit_arch);
11037 opts->x_aarch64_override_tune_string = ptr->x_aarch64_override_tune_string;
11038
11039 aarch64_override_options_internal (opts);
11040}
11041
11042/* Implement TARGET_OPTION_PRINT. */
11043
11044static void
11045aarch64_option_print (FILE *file, int indent, struct cl_target_option *ptr)
11046{
11047 const struct processor *cpu
11048 = aarch64_get_tune_cpu (ptr->x_explicit_tune_core);
11049 unsigned long isa_flags = ptr->x_aarch64_isa_flags;
11050 const struct processor *arch = aarch64_get_arch (ptr->x_explicit_arch);
054b4005 11051 std::string extension
04a99ebe 11052 = aarch64_get_extension_string_for_isa_flags (isa_flags, arch->flags);
361fb3ee
KT
11053
11054 fprintf (file, "%*sselected tune = %s\n", indent, "", cpu->name);
054b4005
JG
11055 fprintf (file, "%*sselected arch = %s%s\n", indent, "",
11056 arch->name, extension.c_str ());
361fb3ee
KT
11057}
11058
d78006d9
KT
11059static GTY(()) tree aarch64_previous_fndecl;
11060
e4ea20c8
KT
11061void
11062aarch64_reset_previous_fndecl (void)
11063{
11064 aarch64_previous_fndecl = NULL;
11065}
11066
acfc1ac1
KT
11067/* Restore or save the TREE_TARGET_GLOBALS from or to NEW_TREE.
11068 Used by aarch64_set_current_function and aarch64_pragma_target_parse to
11069 make sure optab availability predicates are recomputed when necessary. */
11070
11071void
11072aarch64_save_restore_target_globals (tree new_tree)
11073{
11074 if (TREE_TARGET_GLOBALS (new_tree))
11075 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
11076 else if (new_tree == target_option_default_node)
11077 restore_target_globals (&default_target_globals);
11078 else
11079 TREE_TARGET_GLOBALS (new_tree) = save_target_globals_default_opts ();
11080}
11081
d78006d9
KT
11082/* Implement TARGET_SET_CURRENT_FUNCTION. Unpack the codegen decisions
11083 like tuning and ISA features from the DECL_FUNCTION_SPECIFIC_TARGET
11084 of the function, if such exists. This function may be called multiple
11085 times on a single function so use aarch64_previous_fndecl to avoid
11086 setting up identical state. */
11087
11088static void
11089aarch64_set_current_function (tree fndecl)
11090{
acfc1ac1
KT
11091 if (!fndecl || fndecl == aarch64_previous_fndecl)
11092 return;
11093
d78006d9
KT
11094 tree old_tree = (aarch64_previous_fndecl
11095 ? DECL_FUNCTION_SPECIFIC_TARGET (aarch64_previous_fndecl)
11096 : NULL_TREE);
11097
acfc1ac1 11098 tree new_tree = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
d78006d9 11099
acfc1ac1
KT
11100 /* If current function has no attributes but the previous one did,
11101 use the default node. */
11102 if (!new_tree && old_tree)
11103 new_tree = target_option_default_node;
d78006d9 11104
acfc1ac1
KT
11105 /* If nothing to do, return. #pragma GCC reset or #pragma GCC pop to
11106 the default have been handled by aarch64_save_restore_target_globals from
11107 aarch64_pragma_target_parse. */
11108 if (old_tree == new_tree)
11109 return;
d78006d9 11110
acfc1ac1 11111 aarch64_previous_fndecl = fndecl;
6e17a23b 11112
acfc1ac1
KT
11113 /* First set the target options. */
11114 cl_target_option_restore (&global_options, TREE_TARGET_OPTION (new_tree));
6e17a23b 11115
acfc1ac1 11116 aarch64_save_restore_target_globals (new_tree);
d78006d9 11117}
361fb3ee 11118
5a2c8331
KT
11119/* Enum describing the various ways we can handle attributes.
11120 In many cases we can reuse the generic option handling machinery. */
11121
11122enum aarch64_attr_opt_type
11123{
11124 aarch64_attr_mask, /* Attribute should set a bit in target_flags. */
11125 aarch64_attr_bool, /* Attribute sets or unsets a boolean variable. */
11126 aarch64_attr_enum, /* Attribute sets an enum variable. */
11127 aarch64_attr_custom /* Attribute requires a custom handling function. */
11128};
11129
11130/* All the information needed to handle a target attribute.
11131 NAME is the name of the attribute.
9c582551 11132 ATTR_TYPE specifies the type of behavior of the attribute as described
5a2c8331
KT
11133 in the definition of enum aarch64_attr_opt_type.
11134 ALLOW_NEG is true if the attribute supports a "no-" form.
ab93e9b7
SE
11135 HANDLER is the function that takes the attribute string as an argument
11136 It is needed only when the ATTR_TYPE is aarch64_attr_custom.
5a2c8331 11137 OPT_NUM is the enum specifying the option that the attribute modifies.
9c582551 11138 This is needed for attributes that mirror the behavior of a command-line
5a2c8331
KT
11139 option, that is it has ATTR_TYPE aarch64_attr_mask, aarch64_attr_bool or
11140 aarch64_attr_enum. */
11141
11142struct aarch64_attribute_info
11143{
11144 const char *name;
11145 enum aarch64_attr_opt_type attr_type;
11146 bool allow_neg;
ab93e9b7 11147 bool (*handler) (const char *);
5a2c8331
KT
11148 enum opt_code opt_num;
11149};
11150
ab93e9b7 11151/* Handle the ARCH_STR argument to the arch= target attribute. */
5a2c8331
KT
11152
11153static bool
ab93e9b7 11154aarch64_handle_attr_arch (const char *str)
5a2c8331
KT
11155{
11156 const struct processor *tmp_arch = NULL;
11157 enum aarch64_parse_opt_result parse_res
11158 = aarch64_parse_arch (str, &tmp_arch, &aarch64_isa_flags);
11159
11160 if (parse_res == AARCH64_PARSE_OK)
11161 {
11162 gcc_assert (tmp_arch);
11163 selected_arch = tmp_arch;
11164 explicit_arch = selected_arch->arch;
11165 return true;
11166 }
11167
11168 switch (parse_res)
11169 {
11170 case AARCH64_PARSE_MISSING_ARG:
ab93e9b7 11171 error ("missing name in %<target(\"arch=\")%> pragma or attribute");
5a2c8331
KT
11172 break;
11173 case AARCH64_PARSE_INVALID_ARG:
ab93e9b7 11174 error ("invalid name (\"%s\") in %<target(\"arch=\")%> pragma or attribute", str);
01f44038 11175 aarch64_print_hint_for_arch (str);
5a2c8331
KT
11176 break;
11177 case AARCH64_PARSE_INVALID_FEATURE:
ab93e9b7 11178 error ("invalid value (\"%s\") in %<target()%> pragma or attribute", str);
5a2c8331
KT
11179 break;
11180 default:
11181 gcc_unreachable ();
11182 }
11183
11184 return false;
11185}
11186
ab93e9b7 11187/* Handle the argument CPU_STR to the cpu= target attribute. */
5a2c8331
KT
11188
11189static bool
ab93e9b7 11190aarch64_handle_attr_cpu (const char *str)
5a2c8331
KT
11191{
11192 const struct processor *tmp_cpu = NULL;
11193 enum aarch64_parse_opt_result parse_res
11194 = aarch64_parse_cpu (str, &tmp_cpu, &aarch64_isa_flags);
11195
11196 if (parse_res == AARCH64_PARSE_OK)
11197 {
11198 gcc_assert (tmp_cpu);
11199 selected_tune = tmp_cpu;
11200 explicit_tune_core = selected_tune->ident;
11201
11202 selected_arch = &all_architectures[tmp_cpu->arch];
11203 explicit_arch = selected_arch->arch;
11204 return true;
11205 }
11206
11207 switch (parse_res)
11208 {
11209 case AARCH64_PARSE_MISSING_ARG:
ab93e9b7 11210 error ("missing name in %<target(\"cpu=\")%> pragma or attribute");
5a2c8331
KT
11211 break;
11212 case AARCH64_PARSE_INVALID_ARG:
ab93e9b7 11213 error ("invalid name (\"%s\") in %<target(\"cpu=\")%> pragma or attribute", str);
01f44038 11214 aarch64_print_hint_for_core (str);
5a2c8331
KT
11215 break;
11216 case AARCH64_PARSE_INVALID_FEATURE:
ab93e9b7 11217 error ("invalid value (\"%s\") in %<target()%> pragma or attribute", str);
5a2c8331
KT
11218 break;
11219 default:
11220 gcc_unreachable ();
11221 }
11222
11223 return false;
11224}
11225
ab93e9b7 11226/* Handle the argument STR to the tune= target attribute. */
5a2c8331
KT
11227
11228static bool
ab93e9b7 11229aarch64_handle_attr_tune (const char *str)
5a2c8331
KT
11230{
11231 const struct processor *tmp_tune = NULL;
11232 enum aarch64_parse_opt_result parse_res
11233 = aarch64_parse_tune (str, &tmp_tune);
11234
11235 if (parse_res == AARCH64_PARSE_OK)
11236 {
11237 gcc_assert (tmp_tune);
11238 selected_tune = tmp_tune;
11239 explicit_tune_core = selected_tune->ident;
11240 return true;
11241 }
11242
11243 switch (parse_res)
11244 {
11245 case AARCH64_PARSE_INVALID_ARG:
ab93e9b7 11246 error ("invalid name (\"%s\") in %<target(\"tune=\")%> pragma or attribute", str);
01f44038 11247 aarch64_print_hint_for_core (str);
5a2c8331
KT
11248 break;
11249 default:
11250 gcc_unreachable ();
11251 }
11252
11253 return false;
11254}
11255
11256/* Parse an architecture extensions target attribute string specified in STR.
11257 For example "+fp+nosimd". Show any errors if needed. Return TRUE
11258 if successful. Update aarch64_isa_flags to reflect the ISA features
ab93e9b7 11259 modified. */
5a2c8331
KT
11260
11261static bool
ab93e9b7 11262aarch64_handle_attr_isa_flags (char *str)
5a2c8331
KT
11263{
11264 enum aarch64_parse_opt_result parse_res;
11265 unsigned long isa_flags = aarch64_isa_flags;
11266
e4ea20c8
KT
11267 /* We allow "+nothing" in the beginning to clear out all architectural
11268 features if the user wants to handpick specific features. */
11269 if (strncmp ("+nothing", str, 8) == 0)
11270 {
11271 isa_flags = 0;
11272 str += 8;
11273 }
11274
5a2c8331
KT
11275 parse_res = aarch64_parse_extension (str, &isa_flags);
11276
11277 if (parse_res == AARCH64_PARSE_OK)
11278 {
11279 aarch64_isa_flags = isa_flags;
11280 return true;
11281 }
11282
11283 switch (parse_res)
11284 {
11285 case AARCH64_PARSE_MISSING_ARG:
ab93e9b7 11286 error ("missing value in %<target()%> pragma or attribute");
5a2c8331
KT
11287 break;
11288
11289 case AARCH64_PARSE_INVALID_FEATURE:
ab93e9b7 11290 error ("invalid value (\"%s\") in %<target()%> pragma or attribute", str);
5a2c8331
KT
11291 break;
11292
11293 default:
11294 gcc_unreachable ();
11295 }
11296
11297 return false;
11298}
11299
11300/* The target attributes that we support. On top of these we also support just
11301 ISA extensions, like __attribute__ ((target ("+crc"))), but that case is
11302 handled explicitly in aarch64_process_one_target_attr. */
11303
11304static const struct aarch64_attribute_info aarch64_attributes[] =
11305{
11306 { "general-regs-only", aarch64_attr_mask, false, NULL,
11307 OPT_mgeneral_regs_only },
11308 { "fix-cortex-a53-835769", aarch64_attr_bool, true, NULL,
11309 OPT_mfix_cortex_a53_835769 },
48bb1a55
CL
11310 { "fix-cortex-a53-843419", aarch64_attr_bool, true, NULL,
11311 OPT_mfix_cortex_a53_843419 },
5a2c8331 11312 { "cmodel", aarch64_attr_enum, false, NULL, OPT_mcmodel_ },
675d044c 11313 { "strict-align", aarch64_attr_mask, true, NULL, OPT_mstrict_align },
5a2c8331
KT
11314 { "omit-leaf-frame-pointer", aarch64_attr_bool, true, NULL,
11315 OPT_momit_leaf_frame_pointer },
11316 { "tls-dialect", aarch64_attr_enum, false, NULL, OPT_mtls_dialect_ },
11317 { "arch", aarch64_attr_custom, false, aarch64_handle_attr_arch,
11318 OPT_march_ },
11319 { "cpu", aarch64_attr_custom, false, aarch64_handle_attr_cpu, OPT_mcpu_ },
11320 { "tune", aarch64_attr_custom, false, aarch64_handle_attr_tune,
11321 OPT_mtune_ },
db58fd89
JW
11322 { "sign-return-address", aarch64_attr_enum, false, NULL,
11323 OPT_msign_return_address_ },
5a2c8331
KT
11324 { NULL, aarch64_attr_custom, false, NULL, OPT____ }
11325};
11326
11327/* Parse ARG_STR which contains the definition of one target attribute.
ab93e9b7 11328 Show appropriate errors if any or return true if the attribute is valid. */
5a2c8331
KT
11329
11330static bool
ab93e9b7 11331aarch64_process_one_target_attr (char *arg_str)
5a2c8331
KT
11332{
11333 bool invert = false;
11334
11335 size_t len = strlen (arg_str);
11336
11337 if (len == 0)
11338 {
ab93e9b7 11339 error ("malformed %<target()%> pragma or attribute");
5a2c8331
KT
11340 return false;
11341 }
11342
11343 char *str_to_check = (char *) alloca (len + 1);
11344 strcpy (str_to_check, arg_str);
11345
11346 /* Skip leading whitespace. */
11347 while (*str_to_check == ' ' || *str_to_check == '\t')
11348 str_to_check++;
11349
11350 /* We have something like __attribute__ ((target ("+fp+nosimd"))).
11351 It is easier to detect and handle it explicitly here rather than going
11352 through the machinery for the rest of the target attributes in this
11353 function. */
11354 if (*str_to_check == '+')
ab93e9b7 11355 return aarch64_handle_attr_isa_flags (str_to_check);
5a2c8331
KT
11356
11357 if (len > 3 && strncmp (str_to_check, "no-", 3) == 0)
11358 {
11359 invert = true;
11360 str_to_check += 3;
11361 }
11362 char *arg = strchr (str_to_check, '=');
11363
11364 /* If we found opt=foo then terminate STR_TO_CHECK at the '='
11365 and point ARG to "foo". */
11366 if (arg)
11367 {
11368 *arg = '\0';
11369 arg++;
11370 }
11371 const struct aarch64_attribute_info *p_attr;
16d12992 11372 bool found = false;
5a2c8331
KT
11373 for (p_attr = aarch64_attributes; p_attr->name; p_attr++)
11374 {
11375 /* If the names don't match up, or the user has given an argument
11376 to an attribute that doesn't accept one, or didn't give an argument
11377 to an attribute that expects one, fail to match. */
11378 if (strcmp (str_to_check, p_attr->name) != 0)
11379 continue;
11380
16d12992 11381 found = true;
5a2c8331
KT
11382 bool attr_need_arg_p = p_attr->attr_type == aarch64_attr_custom
11383 || p_attr->attr_type == aarch64_attr_enum;
11384
11385 if (attr_need_arg_p ^ (arg != NULL))
11386 {
ab93e9b7 11387 error ("pragma or attribute %<target(\"%s\")%> does not accept an argument", str_to_check);
5a2c8331
KT
11388 return false;
11389 }
11390
11391 /* If the name matches but the attribute does not allow "no-" versions
11392 then we can't match. */
11393 if (invert && !p_attr->allow_neg)
11394 {
ab93e9b7 11395 error ("pragma or attribute %<target(\"%s\")%> does not allow a negated form", str_to_check);
5a2c8331
KT
11396 return false;
11397 }
11398
11399 switch (p_attr->attr_type)
11400 {
11401 /* Has a custom handler registered.
11402 For example, cpu=, arch=, tune=. */
11403 case aarch64_attr_custom:
11404 gcc_assert (p_attr->handler);
ab93e9b7 11405 if (!p_attr->handler (arg))
5a2c8331
KT
11406 return false;
11407 break;
11408
11409 /* Either set or unset a boolean option. */
11410 case aarch64_attr_bool:
11411 {
11412 struct cl_decoded_option decoded;
11413
11414 generate_option (p_attr->opt_num, NULL, !invert,
11415 CL_TARGET, &decoded);
11416 aarch64_handle_option (&global_options, &global_options_set,
11417 &decoded, input_location);
11418 break;
11419 }
11420 /* Set or unset a bit in the target_flags. aarch64_handle_option
11421 should know what mask to apply given the option number. */
11422 case aarch64_attr_mask:
11423 {
11424 struct cl_decoded_option decoded;
11425 /* We only need to specify the option number.
11426 aarch64_handle_option will know which mask to apply. */
11427 decoded.opt_index = p_attr->opt_num;
11428 decoded.value = !invert;
11429 aarch64_handle_option (&global_options, &global_options_set,
11430 &decoded, input_location);
11431 break;
11432 }
11433 /* Use the option setting machinery to set an option to an enum. */
11434 case aarch64_attr_enum:
11435 {
11436 gcc_assert (arg);
11437 bool valid;
11438 int value;
11439 valid = opt_enum_arg_to_value (p_attr->opt_num, arg,
11440 &value, CL_TARGET);
11441 if (valid)
11442 {
11443 set_option (&global_options, NULL, p_attr->opt_num, value,
11444 NULL, DK_UNSPECIFIED, input_location,
11445 global_dc);
11446 }
11447 else
11448 {
ab93e9b7 11449 error ("pragma or attribute %<target(\"%s=%s\")%> is not valid", str_to_check, arg);
5a2c8331
KT
11450 }
11451 break;
11452 }
11453 default:
11454 gcc_unreachable ();
11455 }
11456 }
11457
16d12992
KT
11458 /* If we reached here we either have found an attribute and validated
11459 it or didn't match any. If we matched an attribute but its arguments
11460 were malformed we will have returned false already. */
11461 return found;
5a2c8331
KT
11462}
11463
11464/* Count how many times the character C appears in
11465 NULL-terminated string STR. */
11466
11467static unsigned int
11468num_occurences_in_str (char c, char *str)
11469{
11470 unsigned int res = 0;
11471 while (*str != '\0')
11472 {
11473 if (*str == c)
11474 res++;
11475
11476 str++;
11477 }
11478
11479 return res;
11480}
11481
11482/* Parse the tree in ARGS that contains the target attribute information
ab93e9b7 11483 and update the global target options space. */
5a2c8331
KT
11484
11485bool
ab93e9b7 11486aarch64_process_target_attr (tree args)
5a2c8331
KT
11487{
11488 if (TREE_CODE (args) == TREE_LIST)
11489 {
11490 do
11491 {
11492 tree head = TREE_VALUE (args);
11493 if (head)
11494 {
ab93e9b7 11495 if (!aarch64_process_target_attr (head))
5a2c8331
KT
11496 return false;
11497 }
11498 args = TREE_CHAIN (args);
11499 } while (args);
11500
11501 return true;
11502 }
3b6cb9e3
ML
11503
11504 if (TREE_CODE (args) != STRING_CST)
11505 {
11506 error ("attribute %<target%> argument not a string");
11507 return false;
11508 }
5a2c8331
KT
11509
11510 size_t len = strlen (TREE_STRING_POINTER (args));
11511 char *str_to_check = (char *) alloca (len + 1);
11512 strcpy (str_to_check, TREE_STRING_POINTER (args));
11513
11514 if (len == 0)
11515 {
ab93e9b7 11516 error ("malformed %<target()%> pragma or attribute");
5a2c8331
KT
11517 return false;
11518 }
11519
11520 /* Used to catch empty spaces between commas i.e.
11521 attribute ((target ("attr1,,attr2"))). */
11522 unsigned int num_commas = num_occurences_in_str (',', str_to_check);
11523
11524 /* Handle multiple target attributes separated by ','. */
11525 char *token = strtok (str_to_check, ",");
11526
11527 unsigned int num_attrs = 0;
11528 while (token)
11529 {
11530 num_attrs++;
ab93e9b7 11531 if (!aarch64_process_one_target_attr (token))
5a2c8331 11532 {
ab93e9b7 11533 error ("pragma or attribute %<target(\"%s\")%> is not valid", token);
5a2c8331
KT
11534 return false;
11535 }
11536
11537 token = strtok (NULL, ",");
11538 }
11539
11540 if (num_attrs != num_commas + 1)
11541 {
ab93e9b7 11542 error ("malformed %<target(\"%s\")%> pragma or attribute", TREE_STRING_POINTER (args));
5a2c8331
KT
11543 return false;
11544 }
11545
11546 return true;
11547}
11548
11549/* Implement TARGET_OPTION_VALID_ATTRIBUTE_P. This is used to
11550 process attribute ((target ("..."))). */
11551
11552static bool
11553aarch64_option_valid_attribute_p (tree fndecl, tree, tree args, int)
11554{
11555 struct cl_target_option cur_target;
11556 bool ret;
11557 tree old_optimize;
11558 tree new_target, new_optimize;
11559 tree existing_target = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
91d0e8de
KT
11560
11561 /* If what we're processing is the current pragma string then the
11562 target option node is already stored in target_option_current_node
11563 by aarch64_pragma_target_parse in aarch64-c.c. Use that to avoid
11564 having to re-parse the string. This is especially useful to keep
11565 arm_neon.h compile times down since that header contains a lot
11566 of intrinsics enclosed in pragmas. */
11567 if (!existing_target && args == current_target_pragma)
11568 {
11569 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = target_option_current_node;
11570 return true;
11571 }
5a2c8331
KT
11572 tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
11573
11574 old_optimize = build_optimization_node (&global_options);
11575 func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
11576
11577 /* If the function changed the optimization levels as well as setting
11578 target options, start with the optimizations specified. */
11579 if (func_optimize && func_optimize != old_optimize)
11580 cl_optimization_restore (&global_options,
11581 TREE_OPTIMIZATION (func_optimize));
11582
11583 /* Save the current target options to restore at the end. */
11584 cl_target_option_save (&cur_target, &global_options);
11585
11586 /* If fndecl already has some target attributes applied to it, unpack
11587 them so that we add this attribute on top of them, rather than
11588 overwriting them. */
11589 if (existing_target)
11590 {
11591 struct cl_target_option *existing_options
11592 = TREE_TARGET_OPTION (existing_target);
11593
11594 if (existing_options)
11595 cl_target_option_restore (&global_options, existing_options);
11596 }
11597 else
11598 cl_target_option_restore (&global_options,
11599 TREE_TARGET_OPTION (target_option_current_node));
11600
ab93e9b7 11601 ret = aarch64_process_target_attr (args);
5a2c8331
KT
11602
11603 /* Set up any additional state. */
11604 if (ret)
11605 {
11606 aarch64_override_options_internal (&global_options);
e95a988a
KT
11607 /* Initialize SIMD builtins if we haven't already.
11608 Set current_target_pragma to NULL for the duration so that
11609 the builtin initialization code doesn't try to tag the functions
11610 being built with the attributes specified by any current pragma, thus
11611 going into an infinite recursion. */
11612 if (TARGET_SIMD)
11613 {
11614 tree saved_current_target_pragma = current_target_pragma;
11615 current_target_pragma = NULL;
11616 aarch64_init_simd_builtins ();
11617 current_target_pragma = saved_current_target_pragma;
11618 }
5a2c8331
KT
11619 new_target = build_target_option_node (&global_options);
11620 }
11621 else
11622 new_target = NULL;
11623
11624 new_optimize = build_optimization_node (&global_options);
11625
11626 if (fndecl && ret)
11627 {
11628 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = new_target;
11629
11630 if (old_optimize != new_optimize)
11631 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
11632 }
11633
11634 cl_target_option_restore (&global_options, &cur_target);
11635
11636 if (old_optimize != new_optimize)
11637 cl_optimization_restore (&global_options,
11638 TREE_OPTIMIZATION (old_optimize));
11639 return ret;
11640}
11641
1fd8d40c
KT
11642/* Helper for aarch64_can_inline_p. In the case where CALLER and CALLEE are
11643 tri-bool options (yes, no, don't care) and the default value is
11644 DEF, determine whether to reject inlining. */
11645
11646static bool
11647aarch64_tribools_ok_for_inlining_p (int caller, int callee,
11648 int dont_care, int def)
11649{
11650 /* If the callee doesn't care, always allow inlining. */
11651 if (callee == dont_care)
11652 return true;
11653
11654 /* If the caller doesn't care, always allow inlining. */
11655 if (caller == dont_care)
11656 return true;
11657
11658 /* Otherwise, allow inlining if either the callee and caller values
11659 agree, or if the callee is using the default value. */
11660 return (callee == caller || callee == def);
11661}
11662
11663/* Implement TARGET_CAN_INLINE_P. Decide whether it is valid
11664 to inline CALLEE into CALLER based on target-specific info.
11665 Make sure that the caller and callee have compatible architectural
11666 features. Then go through the other possible target attributes
11667 and see if they can block inlining. Try not to reject always_inline
11668 callees unless they are incompatible architecturally. */
11669
11670static bool
11671aarch64_can_inline_p (tree caller, tree callee)
11672{
11673 tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
11674 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
11675
1fd8d40c
KT
11676 struct cl_target_option *caller_opts
11677 = TREE_TARGET_OPTION (caller_tree ? caller_tree
11678 : target_option_default_node);
11679
675d044c
SD
11680 struct cl_target_option *callee_opts
11681 = TREE_TARGET_OPTION (callee_tree ? callee_tree
11682 : target_option_default_node);
1fd8d40c
KT
11683
11684 /* Callee's ISA flags should be a subset of the caller's. */
11685 if ((caller_opts->x_aarch64_isa_flags & callee_opts->x_aarch64_isa_flags)
11686 != callee_opts->x_aarch64_isa_flags)
11687 return false;
11688
11689 /* Allow non-strict aligned functions inlining into strict
11690 aligned ones. */
11691 if ((TARGET_STRICT_ALIGN_P (caller_opts->x_target_flags)
11692 != TARGET_STRICT_ALIGN_P (callee_opts->x_target_flags))
11693 && !(!TARGET_STRICT_ALIGN_P (callee_opts->x_target_flags)
11694 && TARGET_STRICT_ALIGN_P (caller_opts->x_target_flags)))
11695 return false;
11696
11697 bool always_inline = lookup_attribute ("always_inline",
11698 DECL_ATTRIBUTES (callee));
11699
11700 /* If the architectural features match up and the callee is always_inline
11701 then the other attributes don't matter. */
11702 if (always_inline)
11703 return true;
11704
11705 if (caller_opts->x_aarch64_cmodel_var
11706 != callee_opts->x_aarch64_cmodel_var)
11707 return false;
11708
11709 if (caller_opts->x_aarch64_tls_dialect
11710 != callee_opts->x_aarch64_tls_dialect)
11711 return false;
11712
11713 /* Honour explicit requests to workaround errata. */
11714 if (!aarch64_tribools_ok_for_inlining_p (
11715 caller_opts->x_aarch64_fix_a53_err835769,
11716 callee_opts->x_aarch64_fix_a53_err835769,
11717 2, TARGET_FIX_ERR_A53_835769_DEFAULT))
11718 return false;
11719
48bb1a55
CL
11720 if (!aarch64_tribools_ok_for_inlining_p (
11721 caller_opts->x_aarch64_fix_a53_err843419,
11722 callee_opts->x_aarch64_fix_a53_err843419,
11723 2, TARGET_FIX_ERR_A53_843419))
11724 return false;
11725
1fd8d40c
KT
11726 /* If the user explicitly specified -momit-leaf-frame-pointer for the
11727 caller and calle and they don't match up, reject inlining. */
11728 if (!aarch64_tribools_ok_for_inlining_p (
11729 caller_opts->x_flag_omit_leaf_frame_pointer,
11730 callee_opts->x_flag_omit_leaf_frame_pointer,
11731 2, 1))
11732 return false;
11733
11734 /* If the callee has specific tuning overrides, respect them. */
11735 if (callee_opts->x_aarch64_override_tune_string != NULL
11736 && caller_opts->x_aarch64_override_tune_string == NULL)
11737 return false;
11738
11739 /* If the user specified tuning override strings for the
11740 caller and callee and they don't match up, reject inlining.
11741 We just do a string compare here, we don't analyze the meaning
11742 of the string, as it would be too costly for little gain. */
11743 if (callee_opts->x_aarch64_override_tune_string
11744 && caller_opts->x_aarch64_override_tune_string
11745 && (strcmp (callee_opts->x_aarch64_override_tune_string,
11746 caller_opts->x_aarch64_override_tune_string) != 0))
11747 return false;
11748
11749 return true;
11750}
11751
43e9d192
IB
11752/* Return true if SYMBOL_REF X binds locally. */
11753
11754static bool
11755aarch64_symbol_binds_local_p (const_rtx x)
11756{
11757 return (SYMBOL_REF_DECL (x)
11758 ? targetm.binds_local_p (SYMBOL_REF_DECL (x))
11759 : SYMBOL_REF_LOCAL_P (x));
11760}
11761
11762/* Return true if SYMBOL_REF X is thread local */
11763static bool
11764aarch64_tls_symbol_p (rtx x)
11765{
11766 if (! TARGET_HAVE_TLS)
11767 return false;
11768
11769 if (GET_CODE (x) != SYMBOL_REF)
11770 return false;
11771
11772 return SYMBOL_REF_TLS_MODEL (x) != 0;
11773}
11774
11775/* Classify a TLS symbol into one of the TLS kinds. */
11776enum aarch64_symbol_type
11777aarch64_classify_tls_symbol (rtx x)
11778{
11779 enum tls_model tls_kind = tls_symbolic_operand_type (x);
11780
11781 switch (tls_kind)
11782 {
11783 case TLS_MODEL_GLOBAL_DYNAMIC:
11784 case TLS_MODEL_LOCAL_DYNAMIC:
11785 return TARGET_TLS_DESC ? SYMBOL_SMALL_TLSDESC : SYMBOL_SMALL_TLSGD;
11786
11787 case TLS_MODEL_INITIAL_EXEC:
5ae7caad
JW
11788 switch (aarch64_cmodel)
11789 {
11790 case AARCH64_CMODEL_TINY:
11791 case AARCH64_CMODEL_TINY_PIC:
11792 return SYMBOL_TINY_TLSIE;
11793 default:
79496620 11794 return SYMBOL_SMALL_TLSIE;
5ae7caad 11795 }
43e9d192
IB
11796
11797 case TLS_MODEL_LOCAL_EXEC:
cbf5629e
JW
11798 if (aarch64_tls_size == 12)
11799 return SYMBOL_TLSLE12;
11800 else if (aarch64_tls_size == 24)
11801 return SYMBOL_TLSLE24;
11802 else if (aarch64_tls_size == 32)
11803 return SYMBOL_TLSLE32;
11804 else if (aarch64_tls_size == 48)
11805 return SYMBOL_TLSLE48;
11806 else
11807 gcc_unreachable ();
43e9d192
IB
11808
11809 case TLS_MODEL_EMULATED:
11810 case TLS_MODEL_NONE:
11811 return SYMBOL_FORCE_TO_MEM;
11812
11813 default:
11814 gcc_unreachable ();
11815 }
11816}
11817
43cacb12
RS
11818/* Return the correct method for accessing X + OFFSET, where X is either
11819 a SYMBOL_REF or LABEL_REF. */
17f4d4bf 11820
43e9d192 11821enum aarch64_symbol_type
43cacb12 11822aarch64_classify_symbol (rtx x, HOST_WIDE_INT offset)
43e9d192
IB
11823{
11824 if (GET_CODE (x) == LABEL_REF)
11825 {
11826 switch (aarch64_cmodel)
11827 {
11828 case AARCH64_CMODEL_LARGE:
11829 return SYMBOL_FORCE_TO_MEM;
11830
11831 case AARCH64_CMODEL_TINY_PIC:
11832 case AARCH64_CMODEL_TINY:
a5350ddc
CSS
11833 return SYMBOL_TINY_ABSOLUTE;
11834
1b1e81f8 11835 case AARCH64_CMODEL_SMALL_SPIC:
43e9d192
IB
11836 case AARCH64_CMODEL_SMALL_PIC:
11837 case AARCH64_CMODEL_SMALL:
11838 return SYMBOL_SMALL_ABSOLUTE;
11839
11840 default:
11841 gcc_unreachable ();
11842 }
11843 }
11844
17f4d4bf 11845 if (GET_CODE (x) == SYMBOL_REF)
43e9d192 11846 {
43e9d192
IB
11847 if (aarch64_tls_symbol_p (x))
11848 return aarch64_classify_tls_symbol (x);
11849
17f4d4bf
CSS
11850 switch (aarch64_cmodel)
11851 {
11852 case AARCH64_CMODEL_TINY:
15f6e0da 11853 /* When we retrieve symbol + offset address, we have to make sure
f8b756b7
TB
11854 the offset does not cause overflow of the final address. But
11855 we have no way of knowing the address of symbol at compile time
11856 so we can't accurately say if the distance between the PC and
11857 symbol + offset is outside the addressible range of +/-1M in the
11858 TINY code model. So we rely on images not being greater than
11859 1M and cap the offset at 1M and anything beyond 1M will have to
15f6e0da
RR
11860 be loaded using an alternative mechanism. Furthermore if the
11861 symbol is a weak reference to something that isn't known to
11862 resolve to a symbol in this module, then force to memory. */
11863 if ((SYMBOL_REF_WEAK (x)
11864 && !aarch64_symbol_binds_local_p (x))
43cacb12 11865 || !IN_RANGE (offset, -1048575, 1048575))
a5350ddc
CSS
11866 return SYMBOL_FORCE_TO_MEM;
11867 return SYMBOL_TINY_ABSOLUTE;
11868
17f4d4bf 11869 case AARCH64_CMODEL_SMALL:
f8b756b7
TB
11870 /* Same reasoning as the tiny code model, but the offset cap here is
11871 4G. */
15f6e0da
RR
11872 if ((SYMBOL_REF_WEAK (x)
11873 && !aarch64_symbol_binds_local_p (x))
43cacb12 11874 || !IN_RANGE (offset, HOST_WIDE_INT_C (-4294967263),
3ff5d1f0 11875 HOST_WIDE_INT_C (4294967264)))
17f4d4bf
CSS
11876 return SYMBOL_FORCE_TO_MEM;
11877 return SYMBOL_SMALL_ABSOLUTE;
43e9d192 11878
17f4d4bf 11879 case AARCH64_CMODEL_TINY_PIC:
38e6c9a6 11880 if (!aarch64_symbol_binds_local_p (x))
87dd8ab0 11881 return SYMBOL_TINY_GOT;
38e6c9a6
MS
11882 return SYMBOL_TINY_ABSOLUTE;
11883
1b1e81f8 11884 case AARCH64_CMODEL_SMALL_SPIC:
17f4d4bf
CSS
11885 case AARCH64_CMODEL_SMALL_PIC:
11886 if (!aarch64_symbol_binds_local_p (x))
1b1e81f8
JW
11887 return (aarch64_cmodel == AARCH64_CMODEL_SMALL_SPIC
11888 ? SYMBOL_SMALL_GOT_28K : SYMBOL_SMALL_GOT_4G);
17f4d4bf 11889 return SYMBOL_SMALL_ABSOLUTE;
43e9d192 11890
9ee6540a
WD
11891 case AARCH64_CMODEL_LARGE:
11892 /* This is alright even in PIC code as the constant
11893 pool reference is always PC relative and within
11894 the same translation unit. */
d47d34bb 11895 if (!aarch64_pcrelative_literal_loads && CONSTANT_POOL_ADDRESS_P (x))
9ee6540a
WD
11896 return SYMBOL_SMALL_ABSOLUTE;
11897 else
11898 return SYMBOL_FORCE_TO_MEM;
11899
17f4d4bf
CSS
11900 default:
11901 gcc_unreachable ();
11902 }
43e9d192 11903 }
17f4d4bf 11904
43e9d192
IB
11905 /* By default push everything into the constant pool. */
11906 return SYMBOL_FORCE_TO_MEM;
11907}
11908
43e9d192
IB
11909bool
11910aarch64_constant_address_p (rtx x)
11911{
11912 return (CONSTANT_P (x) && memory_address_p (DImode, x));
11913}
11914
11915bool
11916aarch64_legitimate_pic_operand_p (rtx x)
11917{
11918 if (GET_CODE (x) == SYMBOL_REF
11919 || (GET_CODE (x) == CONST
11920 && GET_CODE (XEXP (x, 0)) == PLUS
11921 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
11922 return false;
11923
11924 return true;
11925}
11926
26895c21
WD
11927/* Implement TARGET_LEGITIMATE_CONSTANT_P hook. Return true for constants
11928 that should be rematerialized rather than spilled. */
3520f7cc 11929
43e9d192 11930static bool
ef4bddc2 11931aarch64_legitimate_constant_p (machine_mode mode, rtx x)
43e9d192 11932{
26895c21 11933 /* Support CSE and rematerialization of common constants. */
c0bb5bc5 11934 if (CONST_INT_P (x)
9f7b87ca 11935 || (CONST_DOUBLE_P (x) && GET_MODE_CLASS (mode) == MODE_FLOAT)
c0bb5bc5 11936 || GET_CODE (x) == CONST_VECTOR)
26895c21
WD
11937 return true;
11938
43cacb12
RS
11939 /* Do not allow vector struct mode constants for Advanced SIMD.
11940 We could support 0 and -1 easily, but they need support in
11941 aarch64-simd.md. */
11942 unsigned int vec_flags = aarch64_classify_vector_mode (mode);
11943 if (vec_flags == (VEC_ADVSIMD | VEC_STRUCT))
43e9d192
IB
11944 return false;
11945
43cacb12
RS
11946 /* Only accept variable-length vector constants if they can be
11947 handled directly.
11948
11949 ??? It would be possible to handle rematerialization of other
11950 constants via secondary reloads. */
11951 if (vec_flags & VEC_ANY_SVE)
11952 return aarch64_simd_valid_immediate (x, NULL);
11953
509bb9b6
RS
11954 if (GET_CODE (x) == HIGH)
11955 x = XEXP (x, 0);
11956
43cacb12
RS
11957 /* Accept polynomial constants that can be calculated by using the
11958 destination of a move as the sole temporary. Constants that
11959 require a second temporary cannot be rematerialized (they can't be
11960 forced to memory and also aren't legitimate constants). */
11961 poly_int64 offset;
11962 if (poly_int_rtx_p (x, &offset))
11963 return aarch64_offset_temporaries (false, offset) <= 1;
11964
11965 /* If an offset is being added to something else, we need to allow the
11966 base to be moved into the destination register, meaning that there
11967 are no free temporaries for the offset. */
11968 x = strip_offset (x, &offset);
11969 if (!offset.is_constant () && aarch64_offset_temporaries (true, offset) > 0)
11970 return false;
26895c21 11971
43cacb12
RS
11972 /* Do not allow const (plus (anchor_symbol, const_int)). */
11973 if (maybe_ne (offset, 0) && SYMBOL_REF_P (x) && SYMBOL_REF_ANCHOR_P (x))
11974 return false;
26895c21 11975
f28e54bd
WD
11976 /* Treat symbols as constants. Avoid TLS symbols as they are complex,
11977 so spilling them is better than rematerialization. */
11978 if (SYMBOL_REF_P (x) && !SYMBOL_REF_TLS_MODEL (x))
11979 return true;
11980
26895c21
WD
11981 /* Label references are always constant. */
11982 if (GET_CODE (x) == LABEL_REF)
11983 return true;
11984
11985 return false;
43e9d192
IB
11986}
11987
a5bc806c 11988rtx
43e9d192
IB
11989aarch64_load_tp (rtx target)
11990{
11991 if (!target
11992 || GET_MODE (target) != Pmode
11993 || !register_operand (target, Pmode))
11994 target = gen_reg_rtx (Pmode);
11995
11996 /* Can return in any reg. */
11997 emit_insn (gen_aarch64_load_tp_hard (target));
11998 return target;
11999}
12000
43e9d192
IB
12001/* On AAPCS systems, this is the "struct __va_list". */
12002static GTY(()) tree va_list_type;
12003
12004/* Implement TARGET_BUILD_BUILTIN_VA_LIST.
12005 Return the type to use as __builtin_va_list.
12006
12007 AAPCS64 \S 7.1.4 requires that va_list be a typedef for a type defined as:
12008
12009 struct __va_list
12010 {
12011 void *__stack;
12012 void *__gr_top;
12013 void *__vr_top;
12014 int __gr_offs;
12015 int __vr_offs;
12016 }; */
12017
12018static tree
12019aarch64_build_builtin_va_list (void)
12020{
12021 tree va_list_name;
12022 tree f_stack, f_grtop, f_vrtop, f_groff, f_vroff;
12023
12024 /* Create the type. */
12025 va_list_type = lang_hooks.types.make_type (RECORD_TYPE);
12026 /* Give it the required name. */
12027 va_list_name = build_decl (BUILTINS_LOCATION,
12028 TYPE_DECL,
12029 get_identifier ("__va_list"),
12030 va_list_type);
12031 DECL_ARTIFICIAL (va_list_name) = 1;
12032 TYPE_NAME (va_list_type) = va_list_name;
665c56c6 12033 TYPE_STUB_DECL (va_list_type) = va_list_name;
43e9d192
IB
12034
12035 /* Create the fields. */
12036 f_stack = build_decl (BUILTINS_LOCATION,
12037 FIELD_DECL, get_identifier ("__stack"),
12038 ptr_type_node);
12039 f_grtop = build_decl (BUILTINS_LOCATION,
12040 FIELD_DECL, get_identifier ("__gr_top"),
12041 ptr_type_node);
12042 f_vrtop = build_decl (BUILTINS_LOCATION,
12043 FIELD_DECL, get_identifier ("__vr_top"),
12044 ptr_type_node);
12045 f_groff = build_decl (BUILTINS_LOCATION,
12046 FIELD_DECL, get_identifier ("__gr_offs"),
12047 integer_type_node);
12048 f_vroff = build_decl (BUILTINS_LOCATION,
12049 FIELD_DECL, get_identifier ("__vr_offs"),
12050 integer_type_node);
12051
88e3bdd1 12052 /* Tell tree-stdarg pass about our internal offset fields.
3fd6b9cc
JW
12053 NOTE: va_list_gpr/fpr_counter_field are only used for tree comparision
12054 purpose to identify whether the code is updating va_list internal
12055 offset fields through irregular way. */
12056 va_list_gpr_counter_field = f_groff;
12057 va_list_fpr_counter_field = f_vroff;
12058
43e9d192
IB
12059 DECL_ARTIFICIAL (f_stack) = 1;
12060 DECL_ARTIFICIAL (f_grtop) = 1;
12061 DECL_ARTIFICIAL (f_vrtop) = 1;
12062 DECL_ARTIFICIAL (f_groff) = 1;
12063 DECL_ARTIFICIAL (f_vroff) = 1;
12064
12065 DECL_FIELD_CONTEXT (f_stack) = va_list_type;
12066 DECL_FIELD_CONTEXT (f_grtop) = va_list_type;
12067 DECL_FIELD_CONTEXT (f_vrtop) = va_list_type;
12068 DECL_FIELD_CONTEXT (f_groff) = va_list_type;
12069 DECL_FIELD_CONTEXT (f_vroff) = va_list_type;
12070
12071 TYPE_FIELDS (va_list_type) = f_stack;
12072 DECL_CHAIN (f_stack) = f_grtop;
12073 DECL_CHAIN (f_grtop) = f_vrtop;
12074 DECL_CHAIN (f_vrtop) = f_groff;
12075 DECL_CHAIN (f_groff) = f_vroff;
12076
12077 /* Compute its layout. */
12078 layout_type (va_list_type);
12079
12080 return va_list_type;
12081}
12082
12083/* Implement TARGET_EXPAND_BUILTIN_VA_START. */
12084static void
12085aarch64_expand_builtin_va_start (tree valist, rtx nextarg ATTRIBUTE_UNUSED)
12086{
12087 const CUMULATIVE_ARGS *cum;
12088 tree f_stack, f_grtop, f_vrtop, f_groff, f_vroff;
12089 tree stack, grtop, vrtop, groff, vroff;
12090 tree t;
88e3bdd1
JW
12091 int gr_save_area_size = cfun->va_list_gpr_size;
12092 int vr_save_area_size = cfun->va_list_fpr_size;
43e9d192
IB
12093 int vr_offset;
12094
12095 cum = &crtl->args.info;
88e3bdd1
JW
12096 if (cfun->va_list_gpr_size)
12097 gr_save_area_size = MIN ((NUM_ARG_REGS - cum->aapcs_ncrn) * UNITS_PER_WORD,
12098 cfun->va_list_gpr_size);
12099 if (cfun->va_list_fpr_size)
12100 vr_save_area_size = MIN ((NUM_FP_ARG_REGS - cum->aapcs_nvrn)
12101 * UNITS_PER_VREG, cfun->va_list_fpr_size);
43e9d192 12102
d5726973 12103 if (!TARGET_FLOAT)
43e9d192 12104 {
261fb553 12105 gcc_assert (cum->aapcs_nvrn == 0);
43e9d192
IB
12106 vr_save_area_size = 0;
12107 }
12108
12109 f_stack = TYPE_FIELDS (va_list_type_node);
12110 f_grtop = DECL_CHAIN (f_stack);
12111 f_vrtop = DECL_CHAIN (f_grtop);
12112 f_groff = DECL_CHAIN (f_vrtop);
12113 f_vroff = DECL_CHAIN (f_groff);
12114
12115 stack = build3 (COMPONENT_REF, TREE_TYPE (f_stack), valist, f_stack,
12116 NULL_TREE);
12117 grtop = build3 (COMPONENT_REF, TREE_TYPE (f_grtop), valist, f_grtop,
12118 NULL_TREE);
12119 vrtop = build3 (COMPONENT_REF, TREE_TYPE (f_vrtop), valist, f_vrtop,
12120 NULL_TREE);
12121 groff = build3 (COMPONENT_REF, TREE_TYPE (f_groff), valist, f_groff,
12122 NULL_TREE);
12123 vroff = build3 (COMPONENT_REF, TREE_TYPE (f_vroff), valist, f_vroff,
12124 NULL_TREE);
12125
12126 /* Emit code to initialize STACK, which points to the next varargs stack
12127 argument. CUM->AAPCS_STACK_SIZE gives the number of stack words used
12128 by named arguments. STACK is 8-byte aligned. */
12129 t = make_tree (TREE_TYPE (stack), virtual_incoming_args_rtx);
12130 if (cum->aapcs_stack_size > 0)
12131 t = fold_build_pointer_plus_hwi (t, cum->aapcs_stack_size * UNITS_PER_WORD);
12132 t = build2 (MODIFY_EXPR, TREE_TYPE (stack), stack, t);
12133 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
12134
12135 /* Emit code to initialize GRTOP, the top of the GR save area.
12136 virtual_incoming_args_rtx should have been 16 byte aligned. */
12137 t = make_tree (TREE_TYPE (grtop), virtual_incoming_args_rtx);
12138 t = build2 (MODIFY_EXPR, TREE_TYPE (grtop), grtop, t);
12139 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
12140
12141 /* Emit code to initialize VRTOP, the top of the VR save area.
12142 This address is gr_save_area_bytes below GRTOP, rounded
12143 down to the next 16-byte boundary. */
12144 t = make_tree (TREE_TYPE (vrtop), virtual_incoming_args_rtx);
4f59f9f2
UB
12145 vr_offset = ROUND_UP (gr_save_area_size,
12146 STACK_BOUNDARY / BITS_PER_UNIT);
43e9d192
IB
12147
12148 if (vr_offset)
12149 t = fold_build_pointer_plus_hwi (t, -vr_offset);
12150 t = build2 (MODIFY_EXPR, TREE_TYPE (vrtop), vrtop, t);
12151 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
12152
12153 /* Emit code to initialize GROFF, the offset from GRTOP of the
12154 next GPR argument. */
12155 t = build2 (MODIFY_EXPR, TREE_TYPE (groff), groff,
12156 build_int_cst (TREE_TYPE (groff), -gr_save_area_size));
12157 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
12158
12159 /* Likewise emit code to initialize VROFF, the offset from FTOP
12160 of the next VR argument. */
12161 t = build2 (MODIFY_EXPR, TREE_TYPE (vroff), vroff,
12162 build_int_cst (TREE_TYPE (vroff), -vr_save_area_size));
12163 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
12164}
12165
12166/* Implement TARGET_GIMPLIFY_VA_ARG_EXPR. */
12167
12168static tree
12169aarch64_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
12170 gimple_seq *post_p ATTRIBUTE_UNUSED)
12171{
12172 tree addr;
12173 bool indirect_p;
12174 bool is_ha; /* is HFA or HVA. */
12175 bool dw_align; /* double-word align. */
ef4bddc2 12176 machine_mode ag_mode = VOIDmode;
43e9d192 12177 int nregs;
ef4bddc2 12178 machine_mode mode;
43e9d192
IB
12179
12180 tree f_stack, f_grtop, f_vrtop, f_groff, f_vroff;
12181 tree stack, f_top, f_off, off, arg, roundup, on_stack;
12182 HOST_WIDE_INT size, rsize, adjust, align;
12183 tree t, u, cond1, cond2;
12184
12185 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
12186 if (indirect_p)
12187 type = build_pointer_type (type);
12188
12189 mode = TYPE_MODE (type);
12190
12191 f_stack = TYPE_FIELDS (va_list_type_node);
12192 f_grtop = DECL_CHAIN (f_stack);
12193 f_vrtop = DECL_CHAIN (f_grtop);
12194 f_groff = DECL_CHAIN (f_vrtop);
12195 f_vroff = DECL_CHAIN (f_groff);
12196
12197 stack = build3 (COMPONENT_REF, TREE_TYPE (f_stack), unshare_expr (valist),
12198 f_stack, NULL_TREE);
12199 size = int_size_in_bytes (type);
985b8393 12200 align = aarch64_function_arg_alignment (mode, type) / BITS_PER_UNIT;
43e9d192
IB
12201
12202 dw_align = false;
12203 adjust = 0;
12204 if (aarch64_vfp_is_call_or_return_candidate (mode,
12205 type,
12206 &ag_mode,
12207 &nregs,
12208 &is_ha))
12209 {
6a70badb
RS
12210 /* No frontends can create types with variable-sized modes, so we
12211 shouldn't be asked to pass or return them. */
12212 unsigned int ag_size = GET_MODE_SIZE (ag_mode).to_constant ();
12213
43e9d192 12214 /* TYPE passed in fp/simd registers. */
d5726973 12215 if (!TARGET_FLOAT)
261fb553 12216 aarch64_err_no_fpadvsimd (mode, "varargs");
43e9d192
IB
12217
12218 f_top = build3 (COMPONENT_REF, TREE_TYPE (f_vrtop),
12219 unshare_expr (valist), f_vrtop, NULL_TREE);
12220 f_off = build3 (COMPONENT_REF, TREE_TYPE (f_vroff),
12221 unshare_expr (valist), f_vroff, NULL_TREE);
12222
12223 rsize = nregs * UNITS_PER_VREG;
12224
12225 if (is_ha)
12226 {
6a70badb
RS
12227 if (BYTES_BIG_ENDIAN && ag_size < UNITS_PER_VREG)
12228 adjust = UNITS_PER_VREG - ag_size;
43e9d192 12229 }
76b0cbf8 12230 else if (BLOCK_REG_PADDING (mode, type, 1) == PAD_DOWNWARD
43e9d192
IB
12231 && size < UNITS_PER_VREG)
12232 {
12233 adjust = UNITS_PER_VREG - size;
12234 }
12235 }
12236 else
12237 {
12238 /* TYPE passed in general registers. */
12239 f_top = build3 (COMPONENT_REF, TREE_TYPE (f_grtop),
12240 unshare_expr (valist), f_grtop, NULL_TREE);
12241 f_off = build3 (COMPONENT_REF, TREE_TYPE (f_groff),
12242 unshare_expr (valist), f_groff, NULL_TREE);
4f59f9f2 12243 rsize = ROUND_UP (size, UNITS_PER_WORD);
43e9d192
IB
12244 nregs = rsize / UNITS_PER_WORD;
12245
12246 if (align > 8)
12247 dw_align = true;
12248
76b0cbf8 12249 if (BLOCK_REG_PADDING (mode, type, 1) == PAD_DOWNWARD
43e9d192
IB
12250 && size < UNITS_PER_WORD)
12251 {
12252 adjust = UNITS_PER_WORD - size;
12253 }
12254 }
12255
12256 /* Get a local temporary for the field value. */
12257 off = get_initialized_tmp_var (f_off, pre_p, NULL);
12258
12259 /* Emit code to branch if off >= 0. */
12260 t = build2 (GE_EXPR, boolean_type_node, off,
12261 build_int_cst (TREE_TYPE (off), 0));
12262 cond1 = build3 (COND_EXPR, ptr_type_node, t, NULL_TREE, NULL_TREE);
12263
12264 if (dw_align)
12265 {
12266 /* Emit: offs = (offs + 15) & -16. */
12267 t = build2 (PLUS_EXPR, TREE_TYPE (off), off,
12268 build_int_cst (TREE_TYPE (off), 15));
12269 t = build2 (BIT_AND_EXPR, TREE_TYPE (off), t,
12270 build_int_cst (TREE_TYPE (off), -16));
12271 roundup = build2 (MODIFY_EXPR, TREE_TYPE (off), off, t);
12272 }
12273 else
12274 roundup = NULL;
12275
12276 /* Update ap.__[g|v]r_offs */
12277 t = build2 (PLUS_EXPR, TREE_TYPE (off), off,
12278 build_int_cst (TREE_TYPE (off), rsize));
12279 t = build2 (MODIFY_EXPR, TREE_TYPE (f_off), unshare_expr (f_off), t);
12280
12281 /* String up. */
12282 if (roundup)
12283 t = build2 (COMPOUND_EXPR, TREE_TYPE (t), roundup, t);
12284
12285 /* [cond2] if (ap.__[g|v]r_offs > 0) */
12286 u = build2 (GT_EXPR, boolean_type_node, unshare_expr (f_off),
12287 build_int_cst (TREE_TYPE (f_off), 0));
12288 cond2 = build3 (COND_EXPR, ptr_type_node, u, NULL_TREE, NULL_TREE);
12289
12290 /* String up: make sure the assignment happens before the use. */
12291 t = build2 (COMPOUND_EXPR, TREE_TYPE (cond2), t, cond2);
12292 COND_EXPR_ELSE (cond1) = t;
12293
12294 /* Prepare the trees handling the argument that is passed on the stack;
12295 the top level node will store in ON_STACK. */
12296 arg = get_initialized_tmp_var (stack, pre_p, NULL);
12297 if (align > 8)
12298 {
12299 /* if (alignof(type) > 8) (arg = arg + 15) & -16; */
4bdc2738 12300 t = fold_build_pointer_plus_hwi (arg, 15);
43e9d192
IB
12301 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
12302 build_int_cst (TREE_TYPE (t), -16));
43e9d192
IB
12303 roundup = build2 (MODIFY_EXPR, TREE_TYPE (arg), arg, t);
12304 }
12305 else
12306 roundup = NULL;
12307 /* Advance ap.__stack */
4bdc2738 12308 t = fold_build_pointer_plus_hwi (arg, size + 7);
43e9d192
IB
12309 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
12310 build_int_cst (TREE_TYPE (t), -8));
43e9d192
IB
12311 t = build2 (MODIFY_EXPR, TREE_TYPE (stack), unshare_expr (stack), t);
12312 /* String up roundup and advance. */
12313 if (roundup)
12314 t = build2 (COMPOUND_EXPR, TREE_TYPE (t), roundup, t);
12315 /* String up with arg */
12316 on_stack = build2 (COMPOUND_EXPR, TREE_TYPE (arg), t, arg);
12317 /* Big-endianness related address adjustment. */
76b0cbf8 12318 if (BLOCK_REG_PADDING (mode, type, 1) == PAD_DOWNWARD
43e9d192
IB
12319 && size < UNITS_PER_WORD)
12320 {
12321 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (arg), arg,
12322 size_int (UNITS_PER_WORD - size));
12323 on_stack = build2 (COMPOUND_EXPR, TREE_TYPE (arg), on_stack, t);
12324 }
12325
12326 COND_EXPR_THEN (cond1) = unshare_expr (on_stack);
12327 COND_EXPR_THEN (cond2) = unshare_expr (on_stack);
12328
12329 /* Adjustment to OFFSET in the case of BIG_ENDIAN. */
12330 t = off;
12331 if (adjust)
12332 t = build2 (PREINCREMENT_EXPR, TREE_TYPE (off), off,
12333 build_int_cst (TREE_TYPE (off), adjust));
12334
12335 t = fold_convert (sizetype, t);
12336 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (f_top), f_top, t);
12337
12338 if (is_ha)
12339 {
12340 /* type ha; // treat as "struct {ftype field[n];}"
12341 ... [computing offs]
12342 for (i = 0; i <nregs; ++i, offs += 16)
12343 ha.field[i] = *((ftype *)(ap.__vr_top + offs));
12344 return ha; */
12345 int i;
12346 tree tmp_ha, field_t, field_ptr_t;
12347
12348 /* Declare a local variable. */
12349 tmp_ha = create_tmp_var_raw (type, "ha");
12350 gimple_add_tmp_var (tmp_ha);
12351
12352 /* Establish the base type. */
12353 switch (ag_mode)
12354 {
4e10a5a7 12355 case E_SFmode:
43e9d192
IB
12356 field_t = float_type_node;
12357 field_ptr_t = float_ptr_type_node;
12358 break;
4e10a5a7 12359 case E_DFmode:
43e9d192
IB
12360 field_t = double_type_node;
12361 field_ptr_t = double_ptr_type_node;
12362 break;
4e10a5a7 12363 case E_TFmode:
43e9d192
IB
12364 field_t = long_double_type_node;
12365 field_ptr_t = long_double_ptr_type_node;
12366 break;
4e10a5a7 12367 case E_HFmode:
1b62ed4f
JG
12368 field_t = aarch64_fp16_type_node;
12369 field_ptr_t = aarch64_fp16_ptr_type_node;
43e9d192 12370 break;
4e10a5a7
RS
12371 case E_V2SImode:
12372 case E_V4SImode:
43e9d192
IB
12373 {
12374 tree innertype = make_signed_type (GET_MODE_PRECISION (SImode));
12375 field_t = build_vector_type_for_mode (innertype, ag_mode);
12376 field_ptr_t = build_pointer_type (field_t);
12377 }
12378 break;
12379 default:
12380 gcc_assert (0);
12381 }
12382
12383 /* *(field_ptr_t)&ha = *((field_ptr_t)vr_saved_area */
12384 tmp_ha = build1 (ADDR_EXPR, field_ptr_t, tmp_ha);
12385 addr = t;
12386 t = fold_convert (field_ptr_t, addr);
12387 t = build2 (MODIFY_EXPR, field_t,
12388 build1 (INDIRECT_REF, field_t, tmp_ha),
12389 build1 (INDIRECT_REF, field_t, t));
12390
12391 /* ha.field[i] = *((field_ptr_t)vr_saved_area + i) */
12392 for (i = 1; i < nregs; ++i)
12393 {
12394 addr = fold_build_pointer_plus_hwi (addr, UNITS_PER_VREG);
12395 u = fold_convert (field_ptr_t, addr);
12396 u = build2 (MODIFY_EXPR, field_t,
12397 build2 (MEM_REF, field_t, tmp_ha,
12398 build_int_cst (field_ptr_t,
12399 (i *
12400 int_size_in_bytes (field_t)))),
12401 build1 (INDIRECT_REF, field_t, u));
12402 t = build2 (COMPOUND_EXPR, TREE_TYPE (t), t, u);
12403 }
12404
12405 u = fold_convert (TREE_TYPE (f_top), tmp_ha);
12406 t = build2 (COMPOUND_EXPR, TREE_TYPE (f_top), t, u);
12407 }
12408
12409 COND_EXPR_ELSE (cond2) = t;
12410 addr = fold_convert (build_pointer_type (type), cond1);
12411 addr = build_va_arg_indirect_ref (addr);
12412
12413 if (indirect_p)
12414 addr = build_va_arg_indirect_ref (addr);
12415
12416 return addr;
12417}
12418
12419/* Implement TARGET_SETUP_INCOMING_VARARGS. */
12420
12421static void
ef4bddc2 12422aarch64_setup_incoming_varargs (cumulative_args_t cum_v, machine_mode mode,
43e9d192
IB
12423 tree type, int *pretend_size ATTRIBUTE_UNUSED,
12424 int no_rtl)
12425{
12426 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
12427 CUMULATIVE_ARGS local_cum;
88e3bdd1
JW
12428 int gr_saved = cfun->va_list_gpr_size;
12429 int vr_saved = cfun->va_list_fpr_size;
43e9d192
IB
12430
12431 /* The caller has advanced CUM up to, but not beyond, the last named
12432 argument. Advance a local copy of CUM past the last "real" named
12433 argument, to find out how many registers are left over. */
12434 local_cum = *cum;
12435 aarch64_function_arg_advance (pack_cumulative_args(&local_cum), mode, type, true);
12436
88e3bdd1
JW
12437 /* Found out how many registers we need to save.
12438 Honor tree-stdvar analysis results. */
12439 if (cfun->va_list_gpr_size)
12440 gr_saved = MIN (NUM_ARG_REGS - local_cum.aapcs_ncrn,
12441 cfun->va_list_gpr_size / UNITS_PER_WORD);
12442 if (cfun->va_list_fpr_size)
12443 vr_saved = MIN (NUM_FP_ARG_REGS - local_cum.aapcs_nvrn,
12444 cfun->va_list_fpr_size / UNITS_PER_VREG);
43e9d192 12445
d5726973 12446 if (!TARGET_FLOAT)
43e9d192 12447 {
261fb553 12448 gcc_assert (local_cum.aapcs_nvrn == 0);
43e9d192
IB
12449 vr_saved = 0;
12450 }
12451
12452 if (!no_rtl)
12453 {
12454 if (gr_saved > 0)
12455 {
12456 rtx ptr, mem;
12457
12458 /* virtual_incoming_args_rtx should have been 16-byte aligned. */
12459 ptr = plus_constant (Pmode, virtual_incoming_args_rtx,
12460 - gr_saved * UNITS_PER_WORD);
12461 mem = gen_frame_mem (BLKmode, ptr);
12462 set_mem_alias_set (mem, get_varargs_alias_set ());
12463
12464 move_block_from_reg (local_cum.aapcs_ncrn + R0_REGNUM,
12465 mem, gr_saved);
12466 }
12467 if (vr_saved > 0)
12468 {
12469 /* We can't use move_block_from_reg, because it will use
12470 the wrong mode, storing D regs only. */
ef4bddc2 12471 machine_mode mode = TImode;
88e3bdd1 12472 int off, i, vr_start;
43e9d192
IB
12473
12474 /* Set OFF to the offset from virtual_incoming_args_rtx of
12475 the first vector register. The VR save area lies below
12476 the GR one, and is aligned to 16 bytes. */
4f59f9f2
UB
12477 off = -ROUND_UP (gr_saved * UNITS_PER_WORD,
12478 STACK_BOUNDARY / BITS_PER_UNIT);
43e9d192
IB
12479 off -= vr_saved * UNITS_PER_VREG;
12480
88e3bdd1
JW
12481 vr_start = V0_REGNUM + local_cum.aapcs_nvrn;
12482 for (i = 0; i < vr_saved; ++i)
43e9d192
IB
12483 {
12484 rtx ptr, mem;
12485
12486 ptr = plus_constant (Pmode, virtual_incoming_args_rtx, off);
12487 mem = gen_frame_mem (mode, ptr);
12488 set_mem_alias_set (mem, get_varargs_alias_set ());
88e3bdd1 12489 aarch64_emit_move (mem, gen_rtx_REG (mode, vr_start + i));
43e9d192
IB
12490 off += UNITS_PER_VREG;
12491 }
12492 }
12493 }
12494
12495 /* We don't save the size into *PRETEND_SIZE because we want to avoid
12496 any complication of having crtl->args.pretend_args_size changed. */
8799637a 12497 cfun->machine->frame.saved_varargs_size
4f59f9f2
UB
12498 = (ROUND_UP (gr_saved * UNITS_PER_WORD,
12499 STACK_BOUNDARY / BITS_PER_UNIT)
43e9d192
IB
12500 + vr_saved * UNITS_PER_VREG);
12501}
12502
12503static void
12504aarch64_conditional_register_usage (void)
12505{
12506 int i;
12507 if (!TARGET_FLOAT)
12508 {
12509 for (i = V0_REGNUM; i <= V31_REGNUM; i++)
12510 {
12511 fixed_regs[i] = 1;
12512 call_used_regs[i] = 1;
12513 }
12514 }
43cacb12
RS
12515 if (!TARGET_SVE)
12516 for (i = P0_REGNUM; i <= P15_REGNUM; i++)
12517 {
12518 fixed_regs[i] = 1;
12519 call_used_regs[i] = 1;
12520 }
43e9d192
IB
12521}
12522
12523/* Walk down the type tree of TYPE counting consecutive base elements.
12524 If *MODEP is VOIDmode, then set it to the first valid floating point
12525 type. If a non-floating point type is found, or if a floating point
12526 type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
12527 otherwise return the count in the sub-tree. */
12528static int
ef4bddc2 12529aapcs_vfp_sub_candidate (const_tree type, machine_mode *modep)
43e9d192 12530{
ef4bddc2 12531 machine_mode mode;
43e9d192
IB
12532 HOST_WIDE_INT size;
12533
12534 switch (TREE_CODE (type))
12535 {
12536 case REAL_TYPE:
12537 mode = TYPE_MODE (type);
1b62ed4f
JG
12538 if (mode != DFmode && mode != SFmode
12539 && mode != TFmode && mode != HFmode)
43e9d192
IB
12540 return -1;
12541
12542 if (*modep == VOIDmode)
12543 *modep = mode;
12544
12545 if (*modep == mode)
12546 return 1;
12547
12548 break;
12549
12550 case COMPLEX_TYPE:
12551 mode = TYPE_MODE (TREE_TYPE (type));
1b62ed4f
JG
12552 if (mode != DFmode && mode != SFmode
12553 && mode != TFmode && mode != HFmode)
43e9d192
IB
12554 return -1;
12555
12556 if (*modep == VOIDmode)
12557 *modep = mode;
12558
12559 if (*modep == mode)
12560 return 2;
12561
12562 break;
12563
12564 case VECTOR_TYPE:
12565 /* Use V2SImode and V4SImode as representatives of all 64-bit
12566 and 128-bit vector types. */
12567 size = int_size_in_bytes (type);
12568 switch (size)
12569 {
12570 case 8:
12571 mode = V2SImode;
12572 break;
12573 case 16:
12574 mode = V4SImode;
12575 break;
12576 default:
12577 return -1;
12578 }
12579
12580 if (*modep == VOIDmode)
12581 *modep = mode;
12582
12583 /* Vector modes are considered to be opaque: two vectors are
12584 equivalent for the purposes of being homogeneous aggregates
12585 if they are the same size. */
12586 if (*modep == mode)
12587 return 1;
12588
12589 break;
12590
12591 case ARRAY_TYPE:
12592 {
12593 int count;
12594 tree index = TYPE_DOMAIN (type);
12595
807e902e
KZ
12596 /* Can't handle incomplete types nor sizes that are not
12597 fixed. */
12598 if (!COMPLETE_TYPE_P (type)
12599 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
43e9d192
IB
12600 return -1;
12601
12602 count = aapcs_vfp_sub_candidate (TREE_TYPE (type), modep);
12603 if (count == -1
12604 || !index
12605 || !TYPE_MAX_VALUE (index)
cc269bb6 12606 || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index))
43e9d192 12607 || !TYPE_MIN_VALUE (index)
cc269bb6 12608 || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index))
43e9d192
IB
12609 || count < 0)
12610 return -1;
12611
ae7e9ddd
RS
12612 count *= (1 + tree_to_uhwi (TYPE_MAX_VALUE (index))
12613 - tree_to_uhwi (TYPE_MIN_VALUE (index)));
43e9d192
IB
12614
12615 /* There must be no padding. */
6a70badb
RS
12616 if (maybe_ne (wi::to_poly_wide (TYPE_SIZE (type)),
12617 count * GET_MODE_BITSIZE (*modep)))
43e9d192
IB
12618 return -1;
12619
12620 return count;
12621 }
12622
12623 case RECORD_TYPE:
12624 {
12625 int count = 0;
12626 int sub_count;
12627 tree field;
12628
807e902e
KZ
12629 /* Can't handle incomplete types nor sizes that are not
12630 fixed. */
12631 if (!COMPLETE_TYPE_P (type)
12632 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
43e9d192
IB
12633 return -1;
12634
12635 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
12636 {
12637 if (TREE_CODE (field) != FIELD_DECL)
12638 continue;
12639
12640 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
12641 if (sub_count < 0)
12642 return -1;
12643 count += sub_count;
12644 }
12645
12646 /* There must be no padding. */
6a70badb
RS
12647 if (maybe_ne (wi::to_poly_wide (TYPE_SIZE (type)),
12648 count * GET_MODE_BITSIZE (*modep)))
43e9d192
IB
12649 return -1;
12650
12651 return count;
12652 }
12653
12654 case UNION_TYPE:
12655 case QUAL_UNION_TYPE:
12656 {
12657 /* These aren't very interesting except in a degenerate case. */
12658 int count = 0;
12659 int sub_count;
12660 tree field;
12661
807e902e
KZ
12662 /* Can't handle incomplete types nor sizes that are not
12663 fixed. */
12664 if (!COMPLETE_TYPE_P (type)
12665 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
43e9d192
IB
12666 return -1;
12667
12668 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
12669 {
12670 if (TREE_CODE (field) != FIELD_DECL)
12671 continue;
12672
12673 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
12674 if (sub_count < 0)
12675 return -1;
12676 count = count > sub_count ? count : sub_count;
12677 }
12678
12679 /* There must be no padding. */
6a70badb
RS
12680 if (maybe_ne (wi::to_poly_wide (TYPE_SIZE (type)),
12681 count * GET_MODE_BITSIZE (*modep)))
43e9d192
IB
12682 return -1;
12683
12684 return count;
12685 }
12686
12687 default:
12688 break;
12689 }
12690
12691 return -1;
12692}
12693
b6ec6215
KT
12694/* Return TRUE if the type, as described by TYPE and MODE, is a short vector
12695 type as described in AAPCS64 \S 4.1.2.
12696
12697 See the comment above aarch64_composite_type_p for the notes on MODE. */
12698
12699static bool
12700aarch64_short_vector_p (const_tree type,
12701 machine_mode mode)
12702{
6a70badb 12703 poly_int64 size = -1;
b6ec6215
KT
12704
12705 if (type && TREE_CODE (type) == VECTOR_TYPE)
12706 size = int_size_in_bytes (type);
12707 else if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
12708 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
12709 size = GET_MODE_SIZE (mode);
12710
6a70badb 12711 return known_eq (size, 8) || known_eq (size, 16);
b6ec6215
KT
12712}
12713
43e9d192
IB
12714/* Return TRUE if the type, as described by TYPE and MODE, is a composite
12715 type as described in AAPCS64 \S 4.3. This includes aggregate, union and
12716 array types. The C99 floating-point complex types are also considered
12717 as composite types, according to AAPCS64 \S 7.1.1. The complex integer
12718 types, which are GCC extensions and out of the scope of AAPCS64, are
12719 treated as composite types here as well.
12720
12721 Note that MODE itself is not sufficient in determining whether a type
12722 is such a composite type or not. This is because
12723 stor-layout.c:compute_record_mode may have already changed the MODE
12724 (BLKmode) of a RECORD_TYPE TYPE to some other mode. For example, a
12725 structure with only one field may have its MODE set to the mode of the
12726 field. Also an integer mode whose size matches the size of the
12727 RECORD_TYPE type may be used to substitute the original mode
12728 (i.e. BLKmode) in certain circumstances. In other words, MODE cannot be
12729 solely relied on. */
12730
12731static bool
12732aarch64_composite_type_p (const_tree type,
ef4bddc2 12733 machine_mode mode)
43e9d192 12734{
b6ec6215
KT
12735 if (aarch64_short_vector_p (type, mode))
12736 return false;
12737
43e9d192
IB
12738 if (type && (AGGREGATE_TYPE_P (type) || TREE_CODE (type) == COMPLEX_TYPE))
12739 return true;
12740
12741 if (mode == BLKmode
12742 || GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT
12743 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
12744 return true;
12745
12746 return false;
12747}
12748
43e9d192
IB
12749/* Return TRUE if an argument, whose type is described by TYPE and MODE,
12750 shall be passed or returned in simd/fp register(s) (providing these
12751 parameter passing registers are available).
12752
12753 Upon successful return, *COUNT returns the number of needed registers,
12754 *BASE_MODE returns the mode of the individual register and when IS_HAF
12755 is not NULL, *IS_HA indicates whether or not the argument is a homogeneous
12756 floating-point aggregate or a homogeneous short-vector aggregate. */
12757
12758static bool
ef4bddc2 12759aarch64_vfp_is_call_or_return_candidate (machine_mode mode,
43e9d192 12760 const_tree type,
ef4bddc2 12761 machine_mode *base_mode,
43e9d192
IB
12762 int *count,
12763 bool *is_ha)
12764{
ef4bddc2 12765 machine_mode new_mode = VOIDmode;
43e9d192
IB
12766 bool composite_p = aarch64_composite_type_p (type, mode);
12767
12768 if (is_ha != NULL) *is_ha = false;
12769
12770 if ((!composite_p && GET_MODE_CLASS (mode) == MODE_FLOAT)
12771 || aarch64_short_vector_p (type, mode))
12772 {
12773 *count = 1;
12774 new_mode = mode;
12775 }
12776 else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
12777 {
12778 if (is_ha != NULL) *is_ha = true;
12779 *count = 2;
12780 new_mode = GET_MODE_INNER (mode);
12781 }
12782 else if (type && composite_p)
12783 {
12784 int ag_count = aapcs_vfp_sub_candidate (type, &new_mode);
12785
12786 if (ag_count > 0 && ag_count <= HA_MAX_NUM_FLDS)
12787 {
12788 if (is_ha != NULL) *is_ha = true;
12789 *count = ag_count;
12790 }
12791 else
12792 return false;
12793 }
12794 else
12795 return false;
12796
12797 *base_mode = new_mode;
12798 return true;
12799}
12800
12801/* Implement TARGET_STRUCT_VALUE_RTX. */
12802
12803static rtx
12804aarch64_struct_value_rtx (tree fndecl ATTRIBUTE_UNUSED,
12805 int incoming ATTRIBUTE_UNUSED)
12806{
12807 return gen_rtx_REG (Pmode, AARCH64_STRUCT_VALUE_REGNUM);
12808}
12809
12810/* Implements target hook vector_mode_supported_p. */
12811static bool
ef4bddc2 12812aarch64_vector_mode_supported_p (machine_mode mode)
43e9d192 12813{
43cacb12
RS
12814 unsigned int vec_flags = aarch64_classify_vector_mode (mode);
12815 return vec_flags != 0 && (vec_flags & VEC_STRUCT) == 0;
43e9d192
IB
12816}
12817
b7342d25
IB
12818/* Return appropriate SIMD container
12819 for MODE within a vector of WIDTH bits. */
ef4bddc2 12820static machine_mode
43cacb12 12821aarch64_simd_container_mode (scalar_mode mode, poly_int64 width)
43e9d192 12822{
43cacb12
RS
12823 if (TARGET_SVE && known_eq (width, BITS_PER_SVE_VECTOR))
12824 switch (mode)
12825 {
12826 case E_DFmode:
12827 return VNx2DFmode;
12828 case E_SFmode:
12829 return VNx4SFmode;
12830 case E_HFmode:
12831 return VNx8HFmode;
12832 case E_DImode:
12833 return VNx2DImode;
12834 case E_SImode:
12835 return VNx4SImode;
12836 case E_HImode:
12837 return VNx8HImode;
12838 case E_QImode:
12839 return VNx16QImode;
12840 default:
12841 return word_mode;
12842 }
12843
12844 gcc_assert (known_eq (width, 64) || known_eq (width, 128));
43e9d192 12845 if (TARGET_SIMD)
b7342d25 12846 {
43cacb12 12847 if (known_eq (width, 128))
b7342d25
IB
12848 switch (mode)
12849 {
4e10a5a7 12850 case E_DFmode:
b7342d25 12851 return V2DFmode;
4e10a5a7 12852 case E_SFmode:
b7342d25 12853 return V4SFmode;
4e10a5a7 12854 case E_HFmode:
b719f884 12855 return V8HFmode;
4e10a5a7 12856 case E_SImode:
b7342d25 12857 return V4SImode;
4e10a5a7 12858 case E_HImode:
b7342d25 12859 return V8HImode;
4e10a5a7 12860 case E_QImode:
b7342d25 12861 return V16QImode;
4e10a5a7 12862 case E_DImode:
b7342d25
IB
12863 return V2DImode;
12864 default:
12865 break;
12866 }
12867 else
12868 switch (mode)
12869 {
4e10a5a7 12870 case E_SFmode:
b7342d25 12871 return V2SFmode;
4e10a5a7 12872 case E_HFmode:
b719f884 12873 return V4HFmode;
4e10a5a7 12874 case E_SImode:
b7342d25 12875 return V2SImode;
4e10a5a7 12876 case E_HImode:
b7342d25 12877 return V4HImode;
4e10a5a7 12878 case E_QImode:
b7342d25
IB
12879 return V8QImode;
12880 default:
12881 break;
12882 }
12883 }
43e9d192
IB
12884 return word_mode;
12885}
12886
b7342d25 12887/* Return 128-bit container as the preferred SIMD mode for MODE. */
ef4bddc2 12888static machine_mode
005ba29c 12889aarch64_preferred_simd_mode (scalar_mode mode)
b7342d25 12890{
43cacb12
RS
12891 poly_int64 bits = TARGET_SVE ? BITS_PER_SVE_VECTOR : 128;
12892 return aarch64_simd_container_mode (mode, bits);
b7342d25
IB
12893}
12894
86e36728 12895/* Return a list of possible vector sizes for the vectorizer
3b357264 12896 to iterate over. */
86e36728
RS
12897static void
12898aarch64_autovectorize_vector_sizes (vector_sizes *sizes)
3b357264 12899{
43cacb12
RS
12900 if (TARGET_SVE)
12901 sizes->safe_push (BYTES_PER_SVE_VECTOR);
86e36728
RS
12902 sizes->safe_push (16);
12903 sizes->safe_push (8);
3b357264
JG
12904}
12905
ac2b960f
YZ
12906/* Implement TARGET_MANGLE_TYPE. */
12907
6f549691 12908static const char *
ac2b960f
YZ
12909aarch64_mangle_type (const_tree type)
12910{
12911 /* The AArch64 ABI documents say that "__va_list" has to be
12912 managled as if it is in the "std" namespace. */
12913 if (lang_hooks.types_compatible_p (CONST_CAST_TREE (type), va_list_type))
12914 return "St9__va_list";
12915
c2ec330c
AL
12916 /* Half-precision float. */
12917 if (TREE_CODE (type) == REAL_TYPE && TYPE_PRECISION (type) == 16)
12918 return "Dh";
12919
f9d53c27
TB
12920 /* Mangle AArch64-specific internal types. TYPE_NAME is non-NULL_TREE for
12921 builtin types. */
12922 if (TYPE_NAME (type) != NULL)
12923 return aarch64_mangle_builtin_type (type);
c6fc9e43 12924
ac2b960f
YZ
12925 /* Use the default mangling. */
12926 return NULL;
12927}
12928
75cf1494
KT
12929/* Find the first rtx_insn before insn that will generate an assembly
12930 instruction. */
12931
12932static rtx_insn *
12933aarch64_prev_real_insn (rtx_insn *insn)
12934{
12935 if (!insn)
12936 return NULL;
12937
12938 do
12939 {
12940 insn = prev_real_insn (insn);
12941 }
12942 while (insn && recog_memoized (insn) < 0);
12943
12944 return insn;
12945}
12946
12947static bool
12948is_madd_op (enum attr_type t1)
12949{
12950 unsigned int i;
12951 /* A number of these may be AArch32 only. */
12952 enum attr_type mlatypes[] = {
12953 TYPE_MLA, TYPE_MLAS, TYPE_SMLAD, TYPE_SMLADX, TYPE_SMLAL, TYPE_SMLALD,
12954 TYPE_SMLALS, TYPE_SMLALXY, TYPE_SMLAWX, TYPE_SMLAWY, TYPE_SMLAXY,
12955 TYPE_SMMLA, TYPE_UMLAL, TYPE_UMLALS,TYPE_SMLSD, TYPE_SMLSDX, TYPE_SMLSLD
12956 };
12957
12958 for (i = 0; i < sizeof (mlatypes) / sizeof (enum attr_type); i++)
12959 {
12960 if (t1 == mlatypes[i])
12961 return true;
12962 }
12963
12964 return false;
12965}
12966
12967/* Check if there is a register dependency between a load and the insn
12968 for which we hold recog_data. */
12969
12970static bool
12971dep_between_memop_and_curr (rtx memop)
12972{
12973 rtx load_reg;
12974 int opno;
12975
8baff86e 12976 gcc_assert (GET_CODE (memop) == SET);
75cf1494
KT
12977
12978 if (!REG_P (SET_DEST (memop)))
12979 return false;
12980
12981 load_reg = SET_DEST (memop);
8baff86e 12982 for (opno = 1; opno < recog_data.n_operands; opno++)
75cf1494
KT
12983 {
12984 rtx operand = recog_data.operand[opno];
12985 if (REG_P (operand)
12986 && reg_overlap_mentioned_p (load_reg, operand))
12987 return true;
12988
12989 }
12990 return false;
12991}
12992
8baff86e
KT
12993
12994/* When working around the Cortex-A53 erratum 835769,
12995 given rtx_insn INSN, return true if it is a 64-bit multiply-accumulate
12996 instruction and has a preceding memory instruction such that a NOP
12997 should be inserted between them. */
12998
75cf1494
KT
12999bool
13000aarch64_madd_needs_nop (rtx_insn* insn)
13001{
13002 enum attr_type attr_type;
13003 rtx_insn *prev;
13004 rtx body;
13005
b32c1043 13006 if (!TARGET_FIX_ERR_A53_835769)
75cf1494
KT
13007 return false;
13008
e322d6e3 13009 if (!INSN_P (insn) || recog_memoized (insn) < 0)
75cf1494
KT
13010 return false;
13011
13012 attr_type = get_attr_type (insn);
13013 if (!is_madd_op (attr_type))
13014 return false;
13015
13016 prev = aarch64_prev_real_insn (insn);
3fea1a75
KT
13017 /* aarch64_prev_real_insn can call recog_memoized on insns other than INSN.
13018 Restore recog state to INSN to avoid state corruption. */
13019 extract_constrain_insn_cached (insn);
13020
550e2205 13021 if (!prev || !contains_mem_rtx_p (PATTERN (prev)))
75cf1494
KT
13022 return false;
13023
13024 body = single_set (prev);
13025
13026 /* If the previous insn is a memory op and there is no dependency between
8baff86e
KT
13027 it and the DImode madd, emit a NOP between them. If body is NULL then we
13028 have a complex memory operation, probably a load/store pair.
13029 Be conservative for now and emit a NOP. */
13030 if (GET_MODE (recog_data.operand[0]) == DImode
13031 && (!body || !dep_between_memop_and_curr (body)))
75cf1494
KT
13032 return true;
13033
13034 return false;
13035
13036}
13037
8baff86e
KT
13038
13039/* Implement FINAL_PRESCAN_INSN. */
13040
75cf1494
KT
13041void
13042aarch64_final_prescan_insn (rtx_insn *insn)
13043{
13044 if (aarch64_madd_needs_nop (insn))
13045 fprintf (asm_out_file, "\tnop // between mem op and mult-accumulate\n");
13046}
13047
13048
43cacb12
RS
13049/* Return true if BASE_OR_STEP is a valid immediate operand for an SVE INDEX
13050 instruction. */
13051
13052bool
13053aarch64_sve_index_immediate_p (rtx base_or_step)
13054{
13055 return (CONST_INT_P (base_or_step)
13056 && IN_RANGE (INTVAL (base_or_step), -16, 15));
13057}
13058
13059/* Return true if X is a valid immediate for the SVE ADD and SUB
13060 instructions. Negate X first if NEGATE_P is true. */
13061
13062bool
13063aarch64_sve_arith_immediate_p (rtx x, bool negate_p)
13064{
13065 rtx elt;
13066
13067 if (!const_vec_duplicate_p (x, &elt)
13068 || !CONST_INT_P (elt))
13069 return false;
13070
13071 HOST_WIDE_INT val = INTVAL (elt);
13072 if (negate_p)
13073 val = -val;
13074 val &= GET_MODE_MASK (GET_MODE_INNER (GET_MODE (x)));
13075
13076 if (val & 0xff)
13077 return IN_RANGE (val, 0, 0xff);
13078 return IN_RANGE (val, 0, 0xff00);
13079}
13080
13081/* Return true if X is a valid immediate operand for an SVE logical
13082 instruction such as AND. */
13083
13084bool
13085aarch64_sve_bitmask_immediate_p (rtx x)
13086{
13087 rtx elt;
13088
13089 return (const_vec_duplicate_p (x, &elt)
13090 && CONST_INT_P (elt)
13091 && aarch64_bitmask_imm (INTVAL (elt),
13092 GET_MODE_INNER (GET_MODE (x))));
13093}
13094
13095/* Return true if X is a valid immediate for the SVE DUP and CPY
13096 instructions. */
13097
13098bool
13099aarch64_sve_dup_immediate_p (rtx x)
13100{
13101 rtx elt;
13102
13103 if (!const_vec_duplicate_p (x, &elt)
13104 || !CONST_INT_P (elt))
13105 return false;
13106
13107 HOST_WIDE_INT val = INTVAL (elt);
13108 if (val & 0xff)
13109 return IN_RANGE (val, -0x80, 0x7f);
13110 return IN_RANGE (val, -0x8000, 0x7f00);
13111}
13112
13113/* Return true if X is a valid immediate operand for an SVE CMP instruction.
13114 SIGNED_P says whether the operand is signed rather than unsigned. */
13115
13116bool
13117aarch64_sve_cmp_immediate_p (rtx x, bool signed_p)
13118{
13119 rtx elt;
13120
13121 return (const_vec_duplicate_p (x, &elt)
13122 && CONST_INT_P (elt)
13123 && (signed_p
13124 ? IN_RANGE (INTVAL (elt), -16, 15)
13125 : IN_RANGE (INTVAL (elt), 0, 127)));
13126}
13127
13128/* Return true if X is a valid immediate operand for an SVE FADD or FSUB
13129 instruction. Negate X first if NEGATE_P is true. */
13130
13131bool
13132aarch64_sve_float_arith_immediate_p (rtx x, bool negate_p)
13133{
13134 rtx elt;
13135 REAL_VALUE_TYPE r;
13136
13137 if (!const_vec_duplicate_p (x, &elt)
13138 || GET_CODE (elt) != CONST_DOUBLE)
13139 return false;
13140
13141 r = *CONST_DOUBLE_REAL_VALUE (elt);
13142
13143 if (negate_p)
13144 r = real_value_negate (&r);
13145
13146 if (real_equal (&r, &dconst1))
13147 return true;
13148 if (real_equal (&r, &dconsthalf))
13149 return true;
13150 return false;
13151}
13152
13153/* Return true if X is a valid immediate operand for an SVE FMUL
13154 instruction. */
13155
13156bool
13157aarch64_sve_float_mul_immediate_p (rtx x)
13158{
13159 rtx elt;
13160
13161 /* GCC will never generate a multiply with an immediate of 2, so there is no
13162 point testing for it (even though it is a valid constant). */
13163 return (const_vec_duplicate_p (x, &elt)
13164 && GET_CODE (elt) == CONST_DOUBLE
13165 && real_equal (CONST_DOUBLE_REAL_VALUE (elt), &dconsthalf));
13166}
13167
b187677b
RS
13168/* Return true if replicating VAL32 is a valid 2-byte or 4-byte immediate
13169 for the Advanced SIMD operation described by WHICH and INSN. If INFO
13170 is nonnull, use it to describe valid immediates. */
3520f7cc 13171static bool
b187677b
RS
13172aarch64_advsimd_valid_immediate_hs (unsigned int val32,
13173 simd_immediate_info *info,
13174 enum simd_immediate_check which,
13175 simd_immediate_info::insn_type insn)
13176{
13177 /* Try a 4-byte immediate with LSL. */
13178 for (unsigned int shift = 0; shift < 32; shift += 8)
13179 if ((val32 & (0xff << shift)) == val32)
13180 {
13181 if (info)
13182 *info = simd_immediate_info (SImode, val32 >> shift, insn,
13183 simd_immediate_info::LSL, shift);
13184 return true;
13185 }
3520f7cc 13186
b187677b
RS
13187 /* Try a 2-byte immediate with LSL. */
13188 unsigned int imm16 = val32 & 0xffff;
13189 if (imm16 == (val32 >> 16))
13190 for (unsigned int shift = 0; shift < 16; shift += 8)
13191 if ((imm16 & (0xff << shift)) == imm16)
48063b9d 13192 {
b187677b
RS
13193 if (info)
13194 *info = simd_immediate_info (HImode, imm16 >> shift, insn,
13195 simd_immediate_info::LSL, shift);
13196 return true;
48063b9d 13197 }
3520f7cc 13198
b187677b
RS
13199 /* Try a 4-byte immediate with MSL, except for cases that MVN
13200 can handle. */
13201 if (which == AARCH64_CHECK_MOV)
13202 for (unsigned int shift = 8; shift < 24; shift += 8)
13203 {
13204 unsigned int low = (1 << shift) - 1;
13205 if (((val32 & (0xff << shift)) | low) == val32)
13206 {
13207 if (info)
13208 *info = simd_immediate_info (SImode, val32 >> shift, insn,
13209 simd_immediate_info::MSL, shift);
13210 return true;
13211 }
13212 }
43e9d192 13213
b187677b
RS
13214 return false;
13215}
13216
13217/* Return true if replicating VAL64 is a valid immediate for the
13218 Advanced SIMD operation described by WHICH. If INFO is nonnull,
13219 use it to describe valid immediates. */
13220static bool
13221aarch64_advsimd_valid_immediate (unsigned HOST_WIDE_INT val64,
13222 simd_immediate_info *info,
13223 enum simd_immediate_check which)
13224{
13225 unsigned int val32 = val64 & 0xffffffff;
13226 unsigned int val16 = val64 & 0xffff;
13227 unsigned int val8 = val64 & 0xff;
13228
13229 if (val32 == (val64 >> 32))
43e9d192 13230 {
b187677b
RS
13231 if ((which & AARCH64_CHECK_ORR) != 0
13232 && aarch64_advsimd_valid_immediate_hs (val32, info, which,
13233 simd_immediate_info::MOV))
13234 return true;
43e9d192 13235
b187677b
RS
13236 if ((which & AARCH64_CHECK_BIC) != 0
13237 && aarch64_advsimd_valid_immediate_hs (~val32, info, which,
13238 simd_immediate_info::MVN))
13239 return true;
ee78df47 13240
b187677b
RS
13241 /* Try using a replicated byte. */
13242 if (which == AARCH64_CHECK_MOV
13243 && val16 == (val32 >> 16)
13244 && val8 == (val16 >> 8))
ee78df47 13245 {
b187677b
RS
13246 if (info)
13247 *info = simd_immediate_info (QImode, val8);
13248 return true;
ee78df47 13249 }
43e9d192
IB
13250 }
13251
b187677b
RS
13252 /* Try using a bit-to-bytemask. */
13253 if (which == AARCH64_CHECK_MOV)
43e9d192 13254 {
b187677b
RS
13255 unsigned int i;
13256 for (i = 0; i < 64; i += 8)
ab6501d7 13257 {
b187677b
RS
13258 unsigned char byte = (val64 >> i) & 0xff;
13259 if (byte != 0 && byte != 0xff)
13260 break;
ab6501d7 13261 }
b187677b 13262 if (i == 64)
ab6501d7 13263 {
b187677b
RS
13264 if (info)
13265 *info = simd_immediate_info (DImode, val64);
13266 return true;
ab6501d7 13267 }
43e9d192 13268 }
b187677b
RS
13269 return false;
13270}
43e9d192 13271
43cacb12
RS
13272/* Return true if replicating VAL64 gives a valid immediate for an SVE MOV
13273 instruction. If INFO is nonnull, use it to describe valid immediates. */
13274
13275static bool
13276aarch64_sve_valid_immediate (unsigned HOST_WIDE_INT val64,
13277 simd_immediate_info *info)
13278{
13279 scalar_int_mode mode = DImode;
13280 unsigned int val32 = val64 & 0xffffffff;
13281 if (val32 == (val64 >> 32))
13282 {
13283 mode = SImode;
13284 unsigned int val16 = val32 & 0xffff;
13285 if (val16 == (val32 >> 16))
13286 {
13287 mode = HImode;
13288 unsigned int val8 = val16 & 0xff;
13289 if (val8 == (val16 >> 8))
13290 mode = QImode;
13291 }
13292 }
13293 HOST_WIDE_INT val = trunc_int_for_mode (val64, mode);
13294 if (IN_RANGE (val, -0x80, 0x7f))
13295 {
13296 /* DUP with no shift. */
13297 if (info)
13298 *info = simd_immediate_info (mode, val);
13299 return true;
13300 }
13301 if ((val & 0xff) == 0 && IN_RANGE (val, -0x8000, 0x7f00))
13302 {
13303 /* DUP with LSL #8. */
13304 if (info)
13305 *info = simd_immediate_info (mode, val);
13306 return true;
13307 }
13308 if (aarch64_bitmask_imm (val64, mode))
13309 {
13310 /* DUPM. */
13311 if (info)
13312 *info = simd_immediate_info (mode, val);
13313 return true;
13314 }
13315 return false;
13316}
13317
b187677b
RS
13318/* Return true if OP is a valid SIMD immediate for the operation
13319 described by WHICH. If INFO is nonnull, use it to describe valid
13320 immediates. */
13321bool
13322aarch64_simd_valid_immediate (rtx op, simd_immediate_info *info,
13323 enum simd_immediate_check which)
13324{
43cacb12
RS
13325 machine_mode mode = GET_MODE (op);
13326 unsigned int vec_flags = aarch64_classify_vector_mode (mode);
13327 if (vec_flags == 0 || vec_flags == (VEC_ADVSIMD | VEC_STRUCT))
13328 return false;
13329
13330 scalar_mode elt_mode = GET_MODE_INNER (mode);
f9093f23 13331 rtx base, step;
b187677b 13332 unsigned int n_elts;
f9093f23
RS
13333 if (GET_CODE (op) == CONST_VECTOR
13334 && CONST_VECTOR_DUPLICATE_P (op))
13335 n_elts = CONST_VECTOR_NPATTERNS (op);
43cacb12
RS
13336 else if ((vec_flags & VEC_SVE_DATA)
13337 && const_vec_series_p (op, &base, &step))
13338 {
13339 gcc_assert (GET_MODE_CLASS (mode) == MODE_VECTOR_INT);
13340 if (!aarch64_sve_index_immediate_p (base)
13341 || !aarch64_sve_index_immediate_p (step))
13342 return false;
13343
13344 if (info)
13345 *info = simd_immediate_info (elt_mode, base, step);
13346 return true;
13347 }
6a70badb
RS
13348 else if (GET_CODE (op) == CONST_VECTOR
13349 && CONST_VECTOR_NUNITS (op).is_constant (&n_elts))
13350 /* N_ELTS set above. */;
b187677b 13351 else
d8edd899 13352 return false;
43e9d192 13353
43cacb12
RS
13354 /* Handle PFALSE and PTRUE. */
13355 if (vec_flags & VEC_SVE_PRED)
13356 return (op == CONST0_RTX (mode)
13357 || op == CONSTM1_RTX (mode));
13358
b187677b 13359 scalar_float_mode elt_float_mode;
f9093f23
RS
13360 if (n_elts == 1
13361 && is_a <scalar_float_mode> (elt_mode, &elt_float_mode))
43e9d192 13362 {
f9093f23
RS
13363 rtx elt = CONST_VECTOR_ENCODED_ELT (op, 0);
13364 if (aarch64_float_const_zero_rtx_p (elt)
13365 || aarch64_float_const_representable_p (elt))
13366 {
13367 if (info)
13368 *info = simd_immediate_info (elt_float_mode, elt);
13369 return true;
13370 }
b187677b 13371 }
43e9d192 13372
b187677b
RS
13373 unsigned int elt_size = GET_MODE_SIZE (elt_mode);
13374 if (elt_size > 8)
13375 return false;
e4f0f84d 13376
b187677b 13377 scalar_int_mode elt_int_mode = int_mode_for_mode (elt_mode).require ();
43e9d192 13378
b187677b
RS
13379 /* Expand the vector constant out into a byte vector, with the least
13380 significant byte of the register first. */
13381 auto_vec<unsigned char, 16> bytes;
13382 bytes.reserve (n_elts * elt_size);
13383 for (unsigned int i = 0; i < n_elts; i++)
13384 {
f9093f23
RS
13385 /* The vector is provided in gcc endian-neutral fashion.
13386 For aarch64_be Advanced SIMD, it must be laid out in the vector
13387 register in reverse order. */
13388 bool swap_p = ((vec_flags & VEC_ADVSIMD) != 0 && BYTES_BIG_ENDIAN);
13389 rtx elt = CONST_VECTOR_ELT (op, swap_p ? (n_elts - 1 - i) : i);
43e9d192 13390
b187677b
RS
13391 if (elt_mode != elt_int_mode)
13392 elt = gen_lowpart (elt_int_mode, elt);
43e9d192 13393
b187677b
RS
13394 if (!CONST_INT_P (elt))
13395 return false;
43e9d192 13396
b187677b
RS
13397 unsigned HOST_WIDE_INT elt_val = INTVAL (elt);
13398 for (unsigned int byte = 0; byte < elt_size; byte++)
48063b9d 13399 {
b187677b
RS
13400 bytes.quick_push (elt_val & 0xff);
13401 elt_val >>= BITS_PER_UNIT;
48063b9d 13402 }
43e9d192
IB
13403 }
13404
b187677b
RS
13405 /* The immediate must repeat every eight bytes. */
13406 unsigned int nbytes = bytes.length ();
13407 for (unsigned i = 8; i < nbytes; ++i)
13408 if (bytes[i] != bytes[i - 8])
13409 return false;
13410
13411 /* Get the repeating 8-byte value as an integer. No endian correction
13412 is needed here because bytes is already in lsb-first order. */
13413 unsigned HOST_WIDE_INT val64 = 0;
13414 for (unsigned int i = 0; i < 8; i++)
13415 val64 |= ((unsigned HOST_WIDE_INT) bytes[i % nbytes]
13416 << (i * BITS_PER_UNIT));
13417
43cacb12
RS
13418 if (vec_flags & VEC_SVE_DATA)
13419 return aarch64_sve_valid_immediate (val64, info);
13420 else
13421 return aarch64_advsimd_valid_immediate (val64, info, which);
13422}
13423
13424/* Check whether X is a VEC_SERIES-like constant that starts at 0 and
13425 has a step in the range of INDEX. Return the index expression if so,
13426 otherwise return null. */
13427rtx
13428aarch64_check_zero_based_sve_index_immediate (rtx x)
13429{
13430 rtx base, step;
13431 if (const_vec_series_p (x, &base, &step)
13432 && base == const0_rtx
13433 && aarch64_sve_index_immediate_p (step))
13434 return step;
13435 return NULL_RTX;
43e9d192
IB
13436}
13437
43e9d192
IB
13438/* Check of immediate shift constants are within range. */
13439bool
ef4bddc2 13440aarch64_simd_shift_imm_p (rtx x, machine_mode mode, bool left)
43e9d192
IB
13441{
13442 int bit_width = GET_MODE_UNIT_SIZE (mode) * BITS_PER_UNIT;
13443 if (left)
ddeabd3e 13444 return aarch64_const_vec_all_same_in_range_p (x, 0, bit_width - 1);
43e9d192 13445 else
ddeabd3e 13446 return aarch64_const_vec_all_same_in_range_p (x, 1, bit_width);
43e9d192
IB
13447}
13448
7325d85a
KT
13449/* Return the bitmask CONST_INT to select the bits required by a zero extract
13450 operation of width WIDTH at bit position POS. */
13451
13452rtx
13453aarch64_mask_from_zextract_ops (rtx width, rtx pos)
13454{
13455 gcc_assert (CONST_INT_P (width));
13456 gcc_assert (CONST_INT_P (pos));
13457
13458 unsigned HOST_WIDE_INT mask
13459 = ((unsigned HOST_WIDE_INT) 1 << UINTVAL (width)) - 1;
13460 return GEN_INT (mask << UINTVAL (pos));
13461}
13462
83f8c414 13463bool
a6e0bfa7 13464aarch64_mov_operand_p (rtx x, machine_mode mode)
83f8c414 13465{
83f8c414
CSS
13466 if (GET_CODE (x) == HIGH
13467 && aarch64_valid_symref (XEXP (x, 0), GET_MODE (XEXP (x, 0))))
13468 return true;
13469
82614948 13470 if (CONST_INT_P (x))
83f8c414
CSS
13471 return true;
13472
43cacb12
RS
13473 if (VECTOR_MODE_P (GET_MODE (x)))
13474 return aarch64_simd_valid_immediate (x, NULL);
13475
83f8c414
CSS
13476 if (GET_CODE (x) == SYMBOL_REF && mode == DImode && CONSTANT_ADDRESS_P (x))
13477 return true;
13478
43cacb12
RS
13479 if (aarch64_sve_cnt_immediate_p (x))
13480 return true;
13481
a6e0bfa7 13482 return aarch64_classify_symbolic_expression (x)
a5350ddc 13483 == SYMBOL_TINY_ABSOLUTE;
83f8c414
CSS
13484}
13485
43e9d192
IB
13486/* Return a const_int vector of VAL. */
13487rtx
ab014eb3 13488aarch64_simd_gen_const_vector_dup (machine_mode mode, HOST_WIDE_INT val)
43e9d192 13489{
59d06c05
RS
13490 rtx c = gen_int_mode (val, GET_MODE_INNER (mode));
13491 return gen_const_vec_duplicate (mode, c);
43e9d192
IB
13492}
13493
051d0e2f
SN
13494/* Check OP is a legal scalar immediate for the MOVI instruction. */
13495
13496bool
77e994c9 13497aarch64_simd_scalar_immediate_valid_for_move (rtx op, scalar_int_mode mode)
051d0e2f 13498{
ef4bddc2 13499 machine_mode vmode;
051d0e2f 13500
43cacb12 13501 vmode = aarch64_simd_container_mode (mode, 64);
051d0e2f 13502 rtx op_v = aarch64_simd_gen_const_vector_dup (vmode, INTVAL (op));
b187677b 13503 return aarch64_simd_valid_immediate (op_v, NULL);
051d0e2f
SN
13504}
13505
988fa693
JG
13506/* Construct and return a PARALLEL RTX vector with elements numbering the
13507 lanes of either the high (HIGH == TRUE) or low (HIGH == FALSE) half of
13508 the vector - from the perspective of the architecture. This does not
13509 line up with GCC's perspective on lane numbers, so we end up with
13510 different masks depending on our target endian-ness. The diagram
13511 below may help. We must draw the distinction when building masks
13512 which select one half of the vector. An instruction selecting
13513 architectural low-lanes for a big-endian target, must be described using
13514 a mask selecting GCC high-lanes.
13515
13516 Big-Endian Little-Endian
13517
13518GCC 0 1 2 3 3 2 1 0
13519 | x | x | x | x | | x | x | x | x |
13520Architecture 3 2 1 0 3 2 1 0
13521
13522Low Mask: { 2, 3 } { 0, 1 }
13523High Mask: { 0, 1 } { 2, 3 }
f5cbabc1
RS
13524
13525 MODE Is the mode of the vector and NUNITS is the number of units in it. */
988fa693 13526
43e9d192 13527rtx
f5cbabc1 13528aarch64_simd_vect_par_cnst_half (machine_mode mode, int nunits, bool high)
43e9d192 13529{
43e9d192 13530 rtvec v = rtvec_alloc (nunits / 2);
988fa693
JG
13531 int high_base = nunits / 2;
13532 int low_base = 0;
13533 int base;
43e9d192
IB
13534 rtx t1;
13535 int i;
13536
988fa693
JG
13537 if (BYTES_BIG_ENDIAN)
13538 base = high ? low_base : high_base;
13539 else
13540 base = high ? high_base : low_base;
13541
13542 for (i = 0; i < nunits / 2; i++)
43e9d192
IB
13543 RTVEC_ELT (v, i) = GEN_INT (base + i);
13544
13545 t1 = gen_rtx_PARALLEL (mode, v);
13546 return t1;
13547}
13548
988fa693
JG
13549/* Check OP for validity as a PARALLEL RTX vector with elements
13550 numbering the lanes of either the high (HIGH == TRUE) or low lanes,
13551 from the perspective of the architecture. See the diagram above
13552 aarch64_simd_vect_par_cnst_half for more details. */
13553
13554bool
ef4bddc2 13555aarch64_simd_check_vect_par_cnst_half (rtx op, machine_mode mode,
988fa693
JG
13556 bool high)
13557{
6a70badb
RS
13558 int nelts;
13559 if (!VECTOR_MODE_P (mode) || !GET_MODE_NUNITS (mode).is_constant (&nelts))
f5cbabc1
RS
13560 return false;
13561
6a70badb 13562 rtx ideal = aarch64_simd_vect_par_cnst_half (mode, nelts, high);
988fa693
JG
13563 HOST_WIDE_INT count_op = XVECLEN (op, 0);
13564 HOST_WIDE_INT count_ideal = XVECLEN (ideal, 0);
13565 int i = 0;
13566
988fa693
JG
13567 if (count_op != count_ideal)
13568 return false;
13569
13570 for (i = 0; i < count_ideal; i++)
13571 {
13572 rtx elt_op = XVECEXP (op, 0, i);
13573 rtx elt_ideal = XVECEXP (ideal, 0, i);
13574
4aa81c2e 13575 if (!CONST_INT_P (elt_op)
988fa693
JG
13576 || INTVAL (elt_ideal) != INTVAL (elt_op))
13577 return false;
13578 }
13579 return true;
13580}
13581
43e9d192
IB
13582/* Bounds-check lanes. Ensure OPERAND lies between LOW (inclusive) and
13583 HIGH (exclusive). */
13584void
46ed6024
CB
13585aarch64_simd_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
13586 const_tree exp)
43e9d192
IB
13587{
13588 HOST_WIDE_INT lane;
4aa81c2e 13589 gcc_assert (CONST_INT_P (operand));
43e9d192
IB
13590 lane = INTVAL (operand);
13591
13592 if (lane < low || lane >= high)
46ed6024
CB
13593 {
13594 if (exp)
cf0c27ef 13595 error ("%Klane %wd out of range %wd - %wd", exp, lane, low, high - 1);
46ed6024 13596 else
cf0c27ef 13597 error ("lane %wd out of range %wd - %wd", lane, low, high - 1);
46ed6024 13598 }
43e9d192
IB
13599}
13600
7ac29c0f
RS
13601/* Peform endian correction on lane number N, which indexes a vector
13602 of mode MODE, and return the result as an SImode rtx. */
13603
13604rtx
13605aarch64_endian_lane_rtx (machine_mode mode, unsigned int n)
13606{
13607 return gen_int_mode (ENDIAN_LANE_N (GET_MODE_NUNITS (mode), n), SImode);
13608}
13609
43e9d192 13610/* Return TRUE if OP is a valid vector addressing mode. */
43cacb12 13611
43e9d192
IB
13612bool
13613aarch64_simd_mem_operand_p (rtx op)
13614{
13615 return MEM_P (op) && (GET_CODE (XEXP (op, 0)) == POST_INC
4aa81c2e 13616 || REG_P (XEXP (op, 0)));
43e9d192
IB
13617}
13618
43cacb12
RS
13619/* Return true if OP is a valid MEM operand for an SVE LD1R instruction. */
13620
13621bool
13622aarch64_sve_ld1r_operand_p (rtx op)
13623{
13624 struct aarch64_address_info addr;
13625 scalar_mode mode;
13626
13627 return (MEM_P (op)
13628 && is_a <scalar_mode> (GET_MODE (op), &mode)
13629 && aarch64_classify_address (&addr, XEXP (op, 0), mode, false)
13630 && addr.type == ADDRESS_REG_IMM
13631 && offset_6bit_unsigned_scaled_p (mode, addr.const_offset));
13632}
13633
13634/* Return true if OP is a valid MEM operand for an SVE LDR instruction.
13635 The conditions for STR are the same. */
13636bool
13637aarch64_sve_ldr_operand_p (rtx op)
13638{
13639 struct aarch64_address_info addr;
13640
13641 return (MEM_P (op)
13642 && aarch64_classify_address (&addr, XEXP (op, 0), GET_MODE (op),
13643 false, ADDR_QUERY_ANY)
13644 && addr.type == ADDRESS_REG_IMM);
13645}
13646
9f4cbab8
RS
13647/* Return true if OP is a valid MEM operand for an SVE_STRUCT mode.
13648 We need to be able to access the individual pieces, so the range
13649 is different from LD[234] and ST[234]. */
13650bool
13651aarch64_sve_struct_memory_operand_p (rtx op)
13652{
13653 if (!MEM_P (op))
13654 return false;
13655
13656 machine_mode mode = GET_MODE (op);
13657 struct aarch64_address_info addr;
13658 if (!aarch64_classify_address (&addr, XEXP (op, 0), SVE_BYTE_MODE, false,
13659 ADDR_QUERY_ANY)
13660 || addr.type != ADDRESS_REG_IMM)
13661 return false;
13662
13663 poly_int64 first = addr.const_offset;
13664 poly_int64 last = first + GET_MODE_SIZE (mode) - BYTES_PER_SVE_VECTOR;
13665 return (offset_4bit_signed_scaled_p (SVE_BYTE_MODE, first)
13666 && offset_4bit_signed_scaled_p (SVE_BYTE_MODE, last));
13667}
13668
2d8c6dc1
AH
13669/* Emit a register copy from operand to operand, taking care not to
13670 early-clobber source registers in the process.
43e9d192 13671
2d8c6dc1
AH
13672 COUNT is the number of components into which the copy needs to be
13673 decomposed. */
43e9d192 13674void
b8506a8a 13675aarch64_simd_emit_reg_reg_move (rtx *operands, machine_mode mode,
2d8c6dc1 13676 unsigned int count)
43e9d192
IB
13677{
13678 unsigned int i;
2d8c6dc1
AH
13679 int rdest = REGNO (operands[0]);
13680 int rsrc = REGNO (operands[1]);
43e9d192
IB
13681
13682 if (!reg_overlap_mentioned_p (operands[0], operands[1])
2d8c6dc1
AH
13683 || rdest < rsrc)
13684 for (i = 0; i < count; i++)
13685 emit_move_insn (gen_rtx_REG (mode, rdest + i),
13686 gen_rtx_REG (mode, rsrc + i));
43e9d192 13687 else
2d8c6dc1
AH
13688 for (i = 0; i < count; i++)
13689 emit_move_insn (gen_rtx_REG (mode, rdest + count - i - 1),
13690 gen_rtx_REG (mode, rsrc + count - i - 1));
43e9d192
IB
13691}
13692
668046d1 13693/* Compute and return the length of aarch64_simd_reglist<mode>, where <mode> is
6ec0e5b9 13694 one of VSTRUCT modes: OI, CI, or XI. */
668046d1 13695int
b8506a8a 13696aarch64_simd_attr_length_rglist (machine_mode mode)
668046d1 13697{
6a70badb
RS
13698 /* This is only used (and only meaningful) for Advanced SIMD, not SVE. */
13699 return (GET_MODE_SIZE (mode).to_constant () / UNITS_PER_VREG) * 4;
668046d1
DS
13700}
13701
db0253a4 13702/* Implement target hook TARGET_VECTOR_ALIGNMENT. The AAPCS64 sets the maximum
43cacb12
RS
13703 alignment of a vector to 128 bits. SVE predicates have an alignment of
13704 16 bits. */
db0253a4
TB
13705static HOST_WIDE_INT
13706aarch64_simd_vector_alignment (const_tree type)
13707{
43cacb12
RS
13708 if (TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
13709 /* ??? Checking the mode isn't ideal, but VECTOR_BOOLEAN_TYPE_P can
13710 be set for non-predicate vectors of booleans. Modes are the most
13711 direct way we have of identifying real SVE predicate types. */
13712 return GET_MODE_CLASS (TYPE_MODE (type)) == MODE_VECTOR_BOOL ? 16 : 128;
9439e9a1 13713 HOST_WIDE_INT align = tree_to_shwi (TYPE_SIZE (type));
db0253a4
TB
13714 return MIN (align, 128);
13715}
13716
43cacb12
RS
13717/* Implement target hook TARGET_VECTORIZE_PREFERRED_VECTOR_ALIGNMENT. */
13718static HOST_WIDE_INT
13719aarch64_vectorize_preferred_vector_alignment (const_tree type)
13720{
13721 if (aarch64_sve_data_mode_p (TYPE_MODE (type)))
13722 {
13723 /* If the length of the vector is fixed, try to align to that length,
13724 otherwise don't try to align at all. */
13725 HOST_WIDE_INT result;
13726 if (!BITS_PER_SVE_VECTOR.is_constant (&result))
13727 result = TYPE_ALIGN (TREE_TYPE (type));
13728 return result;
13729 }
13730 return TYPE_ALIGN (type);
13731}
13732
db0253a4
TB
13733/* Implement target hook TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE. */
13734static bool
13735aarch64_simd_vector_alignment_reachable (const_tree type, bool is_packed)
13736{
13737 if (is_packed)
13738 return false;
13739
43cacb12
RS
13740 /* For fixed-length vectors, check that the vectorizer will aim for
13741 full-vector alignment. This isn't true for generic GCC vectors
13742 that are wider than the ABI maximum of 128 bits. */
13743 if (TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
13744 && (wi::to_widest (TYPE_SIZE (type))
13745 != aarch64_vectorize_preferred_vector_alignment (type)))
db0253a4
TB
13746 return false;
13747
13748 /* Vectors whose size is <= BIGGEST_ALIGNMENT are naturally aligned. */
13749 return true;
13750}
13751
7df76747
N
13752/* Return true if the vector misalignment factor is supported by the
13753 target. */
13754static bool
13755aarch64_builtin_support_vector_misalignment (machine_mode mode,
13756 const_tree type, int misalignment,
13757 bool is_packed)
13758{
13759 if (TARGET_SIMD && STRICT_ALIGNMENT)
13760 {
13761 /* Return if movmisalign pattern is not supported for this mode. */
13762 if (optab_handler (movmisalign_optab, mode) == CODE_FOR_nothing)
13763 return false;
13764
a509c571 13765 /* Misalignment factor is unknown at compile time. */
7df76747 13766 if (misalignment == -1)
a509c571 13767 return false;
7df76747
N
13768 }
13769 return default_builtin_support_vector_misalignment (mode, type, misalignment,
13770 is_packed);
13771}
13772
4369c11e
TB
13773/* If VALS is a vector constant that can be loaded into a register
13774 using DUP, generate instructions to do so and return an RTX to
13775 assign to the register. Otherwise return NULL_RTX. */
13776static rtx
13777aarch64_simd_dup_constant (rtx vals)
13778{
ef4bddc2
RS
13779 machine_mode mode = GET_MODE (vals);
13780 machine_mode inner_mode = GET_MODE_INNER (mode);
4369c11e 13781 rtx x;
4369c11e 13782
92695fbb 13783 if (!const_vec_duplicate_p (vals, &x))
4369c11e
TB
13784 return NULL_RTX;
13785
13786 /* We can load this constant by using DUP and a constant in a
13787 single ARM register. This will be cheaper than a vector
13788 load. */
92695fbb 13789 x = copy_to_mode_reg (inner_mode, x);
59d06c05 13790 return gen_vec_duplicate (mode, x);
4369c11e
TB
13791}
13792
13793
13794/* Generate code to load VALS, which is a PARALLEL containing only
13795 constants (for vec_init) or CONST_VECTOR, efficiently into a
13796 register. Returns an RTX to copy into the register, or NULL_RTX
13797 for a PARALLEL that can not be converted into a CONST_VECTOR. */
1df3f464 13798static rtx
4369c11e
TB
13799aarch64_simd_make_constant (rtx vals)
13800{
ef4bddc2 13801 machine_mode mode = GET_MODE (vals);
4369c11e
TB
13802 rtx const_dup;
13803 rtx const_vec = NULL_RTX;
4369c11e
TB
13804 int n_const = 0;
13805 int i;
13806
13807 if (GET_CODE (vals) == CONST_VECTOR)
13808 const_vec = vals;
13809 else if (GET_CODE (vals) == PARALLEL)
13810 {
13811 /* A CONST_VECTOR must contain only CONST_INTs and
13812 CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
13813 Only store valid constants in a CONST_VECTOR. */
6a70badb 13814 int n_elts = XVECLEN (vals, 0);
4369c11e
TB
13815 for (i = 0; i < n_elts; ++i)
13816 {
13817 rtx x = XVECEXP (vals, 0, i);
13818 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
13819 n_const++;
13820 }
13821 if (n_const == n_elts)
13822 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
13823 }
13824 else
13825 gcc_unreachable ();
13826
13827 if (const_vec != NULL_RTX
b187677b 13828 && aarch64_simd_valid_immediate (const_vec, NULL))
4369c11e
TB
13829 /* Load using MOVI/MVNI. */
13830 return const_vec;
13831 else if ((const_dup = aarch64_simd_dup_constant (vals)) != NULL_RTX)
13832 /* Loaded using DUP. */
13833 return const_dup;
13834 else if (const_vec != NULL_RTX)
13835 /* Load from constant pool. We can not take advantage of single-cycle
13836 LD1 because we need a PC-relative addressing mode. */
13837 return const_vec;
13838 else
13839 /* A PARALLEL containing something not valid inside CONST_VECTOR.
13840 We can not construct an initializer. */
13841 return NULL_RTX;
13842}
13843
35a093b6
JG
13844/* Expand a vector initialisation sequence, such that TARGET is
13845 initialised to contain VALS. */
13846
4369c11e
TB
13847void
13848aarch64_expand_vector_init (rtx target, rtx vals)
13849{
ef4bddc2 13850 machine_mode mode = GET_MODE (target);
146c2e3a 13851 scalar_mode inner_mode = GET_MODE_INNER (mode);
35a093b6 13852 /* The number of vector elements. */
6a70badb 13853 int n_elts = XVECLEN (vals, 0);
35a093b6 13854 /* The number of vector elements which are not constant. */
8b66a2d4
AL
13855 int n_var = 0;
13856 rtx any_const = NULL_RTX;
35a093b6
JG
13857 /* The first element of vals. */
13858 rtx v0 = XVECEXP (vals, 0, 0);
4369c11e 13859 bool all_same = true;
4369c11e 13860
35a093b6 13861 /* Count the number of variable elements to initialise. */
8b66a2d4 13862 for (int i = 0; i < n_elts; ++i)
4369c11e 13863 {
8b66a2d4 13864 rtx x = XVECEXP (vals, 0, i);
35a093b6 13865 if (!(CONST_INT_P (x) || CONST_DOUBLE_P (x)))
8b66a2d4
AL
13866 ++n_var;
13867 else
13868 any_const = x;
4369c11e 13869
35a093b6 13870 all_same &= rtx_equal_p (x, v0);
4369c11e
TB
13871 }
13872
35a093b6
JG
13873 /* No variable elements, hand off to aarch64_simd_make_constant which knows
13874 how best to handle this. */
4369c11e
TB
13875 if (n_var == 0)
13876 {
13877 rtx constant = aarch64_simd_make_constant (vals);
13878 if (constant != NULL_RTX)
13879 {
13880 emit_move_insn (target, constant);
13881 return;
13882 }
13883 }
13884
13885 /* Splat a single non-constant element if we can. */
13886 if (all_same)
13887 {
35a093b6 13888 rtx x = copy_to_mode_reg (inner_mode, v0);
59d06c05 13889 aarch64_emit_move (target, gen_vec_duplicate (mode, x));
4369c11e
TB
13890 return;
13891 }
13892
85c1b6d7
AP
13893 enum insn_code icode = optab_handler (vec_set_optab, mode);
13894 gcc_assert (icode != CODE_FOR_nothing);
13895
13896 /* If there are only variable elements, try to optimize
13897 the insertion using dup for the most common element
13898 followed by insertions. */
13899
13900 /* The algorithm will fill matches[*][0] with the earliest matching element,
13901 and matches[X][1] with the count of duplicate elements (if X is the
13902 earliest element which has duplicates). */
13903
13904 if (n_var == n_elts && n_elts <= 16)
13905 {
13906 int matches[16][2] = {0};
13907 for (int i = 0; i < n_elts; i++)
13908 {
13909 for (int j = 0; j <= i; j++)
13910 {
13911 if (rtx_equal_p (XVECEXP (vals, 0, i), XVECEXP (vals, 0, j)))
13912 {
13913 matches[i][0] = j;
13914 matches[j][1]++;
13915 break;
13916 }
13917 }
13918 }
13919 int maxelement = 0;
13920 int maxv = 0;
13921 for (int i = 0; i < n_elts; i++)
13922 if (matches[i][1] > maxv)
13923 {
13924 maxelement = i;
13925 maxv = matches[i][1];
13926 }
13927
b4e2cd5b
JG
13928 /* Create a duplicate of the most common element, unless all elements
13929 are equally useless to us, in which case just immediately set the
13930 vector register using the first element. */
13931
13932 if (maxv == 1)
13933 {
13934 /* For vectors of two 64-bit elements, we can do even better. */
13935 if (n_elts == 2
13936 && (inner_mode == E_DImode
13937 || inner_mode == E_DFmode))
13938
13939 {
13940 rtx x0 = XVECEXP (vals, 0, 0);
13941 rtx x1 = XVECEXP (vals, 0, 1);
13942 /* Combine can pick up this case, but handling it directly
13943 here leaves clearer RTL.
13944
13945 This is load_pair_lanes<mode>, and also gives us a clean-up
13946 for store_pair_lanes<mode>. */
13947 if (memory_operand (x0, inner_mode)
13948 && memory_operand (x1, inner_mode)
13949 && !STRICT_ALIGNMENT
13950 && rtx_equal_p (XEXP (x1, 0),
13951 plus_constant (Pmode,
13952 XEXP (x0, 0),
13953 GET_MODE_SIZE (inner_mode))))
13954 {
13955 rtx t;
13956 if (inner_mode == DFmode)
13957 t = gen_load_pair_lanesdf (target, x0, x1);
13958 else
13959 t = gen_load_pair_lanesdi (target, x0, x1);
13960 emit_insn (t);
13961 return;
13962 }
13963 }
13964 /* The subreg-move sequence below will move into lane zero of the
13965 vector register. For big-endian we want that position to hold
13966 the last element of VALS. */
13967 maxelement = BYTES_BIG_ENDIAN ? n_elts - 1 : 0;
13968 rtx x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, maxelement));
13969 aarch64_emit_move (target, lowpart_subreg (mode, x, inner_mode));
13970 }
13971 else
13972 {
13973 rtx x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, maxelement));
13974 aarch64_emit_move (target, gen_vec_duplicate (mode, x));
13975 }
85c1b6d7
AP
13976
13977 /* Insert the rest. */
13978 for (int i = 0; i < n_elts; i++)
13979 {
13980 rtx x = XVECEXP (vals, 0, i);
13981 if (matches[i][0] == maxelement)
13982 continue;
13983 x = copy_to_mode_reg (inner_mode, x);
13984 emit_insn (GEN_FCN (icode) (target, x, GEN_INT (i)));
13985 }
13986 return;
13987 }
13988
35a093b6
JG
13989 /* Initialise a vector which is part-variable. We want to first try
13990 to build those lanes which are constant in the most efficient way we
13991 can. */
13992 if (n_var != n_elts)
4369c11e
TB
13993 {
13994 rtx copy = copy_rtx (vals);
4369c11e 13995
8b66a2d4
AL
13996 /* Load constant part of vector. We really don't care what goes into the
13997 parts we will overwrite, but we're more likely to be able to load the
13998 constant efficiently if it has fewer, larger, repeating parts
13999 (see aarch64_simd_valid_immediate). */
14000 for (int i = 0; i < n_elts; i++)
14001 {
14002 rtx x = XVECEXP (vals, 0, i);
14003 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
14004 continue;
14005 rtx subst = any_const;
14006 for (int bit = n_elts / 2; bit > 0; bit /= 2)
14007 {
14008 /* Look in the copied vector, as more elements are const. */
14009 rtx test = XVECEXP (copy, 0, i ^ bit);
14010 if (CONST_INT_P (test) || CONST_DOUBLE_P (test))
14011 {
14012 subst = test;
14013 break;
14014 }
14015 }
14016 XVECEXP (copy, 0, i) = subst;
14017 }
4369c11e 14018 aarch64_expand_vector_init (target, copy);
35a093b6 14019 }
4369c11e 14020
35a093b6 14021 /* Insert the variable lanes directly. */
8b66a2d4 14022 for (int i = 0; i < n_elts; i++)
35a093b6
JG
14023 {
14024 rtx x = XVECEXP (vals, 0, i);
14025 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
14026 continue;
14027 x = copy_to_mode_reg (inner_mode, x);
14028 emit_insn (GEN_FCN (icode) (target, x, GEN_INT (i)));
14029 }
4369c11e
TB
14030}
14031
43e9d192 14032static unsigned HOST_WIDE_INT
ef4bddc2 14033aarch64_shift_truncation_mask (machine_mode mode)
43e9d192 14034{
43cacb12
RS
14035 if (!SHIFT_COUNT_TRUNCATED || aarch64_vector_data_mode_p (mode))
14036 return 0;
14037 return GET_MODE_UNIT_BITSIZE (mode) - 1;
43e9d192
IB
14038}
14039
43e9d192
IB
14040/* Select a format to encode pointers in exception handling data. */
14041int
14042aarch64_asm_preferred_eh_data_format (int code ATTRIBUTE_UNUSED, int global)
14043{
14044 int type;
14045 switch (aarch64_cmodel)
14046 {
14047 case AARCH64_CMODEL_TINY:
14048 case AARCH64_CMODEL_TINY_PIC:
14049 case AARCH64_CMODEL_SMALL:
14050 case AARCH64_CMODEL_SMALL_PIC:
1b1e81f8 14051 case AARCH64_CMODEL_SMALL_SPIC:
43e9d192
IB
14052 /* text+got+data < 4Gb. 4-byte signed relocs are sufficient
14053 for everything. */
14054 type = DW_EH_PE_sdata4;
14055 break;
14056 default:
14057 /* No assumptions here. 8-byte relocs required. */
14058 type = DW_EH_PE_sdata8;
14059 break;
14060 }
14061 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
14062}
14063
e1c1ecb0
KT
14064/* The last .arch and .tune assembly strings that we printed. */
14065static std::string aarch64_last_printed_arch_string;
14066static std::string aarch64_last_printed_tune_string;
14067
361fb3ee
KT
14068/* Implement ASM_DECLARE_FUNCTION_NAME. Output the ISA features used
14069 by the function fndecl. */
14070
14071void
14072aarch64_declare_function_name (FILE *stream, const char* name,
14073 tree fndecl)
14074{
14075 tree target_parts = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
14076
14077 struct cl_target_option *targ_options;
14078 if (target_parts)
14079 targ_options = TREE_TARGET_OPTION (target_parts);
14080 else
14081 targ_options = TREE_TARGET_OPTION (target_option_current_node);
14082 gcc_assert (targ_options);
14083
14084 const struct processor *this_arch
14085 = aarch64_get_arch (targ_options->x_explicit_arch);
14086
054b4005
JG
14087 unsigned long isa_flags = targ_options->x_aarch64_isa_flags;
14088 std::string extension
04a99ebe
JG
14089 = aarch64_get_extension_string_for_isa_flags (isa_flags,
14090 this_arch->flags);
e1c1ecb0
KT
14091 /* Only update the assembler .arch string if it is distinct from the last
14092 such string we printed. */
14093 std::string to_print = this_arch->name + extension;
14094 if (to_print != aarch64_last_printed_arch_string)
14095 {
14096 asm_fprintf (asm_out_file, "\t.arch %s\n", to_print.c_str ());
14097 aarch64_last_printed_arch_string = to_print;
14098 }
361fb3ee
KT
14099
14100 /* Print the cpu name we're tuning for in the comments, might be
e1c1ecb0
KT
14101 useful to readers of the generated asm. Do it only when it changes
14102 from function to function and verbose assembly is requested. */
361fb3ee
KT
14103 const struct processor *this_tune
14104 = aarch64_get_tune_cpu (targ_options->x_explicit_tune_core);
14105
e1c1ecb0
KT
14106 if (flag_debug_asm && aarch64_last_printed_tune_string != this_tune->name)
14107 {
14108 asm_fprintf (asm_out_file, "\t" ASM_COMMENT_START ".tune %s\n",
14109 this_tune->name);
14110 aarch64_last_printed_tune_string = this_tune->name;
14111 }
361fb3ee
KT
14112
14113 /* Don't forget the type directive for ELF. */
14114 ASM_OUTPUT_TYPE_DIRECTIVE (stream, name, "function");
14115 ASM_OUTPUT_LABEL (stream, name);
14116}
14117
e1c1ecb0
KT
14118/* Implements TARGET_ASM_FILE_START. Output the assembly header. */
14119
14120static void
14121aarch64_start_file (void)
14122{
14123 struct cl_target_option *default_options
14124 = TREE_TARGET_OPTION (target_option_default_node);
14125
14126 const struct processor *default_arch
14127 = aarch64_get_arch (default_options->x_explicit_arch);
14128 unsigned long default_isa_flags = default_options->x_aarch64_isa_flags;
14129 std::string extension
04a99ebe
JG
14130 = aarch64_get_extension_string_for_isa_flags (default_isa_flags,
14131 default_arch->flags);
e1c1ecb0
KT
14132
14133 aarch64_last_printed_arch_string = default_arch->name + extension;
14134 aarch64_last_printed_tune_string = "";
14135 asm_fprintf (asm_out_file, "\t.arch %s\n",
14136 aarch64_last_printed_arch_string.c_str ());
14137
14138 default_file_start ();
14139}
14140
0462169c
SN
14141/* Emit load exclusive. */
14142
14143static void
ef4bddc2 14144aarch64_emit_load_exclusive (machine_mode mode, rtx rval,
0462169c
SN
14145 rtx mem, rtx model_rtx)
14146{
14147 rtx (*gen) (rtx, rtx, rtx);
14148
14149 switch (mode)
14150 {
4e10a5a7
RS
14151 case E_QImode: gen = gen_aarch64_load_exclusiveqi; break;
14152 case E_HImode: gen = gen_aarch64_load_exclusivehi; break;
14153 case E_SImode: gen = gen_aarch64_load_exclusivesi; break;
14154 case E_DImode: gen = gen_aarch64_load_exclusivedi; break;
0462169c
SN
14155 default:
14156 gcc_unreachable ();
14157 }
14158
14159 emit_insn (gen (rval, mem, model_rtx));
14160}
14161
14162/* Emit store exclusive. */
14163
14164static void
ef4bddc2 14165aarch64_emit_store_exclusive (machine_mode mode, rtx bval,
0462169c
SN
14166 rtx rval, rtx mem, rtx model_rtx)
14167{
14168 rtx (*gen) (rtx, rtx, rtx, rtx);
14169
14170 switch (mode)
14171 {
4e10a5a7
RS
14172 case E_QImode: gen = gen_aarch64_store_exclusiveqi; break;
14173 case E_HImode: gen = gen_aarch64_store_exclusivehi; break;
14174 case E_SImode: gen = gen_aarch64_store_exclusivesi; break;
14175 case E_DImode: gen = gen_aarch64_store_exclusivedi; break;
0462169c
SN
14176 default:
14177 gcc_unreachable ();
14178 }
14179
14180 emit_insn (gen (bval, rval, mem, model_rtx));
14181}
14182
14183/* Mark the previous jump instruction as unlikely. */
14184
14185static void
14186aarch64_emit_unlikely_jump (rtx insn)
14187{
f370536c 14188 rtx_insn *jump = emit_jump_insn (insn);
5fa396ad 14189 add_reg_br_prob_note (jump, profile_probability::very_unlikely ());
0462169c
SN
14190}
14191
14192/* Expand a compare and swap pattern. */
14193
14194void
14195aarch64_expand_compare_and_swap (rtx operands[])
14196{
14197 rtx bval, rval, mem, oldval, newval, is_weak, mod_s, mod_f, x;
ef4bddc2 14198 machine_mode mode, cmp_mode;
b0770c0f
MW
14199 typedef rtx (*gen_cas_fn) (rtx, rtx, rtx, rtx, rtx, rtx, rtx);
14200 int idx;
14201 gen_cas_fn gen;
14202 const gen_cas_fn split_cas[] =
14203 {
14204 gen_aarch64_compare_and_swapqi,
14205 gen_aarch64_compare_and_swaphi,
14206 gen_aarch64_compare_and_swapsi,
14207 gen_aarch64_compare_and_swapdi
14208 };
14209 const gen_cas_fn atomic_cas[] =
14210 {
14211 gen_aarch64_compare_and_swapqi_lse,
14212 gen_aarch64_compare_and_swaphi_lse,
14213 gen_aarch64_compare_and_swapsi_lse,
14214 gen_aarch64_compare_and_swapdi_lse
14215 };
0462169c
SN
14216
14217 bval = operands[0];
14218 rval = operands[1];
14219 mem = operands[2];
14220 oldval = operands[3];
14221 newval = operands[4];
14222 is_weak = operands[5];
14223 mod_s = operands[6];
14224 mod_f = operands[7];
14225 mode = GET_MODE (mem);
14226 cmp_mode = mode;
14227
14228 /* Normally the succ memory model must be stronger than fail, but in the
14229 unlikely event of fail being ACQUIRE and succ being RELEASE we need to
14230 promote succ to ACQ_REL so that we don't lose the acquire semantics. */
14231
46b35980
AM
14232 if (is_mm_acquire (memmodel_from_int (INTVAL (mod_f)))
14233 && is_mm_release (memmodel_from_int (INTVAL (mod_s))))
0462169c
SN
14234 mod_s = GEN_INT (MEMMODEL_ACQ_REL);
14235
14236 switch (mode)
14237 {
4e10a5a7
RS
14238 case E_QImode:
14239 case E_HImode:
0462169c
SN
14240 /* For short modes, we're going to perform the comparison in SImode,
14241 so do the zero-extension now. */
14242 cmp_mode = SImode;
14243 rval = gen_reg_rtx (SImode);
14244 oldval = convert_modes (SImode, mode, oldval, true);
14245 /* Fall through. */
14246
4e10a5a7
RS
14247 case E_SImode:
14248 case E_DImode:
0462169c
SN
14249 /* Force the value into a register if needed. */
14250 if (!aarch64_plus_operand (oldval, mode))
14251 oldval = force_reg (cmp_mode, oldval);
14252 break;
14253
14254 default:
14255 gcc_unreachable ();
14256 }
14257
14258 switch (mode)
14259 {
4e10a5a7
RS
14260 case E_QImode: idx = 0; break;
14261 case E_HImode: idx = 1; break;
14262 case E_SImode: idx = 2; break;
14263 case E_DImode: idx = 3; break;
0462169c
SN
14264 default:
14265 gcc_unreachable ();
14266 }
b0770c0f
MW
14267 if (TARGET_LSE)
14268 gen = atomic_cas[idx];
14269 else
14270 gen = split_cas[idx];
0462169c
SN
14271
14272 emit_insn (gen (rval, mem, oldval, newval, is_weak, mod_s, mod_f));
14273
14274 if (mode == QImode || mode == HImode)
14275 emit_move_insn (operands[1], gen_lowpart (mode, rval));
14276
14277 x = gen_rtx_REG (CCmode, CC_REGNUM);
14278 x = gen_rtx_EQ (SImode, x, const0_rtx);
f7df4a84 14279 emit_insn (gen_rtx_SET (bval, x));
0462169c
SN
14280}
14281
641c2f8b
MW
14282/* Test whether the target supports using a atomic load-operate instruction.
14283 CODE is the operation and AFTER is TRUE if the data in memory after the
14284 operation should be returned and FALSE if the data before the operation
14285 should be returned. Returns FALSE if the operation isn't supported by the
14286 architecture. */
14287
14288bool
14289aarch64_atomic_ldop_supported_p (enum rtx_code code)
14290{
14291 if (!TARGET_LSE)
14292 return false;
14293
14294 switch (code)
14295 {
14296 case SET:
14297 case AND:
14298 case IOR:
14299 case XOR:
14300 case MINUS:
14301 case PLUS:
14302 return true;
14303 default:
14304 return false;
14305 }
14306}
14307
f70fb3b6
MW
14308/* Emit a barrier, that is appropriate for memory model MODEL, at the end of a
14309 sequence implementing an atomic operation. */
14310
14311static void
14312aarch64_emit_post_barrier (enum memmodel model)
14313{
14314 const enum memmodel base_model = memmodel_base (model);
14315
14316 if (is_mm_sync (model)
14317 && (base_model == MEMMODEL_ACQUIRE
14318 || base_model == MEMMODEL_ACQ_REL
14319 || base_model == MEMMODEL_SEQ_CST))
14320 {
14321 emit_insn (gen_mem_thread_fence (GEN_INT (MEMMODEL_SEQ_CST)));
14322 }
14323}
14324
b0770c0f
MW
14325/* Emit an atomic compare-and-swap operation. RVAL is the destination register
14326 for the data in memory. EXPECTED is the value expected to be in memory.
14327 DESIRED is the value to store to memory. MEM is the memory location. MODEL
14328 is the memory ordering to use. */
14329
14330void
14331aarch64_gen_atomic_cas (rtx rval, rtx mem,
14332 rtx expected, rtx desired,
14333 rtx model)
14334{
14335 rtx (*gen) (rtx, rtx, rtx, rtx);
14336 machine_mode mode;
14337
14338 mode = GET_MODE (mem);
14339
14340 switch (mode)
14341 {
4e10a5a7
RS
14342 case E_QImode: gen = gen_aarch64_atomic_casqi; break;
14343 case E_HImode: gen = gen_aarch64_atomic_cashi; break;
14344 case E_SImode: gen = gen_aarch64_atomic_cassi; break;
14345 case E_DImode: gen = gen_aarch64_atomic_casdi; break;
b0770c0f
MW
14346 default:
14347 gcc_unreachable ();
14348 }
14349
14350 /* Move the expected value into the CAS destination register. */
14351 emit_insn (gen_rtx_SET (rval, expected));
14352
14353 /* Emit the CAS. */
14354 emit_insn (gen (rval, mem, desired, model));
14355
14356 /* Compare the expected value with the value loaded by the CAS, to establish
14357 whether the swap was made. */
14358 aarch64_gen_compare_reg (EQ, rval, expected);
14359}
14360
0462169c
SN
14361/* Split a compare and swap pattern. */
14362
14363void
14364aarch64_split_compare_and_swap (rtx operands[])
14365{
14366 rtx rval, mem, oldval, newval, scratch;
ef4bddc2 14367 machine_mode mode;
0462169c 14368 bool is_weak;
5d8a22a5
DM
14369 rtx_code_label *label1, *label2;
14370 rtx x, cond;
ab876106
MW
14371 enum memmodel model;
14372 rtx model_rtx;
0462169c
SN
14373
14374 rval = operands[0];
14375 mem = operands[1];
14376 oldval = operands[2];
14377 newval = operands[3];
14378 is_weak = (operands[4] != const0_rtx);
ab876106 14379 model_rtx = operands[5];
0462169c
SN
14380 scratch = operands[7];
14381 mode = GET_MODE (mem);
ab876106 14382 model = memmodel_from_int (INTVAL (model_rtx));
0462169c 14383
17f47f86
KT
14384 /* When OLDVAL is zero and we want the strong version we can emit a tighter
14385 loop:
14386 .label1:
14387 LD[A]XR rval, [mem]
14388 CBNZ rval, .label2
14389 ST[L]XR scratch, newval, [mem]
14390 CBNZ scratch, .label1
14391 .label2:
14392 CMP rval, 0. */
14393 bool strong_zero_p = !is_weak && oldval == const0_rtx;
14394
5d8a22a5 14395 label1 = NULL;
0462169c
SN
14396 if (!is_weak)
14397 {
14398 label1 = gen_label_rtx ();
14399 emit_label (label1);
14400 }
14401 label2 = gen_label_rtx ();
14402
ab876106
MW
14403 /* The initial load can be relaxed for a __sync operation since a final
14404 barrier will be emitted to stop code hoisting. */
14405 if (is_mm_sync (model))
14406 aarch64_emit_load_exclusive (mode, rval, mem,
14407 GEN_INT (MEMMODEL_RELAXED));
14408 else
14409 aarch64_emit_load_exclusive (mode, rval, mem, model_rtx);
0462169c 14410
17f47f86
KT
14411 if (strong_zero_p)
14412 {
14413 x = gen_rtx_NE (VOIDmode, rval, const0_rtx);
14414 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
14415 gen_rtx_LABEL_REF (Pmode, label2), pc_rtx);
14416 aarch64_emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
14417 }
14418 else
14419 {
14420 cond = aarch64_gen_compare_reg (NE, rval, oldval);
14421 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
14422 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
14423 gen_rtx_LABEL_REF (Pmode, label2), pc_rtx);
14424 aarch64_emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
14425 }
0462169c 14426
ab876106 14427 aarch64_emit_store_exclusive (mode, scratch, mem, newval, model_rtx);
0462169c
SN
14428
14429 if (!is_weak)
14430 {
14431 x = gen_rtx_NE (VOIDmode, scratch, const0_rtx);
14432 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
14433 gen_rtx_LABEL_REF (Pmode, label1), pc_rtx);
f7df4a84 14434 aarch64_emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
0462169c
SN
14435 }
14436 else
14437 {
14438 cond = gen_rtx_REG (CCmode, CC_REGNUM);
14439 x = gen_rtx_COMPARE (CCmode, scratch, const0_rtx);
f7df4a84 14440 emit_insn (gen_rtx_SET (cond, x));
0462169c
SN
14441 }
14442
14443 emit_label (label2);
17f47f86
KT
14444 /* If we used a CBNZ in the exchange loop emit an explicit compare with RVAL
14445 to set the condition flags. If this is not used it will be removed by
14446 later passes. */
14447 if (strong_zero_p)
14448 {
14449 cond = gen_rtx_REG (CCmode, CC_REGNUM);
14450 x = gen_rtx_COMPARE (CCmode, rval, const0_rtx);
14451 emit_insn (gen_rtx_SET (cond, x));
14452 }
ab876106
MW
14453 /* Emit any final barrier needed for a __sync operation. */
14454 if (is_mm_sync (model))
14455 aarch64_emit_post_barrier (model);
0462169c
SN
14456}
14457
68729b06
MW
14458/* Emit a BIC instruction. */
14459
14460static void
14461aarch64_emit_bic (machine_mode mode, rtx dst, rtx s1, rtx s2, int shift)
14462{
14463 rtx shift_rtx = GEN_INT (shift);
14464 rtx (*gen) (rtx, rtx, rtx, rtx);
14465
14466 switch (mode)
14467 {
4e10a5a7
RS
14468 case E_SImode: gen = gen_and_one_cmpl_lshrsi3; break;
14469 case E_DImode: gen = gen_and_one_cmpl_lshrdi3; break;
68729b06
MW
14470 default:
14471 gcc_unreachable ();
14472 }
14473
14474 emit_insn (gen (dst, s2, shift_rtx, s1));
14475}
14476
9cd7b720
MW
14477/* Emit an atomic swap. */
14478
14479static void
14480aarch64_emit_atomic_swap (machine_mode mode, rtx dst, rtx value,
14481 rtx mem, rtx model)
14482{
14483 rtx (*gen) (rtx, rtx, rtx, rtx);
14484
14485 switch (mode)
14486 {
4e10a5a7
RS
14487 case E_QImode: gen = gen_aarch64_atomic_swpqi; break;
14488 case E_HImode: gen = gen_aarch64_atomic_swphi; break;
14489 case E_SImode: gen = gen_aarch64_atomic_swpsi; break;
14490 case E_DImode: gen = gen_aarch64_atomic_swpdi; break;
9cd7b720
MW
14491 default:
14492 gcc_unreachable ();
14493 }
14494
14495 emit_insn (gen (dst, mem, value, model));
14496}
14497
641c2f8b
MW
14498/* Operations supported by aarch64_emit_atomic_load_op. */
14499
14500enum aarch64_atomic_load_op_code
14501{
14502 AARCH64_LDOP_PLUS, /* A + B */
14503 AARCH64_LDOP_XOR, /* A ^ B */
14504 AARCH64_LDOP_OR, /* A | B */
14505 AARCH64_LDOP_BIC /* A & ~B */
14506};
14507
14508/* Emit an atomic load-operate. */
14509
14510static void
14511aarch64_emit_atomic_load_op (enum aarch64_atomic_load_op_code code,
14512 machine_mode mode, rtx dst, rtx src,
14513 rtx mem, rtx model)
14514{
14515 typedef rtx (*aarch64_atomic_load_op_fn) (rtx, rtx, rtx, rtx);
14516 const aarch64_atomic_load_op_fn plus[] =
14517 {
14518 gen_aarch64_atomic_loadaddqi,
14519 gen_aarch64_atomic_loadaddhi,
14520 gen_aarch64_atomic_loadaddsi,
14521 gen_aarch64_atomic_loadadddi
14522 };
14523 const aarch64_atomic_load_op_fn eor[] =
14524 {
14525 gen_aarch64_atomic_loadeorqi,
14526 gen_aarch64_atomic_loadeorhi,
14527 gen_aarch64_atomic_loadeorsi,
14528 gen_aarch64_atomic_loadeordi
14529 };
14530 const aarch64_atomic_load_op_fn ior[] =
14531 {
14532 gen_aarch64_atomic_loadsetqi,
14533 gen_aarch64_atomic_loadsethi,
14534 gen_aarch64_atomic_loadsetsi,
14535 gen_aarch64_atomic_loadsetdi
14536 };
14537 const aarch64_atomic_load_op_fn bic[] =
14538 {
14539 gen_aarch64_atomic_loadclrqi,
14540 gen_aarch64_atomic_loadclrhi,
14541 gen_aarch64_atomic_loadclrsi,
14542 gen_aarch64_atomic_loadclrdi
14543 };
14544 aarch64_atomic_load_op_fn gen;
14545 int idx = 0;
14546
14547 switch (mode)
14548 {
4e10a5a7
RS
14549 case E_QImode: idx = 0; break;
14550 case E_HImode: idx = 1; break;
14551 case E_SImode: idx = 2; break;
14552 case E_DImode: idx = 3; break;
641c2f8b
MW
14553 default:
14554 gcc_unreachable ();
14555 }
14556
14557 switch (code)
14558 {
14559 case AARCH64_LDOP_PLUS: gen = plus[idx]; break;
14560 case AARCH64_LDOP_XOR: gen = eor[idx]; break;
14561 case AARCH64_LDOP_OR: gen = ior[idx]; break;
14562 case AARCH64_LDOP_BIC: gen = bic[idx]; break;
14563 default:
14564 gcc_unreachable ();
14565 }
14566
14567 emit_insn (gen (dst, mem, src, model));
14568}
14569
14570/* Emit an atomic load+operate. CODE is the operation. OUT_DATA is the
68729b06
MW
14571 location to store the data read from memory. OUT_RESULT is the location to
14572 store the result of the operation. MEM is the memory location to read and
14573 modify. MODEL_RTX is the memory ordering to use. VALUE is the second
14574 operand for the operation. Either OUT_DATA or OUT_RESULT, but not both, can
14575 be NULL. */
9cd7b720
MW
14576
14577void
68729b06 14578aarch64_gen_atomic_ldop (enum rtx_code code, rtx out_data, rtx out_result,
9cd7b720
MW
14579 rtx mem, rtx value, rtx model_rtx)
14580{
14581 machine_mode mode = GET_MODE (mem);
641c2f8b
MW
14582 machine_mode wmode = (mode == DImode ? DImode : SImode);
14583 const bool short_mode = (mode < SImode);
14584 aarch64_atomic_load_op_code ldop_code;
14585 rtx src;
14586 rtx x;
14587
14588 if (out_data)
14589 out_data = gen_lowpart (mode, out_data);
9cd7b720 14590
68729b06
MW
14591 if (out_result)
14592 out_result = gen_lowpart (mode, out_result);
14593
641c2f8b
MW
14594 /* Make sure the value is in a register, putting it into a destination
14595 register if it needs to be manipulated. */
14596 if (!register_operand (value, mode)
14597 || code == AND || code == MINUS)
14598 {
68729b06 14599 src = out_result ? out_result : out_data;
641c2f8b
MW
14600 emit_move_insn (src, gen_lowpart (mode, value));
14601 }
14602 else
14603 src = value;
14604 gcc_assert (register_operand (src, mode));
9cd7b720 14605
641c2f8b
MW
14606 /* Preprocess the data for the operation as necessary. If the operation is
14607 a SET then emit a swap instruction and finish. */
9cd7b720
MW
14608 switch (code)
14609 {
14610 case SET:
641c2f8b 14611 aarch64_emit_atomic_swap (mode, out_data, src, mem, model_rtx);
9cd7b720
MW
14612 return;
14613
641c2f8b
MW
14614 case MINUS:
14615 /* Negate the value and treat it as a PLUS. */
14616 {
14617 rtx neg_src;
14618
14619 /* Resize the value if necessary. */
14620 if (short_mode)
14621 src = gen_lowpart (wmode, src);
14622
14623 neg_src = gen_rtx_NEG (wmode, src);
14624 emit_insn (gen_rtx_SET (src, neg_src));
14625
14626 if (short_mode)
14627 src = gen_lowpart (mode, src);
14628 }
14629 /* Fall-through. */
14630 case PLUS:
14631 ldop_code = AARCH64_LDOP_PLUS;
14632 break;
14633
14634 case IOR:
14635 ldop_code = AARCH64_LDOP_OR;
14636 break;
14637
14638 case XOR:
14639 ldop_code = AARCH64_LDOP_XOR;
14640 break;
14641
14642 case AND:
14643 {
14644 rtx not_src;
14645
14646 /* Resize the value if necessary. */
14647 if (short_mode)
14648 src = gen_lowpart (wmode, src);
14649
14650 not_src = gen_rtx_NOT (wmode, src);
14651 emit_insn (gen_rtx_SET (src, not_src));
14652
14653 if (short_mode)
14654 src = gen_lowpart (mode, src);
14655 }
14656 ldop_code = AARCH64_LDOP_BIC;
14657 break;
14658
9cd7b720
MW
14659 default:
14660 /* The operation can't be done with atomic instructions. */
14661 gcc_unreachable ();
14662 }
641c2f8b
MW
14663
14664 aarch64_emit_atomic_load_op (ldop_code, mode, out_data, src, mem, model_rtx);
68729b06
MW
14665
14666 /* If necessary, calculate the data in memory after the update by redoing the
14667 operation from values in registers. */
14668 if (!out_result)
14669 return;
14670
14671 if (short_mode)
14672 {
14673 src = gen_lowpart (wmode, src);
14674 out_data = gen_lowpart (wmode, out_data);
14675 out_result = gen_lowpart (wmode, out_result);
14676 }
14677
14678 x = NULL_RTX;
14679
14680 switch (code)
14681 {
14682 case MINUS:
14683 case PLUS:
14684 x = gen_rtx_PLUS (wmode, out_data, src);
14685 break;
14686 case IOR:
14687 x = gen_rtx_IOR (wmode, out_data, src);
14688 break;
14689 case XOR:
14690 x = gen_rtx_XOR (wmode, out_data, src);
14691 break;
14692 case AND:
14693 aarch64_emit_bic (wmode, out_result, out_data, src, 0);
14694 return;
14695 default:
14696 gcc_unreachable ();
14697 }
14698
14699 emit_set_insn (out_result, x);
14700
14701 return;
9cd7b720
MW
14702}
14703
0462169c
SN
14704/* Split an atomic operation. */
14705
14706void
14707aarch64_split_atomic_op (enum rtx_code code, rtx old_out, rtx new_out, rtx mem,
9cd7b720 14708 rtx value, rtx model_rtx, rtx cond)
0462169c 14709{
ef4bddc2
RS
14710 machine_mode mode = GET_MODE (mem);
14711 machine_mode wmode = (mode == DImode ? DImode : SImode);
f70fb3b6
MW
14712 const enum memmodel model = memmodel_from_int (INTVAL (model_rtx));
14713 const bool is_sync = is_mm_sync (model);
5d8a22a5
DM
14714 rtx_code_label *label;
14715 rtx x;
0462169c 14716
9cd7b720 14717 /* Split the atomic operation into a sequence. */
0462169c
SN
14718 label = gen_label_rtx ();
14719 emit_label (label);
14720
14721 if (new_out)
14722 new_out = gen_lowpart (wmode, new_out);
14723 if (old_out)
14724 old_out = gen_lowpart (wmode, old_out);
14725 else
14726 old_out = new_out;
14727 value = simplify_gen_subreg (wmode, value, mode, 0);
14728
f70fb3b6
MW
14729 /* The initial load can be relaxed for a __sync operation since a final
14730 barrier will be emitted to stop code hoisting. */
14731 if (is_sync)
14732 aarch64_emit_load_exclusive (mode, old_out, mem,
14733 GEN_INT (MEMMODEL_RELAXED));
14734 else
14735 aarch64_emit_load_exclusive (mode, old_out, mem, model_rtx);
0462169c
SN
14736
14737 switch (code)
14738 {
14739 case SET:
14740 new_out = value;
14741 break;
14742
14743 case NOT:
14744 x = gen_rtx_AND (wmode, old_out, value);
f7df4a84 14745 emit_insn (gen_rtx_SET (new_out, x));
0462169c 14746 x = gen_rtx_NOT (wmode, new_out);
f7df4a84 14747 emit_insn (gen_rtx_SET (new_out, x));
0462169c
SN
14748 break;
14749
14750 case MINUS:
14751 if (CONST_INT_P (value))
14752 {
14753 value = GEN_INT (-INTVAL (value));
14754 code = PLUS;
14755 }
14756 /* Fall through. */
14757
14758 default:
14759 x = gen_rtx_fmt_ee (code, wmode, old_out, value);
f7df4a84 14760 emit_insn (gen_rtx_SET (new_out, x));
0462169c
SN
14761 break;
14762 }
14763
14764 aarch64_emit_store_exclusive (mode, cond, mem,
14765 gen_lowpart (mode, new_out), model_rtx);
14766
14767 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
14768 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
14769 gen_rtx_LABEL_REF (Pmode, label), pc_rtx);
f7df4a84 14770 aarch64_emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
f70fb3b6
MW
14771
14772 /* Emit any final barrier needed for a __sync operation. */
14773 if (is_sync)
14774 aarch64_emit_post_barrier (model);
0462169c
SN
14775}
14776
c2ec330c
AL
14777static void
14778aarch64_init_libfuncs (void)
14779{
14780 /* Half-precision float operations. The compiler handles all operations
14781 with NULL libfuncs by converting to SFmode. */
14782
14783 /* Conversions. */
14784 set_conv_libfunc (trunc_optab, HFmode, SFmode, "__gnu_f2h_ieee");
14785 set_conv_libfunc (sext_optab, SFmode, HFmode, "__gnu_h2f_ieee");
14786
14787 /* Arithmetic. */
14788 set_optab_libfunc (add_optab, HFmode, NULL);
14789 set_optab_libfunc (sdiv_optab, HFmode, NULL);
14790 set_optab_libfunc (smul_optab, HFmode, NULL);
14791 set_optab_libfunc (neg_optab, HFmode, NULL);
14792 set_optab_libfunc (sub_optab, HFmode, NULL);
14793
14794 /* Comparisons. */
14795 set_optab_libfunc (eq_optab, HFmode, NULL);
14796 set_optab_libfunc (ne_optab, HFmode, NULL);
14797 set_optab_libfunc (lt_optab, HFmode, NULL);
14798 set_optab_libfunc (le_optab, HFmode, NULL);
14799 set_optab_libfunc (ge_optab, HFmode, NULL);
14800 set_optab_libfunc (gt_optab, HFmode, NULL);
14801 set_optab_libfunc (unord_optab, HFmode, NULL);
14802}
14803
43e9d192 14804/* Target hook for c_mode_for_suffix. */
ef4bddc2 14805static machine_mode
43e9d192
IB
14806aarch64_c_mode_for_suffix (char suffix)
14807{
14808 if (suffix == 'q')
14809 return TFmode;
14810
14811 return VOIDmode;
14812}
14813
3520f7cc
JG
14814/* We can only represent floating point constants which will fit in
14815 "quarter-precision" values. These values are characterised by
14816 a sign bit, a 4-bit mantissa and a 3-bit exponent. And are given
14817 by:
14818
14819 (-1)^s * (n/16) * 2^r
14820
14821 Where:
14822 's' is the sign bit.
14823 'n' is an integer in the range 16 <= n <= 31.
14824 'r' is an integer in the range -3 <= r <= 4. */
14825
14826/* Return true iff X can be represented by a quarter-precision
14827 floating point immediate operand X. Note, we cannot represent 0.0. */
14828bool
14829aarch64_float_const_representable_p (rtx x)
14830{
14831 /* This represents our current view of how many bits
14832 make up the mantissa. */
14833 int point_pos = 2 * HOST_BITS_PER_WIDE_INT - 1;
ba96cdfb 14834 int exponent;
3520f7cc 14835 unsigned HOST_WIDE_INT mantissa, mask;
3520f7cc 14836 REAL_VALUE_TYPE r, m;
807e902e 14837 bool fail;
3520f7cc
JG
14838
14839 if (!CONST_DOUBLE_P (x))
14840 return false;
14841
c2ec330c
AL
14842 /* We don't support HFmode constants yet. */
14843 if (GET_MODE (x) == VOIDmode || GET_MODE (x) == HFmode)
94bfa2da
TV
14844 return false;
14845
34a72c33 14846 r = *CONST_DOUBLE_REAL_VALUE (x);
3520f7cc
JG
14847
14848 /* We cannot represent infinities, NaNs or +/-zero. We won't
14849 know if we have +zero until we analyse the mantissa, but we
14850 can reject the other invalid values. */
14851 if (REAL_VALUE_ISINF (r) || REAL_VALUE_ISNAN (r)
14852 || REAL_VALUE_MINUS_ZERO (r))
14853 return false;
14854
ba96cdfb 14855 /* Extract exponent. */
3520f7cc
JG
14856 r = real_value_abs (&r);
14857 exponent = REAL_EXP (&r);
14858
14859 /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
14860 highest (sign) bit, with a fixed binary point at bit point_pos.
14861 m1 holds the low part of the mantissa, m2 the high part.
14862 WARNING: If we ever have a representation using more than 2 * H_W_I - 1
14863 bits for the mantissa, this can fail (low bits will be lost). */
14864 real_ldexp (&m, &r, point_pos - exponent);
807e902e 14865 wide_int w = real_to_integer (&m, &fail, HOST_BITS_PER_WIDE_INT * 2);
3520f7cc
JG
14866
14867 /* If the low part of the mantissa has bits set we cannot represent
14868 the value. */
d9074b29 14869 if (w.ulow () != 0)
3520f7cc
JG
14870 return false;
14871 /* We have rejected the lower HOST_WIDE_INT, so update our
14872 understanding of how many bits lie in the mantissa and
14873 look only at the high HOST_WIDE_INT. */
807e902e 14874 mantissa = w.elt (1);
3520f7cc
JG
14875 point_pos -= HOST_BITS_PER_WIDE_INT;
14876
14877 /* We can only represent values with a mantissa of the form 1.xxxx. */
14878 mask = ((unsigned HOST_WIDE_INT)1 << (point_pos - 5)) - 1;
14879 if ((mantissa & mask) != 0)
14880 return false;
14881
14882 /* Having filtered unrepresentable values, we may now remove all
14883 but the highest 5 bits. */
14884 mantissa >>= point_pos - 5;
14885
14886 /* We cannot represent the value 0.0, so reject it. This is handled
14887 elsewhere. */
14888 if (mantissa == 0)
14889 return false;
14890
14891 /* Then, as bit 4 is always set, we can mask it off, leaving
14892 the mantissa in the range [0, 15]. */
14893 mantissa &= ~(1 << 4);
14894 gcc_assert (mantissa <= 15);
14895
14896 /* GCC internally does not use IEEE754-like encoding (where normalized
14897 significands are in the range [1, 2). GCC uses [0.5, 1) (see real.c).
14898 Our mantissa values are shifted 4 places to the left relative to
14899 normalized IEEE754 so we must modify the exponent returned by REAL_EXP
14900 by 5 places to correct for GCC's representation. */
14901 exponent = 5 - exponent;
14902
14903 return (exponent >= 0 && exponent <= 7);
14904}
14905
ab6501d7
SD
14906/* Returns the string with the instruction for AdvSIMD MOVI, MVNI, ORR or BIC
14907 immediate with a CONST_VECTOR of MODE and WIDTH. WHICH selects whether to
14908 output MOVI/MVNI, ORR or BIC immediate. */
3520f7cc 14909char*
b187677b 14910aarch64_output_simd_mov_immediate (rtx const_vector, unsigned width,
ab6501d7 14911 enum simd_immediate_check which)
3520f7cc 14912{
3ea63f60 14913 bool is_valid;
3520f7cc 14914 static char templ[40];
3520f7cc 14915 const char *mnemonic;
e4f0f84d 14916 const char *shift_op;
3520f7cc 14917 unsigned int lane_count = 0;
81c2dfb9 14918 char element_char;
3520f7cc 14919
b187677b 14920 struct simd_immediate_info info;
48063b9d
IB
14921
14922 /* This will return true to show const_vector is legal for use as either
ab6501d7
SD
14923 a AdvSIMD MOVI instruction (or, implicitly, MVNI), ORR or BIC immediate.
14924 It will also update INFO to show how the immediate should be generated.
14925 WHICH selects whether to check for MOVI/MVNI, ORR or BIC. */
b187677b 14926 is_valid = aarch64_simd_valid_immediate (const_vector, &info, which);
3520f7cc
JG
14927 gcc_assert (is_valid);
14928
b187677b
RS
14929 element_char = sizetochar (GET_MODE_BITSIZE (info.elt_mode));
14930 lane_count = width / GET_MODE_BITSIZE (info.elt_mode);
48063b9d 14931
b187677b 14932 if (GET_MODE_CLASS (info.elt_mode) == MODE_FLOAT)
3520f7cc 14933 {
b187677b 14934 gcc_assert (info.shift == 0 && info.insn == simd_immediate_info::MOV);
0d8e1702
KT
14935 /* For FP zero change it to a CONST_INT 0 and use the integer SIMD
14936 move immediate path. */
48063b9d
IB
14937 if (aarch64_float_const_zero_rtx_p (info.value))
14938 info.value = GEN_INT (0);
14939 else
14940 {
83faf7d0 14941 const unsigned int buf_size = 20;
48063b9d 14942 char float_buf[buf_size] = {'\0'};
34a72c33
RS
14943 real_to_decimal_for_mode (float_buf,
14944 CONST_DOUBLE_REAL_VALUE (info.value),
b187677b 14945 buf_size, buf_size, 1, info.elt_mode);
48063b9d
IB
14946
14947 if (lane_count == 1)
14948 snprintf (templ, sizeof (templ), "fmov\t%%d0, %s", float_buf);
14949 else
14950 snprintf (templ, sizeof (templ), "fmov\t%%0.%d%c, %s",
81c2dfb9 14951 lane_count, element_char, float_buf);
48063b9d
IB
14952 return templ;
14953 }
3520f7cc 14954 }
3520f7cc 14955
0d8e1702 14956 gcc_assert (CONST_INT_P (info.value));
ab6501d7
SD
14957
14958 if (which == AARCH64_CHECK_MOV)
14959 {
b187677b
RS
14960 mnemonic = info.insn == simd_immediate_info::MVN ? "mvni" : "movi";
14961 shift_op = info.modifier == simd_immediate_info::MSL ? "msl" : "lsl";
ab6501d7
SD
14962 if (lane_count == 1)
14963 snprintf (templ, sizeof (templ), "%s\t%%d0, " HOST_WIDE_INT_PRINT_HEX,
14964 mnemonic, UINTVAL (info.value));
14965 else if (info.shift)
14966 snprintf (templ, sizeof (templ), "%s\t%%0.%d%c, "
14967 HOST_WIDE_INT_PRINT_HEX ", %s %d", mnemonic, lane_count,
14968 element_char, UINTVAL (info.value), shift_op, info.shift);
14969 else
14970 snprintf (templ, sizeof (templ), "%s\t%%0.%d%c, "
14971 HOST_WIDE_INT_PRINT_HEX, mnemonic, lane_count,
14972 element_char, UINTVAL (info.value));
14973 }
3520f7cc 14974 else
ab6501d7
SD
14975 {
14976 /* For AARCH64_CHECK_BIC and AARCH64_CHECK_ORR. */
b187677b 14977 mnemonic = info.insn == simd_immediate_info::MVN ? "bic" : "orr";
ab6501d7
SD
14978 if (info.shift)
14979 snprintf (templ, sizeof (templ), "%s\t%%0.%d%c, #"
14980 HOST_WIDE_INT_PRINT_DEC ", %s #%d", mnemonic, lane_count,
14981 element_char, UINTVAL (info.value), "lsl", info.shift);
14982 else
14983 snprintf (templ, sizeof (templ), "%s\t%%0.%d%c, #"
14984 HOST_WIDE_INT_PRINT_DEC, mnemonic, lane_count,
14985 element_char, UINTVAL (info.value));
14986 }
3520f7cc
JG
14987 return templ;
14988}
14989
b7342d25 14990char*
77e994c9 14991aarch64_output_scalar_simd_mov_immediate (rtx immediate, scalar_int_mode mode)
b7342d25 14992{
a2170965
TC
14993
14994 /* If a floating point number was passed and we desire to use it in an
14995 integer mode do the conversion to integer. */
14996 if (CONST_DOUBLE_P (immediate) && GET_MODE_CLASS (mode) == MODE_INT)
14997 {
14998 unsigned HOST_WIDE_INT ival;
14999 if (!aarch64_reinterpret_float_as_int (immediate, &ival))
15000 gcc_unreachable ();
15001 immediate = gen_int_mode (ival, mode);
15002 }
15003
ef4bddc2 15004 machine_mode vmode;
a2170965
TC
15005 /* use a 64 bit mode for everything except for DI/DF mode, where we use
15006 a 128 bit vector mode. */
15007 int width = GET_MODE_BITSIZE (mode) == 64 ? 128 : 64;
b7342d25 15008
a2170965 15009 vmode = aarch64_simd_container_mode (mode, width);
b7342d25 15010 rtx v_op = aarch64_simd_gen_const_vector_dup (vmode, INTVAL (immediate));
b187677b 15011 return aarch64_output_simd_mov_immediate (v_op, width);
b7342d25
IB
15012}
15013
43cacb12
RS
15014/* Return the output string to use for moving immediate CONST_VECTOR
15015 into an SVE register. */
15016
15017char *
15018aarch64_output_sve_mov_immediate (rtx const_vector)
15019{
15020 static char templ[40];
15021 struct simd_immediate_info info;
15022 char element_char;
15023
15024 bool is_valid = aarch64_simd_valid_immediate (const_vector, &info);
15025 gcc_assert (is_valid);
15026
15027 element_char = sizetochar (GET_MODE_BITSIZE (info.elt_mode));
15028
15029 if (info.step)
15030 {
15031 snprintf (templ, sizeof (templ), "index\t%%0.%c, #"
15032 HOST_WIDE_INT_PRINT_DEC ", #" HOST_WIDE_INT_PRINT_DEC,
15033 element_char, INTVAL (info.value), INTVAL (info.step));
15034 return templ;
15035 }
15036
15037 if (GET_MODE_CLASS (info.elt_mode) == MODE_FLOAT)
15038 {
15039 if (aarch64_float_const_zero_rtx_p (info.value))
15040 info.value = GEN_INT (0);
15041 else
15042 {
15043 const int buf_size = 20;
15044 char float_buf[buf_size] = {};
15045 real_to_decimal_for_mode (float_buf,
15046 CONST_DOUBLE_REAL_VALUE (info.value),
15047 buf_size, buf_size, 1, info.elt_mode);
15048
15049 snprintf (templ, sizeof (templ), "fmov\t%%0.%c, #%s",
15050 element_char, float_buf);
15051 return templ;
15052 }
15053 }
15054
15055 snprintf (templ, sizeof (templ), "mov\t%%0.%c, #" HOST_WIDE_INT_PRINT_DEC,
15056 element_char, INTVAL (info.value));
15057 return templ;
15058}
15059
15060/* Return the asm format for a PTRUE instruction whose destination has
15061 mode MODE. SUFFIX is the element size suffix. */
15062
15063char *
15064aarch64_output_ptrue (machine_mode mode, char suffix)
15065{
15066 unsigned int nunits;
15067 static char buf[sizeof ("ptrue\t%0.N, vlNNNNN")];
15068 if (GET_MODE_NUNITS (mode).is_constant (&nunits))
15069 snprintf (buf, sizeof (buf), "ptrue\t%%0.%c, vl%d", suffix, nunits);
15070 else
15071 snprintf (buf, sizeof (buf), "ptrue\t%%0.%c, all", suffix);
15072 return buf;
15073}
15074
88b08073
JG
15075/* Split operands into moves from op[1] + op[2] into op[0]. */
15076
15077void
15078aarch64_split_combinev16qi (rtx operands[3])
15079{
15080 unsigned int dest = REGNO (operands[0]);
15081 unsigned int src1 = REGNO (operands[1]);
15082 unsigned int src2 = REGNO (operands[2]);
ef4bddc2 15083 machine_mode halfmode = GET_MODE (operands[1]);
462a99aa 15084 unsigned int halfregs = REG_NREGS (operands[1]);
88b08073
JG
15085 rtx destlo, desthi;
15086
15087 gcc_assert (halfmode == V16QImode);
15088
15089 if (src1 == dest && src2 == dest + halfregs)
15090 {
15091 /* No-op move. Can't split to nothing; emit something. */
15092 emit_note (NOTE_INSN_DELETED);
15093 return;
15094 }
15095
15096 /* Preserve register attributes for variable tracking. */
15097 destlo = gen_rtx_REG_offset (operands[0], halfmode, dest, 0);
15098 desthi = gen_rtx_REG_offset (operands[0], halfmode, dest + halfregs,
15099 GET_MODE_SIZE (halfmode));
15100
15101 /* Special case of reversed high/low parts. */
15102 if (reg_overlap_mentioned_p (operands[2], destlo)
15103 && reg_overlap_mentioned_p (operands[1], desthi))
15104 {
15105 emit_insn (gen_xorv16qi3 (operands[1], operands[1], operands[2]));
15106 emit_insn (gen_xorv16qi3 (operands[2], operands[1], operands[2]));
15107 emit_insn (gen_xorv16qi3 (operands[1], operands[1], operands[2]));
15108 }
15109 else if (!reg_overlap_mentioned_p (operands[2], destlo))
15110 {
15111 /* Try to avoid unnecessary moves if part of the result
15112 is in the right place already. */
15113 if (src1 != dest)
15114 emit_move_insn (destlo, operands[1]);
15115 if (src2 != dest + halfregs)
15116 emit_move_insn (desthi, operands[2]);
15117 }
15118 else
15119 {
15120 if (src2 != dest + halfregs)
15121 emit_move_insn (desthi, operands[2]);
15122 if (src1 != dest)
15123 emit_move_insn (destlo, operands[1]);
15124 }
15125}
15126
15127/* vec_perm support. */
15128
88b08073
JG
15129struct expand_vec_perm_d
15130{
15131 rtx target, op0, op1;
e3342de4 15132 vec_perm_indices perm;
ef4bddc2 15133 machine_mode vmode;
43cacb12 15134 unsigned int vec_flags;
88b08073
JG
15135 bool one_vector_p;
15136 bool testing_p;
15137};
15138
15139/* Generate a variable permutation. */
15140
15141static void
15142aarch64_expand_vec_perm_1 (rtx target, rtx op0, rtx op1, rtx sel)
15143{
ef4bddc2 15144 machine_mode vmode = GET_MODE (target);
88b08073
JG
15145 bool one_vector_p = rtx_equal_p (op0, op1);
15146
15147 gcc_checking_assert (vmode == V8QImode || vmode == V16QImode);
15148 gcc_checking_assert (GET_MODE (op0) == vmode);
15149 gcc_checking_assert (GET_MODE (op1) == vmode);
15150 gcc_checking_assert (GET_MODE (sel) == vmode);
15151 gcc_checking_assert (TARGET_SIMD);
15152
15153 if (one_vector_p)
15154 {
15155 if (vmode == V8QImode)
15156 {
15157 /* Expand the argument to a V16QI mode by duplicating it. */
15158 rtx pair = gen_reg_rtx (V16QImode);
15159 emit_insn (gen_aarch64_combinev8qi (pair, op0, op0));
15160 emit_insn (gen_aarch64_tbl1v8qi (target, pair, sel));
15161 }
15162 else
15163 {
15164 emit_insn (gen_aarch64_tbl1v16qi (target, op0, sel));
15165 }
15166 }
15167 else
15168 {
15169 rtx pair;
15170
15171 if (vmode == V8QImode)
15172 {
15173 pair = gen_reg_rtx (V16QImode);
15174 emit_insn (gen_aarch64_combinev8qi (pair, op0, op1));
15175 emit_insn (gen_aarch64_tbl1v8qi (target, pair, sel));
15176 }
15177 else
15178 {
15179 pair = gen_reg_rtx (OImode);
15180 emit_insn (gen_aarch64_combinev16qi (pair, op0, op1));
15181 emit_insn (gen_aarch64_tbl2v16qi (target, pair, sel));
15182 }
15183 }
15184}
15185
80940017
RS
15186/* Expand a vec_perm with the operands given by TARGET, OP0, OP1 and SEL.
15187 NELT is the number of elements in the vector. */
15188
88b08073 15189void
80940017
RS
15190aarch64_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel,
15191 unsigned int nelt)
88b08073 15192{
ef4bddc2 15193 machine_mode vmode = GET_MODE (target);
88b08073 15194 bool one_vector_p = rtx_equal_p (op0, op1);
f7c4e5b8 15195 rtx mask;
88b08073
JG
15196
15197 /* The TBL instruction does not use a modulo index, so we must take care
15198 of that ourselves. */
f7c4e5b8
AL
15199 mask = aarch64_simd_gen_const_vector_dup (vmode,
15200 one_vector_p ? nelt - 1 : 2 * nelt - 1);
88b08073
JG
15201 sel = expand_simple_binop (vmode, AND, sel, mask, NULL, 0, OPTAB_LIB_WIDEN);
15202
f7c4e5b8
AL
15203 /* For big-endian, we also need to reverse the index within the vector
15204 (but not which vector). */
15205 if (BYTES_BIG_ENDIAN)
15206 {
15207 /* If one_vector_p, mask is a vector of (nelt - 1)'s already. */
15208 if (!one_vector_p)
15209 mask = aarch64_simd_gen_const_vector_dup (vmode, nelt - 1);
15210 sel = expand_simple_binop (vmode, XOR, sel, mask,
15211 NULL, 0, OPTAB_LIB_WIDEN);
15212 }
88b08073
JG
15213 aarch64_expand_vec_perm_1 (target, op0, op1, sel);
15214}
15215
43cacb12
RS
15216/* Generate (set TARGET (unspec [OP0 OP1] CODE)). */
15217
15218static void
15219emit_unspec2 (rtx target, int code, rtx op0, rtx op1)
15220{
15221 emit_insn (gen_rtx_SET (target,
15222 gen_rtx_UNSPEC (GET_MODE (target),
15223 gen_rtvec (2, op0, op1), code)));
15224}
15225
15226/* Expand an SVE vec_perm with the given operands. */
15227
15228void
15229aarch64_expand_sve_vec_perm (rtx target, rtx op0, rtx op1, rtx sel)
15230{
15231 machine_mode data_mode = GET_MODE (target);
15232 machine_mode sel_mode = GET_MODE (sel);
15233 /* Enforced by the pattern condition. */
15234 int nunits = GET_MODE_NUNITS (sel_mode).to_constant ();
15235
15236 /* Note: vec_perm indices are supposed to wrap when they go beyond the
15237 size of the two value vectors, i.e. the upper bits of the indices
15238 are effectively ignored. SVE TBL instead produces 0 for any
15239 out-of-range indices, so we need to modulo all the vec_perm indices
15240 to ensure they are all in range. */
15241 rtx sel_reg = force_reg (sel_mode, sel);
15242
15243 /* Check if the sel only references the first values vector. */
15244 if (GET_CODE (sel) == CONST_VECTOR
15245 && aarch64_const_vec_all_in_range_p (sel, 0, nunits - 1))
15246 {
15247 emit_unspec2 (target, UNSPEC_TBL, op0, sel_reg);
15248 return;
15249 }
15250
15251 /* Check if the two values vectors are the same. */
15252 if (rtx_equal_p (op0, op1))
15253 {
15254 rtx max_sel = aarch64_simd_gen_const_vector_dup (sel_mode, nunits - 1);
15255 rtx sel_mod = expand_simple_binop (sel_mode, AND, sel_reg, max_sel,
15256 NULL, 0, OPTAB_DIRECT);
15257 emit_unspec2 (target, UNSPEC_TBL, op0, sel_mod);
15258 return;
15259 }
15260
15261 /* Run TBL on for each value vector and combine the results. */
15262
15263 rtx res0 = gen_reg_rtx (data_mode);
15264 rtx res1 = gen_reg_rtx (data_mode);
15265 rtx neg_num_elems = aarch64_simd_gen_const_vector_dup (sel_mode, -nunits);
15266 if (GET_CODE (sel) != CONST_VECTOR
15267 || !aarch64_const_vec_all_in_range_p (sel, 0, 2 * nunits - 1))
15268 {
15269 rtx max_sel = aarch64_simd_gen_const_vector_dup (sel_mode,
15270 2 * nunits - 1);
15271 sel_reg = expand_simple_binop (sel_mode, AND, sel_reg, max_sel,
15272 NULL, 0, OPTAB_DIRECT);
15273 }
15274 emit_unspec2 (res0, UNSPEC_TBL, op0, sel_reg);
15275 rtx sel_sub = expand_simple_binop (sel_mode, PLUS, sel_reg, neg_num_elems,
15276 NULL, 0, OPTAB_DIRECT);
15277 emit_unspec2 (res1, UNSPEC_TBL, op1, sel_sub);
15278 if (GET_MODE_CLASS (data_mode) == MODE_VECTOR_INT)
15279 emit_insn (gen_rtx_SET (target, gen_rtx_IOR (data_mode, res0, res1)));
15280 else
15281 emit_unspec2 (target, UNSPEC_IORF, res0, res1);
15282}
15283
cc4d934f
JG
15284/* Recognize patterns suitable for the TRN instructions. */
15285static bool
15286aarch64_evpc_trn (struct expand_vec_perm_d *d)
15287{
6a70badb
RS
15288 HOST_WIDE_INT odd;
15289 poly_uint64 nelt = d->perm.length ();
cc4d934f 15290 rtx out, in0, in1, x;
ef4bddc2 15291 machine_mode vmode = d->vmode;
cc4d934f
JG
15292
15293 if (GET_MODE_UNIT_SIZE (vmode) > 8)
15294 return false;
15295
15296 /* Note that these are little-endian tests.
15297 We correct for big-endian later. */
6a70badb
RS
15298 if (!d->perm[0].is_constant (&odd)
15299 || (odd != 0 && odd != 1)
326ac20e
RS
15300 || !d->perm.series_p (0, 2, odd, 2)
15301 || !d->perm.series_p (1, 2, nelt + odd, 2))
cc4d934f 15302 return false;
cc4d934f
JG
15303
15304 /* Success! */
15305 if (d->testing_p)
15306 return true;
15307
15308 in0 = d->op0;
15309 in1 = d->op1;
43cacb12
RS
15310 /* We don't need a big-endian lane correction for SVE; see the comment
15311 at the head of aarch64-sve.md for details. */
15312 if (BYTES_BIG_ENDIAN && d->vec_flags == VEC_ADVSIMD)
cc4d934f
JG
15313 {
15314 x = in0, in0 = in1, in1 = x;
15315 odd = !odd;
15316 }
15317 out = d->target;
15318
3f8334a5
RS
15319 emit_set_insn (out, gen_rtx_UNSPEC (vmode, gen_rtvec (2, in0, in1),
15320 odd ? UNSPEC_TRN2 : UNSPEC_TRN1));
cc4d934f
JG
15321 return true;
15322}
15323
15324/* Recognize patterns suitable for the UZP instructions. */
15325static bool
15326aarch64_evpc_uzp (struct expand_vec_perm_d *d)
15327{
6a70badb 15328 HOST_WIDE_INT odd;
cc4d934f 15329 rtx out, in0, in1, x;
ef4bddc2 15330 machine_mode vmode = d->vmode;
cc4d934f
JG
15331
15332 if (GET_MODE_UNIT_SIZE (vmode) > 8)
15333 return false;
15334
15335 /* Note that these are little-endian tests.
15336 We correct for big-endian later. */
6a70badb
RS
15337 if (!d->perm[0].is_constant (&odd)
15338 || (odd != 0 && odd != 1)
326ac20e 15339 || !d->perm.series_p (0, 1, odd, 2))
cc4d934f 15340 return false;
cc4d934f
JG
15341
15342 /* Success! */
15343 if (d->testing_p)
15344 return true;
15345
15346 in0 = d->op0;
15347 in1 = d->op1;
43cacb12
RS
15348 /* We don't need a big-endian lane correction for SVE; see the comment
15349 at the head of aarch64-sve.md for details. */
15350 if (BYTES_BIG_ENDIAN && d->vec_flags == VEC_ADVSIMD)
cc4d934f
JG
15351 {
15352 x = in0, in0 = in1, in1 = x;
15353 odd = !odd;
15354 }
15355 out = d->target;
15356
3f8334a5
RS
15357 emit_set_insn (out, gen_rtx_UNSPEC (vmode, gen_rtvec (2, in0, in1),
15358 odd ? UNSPEC_UZP2 : UNSPEC_UZP1));
cc4d934f
JG
15359 return true;
15360}
15361
15362/* Recognize patterns suitable for the ZIP instructions. */
15363static bool
15364aarch64_evpc_zip (struct expand_vec_perm_d *d)
15365{
6a70badb
RS
15366 unsigned int high;
15367 poly_uint64 nelt = d->perm.length ();
cc4d934f 15368 rtx out, in0, in1, x;
ef4bddc2 15369 machine_mode vmode = d->vmode;
cc4d934f
JG
15370
15371 if (GET_MODE_UNIT_SIZE (vmode) > 8)
15372 return false;
15373
15374 /* Note that these are little-endian tests.
15375 We correct for big-endian later. */
6a70badb
RS
15376 poly_uint64 first = d->perm[0];
15377 if ((maybe_ne (first, 0U) && maybe_ne (first * 2, nelt))
15378 || !d->perm.series_p (0, 2, first, 1)
15379 || !d->perm.series_p (1, 2, first + nelt, 1))
cc4d934f 15380 return false;
6a70badb 15381 high = maybe_ne (first, 0U);
cc4d934f
JG
15382
15383 /* Success! */
15384 if (d->testing_p)
15385 return true;
15386
15387 in0 = d->op0;
15388 in1 = d->op1;
43cacb12
RS
15389 /* We don't need a big-endian lane correction for SVE; see the comment
15390 at the head of aarch64-sve.md for details. */
15391 if (BYTES_BIG_ENDIAN && d->vec_flags == VEC_ADVSIMD)
cc4d934f
JG
15392 {
15393 x = in0, in0 = in1, in1 = x;
15394 high = !high;
15395 }
15396 out = d->target;
15397
3f8334a5
RS
15398 emit_set_insn (out, gen_rtx_UNSPEC (vmode, gen_rtvec (2, in0, in1),
15399 high ? UNSPEC_ZIP2 : UNSPEC_ZIP1));
cc4d934f
JG
15400 return true;
15401}
15402
ae0533da
AL
15403/* Recognize patterns for the EXT insn. */
15404
15405static bool
15406aarch64_evpc_ext (struct expand_vec_perm_d *d)
15407{
6a70badb 15408 HOST_WIDE_INT location;
ae0533da
AL
15409 rtx offset;
15410
6a70badb
RS
15411 /* The first element always refers to the first vector.
15412 Check if the extracted indices are increasing by one. */
43cacb12
RS
15413 if (d->vec_flags == VEC_SVE_PRED
15414 || !d->perm[0].is_constant (&location)
6a70badb 15415 || !d->perm.series_p (0, 1, location, 1))
326ac20e 15416 return false;
ae0533da 15417
ae0533da
AL
15418 /* Success! */
15419 if (d->testing_p)
15420 return true;
15421
b31e65bb 15422 /* The case where (location == 0) is a no-op for both big- and little-endian,
43cacb12 15423 and is removed by the mid-end at optimization levels -O1 and higher.
b31e65bb 15424
43cacb12
RS
15425 We don't need a big-endian lane correction for SVE; see the comment
15426 at the head of aarch64-sve.md for details. */
15427 if (BYTES_BIG_ENDIAN && location != 0 && d->vec_flags == VEC_ADVSIMD)
ae0533da
AL
15428 {
15429 /* After setup, we want the high elements of the first vector (stored
15430 at the LSB end of the register), and the low elements of the second
15431 vector (stored at the MSB end of the register). So swap. */
cb5c6c29 15432 std::swap (d->op0, d->op1);
6a70badb
RS
15433 /* location != 0 (above), so safe to assume (nelt - location) < nelt.
15434 to_constant () is safe since this is restricted to Advanced SIMD
15435 vectors. */
15436 location = d->perm.length ().to_constant () - location;
ae0533da
AL
15437 }
15438
15439 offset = GEN_INT (location);
3f8334a5
RS
15440 emit_set_insn (d->target,
15441 gen_rtx_UNSPEC (d->vmode,
15442 gen_rtvec (3, d->op0, d->op1, offset),
15443 UNSPEC_EXT));
ae0533da
AL
15444 return true;
15445}
15446
43cacb12
RS
15447/* Recognize patterns for the REV{64,32,16} insns, which reverse elements
15448 within each 64-bit, 32-bit or 16-bit granule. */
923fcec3
AL
15449
15450static bool
43cacb12 15451aarch64_evpc_rev_local (struct expand_vec_perm_d *d)
923fcec3 15452{
6a70badb
RS
15453 HOST_WIDE_INT diff;
15454 unsigned int i, size, unspec;
43cacb12 15455 machine_mode pred_mode;
923fcec3 15456
43cacb12
RS
15457 if (d->vec_flags == VEC_SVE_PRED
15458 || !d->one_vector_p
6a70badb 15459 || !d->perm[0].is_constant (&diff))
923fcec3
AL
15460 return false;
15461
3f8334a5
RS
15462 size = (diff + 1) * GET_MODE_UNIT_SIZE (d->vmode);
15463 if (size == 8)
43cacb12
RS
15464 {
15465 unspec = UNSPEC_REV64;
15466 pred_mode = VNx2BImode;
15467 }
3f8334a5 15468 else if (size == 4)
43cacb12
RS
15469 {
15470 unspec = UNSPEC_REV32;
15471 pred_mode = VNx4BImode;
15472 }
3f8334a5 15473 else if (size == 2)
43cacb12
RS
15474 {
15475 unspec = UNSPEC_REV16;
15476 pred_mode = VNx8BImode;
15477 }
3f8334a5
RS
15478 else
15479 return false;
923fcec3 15480
326ac20e
RS
15481 unsigned int step = diff + 1;
15482 for (i = 0; i < step; ++i)
15483 if (!d->perm.series_p (i, step, diff - i, step))
15484 return false;
923fcec3
AL
15485
15486 /* Success! */
15487 if (d->testing_p)
15488 return true;
15489
43cacb12
RS
15490 rtx src = gen_rtx_UNSPEC (d->vmode, gen_rtvec (1, d->op0), unspec);
15491 if (d->vec_flags == VEC_SVE_DATA)
15492 {
15493 rtx pred = force_reg (pred_mode, CONSTM1_RTX (pred_mode));
15494 src = gen_rtx_UNSPEC (d->vmode, gen_rtvec (2, pred, src),
15495 UNSPEC_MERGE_PTRUE);
15496 }
15497 emit_set_insn (d->target, src);
15498 return true;
15499}
15500
15501/* Recognize patterns for the REV insn, which reverses elements within
15502 a full vector. */
15503
15504static bool
15505aarch64_evpc_rev_global (struct expand_vec_perm_d *d)
15506{
15507 poly_uint64 nelt = d->perm.length ();
15508
15509 if (!d->one_vector_p || d->vec_flags != VEC_SVE_DATA)
15510 return false;
15511
15512 if (!d->perm.series_p (0, 1, nelt - 1, -1))
15513 return false;
15514
15515 /* Success! */
15516 if (d->testing_p)
15517 return true;
15518
15519 rtx src = gen_rtx_UNSPEC (d->vmode, gen_rtvec (1, d->op0), UNSPEC_REV);
15520 emit_set_insn (d->target, src);
923fcec3
AL
15521 return true;
15522}
15523
91bd4114
JG
15524static bool
15525aarch64_evpc_dup (struct expand_vec_perm_d *d)
15526{
91bd4114
JG
15527 rtx out = d->target;
15528 rtx in0;
6a70badb 15529 HOST_WIDE_INT elt;
ef4bddc2 15530 machine_mode vmode = d->vmode;
91bd4114
JG
15531 rtx lane;
15532
43cacb12
RS
15533 if (d->vec_flags == VEC_SVE_PRED
15534 || d->perm.encoding ().encoded_nelts () != 1
6a70badb 15535 || !d->perm[0].is_constant (&elt))
326ac20e
RS
15536 return false;
15537
43cacb12
RS
15538 if (d->vec_flags == VEC_SVE_DATA && elt >= 64 * GET_MODE_UNIT_SIZE (vmode))
15539 return false;
15540
326ac20e
RS
15541 /* Success! */
15542 if (d->testing_p)
15543 return true;
15544
91bd4114
JG
15545 /* The generic preparation in aarch64_expand_vec_perm_const_1
15546 swaps the operand order and the permute indices if it finds
15547 d->perm[0] to be in the second operand. Thus, we can always
15548 use d->op0 and need not do any extra arithmetic to get the
15549 correct lane number. */
15550 in0 = d->op0;
f901401e 15551 lane = GEN_INT (elt); /* The pattern corrects for big-endian. */
91bd4114 15552
3f8334a5
RS
15553 rtx parallel = gen_rtx_PARALLEL (vmode, gen_rtvec (1, lane));
15554 rtx select = gen_rtx_VEC_SELECT (GET_MODE_INNER (vmode), in0, parallel);
15555 emit_set_insn (out, gen_rtx_VEC_DUPLICATE (vmode, select));
91bd4114
JG
15556 return true;
15557}
15558
88b08073
JG
15559static bool
15560aarch64_evpc_tbl (struct expand_vec_perm_d *d)
15561{
43cacb12 15562 rtx rperm[MAX_COMPILE_TIME_VEC_BYTES], sel;
ef4bddc2 15563 machine_mode vmode = d->vmode;
6a70badb
RS
15564
15565 /* Make sure that the indices are constant. */
15566 unsigned int encoded_nelts = d->perm.encoding ().encoded_nelts ();
15567 for (unsigned int i = 0; i < encoded_nelts; ++i)
15568 if (!d->perm[i].is_constant ())
15569 return false;
88b08073 15570
88b08073
JG
15571 if (d->testing_p)
15572 return true;
15573
15574 /* Generic code will try constant permutation twice. Once with the
15575 original mode and again with the elements lowered to QImode.
15576 So wait and don't do the selector expansion ourselves. */
15577 if (vmode != V8QImode && vmode != V16QImode)
15578 return false;
15579
6a70badb
RS
15580 /* to_constant is safe since this routine is specific to Advanced SIMD
15581 vectors. */
15582 unsigned int nelt = d->perm.length ().to_constant ();
15583 for (unsigned int i = 0; i < nelt; ++i)
15584 /* If big-endian and two vectors we end up with a weird mixed-endian
15585 mode on NEON. Reverse the index within each word but not the word
15586 itself. to_constant is safe because we checked is_constant above. */
15587 rperm[i] = GEN_INT (BYTES_BIG_ENDIAN
15588 ? d->perm[i].to_constant () ^ (nelt - 1)
15589 : d->perm[i].to_constant ());
bbcc9c00 15590
88b08073
JG
15591 sel = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm));
15592 sel = force_reg (vmode, sel);
15593
15594 aarch64_expand_vec_perm_1 (d->target, d->op0, d->op1, sel);
15595 return true;
15596}
15597
43cacb12
RS
15598/* Try to implement D using an SVE TBL instruction. */
15599
15600static bool
15601aarch64_evpc_sve_tbl (struct expand_vec_perm_d *d)
15602{
15603 unsigned HOST_WIDE_INT nelt;
15604
15605 /* Permuting two variable-length vectors could overflow the
15606 index range. */
15607 if (!d->one_vector_p && !d->perm.length ().is_constant (&nelt))
15608 return false;
15609
15610 if (d->testing_p)
15611 return true;
15612
15613 machine_mode sel_mode = mode_for_int_vector (d->vmode).require ();
15614 rtx sel = vec_perm_indices_to_rtx (sel_mode, d->perm);
15615 aarch64_expand_sve_vec_perm (d->target, d->op0, d->op1, sel);
15616 return true;
15617}
15618
88b08073
JG
15619static bool
15620aarch64_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
15621{
15622 /* The pattern matching functions above are written to look for a small
15623 number to begin the sequence (0, 1, N/2). If we begin with an index
15624 from the second operand, we can swap the operands. */
6a70badb
RS
15625 poly_int64 nelt = d->perm.length ();
15626 if (known_ge (d->perm[0], nelt))
88b08073 15627 {
e3342de4 15628 d->perm.rotate_inputs (1);
cb5c6c29 15629 std::swap (d->op0, d->op1);
88b08073
JG
15630 }
15631
43cacb12
RS
15632 if ((d->vec_flags == VEC_ADVSIMD
15633 || d->vec_flags == VEC_SVE_DATA
15634 || d->vec_flags == VEC_SVE_PRED)
15635 && known_gt (nelt, 1))
cc4d934f 15636 {
43cacb12
RS
15637 if (aarch64_evpc_rev_local (d))
15638 return true;
15639 else if (aarch64_evpc_rev_global (d))
923fcec3
AL
15640 return true;
15641 else if (aarch64_evpc_ext (d))
ae0533da 15642 return true;
f901401e
AL
15643 else if (aarch64_evpc_dup (d))
15644 return true;
ae0533da 15645 else if (aarch64_evpc_zip (d))
cc4d934f
JG
15646 return true;
15647 else if (aarch64_evpc_uzp (d))
15648 return true;
15649 else if (aarch64_evpc_trn (d))
15650 return true;
43cacb12
RS
15651 if (d->vec_flags == VEC_SVE_DATA)
15652 return aarch64_evpc_sve_tbl (d);
15653 else if (d->vec_flags == VEC_SVE_DATA)
15654 return aarch64_evpc_tbl (d);
cc4d934f 15655 }
88b08073
JG
15656 return false;
15657}
15658
f151c9e1 15659/* Implement TARGET_VECTORIZE_VEC_PERM_CONST. */
88b08073 15660
f151c9e1
RS
15661static bool
15662aarch64_vectorize_vec_perm_const (machine_mode vmode, rtx target, rtx op0,
15663 rtx op1, const vec_perm_indices &sel)
88b08073
JG
15664{
15665 struct expand_vec_perm_d d;
88b08073 15666
326ac20e
RS
15667 /* Check whether the mask can be applied to a single vector. */
15668 if (op0 && rtx_equal_p (op0, op1))
15669 d.one_vector_p = true;
15670 else if (sel.all_from_input_p (0))
88b08073 15671 {
326ac20e
RS
15672 d.one_vector_p = true;
15673 op1 = op0;
88b08073 15674 }
326ac20e 15675 else if (sel.all_from_input_p (1))
88b08073 15676 {
88b08073 15677 d.one_vector_p = true;
326ac20e 15678 op0 = op1;
88b08073 15679 }
326ac20e
RS
15680 else
15681 d.one_vector_p = false;
88b08073 15682
326ac20e
RS
15683 d.perm.new_vector (sel.encoding (), d.one_vector_p ? 1 : 2,
15684 sel.nelts_per_input ());
15685 d.vmode = vmode;
43cacb12 15686 d.vec_flags = aarch64_classify_vector_mode (d.vmode);
326ac20e
RS
15687 d.target = target;
15688 d.op0 = op0;
15689 d.op1 = op1;
15690 d.testing_p = !target;
e3342de4 15691
f151c9e1
RS
15692 if (!d.testing_p)
15693 return aarch64_expand_vec_perm_const_1 (&d);
88b08073 15694
326ac20e 15695 rtx_insn *last = get_last_insn ();
f151c9e1 15696 bool ret = aarch64_expand_vec_perm_const_1 (&d);
326ac20e 15697 gcc_assert (last == get_last_insn ());
88b08073
JG
15698
15699 return ret;
15700}
15701
73e3da51
RS
15702/* Generate a byte permute mask for a register of mode MODE,
15703 which has NUNITS units. */
15704
668046d1 15705rtx
73e3da51 15706aarch64_reverse_mask (machine_mode mode, unsigned int nunits)
668046d1
DS
15707{
15708 /* We have to reverse each vector because we dont have
15709 a permuted load that can reverse-load according to ABI rules. */
15710 rtx mask;
15711 rtvec v = rtvec_alloc (16);
73e3da51
RS
15712 unsigned int i, j;
15713 unsigned int usize = GET_MODE_UNIT_SIZE (mode);
668046d1
DS
15714
15715 gcc_assert (BYTES_BIG_ENDIAN);
15716 gcc_assert (AARCH64_VALID_SIMD_QREG_MODE (mode));
15717
15718 for (i = 0; i < nunits; i++)
15719 for (j = 0; j < usize; j++)
15720 RTVEC_ELT (v, i * usize + j) = GEN_INT ((i + 1) * usize - 1 - j);
15721 mask = gen_rtx_CONST_VECTOR (V16QImode, v);
15722 return force_reg (V16QImode, mask);
15723}
15724
43cacb12
RS
15725/* Return true if X is a valid second operand for the SVE instruction
15726 that implements integer comparison OP_CODE. */
15727
15728static bool
15729aarch64_sve_cmp_operand_p (rtx_code op_code, rtx x)
15730{
15731 if (register_operand (x, VOIDmode))
15732 return true;
15733
15734 switch (op_code)
15735 {
15736 case LTU:
15737 case LEU:
15738 case GEU:
15739 case GTU:
15740 return aarch64_sve_cmp_immediate_p (x, false);
15741 case LT:
15742 case LE:
15743 case GE:
15744 case GT:
15745 case NE:
15746 case EQ:
15747 return aarch64_sve_cmp_immediate_p (x, true);
15748 default:
15749 gcc_unreachable ();
15750 }
15751}
15752
f22d7973
RS
15753/* Use predicated SVE instructions to implement the equivalent of:
15754
15755 (set TARGET OP)
15756
15757 given that PTRUE is an all-true predicate of the appropriate mode. */
15758
15759static void
15760aarch64_emit_sve_ptrue_op (rtx target, rtx ptrue, rtx op)
15761{
15762 rtx unspec = gen_rtx_UNSPEC (GET_MODE (target),
15763 gen_rtvec (2, ptrue, op),
15764 UNSPEC_MERGE_PTRUE);
15765 rtx_insn *insn = emit_set_insn (target, unspec);
15766 set_unique_reg_note (insn, REG_EQUAL, copy_rtx (op));
15767}
15768
15769/* Likewise, but also clobber the condition codes. */
15770
15771static void
15772aarch64_emit_sve_ptrue_op_cc (rtx target, rtx ptrue, rtx op)
15773{
15774 rtx unspec = gen_rtx_UNSPEC (GET_MODE (target),
15775 gen_rtvec (2, ptrue, op),
15776 UNSPEC_MERGE_PTRUE);
15777 rtx_insn *insn = emit_insn (gen_set_clobber_cc (target, unspec));
15778 set_unique_reg_note (insn, REG_EQUAL, copy_rtx (op));
15779}
15780
43cacb12
RS
15781/* Return the UNSPEC_COND_* code for comparison CODE. */
15782
15783static unsigned int
15784aarch64_unspec_cond_code (rtx_code code)
15785{
15786 switch (code)
15787 {
15788 case NE:
15789 return UNSPEC_COND_NE;
15790 case EQ:
15791 return UNSPEC_COND_EQ;
15792 case LT:
15793 return UNSPEC_COND_LT;
15794 case GT:
15795 return UNSPEC_COND_GT;
15796 case LE:
15797 return UNSPEC_COND_LE;
15798 case GE:
15799 return UNSPEC_COND_GE;
43cacb12
RS
15800 default:
15801 gcc_unreachable ();
15802 }
15803}
15804
f22d7973 15805/* Emit:
43cacb12 15806
f22d7973
RS
15807 (set TARGET (unspec [PRED OP0 OP1] UNSPEC_COND_<X>))
15808
15809 where <X> is the operation associated with comparison CODE. This form
15810 of instruction is used when (and (CODE OP0 OP1) PRED) would have different
15811 semantics, such as when PRED might not be all-true and when comparing
15812 inactive lanes could have side effects. */
15813
15814static void
15815aarch64_emit_sve_predicated_cond (rtx target, rtx_code code,
15816 rtx pred, rtx op0, rtx op1)
43cacb12 15817{
f22d7973
RS
15818 rtx unspec = gen_rtx_UNSPEC (GET_MODE (pred),
15819 gen_rtvec (3, pred, op0, op1),
15820 aarch64_unspec_cond_code (code));
15821 emit_set_insn (target, unspec);
43cacb12
RS
15822}
15823
f22d7973 15824/* Expand an SVE integer comparison using the SVE equivalent of:
43cacb12 15825
f22d7973 15826 (set TARGET (CODE OP0 OP1)). */
43cacb12
RS
15827
15828void
15829aarch64_expand_sve_vec_cmp_int (rtx target, rtx_code code, rtx op0, rtx op1)
15830{
15831 machine_mode pred_mode = GET_MODE (target);
15832 machine_mode data_mode = GET_MODE (op0);
15833
15834 if (!aarch64_sve_cmp_operand_p (code, op1))
15835 op1 = force_reg (data_mode, op1);
15836
15837 rtx ptrue = force_reg (pred_mode, CONSTM1_RTX (pred_mode));
f22d7973
RS
15838 rtx cond = gen_rtx_fmt_ee (code, pred_mode, op0, op1);
15839 aarch64_emit_sve_ptrue_op_cc (target, ptrue, cond);
43cacb12
RS
15840}
15841
f22d7973 15842/* Emit the SVE equivalent of:
43cacb12 15843
f22d7973
RS
15844 (set TMP1 (CODE1 OP0 OP1))
15845 (set TMP2 (CODE2 OP0 OP1))
15846 (set TARGET (ior:PRED_MODE TMP1 TMP2))
43cacb12 15847
f22d7973 15848 PTRUE is an all-true predicate with the same mode as TARGET. */
43cacb12
RS
15849
15850static void
f22d7973
RS
15851aarch64_emit_sve_or_conds (rtx target, rtx_code code1, rtx_code code2,
15852 rtx ptrue, rtx op0, rtx op1)
43cacb12 15853{
f22d7973 15854 machine_mode pred_mode = GET_MODE (ptrue);
43cacb12 15855 rtx tmp1 = gen_reg_rtx (pred_mode);
f22d7973
RS
15856 aarch64_emit_sve_ptrue_op (tmp1, ptrue,
15857 gen_rtx_fmt_ee (code1, pred_mode, op0, op1));
43cacb12 15858 rtx tmp2 = gen_reg_rtx (pred_mode);
f22d7973
RS
15859 aarch64_emit_sve_ptrue_op (tmp2, ptrue,
15860 gen_rtx_fmt_ee (code2, pred_mode, op0, op1));
15861 aarch64_emit_binop (target, ior_optab, tmp1, tmp2);
43cacb12
RS
15862}
15863
f22d7973 15864/* Emit the SVE equivalent of:
43cacb12 15865
f22d7973
RS
15866 (set TMP (CODE OP0 OP1))
15867 (set TARGET (not TMP))
43cacb12 15868
f22d7973 15869 PTRUE is an all-true predicate with the same mode as TARGET. */
43cacb12
RS
15870
15871static void
f22d7973
RS
15872aarch64_emit_sve_inverted_cond (rtx target, rtx ptrue, rtx_code code,
15873 rtx op0, rtx op1)
43cacb12 15874{
f22d7973
RS
15875 machine_mode pred_mode = GET_MODE (ptrue);
15876 rtx tmp = gen_reg_rtx (pred_mode);
15877 aarch64_emit_sve_ptrue_op (tmp, ptrue,
15878 gen_rtx_fmt_ee (code, pred_mode, op0, op1));
15879 aarch64_emit_unop (target, one_cmpl_optab, tmp);
43cacb12
RS
15880}
15881
f22d7973 15882/* Expand an SVE floating-point comparison using the SVE equivalent of:
43cacb12 15883
f22d7973 15884 (set TARGET (CODE OP0 OP1))
43cacb12
RS
15885
15886 If CAN_INVERT_P is true, the caller can also handle inverted results;
15887 return true if the result is in fact inverted. */
15888
15889bool
15890aarch64_expand_sve_vec_cmp_float (rtx target, rtx_code code,
15891 rtx op0, rtx op1, bool can_invert_p)
15892{
15893 machine_mode pred_mode = GET_MODE (target);
15894 machine_mode data_mode = GET_MODE (op0);
15895
15896 rtx ptrue = force_reg (pred_mode, CONSTM1_RTX (pred_mode));
15897 switch (code)
15898 {
15899 case UNORDERED:
15900 /* UNORDERED has no immediate form. */
15901 op1 = force_reg (data_mode, op1);
f22d7973 15902 /* fall through */
43cacb12
RS
15903 case LT:
15904 case LE:
15905 case GT:
15906 case GE:
15907 case EQ:
15908 case NE:
f22d7973
RS
15909 {
15910 /* There is native support for the comparison. */
15911 rtx cond = gen_rtx_fmt_ee (code, pred_mode, op0, op1);
15912 aarch64_emit_sve_ptrue_op (target, ptrue, cond);
15913 return false;
15914 }
43cacb12
RS
15915
15916 case LTGT:
15917 /* This is a trapping operation (LT or GT). */
f22d7973 15918 aarch64_emit_sve_or_conds (target, LT, GT, ptrue, op0, op1);
43cacb12
RS
15919 return false;
15920
15921 case UNEQ:
15922 if (!flag_trapping_math)
15923 {
15924 /* This would trap for signaling NaNs. */
15925 op1 = force_reg (data_mode, op1);
f22d7973 15926 aarch64_emit_sve_or_conds (target, UNORDERED, EQ, ptrue, op0, op1);
43cacb12
RS
15927 return false;
15928 }
15929 /* fall through */
43cacb12
RS
15930 case UNLT:
15931 case UNLE:
15932 case UNGT:
15933 case UNGE:
f22d7973
RS
15934 if (flag_trapping_math)
15935 {
15936 /* Work out which elements are ordered. */
15937 rtx ordered = gen_reg_rtx (pred_mode);
15938 op1 = force_reg (data_mode, op1);
15939 aarch64_emit_sve_inverted_cond (ordered, ptrue, UNORDERED, op0, op1);
15940
15941 /* Test the opposite condition for the ordered elements,
15942 then invert the result. */
15943 if (code == UNEQ)
15944 code = NE;
15945 else
15946 code = reverse_condition_maybe_unordered (code);
15947 if (can_invert_p)
15948 {
15949 aarch64_emit_sve_predicated_cond (target, code,
15950 ordered, op0, op1);
15951 return true;
15952 }
15953 rtx tmp = gen_reg_rtx (pred_mode);
15954 aarch64_emit_sve_predicated_cond (tmp, code, ordered, op0, op1);
15955 aarch64_emit_unop (target, one_cmpl_optab, tmp);
15956 return false;
15957 }
15958 break;
15959
15960 case ORDERED:
15961 /* ORDERED has no immediate form. */
15962 op1 = force_reg (data_mode, op1);
15963 break;
43cacb12
RS
15964
15965 default:
15966 gcc_unreachable ();
15967 }
f22d7973
RS
15968
15969 /* There is native support for the inverse comparison. */
15970 code = reverse_condition_maybe_unordered (code);
15971 if (can_invert_p)
15972 {
15973 rtx cond = gen_rtx_fmt_ee (code, pred_mode, op0, op1);
15974 aarch64_emit_sve_ptrue_op (target, ptrue, cond);
15975 return true;
15976 }
15977 aarch64_emit_sve_inverted_cond (target, ptrue, code, op0, op1);
15978 return false;
43cacb12
RS
15979}
15980
15981/* Expand an SVE vcond pattern with operands OPS. DATA_MODE is the mode
15982 of the data being selected and CMP_MODE is the mode of the values being
15983 compared. */
15984
15985void
15986aarch64_expand_sve_vcond (machine_mode data_mode, machine_mode cmp_mode,
15987 rtx *ops)
15988{
15989 machine_mode pred_mode
15990 = aarch64_get_mask_mode (GET_MODE_NUNITS (cmp_mode),
15991 GET_MODE_SIZE (cmp_mode)).require ();
15992 rtx pred = gen_reg_rtx (pred_mode);
15993 if (FLOAT_MODE_P (cmp_mode))
15994 {
15995 if (aarch64_expand_sve_vec_cmp_float (pred, GET_CODE (ops[3]),
15996 ops[4], ops[5], true))
15997 std::swap (ops[1], ops[2]);
15998 }
15999 else
16000 aarch64_expand_sve_vec_cmp_int (pred, GET_CODE (ops[3]), ops[4], ops[5]);
16001
16002 rtvec vec = gen_rtvec (3, pred, ops[1], ops[2]);
16003 emit_set_insn (ops[0], gen_rtx_UNSPEC (data_mode, vec, UNSPEC_SEL));
16004}
16005
99e1629f
RS
16006/* Implement TARGET_MODES_TIEABLE_P. In principle we should always return
16007 true. However due to issues with register allocation it is preferable
16008 to avoid tieing integer scalar and FP scalar modes. Executing integer
16009 operations in general registers is better than treating them as scalar
16010 vector operations. This reduces latency and avoids redundant int<->FP
16011 moves. So tie modes if they are either the same class, or vector modes
16012 with other vector modes, vector structs or any scalar mode. */
97e1ad78 16013
99e1629f 16014static bool
ef4bddc2 16015aarch64_modes_tieable_p (machine_mode mode1, machine_mode mode2)
97e1ad78
JG
16016{
16017 if (GET_MODE_CLASS (mode1) == GET_MODE_CLASS (mode2))
16018 return true;
16019
16020 /* We specifically want to allow elements of "structure" modes to
16021 be tieable to the structure. This more general condition allows
43cacb12
RS
16022 other rarer situations too. The reason we don't extend this to
16023 predicate modes is that there are no predicate structure modes
16024 nor any specific instructions for extracting part of a predicate
16025 register. */
16026 if (aarch64_vector_data_mode_p (mode1)
16027 && aarch64_vector_data_mode_p (mode2))
61f17a5c
WD
16028 return true;
16029
16030 /* Also allow any scalar modes with vectors. */
16031 if (aarch64_vector_mode_supported_p (mode1)
16032 || aarch64_vector_mode_supported_p (mode2))
97e1ad78
JG
16033 return true;
16034
16035 return false;
16036}
16037
e2c75eea
JG
16038/* Return a new RTX holding the result of moving POINTER forward by
16039 AMOUNT bytes. */
16040
16041static rtx
6a70badb 16042aarch64_move_pointer (rtx pointer, poly_int64 amount)
e2c75eea
JG
16043{
16044 rtx next = plus_constant (Pmode, XEXP (pointer, 0), amount);
16045
16046 return adjust_automodify_address (pointer, GET_MODE (pointer),
16047 next, amount);
16048}
16049
16050/* Return a new RTX holding the result of moving POINTER forward by the
16051 size of the mode it points to. */
16052
16053static rtx
16054aarch64_progress_pointer (rtx pointer)
16055{
6a70badb 16056 return aarch64_move_pointer (pointer, GET_MODE_SIZE (GET_MODE (pointer)));
e2c75eea
JG
16057}
16058
16059/* Copy one MODE sized block from SRC to DST, then progress SRC and DST by
16060 MODE bytes. */
16061
16062static void
16063aarch64_copy_one_block_and_progress_pointers (rtx *src, rtx *dst,
ef4bddc2 16064 machine_mode mode)
e2c75eea
JG
16065{
16066 rtx reg = gen_reg_rtx (mode);
16067
16068 /* "Cast" the pointers to the correct mode. */
16069 *src = adjust_address (*src, mode, 0);
16070 *dst = adjust_address (*dst, mode, 0);
16071 /* Emit the memcpy. */
16072 emit_move_insn (reg, *src);
16073 emit_move_insn (*dst, reg);
16074 /* Move the pointers forward. */
16075 *src = aarch64_progress_pointer (*src);
16076 *dst = aarch64_progress_pointer (*dst);
16077}
16078
16079/* Expand movmem, as if from a __builtin_memcpy. Return true if
16080 we succeed, otherwise return false. */
16081
16082bool
16083aarch64_expand_movmem (rtx *operands)
16084{
16085 unsigned int n;
16086 rtx dst = operands[0];
16087 rtx src = operands[1];
16088 rtx base;
16089 bool speed_p = !optimize_function_for_size_p (cfun);
16090
16091 /* When optimizing for size, give a better estimate of the length of a
16092 memcpy call, but use the default otherwise. */
16093 unsigned int max_instructions = (speed_p ? 15 : AARCH64_CALL_RATIO) / 2;
16094
16095 /* We can't do anything smart if the amount to copy is not constant. */
16096 if (!CONST_INT_P (operands[2]))
16097 return false;
16098
16099 n = UINTVAL (operands[2]);
16100
16101 /* Try to keep the number of instructions low. For cases below 16 bytes we
16102 need to make at most two moves. For cases above 16 bytes it will be one
16103 move for each 16 byte chunk, then at most two additional moves. */
16104 if (((n / 16) + (n % 16 ? 2 : 0)) > max_instructions)
16105 return false;
16106
16107 base = copy_to_mode_reg (Pmode, XEXP (dst, 0));
16108 dst = adjust_automodify_address (dst, VOIDmode, base, 0);
16109
16110 base = copy_to_mode_reg (Pmode, XEXP (src, 0));
16111 src = adjust_automodify_address (src, VOIDmode, base, 0);
16112
16113 /* Simple cases. Copy 0-3 bytes, as (if applicable) a 2-byte, then a
16114 1-byte chunk. */
16115 if (n < 4)
16116 {
16117 if (n >= 2)
16118 {
16119 aarch64_copy_one_block_and_progress_pointers (&src, &dst, HImode);
16120 n -= 2;
16121 }
16122
16123 if (n == 1)
16124 aarch64_copy_one_block_and_progress_pointers (&src, &dst, QImode);
16125
16126 return true;
16127 }
16128
16129 /* Copy 4-8 bytes. First a 4-byte chunk, then (if applicable) a second
16130 4-byte chunk, partially overlapping with the previously copied chunk. */
16131 if (n < 8)
16132 {
16133 aarch64_copy_one_block_and_progress_pointers (&src, &dst, SImode);
16134 n -= 4;
16135 if (n > 0)
16136 {
16137 int move = n - 4;
16138
16139 src = aarch64_move_pointer (src, move);
16140 dst = aarch64_move_pointer (dst, move);
16141 aarch64_copy_one_block_and_progress_pointers (&src, &dst, SImode);
16142 }
16143 return true;
16144 }
16145
16146 /* Copy more than 8 bytes. Copy chunks of 16 bytes until we run out of
16147 them, then (if applicable) an 8-byte chunk. */
16148 while (n >= 8)
16149 {
16150 if (n / 16)
16151 {
16152 aarch64_copy_one_block_and_progress_pointers (&src, &dst, TImode);
16153 n -= 16;
16154 }
16155 else
16156 {
16157 aarch64_copy_one_block_and_progress_pointers (&src, &dst, DImode);
16158 n -= 8;
16159 }
16160 }
16161
16162 /* Finish the final bytes of the copy. We can always do this in one
16163 instruction. We either copy the exact amount we need, or partially
16164 overlap with the previous chunk we copied and copy 8-bytes. */
16165 if (n == 0)
16166 return true;
16167 else if (n == 1)
16168 aarch64_copy_one_block_and_progress_pointers (&src, &dst, QImode);
16169 else if (n == 2)
16170 aarch64_copy_one_block_and_progress_pointers (&src, &dst, HImode);
16171 else if (n == 4)
16172 aarch64_copy_one_block_and_progress_pointers (&src, &dst, SImode);
16173 else
16174 {
16175 if (n == 3)
16176 {
16177 src = aarch64_move_pointer (src, -1);
16178 dst = aarch64_move_pointer (dst, -1);
16179 aarch64_copy_one_block_and_progress_pointers (&src, &dst, SImode);
16180 }
16181 else
16182 {
16183 int move = n - 8;
16184
16185 src = aarch64_move_pointer (src, move);
16186 dst = aarch64_move_pointer (dst, move);
16187 aarch64_copy_one_block_and_progress_pointers (&src, &dst, DImode);
16188 }
16189 }
16190
16191 return true;
16192}
16193
141a3ccf
KT
16194/* Split a DImode store of a CONST_INT SRC to MEM DST as two
16195 SImode stores. Handle the case when the constant has identical
16196 bottom and top halves. This is beneficial when the two stores can be
16197 merged into an STP and we avoid synthesising potentially expensive
16198 immediates twice. Return true if such a split is possible. */
16199
16200bool
16201aarch64_split_dimode_const_store (rtx dst, rtx src)
16202{
16203 rtx lo = gen_lowpart (SImode, src);
16204 rtx hi = gen_highpart_mode (SImode, DImode, src);
16205
16206 bool size_p = optimize_function_for_size_p (cfun);
16207
16208 if (!rtx_equal_p (lo, hi))
16209 return false;
16210
16211 unsigned int orig_cost
16212 = aarch64_internal_mov_immediate (NULL_RTX, src, false, DImode);
16213 unsigned int lo_cost
16214 = aarch64_internal_mov_immediate (NULL_RTX, lo, false, SImode);
16215
16216 /* We want to transform:
16217 MOV x1, 49370
16218 MOVK x1, 0x140, lsl 16
16219 MOVK x1, 0xc0da, lsl 32
16220 MOVK x1, 0x140, lsl 48
16221 STR x1, [x0]
16222 into:
16223 MOV w1, 49370
16224 MOVK w1, 0x140, lsl 16
16225 STP w1, w1, [x0]
16226 So we want to perform this only when we save two instructions
16227 or more. When optimizing for size, however, accept any code size
16228 savings we can. */
16229 if (size_p && orig_cost <= lo_cost)
16230 return false;
16231
16232 if (!size_p
16233 && (orig_cost <= lo_cost + 1))
16234 return false;
16235
16236 rtx mem_lo = adjust_address (dst, SImode, 0);
16237 if (!aarch64_mem_pair_operand (mem_lo, SImode))
16238 return false;
16239
16240 rtx tmp_reg = gen_reg_rtx (SImode);
16241 aarch64_expand_mov_immediate (tmp_reg, lo);
16242 rtx mem_hi = aarch64_move_pointer (mem_lo, GET_MODE_SIZE (SImode));
16243 /* Don't emit an explicit store pair as this may not be always profitable.
16244 Let the sched-fusion logic decide whether to merge them. */
16245 emit_move_insn (mem_lo, tmp_reg);
16246 emit_move_insn (mem_hi, tmp_reg);
16247
16248 return true;
16249}
16250
a3125fc2
CL
16251/* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
16252
16253static unsigned HOST_WIDE_INT
16254aarch64_asan_shadow_offset (void)
16255{
16256 return (HOST_WIDE_INT_1 << 36);
16257}
16258
5f3bc026 16259static rtx
cb4347e8 16260aarch64_gen_ccmp_first (rtx_insn **prep_seq, rtx_insn **gen_seq,
5f3bc026
ZC
16261 int code, tree treeop0, tree treeop1)
16262{
c8012fbc
WD
16263 machine_mode op_mode, cmp_mode, cc_mode = CCmode;
16264 rtx op0, op1;
5f3bc026 16265 int unsignedp = TYPE_UNSIGNED (TREE_TYPE (treeop0));
c8012fbc 16266 insn_code icode;
5f3bc026
ZC
16267 struct expand_operand ops[4];
16268
5f3bc026
ZC
16269 start_sequence ();
16270 expand_operands (treeop0, treeop1, NULL_RTX, &op0, &op1, EXPAND_NORMAL);
16271
16272 op_mode = GET_MODE (op0);
16273 if (op_mode == VOIDmode)
16274 op_mode = GET_MODE (op1);
16275
16276 switch (op_mode)
16277 {
4e10a5a7
RS
16278 case E_QImode:
16279 case E_HImode:
16280 case E_SImode:
5f3bc026
ZC
16281 cmp_mode = SImode;
16282 icode = CODE_FOR_cmpsi;
16283 break;
16284
4e10a5a7 16285 case E_DImode:
5f3bc026
ZC
16286 cmp_mode = DImode;
16287 icode = CODE_FOR_cmpdi;
16288 break;
16289
4e10a5a7 16290 case E_SFmode:
786e3c06
WD
16291 cmp_mode = SFmode;
16292 cc_mode = aarch64_select_cc_mode ((rtx_code) code, op0, op1);
16293 icode = cc_mode == CCFPEmode ? CODE_FOR_fcmpesf : CODE_FOR_fcmpsf;
16294 break;
16295
4e10a5a7 16296 case E_DFmode:
786e3c06
WD
16297 cmp_mode = DFmode;
16298 cc_mode = aarch64_select_cc_mode ((rtx_code) code, op0, op1);
16299 icode = cc_mode == CCFPEmode ? CODE_FOR_fcmpedf : CODE_FOR_fcmpdf;
16300 break;
16301
5f3bc026
ZC
16302 default:
16303 end_sequence ();
16304 return NULL_RTX;
16305 }
16306
c8012fbc
WD
16307 op0 = prepare_operand (icode, op0, 0, op_mode, cmp_mode, unsignedp);
16308 op1 = prepare_operand (icode, op1, 1, op_mode, cmp_mode, unsignedp);
5f3bc026
ZC
16309 if (!op0 || !op1)
16310 {
16311 end_sequence ();
16312 return NULL_RTX;
16313 }
16314 *prep_seq = get_insns ();
16315 end_sequence ();
16316
c8012fbc
WD
16317 create_fixed_operand (&ops[0], op0);
16318 create_fixed_operand (&ops[1], op1);
5f3bc026
ZC
16319
16320 start_sequence ();
c8012fbc 16321 if (!maybe_expand_insn (icode, 2, ops))
5f3bc026
ZC
16322 {
16323 end_sequence ();
16324 return NULL_RTX;
16325 }
16326 *gen_seq = get_insns ();
16327 end_sequence ();
16328
c8012fbc
WD
16329 return gen_rtx_fmt_ee ((rtx_code) code, cc_mode,
16330 gen_rtx_REG (cc_mode, CC_REGNUM), const0_rtx);
5f3bc026
ZC
16331}
16332
16333static rtx
cb4347e8
TS
16334aarch64_gen_ccmp_next (rtx_insn **prep_seq, rtx_insn **gen_seq, rtx prev,
16335 int cmp_code, tree treeop0, tree treeop1, int bit_code)
5f3bc026 16336{
c8012fbc
WD
16337 rtx op0, op1, target;
16338 machine_mode op_mode, cmp_mode, cc_mode = CCmode;
5f3bc026 16339 int unsignedp = TYPE_UNSIGNED (TREE_TYPE (treeop0));
c8012fbc 16340 insn_code icode;
5f3bc026 16341 struct expand_operand ops[6];
c8012fbc 16342 int aarch64_cond;
5f3bc026 16343
cb4347e8 16344 push_to_sequence (*prep_seq);
5f3bc026
ZC
16345 expand_operands (treeop0, treeop1, NULL_RTX, &op0, &op1, EXPAND_NORMAL);
16346
16347 op_mode = GET_MODE (op0);
16348 if (op_mode == VOIDmode)
16349 op_mode = GET_MODE (op1);
16350
16351 switch (op_mode)
16352 {
4e10a5a7
RS
16353 case E_QImode:
16354 case E_HImode:
16355 case E_SImode:
5f3bc026 16356 cmp_mode = SImode;
c8012fbc 16357 icode = CODE_FOR_ccmpsi;
5f3bc026
ZC
16358 break;
16359
4e10a5a7 16360 case E_DImode:
5f3bc026 16361 cmp_mode = DImode;
c8012fbc 16362 icode = CODE_FOR_ccmpdi;
5f3bc026
ZC
16363 break;
16364
4e10a5a7 16365 case E_SFmode:
786e3c06
WD
16366 cmp_mode = SFmode;
16367 cc_mode = aarch64_select_cc_mode ((rtx_code) cmp_code, op0, op1);
16368 icode = cc_mode == CCFPEmode ? CODE_FOR_fccmpesf : CODE_FOR_fccmpsf;
16369 break;
16370
4e10a5a7 16371 case E_DFmode:
786e3c06
WD
16372 cmp_mode = DFmode;
16373 cc_mode = aarch64_select_cc_mode ((rtx_code) cmp_code, op0, op1);
16374 icode = cc_mode == CCFPEmode ? CODE_FOR_fccmpedf : CODE_FOR_fccmpdf;
16375 break;
16376
5f3bc026
ZC
16377 default:
16378 end_sequence ();
16379 return NULL_RTX;
16380 }
16381
16382 op0 = prepare_operand (icode, op0, 2, op_mode, cmp_mode, unsignedp);
16383 op1 = prepare_operand (icode, op1, 3, op_mode, cmp_mode, unsignedp);
16384 if (!op0 || !op1)
16385 {
16386 end_sequence ();
16387 return NULL_RTX;
16388 }
16389 *prep_seq = get_insns ();
16390 end_sequence ();
16391
16392 target = gen_rtx_REG (cc_mode, CC_REGNUM);
c8012fbc 16393 aarch64_cond = aarch64_get_condition_code_1 (cc_mode, (rtx_code) cmp_code);
5f3bc026 16394
c8012fbc
WD
16395 if (bit_code != AND)
16396 {
16397 prev = gen_rtx_fmt_ee (REVERSE_CONDITION (GET_CODE (prev),
16398 GET_MODE (XEXP (prev, 0))),
16399 VOIDmode, XEXP (prev, 0), const0_rtx);
16400 aarch64_cond = AARCH64_INVERSE_CONDITION_CODE (aarch64_cond);
16401 }
16402
16403 create_fixed_operand (&ops[0], XEXP (prev, 0));
5f3bc026
ZC
16404 create_fixed_operand (&ops[1], target);
16405 create_fixed_operand (&ops[2], op0);
16406 create_fixed_operand (&ops[3], op1);
c8012fbc
WD
16407 create_fixed_operand (&ops[4], prev);
16408 create_fixed_operand (&ops[5], GEN_INT (aarch64_cond));
5f3bc026 16409
cb4347e8 16410 push_to_sequence (*gen_seq);
5f3bc026
ZC
16411 if (!maybe_expand_insn (icode, 6, ops))
16412 {
16413 end_sequence ();
16414 return NULL_RTX;
16415 }
16416
16417 *gen_seq = get_insns ();
16418 end_sequence ();
16419
c8012fbc 16420 return gen_rtx_fmt_ee ((rtx_code) cmp_code, VOIDmode, target, const0_rtx);
5f3bc026
ZC
16421}
16422
16423#undef TARGET_GEN_CCMP_FIRST
16424#define TARGET_GEN_CCMP_FIRST aarch64_gen_ccmp_first
16425
16426#undef TARGET_GEN_CCMP_NEXT
16427#define TARGET_GEN_CCMP_NEXT aarch64_gen_ccmp_next
16428
6a569cdd
KT
16429/* Implement TARGET_SCHED_MACRO_FUSION_P. Return true if target supports
16430 instruction fusion of some sort. */
16431
16432static bool
16433aarch64_macro_fusion_p (void)
16434{
b175b679 16435 return aarch64_tune_params.fusible_ops != AARCH64_FUSE_NOTHING;
6a569cdd
KT
16436}
16437
16438
16439/* Implement TARGET_SCHED_MACRO_FUSION_PAIR_P. Return true if PREV and CURR
16440 should be kept together during scheduling. */
16441
16442static bool
16443aarch_macro_fusion_pair_p (rtx_insn *prev, rtx_insn *curr)
16444{
16445 rtx set_dest;
16446 rtx prev_set = single_set (prev);
16447 rtx curr_set = single_set (curr);
16448 /* prev and curr are simple SET insns i.e. no flag setting or branching. */
16449 bool simple_sets_p = prev_set && curr_set && !any_condjump_p (curr);
16450
16451 if (!aarch64_macro_fusion_p ())
16452 return false;
16453
d7b03373 16454 if (simple_sets_p && aarch64_fusion_enabled_p (AARCH64_FUSE_MOV_MOVK))
6a569cdd
KT
16455 {
16456 /* We are trying to match:
16457 prev (mov) == (set (reg r0) (const_int imm16))
16458 curr (movk) == (set (zero_extract (reg r0)
16459 (const_int 16)
16460 (const_int 16))
16461 (const_int imm16_1)) */
16462
16463 set_dest = SET_DEST (curr_set);
16464
16465 if (GET_CODE (set_dest) == ZERO_EXTRACT
16466 && CONST_INT_P (SET_SRC (curr_set))
16467 && CONST_INT_P (SET_SRC (prev_set))
16468 && CONST_INT_P (XEXP (set_dest, 2))
16469 && INTVAL (XEXP (set_dest, 2)) == 16
16470 && REG_P (XEXP (set_dest, 0))
16471 && REG_P (SET_DEST (prev_set))
16472 && REGNO (XEXP (set_dest, 0)) == REGNO (SET_DEST (prev_set)))
16473 {
16474 return true;
16475 }
16476 }
16477
d7b03373 16478 if (simple_sets_p && aarch64_fusion_enabled_p (AARCH64_FUSE_ADRP_ADD))
9bbe08fe
KT
16479 {
16480
16481 /* We're trying to match:
16482 prev (adrp) == (set (reg r1)
16483 (high (symbol_ref ("SYM"))))
16484 curr (add) == (set (reg r0)
16485 (lo_sum (reg r1)
16486 (symbol_ref ("SYM"))))
16487 Note that r0 need not necessarily be the same as r1, especially
16488 during pre-regalloc scheduling. */
16489
16490 if (satisfies_constraint_Ush (SET_SRC (prev_set))
16491 && REG_P (SET_DEST (prev_set)) && REG_P (SET_DEST (curr_set)))
16492 {
16493 if (GET_CODE (SET_SRC (curr_set)) == LO_SUM
16494 && REG_P (XEXP (SET_SRC (curr_set), 0))
16495 && REGNO (XEXP (SET_SRC (curr_set), 0))
16496 == REGNO (SET_DEST (prev_set))
16497 && rtx_equal_p (XEXP (SET_SRC (prev_set), 0),
16498 XEXP (SET_SRC (curr_set), 1)))
16499 return true;
16500 }
16501 }
16502
d7b03373 16503 if (simple_sets_p && aarch64_fusion_enabled_p (AARCH64_FUSE_MOVK_MOVK))
cd0cb232
KT
16504 {
16505
16506 /* We're trying to match:
16507 prev (movk) == (set (zero_extract (reg r0)
16508 (const_int 16)
16509 (const_int 32))
16510 (const_int imm16_1))
16511 curr (movk) == (set (zero_extract (reg r0)
16512 (const_int 16)
16513 (const_int 48))
16514 (const_int imm16_2)) */
16515
16516 if (GET_CODE (SET_DEST (prev_set)) == ZERO_EXTRACT
16517 && GET_CODE (SET_DEST (curr_set)) == ZERO_EXTRACT
16518 && REG_P (XEXP (SET_DEST (prev_set), 0))
16519 && REG_P (XEXP (SET_DEST (curr_set), 0))
16520 && REGNO (XEXP (SET_DEST (prev_set), 0))
16521 == REGNO (XEXP (SET_DEST (curr_set), 0))
16522 && CONST_INT_P (XEXP (SET_DEST (prev_set), 2))
16523 && CONST_INT_P (XEXP (SET_DEST (curr_set), 2))
16524 && INTVAL (XEXP (SET_DEST (prev_set), 2)) == 32
16525 && INTVAL (XEXP (SET_DEST (curr_set), 2)) == 48
16526 && CONST_INT_P (SET_SRC (prev_set))
16527 && CONST_INT_P (SET_SRC (curr_set)))
16528 return true;
16529
16530 }
d7b03373 16531 if (simple_sets_p && aarch64_fusion_enabled_p (AARCH64_FUSE_ADRP_LDR))
d8354ad7
KT
16532 {
16533 /* We're trying to match:
16534 prev (adrp) == (set (reg r0)
16535 (high (symbol_ref ("SYM"))))
16536 curr (ldr) == (set (reg r1)
16537 (mem (lo_sum (reg r0)
16538 (symbol_ref ("SYM")))))
16539 or
16540 curr (ldr) == (set (reg r1)
16541 (zero_extend (mem
16542 (lo_sum (reg r0)
16543 (symbol_ref ("SYM")))))) */
16544 if (satisfies_constraint_Ush (SET_SRC (prev_set))
16545 && REG_P (SET_DEST (prev_set)) && REG_P (SET_DEST (curr_set)))
16546 {
16547 rtx curr_src = SET_SRC (curr_set);
16548
16549 if (GET_CODE (curr_src) == ZERO_EXTEND)
16550 curr_src = XEXP (curr_src, 0);
16551
16552 if (MEM_P (curr_src) && GET_CODE (XEXP (curr_src, 0)) == LO_SUM
16553 && REG_P (XEXP (XEXP (curr_src, 0), 0))
16554 && REGNO (XEXP (XEXP (curr_src, 0), 0))
16555 == REGNO (SET_DEST (prev_set))
16556 && rtx_equal_p (XEXP (XEXP (curr_src, 0), 1),
16557 XEXP (SET_SRC (prev_set), 0)))
16558 return true;
16559 }
16560 }
cd0cb232 16561
d7b03373 16562 if (aarch64_fusion_enabled_p (AARCH64_FUSE_AES_AESMC)
00a8574a
WD
16563 && aarch_crypto_can_dual_issue (prev, curr))
16564 return true;
16565
d7b03373 16566 if (aarch64_fusion_enabled_p (AARCH64_FUSE_CMP_BRANCH)
3759108f
AP
16567 && any_condjump_p (curr))
16568 {
16569 enum attr_type prev_type = get_attr_type (prev);
16570
509f819a
N
16571 unsigned int condreg1, condreg2;
16572 rtx cc_reg_1;
16573 aarch64_fixed_condition_code_regs (&condreg1, &condreg2);
16574 cc_reg_1 = gen_rtx_REG (CCmode, condreg1);
16575
16576 if (reg_referenced_p (cc_reg_1, PATTERN (curr))
16577 && prev
16578 && modified_in_p (cc_reg_1, prev))
16579 {
16580 /* FIXME: this misses some which is considered simple arthematic
16581 instructions for ThunderX. Simple shifts are missed here. */
16582 if (prev_type == TYPE_ALUS_SREG
16583 || prev_type == TYPE_ALUS_IMM
16584 || prev_type == TYPE_LOGICS_REG
16585 || prev_type == TYPE_LOGICS_IMM)
16586 return true;
16587 }
3759108f
AP
16588 }
16589
bee7e0fc
AP
16590 if (prev_set
16591 && curr_set
16592 && aarch64_fusion_enabled_p (AARCH64_FUSE_ALU_BRANCH)
00c7c57f
JB
16593 && any_condjump_p (curr))
16594 {
16595 /* We're trying to match:
16596 prev (alu_insn) == (set (r0) plus ((r0) (r1/imm)))
16597 curr (cbz) == (set (pc) (if_then_else (eq/ne) (r0)
16598 (const_int 0))
16599 (label_ref ("SYM"))
16600 (pc)) */
16601 if (SET_DEST (curr_set) == (pc_rtx)
16602 && GET_CODE (SET_SRC (curr_set)) == IF_THEN_ELSE
16603 && REG_P (XEXP (XEXP (SET_SRC (curr_set), 0), 0))
16604 && REG_P (SET_DEST (prev_set))
16605 && REGNO (SET_DEST (prev_set))
16606 == REGNO (XEXP (XEXP (SET_SRC (curr_set), 0), 0)))
16607 {
16608 /* Fuse ALU operations followed by conditional branch instruction. */
16609 switch (get_attr_type (prev))
16610 {
16611 case TYPE_ALU_IMM:
16612 case TYPE_ALU_SREG:
16613 case TYPE_ADC_REG:
16614 case TYPE_ADC_IMM:
16615 case TYPE_ADCS_REG:
16616 case TYPE_ADCS_IMM:
16617 case TYPE_LOGIC_REG:
16618 case TYPE_LOGIC_IMM:
16619 case TYPE_CSEL:
16620 case TYPE_ADR:
16621 case TYPE_MOV_IMM:
16622 case TYPE_SHIFT_REG:
16623 case TYPE_SHIFT_IMM:
16624 case TYPE_BFM:
16625 case TYPE_RBIT:
16626 case TYPE_REV:
16627 case TYPE_EXTEND:
16628 return true;
16629
16630 default:;
16631 }
16632 }
16633 }
16634
6a569cdd
KT
16635 return false;
16636}
16637
f2879a90
KT
16638/* Return true iff the instruction fusion described by OP is enabled. */
16639
16640bool
16641aarch64_fusion_enabled_p (enum aarch64_fusion_pairs op)
16642{
16643 return (aarch64_tune_params.fusible_ops & op) != 0;
16644}
16645
350013bc
BC
16646/* If MEM is in the form of [base+offset], extract the two parts
16647 of address and set to BASE and OFFSET, otherwise return false
16648 after clearing BASE and OFFSET. */
16649
16650bool
16651extract_base_offset_in_addr (rtx mem, rtx *base, rtx *offset)
16652{
16653 rtx addr;
16654
16655 gcc_assert (MEM_P (mem));
16656
16657 addr = XEXP (mem, 0);
16658
16659 if (REG_P (addr))
16660 {
16661 *base = addr;
16662 *offset = const0_rtx;
16663 return true;
16664 }
16665
16666 if (GET_CODE (addr) == PLUS
16667 && REG_P (XEXP (addr, 0)) && CONST_INT_P (XEXP (addr, 1)))
16668 {
16669 *base = XEXP (addr, 0);
16670 *offset = XEXP (addr, 1);
16671 return true;
16672 }
16673
16674 *base = NULL_RTX;
16675 *offset = NULL_RTX;
16676
16677 return false;
16678}
16679
16680/* Types for scheduling fusion. */
16681enum sched_fusion_type
16682{
16683 SCHED_FUSION_NONE = 0,
16684 SCHED_FUSION_LD_SIGN_EXTEND,
16685 SCHED_FUSION_LD_ZERO_EXTEND,
16686 SCHED_FUSION_LD,
16687 SCHED_FUSION_ST,
16688 SCHED_FUSION_NUM
16689};
16690
16691/* If INSN is a load or store of address in the form of [base+offset],
16692 extract the two parts and set to BASE and OFFSET. Return scheduling
16693 fusion type this INSN is. */
16694
16695static enum sched_fusion_type
16696fusion_load_store (rtx_insn *insn, rtx *base, rtx *offset)
16697{
16698 rtx x, dest, src;
16699 enum sched_fusion_type fusion = SCHED_FUSION_LD;
16700
16701 gcc_assert (INSN_P (insn));
16702 x = PATTERN (insn);
16703 if (GET_CODE (x) != SET)
16704 return SCHED_FUSION_NONE;
16705
16706 src = SET_SRC (x);
16707 dest = SET_DEST (x);
16708
abc52318
KT
16709 machine_mode dest_mode = GET_MODE (dest);
16710
16711 if (!aarch64_mode_valid_for_sched_fusion_p (dest_mode))
350013bc
BC
16712 return SCHED_FUSION_NONE;
16713
16714 if (GET_CODE (src) == SIGN_EXTEND)
16715 {
16716 fusion = SCHED_FUSION_LD_SIGN_EXTEND;
16717 src = XEXP (src, 0);
16718 if (GET_CODE (src) != MEM || GET_MODE (src) != SImode)
16719 return SCHED_FUSION_NONE;
16720 }
16721 else if (GET_CODE (src) == ZERO_EXTEND)
16722 {
16723 fusion = SCHED_FUSION_LD_ZERO_EXTEND;
16724 src = XEXP (src, 0);
16725 if (GET_CODE (src) != MEM || GET_MODE (src) != SImode)
16726 return SCHED_FUSION_NONE;
16727 }
16728
16729 if (GET_CODE (src) == MEM && REG_P (dest))
16730 extract_base_offset_in_addr (src, base, offset);
16731 else if (GET_CODE (dest) == MEM && (REG_P (src) || src == const0_rtx))
16732 {
16733 fusion = SCHED_FUSION_ST;
16734 extract_base_offset_in_addr (dest, base, offset);
16735 }
16736 else
16737 return SCHED_FUSION_NONE;
16738
16739 if (*base == NULL_RTX || *offset == NULL_RTX)
16740 fusion = SCHED_FUSION_NONE;
16741
16742 return fusion;
16743}
16744
16745/* Implement the TARGET_SCHED_FUSION_PRIORITY hook.
16746
16747 Currently we only support to fuse ldr or str instructions, so FUSION_PRI
16748 and PRI are only calculated for these instructions. For other instruction,
16749 FUSION_PRI and PRI are simply set to MAX_PRI - 1. In the future, other
16750 type instruction fusion can be added by returning different priorities.
16751
16752 It's important that irrelevant instructions get the largest FUSION_PRI. */
16753
16754static void
16755aarch64_sched_fusion_priority (rtx_insn *insn, int max_pri,
16756 int *fusion_pri, int *pri)
16757{
16758 int tmp, off_val;
16759 rtx base, offset;
16760 enum sched_fusion_type fusion;
16761
16762 gcc_assert (INSN_P (insn));
16763
16764 tmp = max_pri - 1;
16765 fusion = fusion_load_store (insn, &base, &offset);
16766 if (fusion == SCHED_FUSION_NONE)
16767 {
16768 *pri = tmp;
16769 *fusion_pri = tmp;
16770 return;
16771 }
16772
16773 /* Set FUSION_PRI according to fusion type and base register. */
16774 *fusion_pri = tmp - fusion * FIRST_PSEUDO_REGISTER - REGNO (base);
16775
16776 /* Calculate PRI. */
16777 tmp /= 2;
16778
16779 /* INSN with smaller offset goes first. */
16780 off_val = (int)(INTVAL (offset));
16781 if (off_val >= 0)
16782 tmp -= (off_val & 0xfffff);
16783 else
16784 tmp += ((- off_val) & 0xfffff);
16785
16786 *pri = tmp;
16787 return;
16788}
16789
9bca63d4
WD
16790/* Implement the TARGET_SCHED_ADJUST_PRIORITY hook.
16791 Adjust priority of sha1h instructions so they are scheduled before
16792 other SHA1 instructions. */
16793
16794static int
16795aarch64_sched_adjust_priority (rtx_insn *insn, int priority)
16796{
16797 rtx x = PATTERN (insn);
16798
16799 if (GET_CODE (x) == SET)
16800 {
16801 x = SET_SRC (x);
16802
16803 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_SHA1H)
16804 return priority + 10;
16805 }
16806
16807 return priority;
16808}
16809
350013bc
BC
16810/* Given OPERANDS of consecutive load/store, check if we can merge
16811 them into ldp/stp. LOAD is true if they are load instructions.
16812 MODE is the mode of memory operands. */
16813
16814bool
16815aarch64_operands_ok_for_ldpstp (rtx *operands, bool load,
b8506a8a 16816 machine_mode mode)
350013bc
BC
16817{
16818 HOST_WIDE_INT offval_1, offval_2, msize;
16819 enum reg_class rclass_1, rclass_2;
16820 rtx mem_1, mem_2, reg_1, reg_2, base_1, base_2, offset_1, offset_2;
16821
16822 if (load)
16823 {
16824 mem_1 = operands[1];
16825 mem_2 = operands[3];
16826 reg_1 = operands[0];
16827 reg_2 = operands[2];
16828 gcc_assert (REG_P (reg_1) && REG_P (reg_2));
16829 if (REGNO (reg_1) == REGNO (reg_2))
16830 return false;
16831 }
16832 else
16833 {
16834 mem_1 = operands[0];
16835 mem_2 = operands[2];
16836 reg_1 = operands[1];
16837 reg_2 = operands[3];
16838 }
16839
bf84ac44
AP
16840 /* The mems cannot be volatile. */
16841 if (MEM_VOLATILE_P (mem_1) || MEM_VOLATILE_P (mem_2))
16842 return false;
16843
54700e2e
AP
16844 /* If we have SImode and slow unaligned ldp,
16845 check the alignment to be at least 8 byte. */
16846 if (mode == SImode
16847 && (aarch64_tune_params.extra_tuning_flags
16848 & AARCH64_EXTRA_TUNE_SLOW_UNALIGNED_LDPW)
16849 && !optimize_size
16850 && MEM_ALIGN (mem_1) < 8 * BITS_PER_UNIT)
16851 return false;
16852
350013bc
BC
16853 /* Check if the addresses are in the form of [base+offset]. */
16854 extract_base_offset_in_addr (mem_1, &base_1, &offset_1);
16855 if (base_1 == NULL_RTX || offset_1 == NULL_RTX)
16856 return false;
16857 extract_base_offset_in_addr (mem_2, &base_2, &offset_2);
16858 if (base_2 == NULL_RTX || offset_2 == NULL_RTX)
16859 return false;
16860
16861 /* Check if the bases are same. */
16862 if (!rtx_equal_p (base_1, base_2))
16863 return false;
16864
dfe1da23
JW
16865 /* The operands must be of the same size. */
16866 gcc_assert (known_eq (GET_MODE_SIZE (GET_MODE (mem_1)),
16867 GET_MODE_SIZE (GET_MODE (mem_2))));
16868
350013bc
BC
16869 offval_1 = INTVAL (offset_1);
16870 offval_2 = INTVAL (offset_2);
6a70badb
RS
16871 /* We should only be trying this for fixed-sized modes. There is no
16872 SVE LDP/STP instruction. */
16873 msize = GET_MODE_SIZE (mode).to_constant ();
350013bc
BC
16874 /* Check if the offsets are consecutive. */
16875 if (offval_1 != (offval_2 + msize) && offval_2 != (offval_1 + msize))
16876 return false;
16877
16878 /* Check if the addresses are clobbered by load. */
16879 if (load)
16880 {
16881 if (reg_mentioned_p (reg_1, mem_1))
16882 return false;
16883
16884 /* In increasing order, the last load can clobber the address. */
16885 if (offval_1 > offval_2 && reg_mentioned_p (reg_2, mem_2))
9b56ec11 16886 return false;
350013bc
BC
16887 }
16888
9b56ec11
JW
16889 /* One of the memory accesses must be a mempair operand.
16890 If it is not the first one, they need to be swapped by the
16891 peephole. */
16892 if (!aarch64_mem_pair_operand (mem_1, GET_MODE (mem_1))
16893 && !aarch64_mem_pair_operand (mem_2, GET_MODE (mem_2)))
16894 return false;
16895
350013bc
BC
16896 if (REG_P (reg_1) && FP_REGNUM_P (REGNO (reg_1)))
16897 rclass_1 = FP_REGS;
16898 else
16899 rclass_1 = GENERAL_REGS;
16900
16901 if (REG_P (reg_2) && FP_REGNUM_P (REGNO (reg_2)))
16902 rclass_2 = FP_REGS;
16903 else
16904 rclass_2 = GENERAL_REGS;
16905
16906 /* Check if the registers are of same class. */
16907 if (rclass_1 != rclass_2)
16908 return false;
16909
16910 return true;
16911}
16912
9b56ec11
JW
16913/* Given OPERANDS of consecutive load/store that can be merged,
16914 swap them if they are not in ascending order. */
16915void
16916aarch64_swap_ldrstr_operands (rtx* operands, bool load)
16917{
16918 rtx mem_1, mem_2, base_1, base_2, offset_1, offset_2;
16919 HOST_WIDE_INT offval_1, offval_2;
16920
16921 if (load)
16922 {
16923 mem_1 = operands[1];
16924 mem_2 = operands[3];
16925 }
16926 else
16927 {
16928 mem_1 = operands[0];
16929 mem_2 = operands[2];
16930 }
16931
16932 extract_base_offset_in_addr (mem_1, &base_1, &offset_1);
16933 extract_base_offset_in_addr (mem_2, &base_2, &offset_2);
16934
16935 offval_1 = INTVAL (offset_1);
16936 offval_2 = INTVAL (offset_2);
16937
16938 if (offval_1 > offval_2)
16939 {
16940 /* Irrespective of whether this is a load or a store,
16941 we do the same swap. */
16942 std::swap (operands[0], operands[2]);
16943 std::swap (operands[1], operands[3]);
16944 }
16945}
16946
350013bc
BC
16947/* Given OPERANDS of consecutive load/store, check if we can merge
16948 them into ldp/stp by adjusting the offset. LOAD is true if they
16949 are load instructions. MODE is the mode of memory operands.
16950
16951 Given below consecutive stores:
16952
16953 str w1, [xb, 0x100]
16954 str w1, [xb, 0x104]
16955 str w1, [xb, 0x108]
16956 str w1, [xb, 0x10c]
16957
16958 Though the offsets are out of the range supported by stp, we can
16959 still pair them after adjusting the offset, like:
16960
16961 add scratch, xb, 0x100
16962 stp w1, w1, [scratch]
16963 stp w1, w1, [scratch, 0x8]
16964
16965 The peephole patterns detecting this opportunity should guarantee
16966 the scratch register is avaliable. */
16967
16968bool
16969aarch64_operands_adjust_ok_for_ldpstp (rtx *operands, bool load,
146c2e3a 16970 scalar_mode mode)
350013bc
BC
16971{
16972 enum reg_class rclass_1, rclass_2, rclass_3, rclass_4;
16973 HOST_WIDE_INT offval_1, offval_2, offval_3, offval_4, msize;
16974 rtx mem_1, mem_2, mem_3, mem_4, reg_1, reg_2, reg_3, reg_4;
16975 rtx base_1, base_2, base_3, base_4, offset_1, offset_2, offset_3, offset_4;
16976
16977 if (load)
16978 {
16979 reg_1 = operands[0];
16980 mem_1 = operands[1];
16981 reg_2 = operands[2];
16982 mem_2 = operands[3];
16983 reg_3 = operands[4];
16984 mem_3 = operands[5];
16985 reg_4 = operands[6];
16986 mem_4 = operands[7];
16987 gcc_assert (REG_P (reg_1) && REG_P (reg_2)
16988 && REG_P (reg_3) && REG_P (reg_4));
16989 if (REGNO (reg_1) == REGNO (reg_2) || REGNO (reg_3) == REGNO (reg_4))
16990 return false;
16991 }
16992 else
16993 {
16994 mem_1 = operands[0];
16995 reg_1 = operands[1];
16996 mem_2 = operands[2];
16997 reg_2 = operands[3];
16998 mem_3 = operands[4];
16999 reg_3 = operands[5];
17000 mem_4 = operands[6];
17001 reg_4 = operands[7];
17002 }
17003 /* Skip if memory operand is by itslef valid for ldp/stp. */
17004 if (!MEM_P (mem_1) || aarch64_mem_pair_operand (mem_1, mode))
17005 return false;
17006
bf84ac44
AP
17007 /* The mems cannot be volatile. */
17008 if (MEM_VOLATILE_P (mem_1) || MEM_VOLATILE_P (mem_2)
17009 || MEM_VOLATILE_P (mem_3) ||MEM_VOLATILE_P (mem_4))
17010 return false;
17011
350013bc
BC
17012 /* Check if the addresses are in the form of [base+offset]. */
17013 extract_base_offset_in_addr (mem_1, &base_1, &offset_1);
17014 if (base_1 == NULL_RTX || offset_1 == NULL_RTX)
17015 return false;
17016 extract_base_offset_in_addr (mem_2, &base_2, &offset_2);
17017 if (base_2 == NULL_RTX || offset_2 == NULL_RTX)
17018 return false;
17019 extract_base_offset_in_addr (mem_3, &base_3, &offset_3);
17020 if (base_3 == NULL_RTX || offset_3 == NULL_RTX)
17021 return false;
17022 extract_base_offset_in_addr (mem_4, &base_4, &offset_4);
17023 if (base_4 == NULL_RTX || offset_4 == NULL_RTX)
17024 return false;
17025
17026 /* Check if the bases are same. */
17027 if (!rtx_equal_p (base_1, base_2)
17028 || !rtx_equal_p (base_2, base_3)
17029 || !rtx_equal_p (base_3, base_4))
17030 return false;
17031
17032 offval_1 = INTVAL (offset_1);
17033 offval_2 = INTVAL (offset_2);
17034 offval_3 = INTVAL (offset_3);
17035 offval_4 = INTVAL (offset_4);
17036 msize = GET_MODE_SIZE (mode);
17037 /* Check if the offsets are consecutive. */
17038 if ((offval_1 != (offval_2 + msize)
17039 || offval_1 != (offval_3 + msize * 2)
17040 || offval_1 != (offval_4 + msize * 3))
17041 && (offval_4 != (offval_3 + msize)
17042 || offval_4 != (offval_2 + msize * 2)
17043 || offval_4 != (offval_1 + msize * 3)))
17044 return false;
17045
17046 /* Check if the addresses are clobbered by load. */
17047 if (load)
17048 {
17049 if (reg_mentioned_p (reg_1, mem_1)
17050 || reg_mentioned_p (reg_2, mem_2)
17051 || reg_mentioned_p (reg_3, mem_3))
17052 return false;
17053
17054 /* In increasing order, the last load can clobber the address. */
17055 if (offval_1 > offval_2 && reg_mentioned_p (reg_4, mem_4))
17056 return false;
17057 }
17058
54700e2e
AP
17059 /* If we have SImode and slow unaligned ldp,
17060 check the alignment to be at least 8 byte. */
17061 if (mode == SImode
17062 && (aarch64_tune_params.extra_tuning_flags
17063 & AARCH64_EXTRA_TUNE_SLOW_UNALIGNED_LDPW)
17064 && !optimize_size
17065 && MEM_ALIGN (mem_1) < 8 * BITS_PER_UNIT)
17066 return false;
17067
350013bc
BC
17068 if (REG_P (reg_1) && FP_REGNUM_P (REGNO (reg_1)))
17069 rclass_1 = FP_REGS;
17070 else
17071 rclass_1 = GENERAL_REGS;
17072
17073 if (REG_P (reg_2) && FP_REGNUM_P (REGNO (reg_2)))
17074 rclass_2 = FP_REGS;
17075 else
17076 rclass_2 = GENERAL_REGS;
17077
17078 if (REG_P (reg_3) && FP_REGNUM_P (REGNO (reg_3)))
17079 rclass_3 = FP_REGS;
17080 else
17081 rclass_3 = GENERAL_REGS;
17082
17083 if (REG_P (reg_4) && FP_REGNUM_P (REGNO (reg_4)))
17084 rclass_4 = FP_REGS;
17085 else
17086 rclass_4 = GENERAL_REGS;
17087
17088 /* Check if the registers are of same class. */
17089 if (rclass_1 != rclass_2 || rclass_2 != rclass_3 || rclass_3 != rclass_4)
17090 return false;
17091
17092 return true;
17093}
17094
17095/* Given OPERANDS of consecutive load/store, this function pairs them
17096 into ldp/stp after adjusting the offset. It depends on the fact
17097 that addresses of load/store instructions are in increasing order.
17098 MODE is the mode of memory operands. CODE is the rtl operator
17099 which should be applied to all memory operands, it's SIGN_EXTEND,
17100 ZERO_EXTEND or UNKNOWN. */
17101
17102bool
17103aarch64_gen_adjusted_ldpstp (rtx *operands, bool load,
146c2e3a 17104 scalar_mode mode, RTX_CODE code)
350013bc 17105{
9b56ec11 17106 rtx base, offset_1, offset_2, t1, t2;
350013bc
BC
17107 rtx mem_1, mem_2, mem_3, mem_4;
17108 HOST_WIDE_INT off_val, abs_off, adj_off, new_off, stp_off_limit, msize;
17109
9b56ec11
JW
17110 if (load)
17111 {
17112 mem_1 = operands[1];
17113 mem_2 = operands[3];
17114 }
17115 else
17116 {
17117 mem_1 = operands[0];
17118 mem_2 = operands[2];
17119 }
17120
17121 extract_base_offset_in_addr (mem_1, &base, &offset_1);
17122 extract_base_offset_in_addr (mem_2, &base, &offset_2);
17123 gcc_assert (base != NULL_RTX && offset_1 != NULL_RTX
17124 && offset_2 != NULL_RTX);
17125
17126 if (INTVAL (offset_1) > INTVAL (offset_2))
17127 {
17128 std::swap (operands[0], operands[6]);
17129 std::swap (operands[1], operands[7]);
17130 std::swap (operands[2], operands[4]);
17131 std::swap (operands[3], operands[5]);
17132 }
17133
350013bc
BC
17134 if (load)
17135 {
17136 mem_1 = operands[1];
17137 mem_2 = operands[3];
17138 mem_3 = operands[5];
17139 mem_4 = operands[7];
17140 }
17141 else
17142 {
17143 mem_1 = operands[0];
17144 mem_2 = operands[2];
17145 mem_3 = operands[4];
17146 mem_4 = operands[6];
17147 gcc_assert (code == UNKNOWN);
17148 }
17149
9b56ec11
JW
17150 /* Extract the offset of the new first address. */
17151 extract_base_offset_in_addr (mem_1, &base, &offset_1);
17152 extract_base_offset_in_addr (mem_2, &base, &offset_2);
350013bc
BC
17153
17154 /* Adjust offset thus it can fit in ldp/stp instruction. */
17155 msize = GET_MODE_SIZE (mode);
17156 stp_off_limit = msize * 0x40;
9b56ec11 17157 off_val = INTVAL (offset_1);
350013bc
BC
17158 abs_off = (off_val < 0) ? -off_val : off_val;
17159 new_off = abs_off % stp_off_limit;
17160 adj_off = abs_off - new_off;
17161
17162 /* Further adjust to make sure all offsets are OK. */
17163 if ((new_off + msize * 2) >= stp_off_limit)
17164 {
17165 adj_off += stp_off_limit;
17166 new_off -= stp_off_limit;
17167 }
17168
17169 /* Make sure the adjustment can be done with ADD/SUB instructions. */
17170 if (adj_off >= 0x1000)
17171 return false;
17172
17173 if (off_val < 0)
17174 {
17175 adj_off = -adj_off;
17176 new_off = -new_off;
17177 }
17178
17179 /* Create new memory references. */
17180 mem_1 = change_address (mem_1, VOIDmode,
17181 plus_constant (DImode, operands[8], new_off));
17182
17183 /* Check if the adjusted address is OK for ldp/stp. */
17184 if (!aarch64_mem_pair_operand (mem_1, mode))
17185 return false;
17186
17187 msize = GET_MODE_SIZE (mode);
17188 mem_2 = change_address (mem_2, VOIDmode,
17189 plus_constant (DImode,
17190 operands[8],
17191 new_off + msize));
17192 mem_3 = change_address (mem_3, VOIDmode,
17193 plus_constant (DImode,
17194 operands[8],
17195 new_off + msize * 2));
17196 mem_4 = change_address (mem_4, VOIDmode,
17197 plus_constant (DImode,
17198 operands[8],
17199 new_off + msize * 3));
17200
17201 if (code == ZERO_EXTEND)
17202 {
17203 mem_1 = gen_rtx_ZERO_EXTEND (DImode, mem_1);
17204 mem_2 = gen_rtx_ZERO_EXTEND (DImode, mem_2);
17205 mem_3 = gen_rtx_ZERO_EXTEND (DImode, mem_3);
17206 mem_4 = gen_rtx_ZERO_EXTEND (DImode, mem_4);
17207 }
17208 else if (code == SIGN_EXTEND)
17209 {
17210 mem_1 = gen_rtx_SIGN_EXTEND (DImode, mem_1);
17211 mem_2 = gen_rtx_SIGN_EXTEND (DImode, mem_2);
17212 mem_3 = gen_rtx_SIGN_EXTEND (DImode, mem_3);
17213 mem_4 = gen_rtx_SIGN_EXTEND (DImode, mem_4);
17214 }
17215
17216 if (load)
17217 {
17218 operands[1] = mem_1;
17219 operands[3] = mem_2;
17220 operands[5] = mem_3;
17221 operands[7] = mem_4;
17222 }
17223 else
17224 {
17225 operands[0] = mem_1;
17226 operands[2] = mem_2;
17227 operands[4] = mem_3;
17228 operands[6] = mem_4;
17229 }
17230
17231 /* Emit adjusting instruction. */
f7df4a84 17232 emit_insn (gen_rtx_SET (operands[8], plus_constant (DImode, base, adj_off)));
350013bc 17233 /* Emit ldp/stp instructions. */
f7df4a84
RS
17234 t1 = gen_rtx_SET (operands[0], operands[1]);
17235 t2 = gen_rtx_SET (operands[2], operands[3]);
350013bc 17236 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, t1, t2)));
f7df4a84
RS
17237 t1 = gen_rtx_SET (operands[4], operands[5]);
17238 t2 = gen_rtx_SET (operands[6], operands[7]);
350013bc
BC
17239 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, t1, t2)));
17240 return true;
17241}
17242
76a34e3f
RS
17243/* Implement TARGET_VECTORIZE_EMPTY_MASK_IS_EXPENSIVE. Assume for now that
17244 it isn't worth branching around empty masked ops (including masked
17245 stores). */
17246
17247static bool
17248aarch64_empty_mask_is_expensive (unsigned)
17249{
17250 return false;
17251}
17252
1b1e81f8
JW
17253/* Return 1 if pseudo register should be created and used to hold
17254 GOT address for PIC code. */
17255
17256bool
17257aarch64_use_pseudo_pic_reg (void)
17258{
17259 return aarch64_cmodel == AARCH64_CMODEL_SMALL_SPIC;
17260}
17261
7b841a12
JW
17262/* Implement TARGET_UNSPEC_MAY_TRAP_P. */
17263
17264static int
17265aarch64_unspec_may_trap_p (const_rtx x, unsigned flags)
17266{
17267 switch (XINT (x, 1))
17268 {
17269 case UNSPEC_GOTSMALLPIC:
17270 case UNSPEC_GOTSMALLPIC28K:
17271 case UNSPEC_GOTTINYPIC:
17272 return 0;
17273 default:
17274 break;
17275 }
17276
17277 return default_unspec_may_trap_p (x, flags);
17278}
17279
39252973
KT
17280
17281/* If X is a positive CONST_DOUBLE with a value that is a power of 2
17282 return the log2 of that value. Otherwise return -1. */
17283
17284int
17285aarch64_fpconst_pow_of_2 (rtx x)
17286{
17287 const REAL_VALUE_TYPE *r;
17288
17289 if (!CONST_DOUBLE_P (x))
17290 return -1;
17291
17292 r = CONST_DOUBLE_REAL_VALUE (x);
17293
17294 if (REAL_VALUE_NEGATIVE (*r)
17295 || REAL_VALUE_ISNAN (*r)
17296 || REAL_VALUE_ISINF (*r)
17297 || !real_isinteger (r, DFmode))
17298 return -1;
17299
17300 return exact_log2 (real_to_integer (r));
17301}
17302
17303/* If X is a vector of equal CONST_DOUBLE values and that value is
17304 Y, return the aarch64_fpconst_pow_of_2 of Y. Otherwise return -1. */
17305
17306int
17307aarch64_vec_fpconst_pow_of_2 (rtx x)
17308{
6a70badb
RS
17309 int nelts;
17310 if (GET_CODE (x) != CONST_VECTOR
17311 || !CONST_VECTOR_NUNITS (x).is_constant (&nelts))
39252973
KT
17312 return -1;
17313
17314 if (GET_MODE_CLASS (GET_MODE (x)) != MODE_VECTOR_FLOAT)
17315 return -1;
17316
17317 int firstval = aarch64_fpconst_pow_of_2 (CONST_VECTOR_ELT (x, 0));
17318 if (firstval <= 0)
17319 return -1;
17320
6a70badb 17321 for (int i = 1; i < nelts; i++)
39252973
KT
17322 if (aarch64_fpconst_pow_of_2 (CONST_VECTOR_ELT (x, i)) != firstval)
17323 return -1;
17324
17325 return firstval;
17326}
17327
11e554b3
JG
17328/* Implement TARGET_PROMOTED_TYPE to promote 16-bit floating point types
17329 to float.
17330
17331 __fp16 always promotes through this hook.
17332 _Float16 may promote if TARGET_FLT_EVAL_METHOD is 16, but we do that
17333 through the generic excess precision logic rather than here. */
17334
c2ec330c
AL
17335static tree
17336aarch64_promoted_type (const_tree t)
17337{
11e554b3
JG
17338 if (SCALAR_FLOAT_TYPE_P (t)
17339 && TYPE_MAIN_VARIANT (t) == aarch64_fp16_type_node)
c2ec330c 17340 return float_type_node;
11e554b3 17341
c2ec330c
AL
17342 return NULL_TREE;
17343}
ee62a5a6
RS
17344
17345/* Implement the TARGET_OPTAB_SUPPORTED_P hook. */
17346
17347static bool
9acc9cbe 17348aarch64_optab_supported_p (int op, machine_mode mode1, machine_mode,
ee62a5a6
RS
17349 optimization_type opt_type)
17350{
17351 switch (op)
17352 {
17353 case rsqrt_optab:
9acc9cbe 17354 return opt_type == OPTIMIZE_FOR_SPEED && use_rsqrt_p (mode1);
ee62a5a6
RS
17355
17356 default:
17357 return true;
17358 }
17359}
17360
43cacb12
RS
17361/* Implement the TARGET_DWARF_POLY_INDETERMINATE_VALUE hook. */
17362
17363static unsigned int
17364aarch64_dwarf_poly_indeterminate_value (unsigned int i, unsigned int *factor,
17365 int *offset)
17366{
17367 /* Polynomial invariant 1 == (VG / 2) - 1. */
17368 gcc_assert (i == 1);
17369 *factor = 2;
17370 *offset = 1;
17371 return AARCH64_DWARF_VG;
17372}
17373
11e554b3
JG
17374/* Implement TARGET_LIBGCC_FLOATING_POINT_MODE_SUPPORTED_P - return TRUE
17375 if MODE is HFmode, and punt to the generic implementation otherwise. */
17376
17377static bool
7c5bd57a 17378aarch64_libgcc_floating_mode_supported_p (scalar_float_mode mode)
11e554b3
JG
17379{
17380 return (mode == HFmode
17381 ? true
17382 : default_libgcc_floating_mode_supported_p (mode));
17383}
17384
2e5f8203
JG
17385/* Implement TARGET_SCALAR_MODE_SUPPORTED_P - return TRUE
17386 if MODE is HFmode, and punt to the generic implementation otherwise. */
17387
17388static bool
18e2a8b8 17389aarch64_scalar_mode_supported_p (scalar_mode mode)
2e5f8203
JG
17390{
17391 return (mode == HFmode
17392 ? true
17393 : default_scalar_mode_supported_p (mode));
17394}
17395
11e554b3
JG
17396/* Set the value of FLT_EVAL_METHOD.
17397 ISO/IEC TS 18661-3 defines two values that we'd like to make use of:
17398
17399 0: evaluate all operations and constants, whose semantic type has at
17400 most the range and precision of type float, to the range and
17401 precision of float; evaluate all other operations and constants to
17402 the range and precision of the semantic type;
17403
17404 N, where _FloatN is a supported interchange floating type
17405 evaluate all operations and constants, whose semantic type has at
17406 most the range and precision of _FloatN type, to the range and
17407 precision of the _FloatN type; evaluate all other operations and
17408 constants to the range and precision of the semantic type;
17409
17410 If we have the ARMv8.2-A extensions then we support _Float16 in native
17411 precision, so we should set this to 16. Otherwise, we support the type,
17412 but want to evaluate expressions in float precision, so set this to
17413 0. */
17414
17415static enum flt_eval_method
17416aarch64_excess_precision (enum excess_precision_type type)
17417{
17418 switch (type)
17419 {
17420 case EXCESS_PRECISION_TYPE_FAST:
17421 case EXCESS_PRECISION_TYPE_STANDARD:
17422 /* We can calculate either in 16-bit range and precision or
17423 32-bit range and precision. Make that decision based on whether
17424 we have native support for the ARMv8.2-A 16-bit floating-point
17425 instructions or not. */
17426 return (TARGET_FP_F16INST
17427 ? FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16
17428 : FLT_EVAL_METHOD_PROMOTE_TO_FLOAT);
17429 case EXCESS_PRECISION_TYPE_IMPLICIT:
17430 return FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16;
17431 default:
17432 gcc_unreachable ();
17433 }
17434 return FLT_EVAL_METHOD_UNPREDICTABLE;
17435}
17436
b48d6421
KT
17437/* Implement TARGET_SCHED_CAN_SPECULATE_INSN. Return true if INSN can be
17438 scheduled for speculative execution. Reject the long-running division
17439 and square-root instructions. */
17440
17441static bool
17442aarch64_sched_can_speculate_insn (rtx_insn *insn)
17443{
17444 switch (get_attr_type (insn))
17445 {
17446 case TYPE_SDIV:
17447 case TYPE_UDIV:
17448 case TYPE_FDIVS:
17449 case TYPE_FDIVD:
17450 case TYPE_FSQRTS:
17451 case TYPE_FSQRTD:
17452 case TYPE_NEON_FP_SQRT_S:
17453 case TYPE_NEON_FP_SQRT_D:
17454 case TYPE_NEON_FP_SQRT_S_Q:
17455 case TYPE_NEON_FP_SQRT_D_Q:
17456 case TYPE_NEON_FP_DIV_S:
17457 case TYPE_NEON_FP_DIV_D:
17458 case TYPE_NEON_FP_DIV_S_Q:
17459 case TYPE_NEON_FP_DIV_D_Q:
17460 return false;
17461 default:
17462 return true;
17463 }
17464}
17465
43cacb12
RS
17466/* Implement TARGET_COMPUTE_PRESSURE_CLASSES. */
17467
17468static int
17469aarch64_compute_pressure_classes (reg_class *classes)
17470{
17471 int i = 0;
17472 classes[i++] = GENERAL_REGS;
17473 classes[i++] = FP_REGS;
17474 /* PR_REGS isn't a useful pressure class because many predicate pseudo
17475 registers need to go in PR_LO_REGS at some point during their
17476 lifetime. Splitting it into two halves has the effect of making
17477 all predicates count against PR_LO_REGS, so that we try whenever
17478 possible to restrict the number of live predicates to 8. This
17479 greatly reduces the amount of spilling in certain loops. */
17480 classes[i++] = PR_LO_REGS;
17481 classes[i++] = PR_HI_REGS;
17482 return i;
17483}
17484
17485/* Implement TARGET_CAN_CHANGE_MODE_CLASS. */
17486
17487static bool
17488aarch64_can_change_mode_class (machine_mode from,
17489 machine_mode to, reg_class_t)
17490{
002092be
RS
17491 if (BYTES_BIG_ENDIAN)
17492 {
17493 bool from_sve_p = aarch64_sve_data_mode_p (from);
17494 bool to_sve_p = aarch64_sve_data_mode_p (to);
17495
17496 /* Don't allow changes between SVE data modes and non-SVE modes.
17497 See the comment at the head of aarch64-sve.md for details. */
17498 if (from_sve_p != to_sve_p)
17499 return false;
17500
17501 /* Don't allow changes in element size: lane 0 of the new vector
17502 would not then be lane 0 of the old vector. See the comment
17503 above aarch64_maybe_expand_sve_subreg_move for a more detailed
17504 description.
17505
17506 In the worst case, this forces a register to be spilled in
17507 one mode and reloaded in the other, which handles the
17508 endianness correctly. */
17509 if (from_sve_p && GET_MODE_UNIT_SIZE (from) != GET_MODE_UNIT_SIZE (to))
17510 return false;
17511 }
43cacb12
RS
17512 return true;
17513}
17514
5cce8171
RS
17515/* Implement TARGET_EARLY_REMAT_MODES. */
17516
17517static void
17518aarch64_select_early_remat_modes (sbitmap modes)
17519{
17520 /* SVE values are not normally live across a call, so it should be
17521 worth doing early rematerialization even in VL-specific mode. */
17522 for (int i = 0; i < NUM_MACHINE_MODES; ++i)
17523 {
17524 machine_mode mode = (machine_mode) i;
17525 unsigned int vec_flags = aarch64_classify_vector_mode (mode);
17526 if (vec_flags & VEC_ANY_SVE)
17527 bitmap_set_bit (modes, i);
17528 }
17529}
17530
51b86113
DM
17531/* Target-specific selftests. */
17532
17533#if CHECKING_P
17534
17535namespace selftest {
17536
17537/* Selftest for the RTL loader.
17538 Verify that the RTL loader copes with a dump from
17539 print_rtx_function. This is essentially just a test that class
17540 function_reader can handle a real dump, but it also verifies
17541 that lookup_reg_by_dump_name correctly handles hard regs.
17542 The presence of hard reg names in the dump means that the test is
17543 target-specific, hence it is in this file. */
17544
17545static void
17546aarch64_test_loading_full_dump ()
17547{
17548 rtl_dump_test t (SELFTEST_LOCATION, locate_file ("aarch64/times-two.rtl"));
17549
17550 ASSERT_STREQ ("times_two", IDENTIFIER_POINTER (DECL_NAME (cfun->decl)));
17551
17552 rtx_insn *insn_1 = get_insn_by_uid (1);
17553 ASSERT_EQ (NOTE, GET_CODE (insn_1));
17554
17555 rtx_insn *insn_15 = get_insn_by_uid (15);
17556 ASSERT_EQ (INSN, GET_CODE (insn_15));
17557 ASSERT_EQ (USE, GET_CODE (PATTERN (insn_15)));
17558
17559 /* Verify crtl->return_rtx. */
17560 ASSERT_EQ (REG, GET_CODE (crtl->return_rtx));
17561 ASSERT_EQ (0, REGNO (crtl->return_rtx));
17562 ASSERT_EQ (SImode, GET_MODE (crtl->return_rtx));
17563}
17564
17565/* Run all target-specific selftests. */
17566
17567static void
17568aarch64_run_selftests (void)
17569{
17570 aarch64_test_loading_full_dump ();
17571}
17572
17573} // namespace selftest
17574
17575#endif /* #if CHECKING_P */
17576
43e9d192
IB
17577#undef TARGET_ADDRESS_COST
17578#define TARGET_ADDRESS_COST aarch64_address_cost
17579
17580/* This hook will determines whether unnamed bitfields affect the alignment
17581 of the containing structure. The hook returns true if the structure
17582 should inherit the alignment requirements of an unnamed bitfield's
17583 type. */
17584#undef TARGET_ALIGN_ANON_BITFIELD
17585#define TARGET_ALIGN_ANON_BITFIELD hook_bool_void_true
17586
17587#undef TARGET_ASM_ALIGNED_DI_OP
17588#define TARGET_ASM_ALIGNED_DI_OP "\t.xword\t"
17589
17590#undef TARGET_ASM_ALIGNED_HI_OP
17591#define TARGET_ASM_ALIGNED_HI_OP "\t.hword\t"
17592
17593#undef TARGET_ASM_ALIGNED_SI_OP
17594#define TARGET_ASM_ALIGNED_SI_OP "\t.word\t"
17595
17596#undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
17597#define TARGET_ASM_CAN_OUTPUT_MI_THUNK \
17598 hook_bool_const_tree_hwi_hwi_const_tree_true
17599
e1c1ecb0
KT
17600#undef TARGET_ASM_FILE_START
17601#define TARGET_ASM_FILE_START aarch64_start_file
17602
43e9d192
IB
17603#undef TARGET_ASM_OUTPUT_MI_THUNK
17604#define TARGET_ASM_OUTPUT_MI_THUNK aarch64_output_mi_thunk
17605
17606#undef TARGET_ASM_SELECT_RTX_SECTION
17607#define TARGET_ASM_SELECT_RTX_SECTION aarch64_select_rtx_section
17608
17609#undef TARGET_ASM_TRAMPOLINE_TEMPLATE
17610#define TARGET_ASM_TRAMPOLINE_TEMPLATE aarch64_asm_trampoline_template
17611
17612#undef TARGET_BUILD_BUILTIN_VA_LIST
17613#define TARGET_BUILD_BUILTIN_VA_LIST aarch64_build_builtin_va_list
17614
17615#undef TARGET_CALLEE_COPIES
17616#define TARGET_CALLEE_COPIES hook_bool_CUMULATIVE_ARGS_mode_tree_bool_false
17617
17618#undef TARGET_CAN_ELIMINATE
17619#define TARGET_CAN_ELIMINATE aarch64_can_eliminate
17620
1fd8d40c
KT
17621#undef TARGET_CAN_INLINE_P
17622#define TARGET_CAN_INLINE_P aarch64_can_inline_p
17623
43e9d192
IB
17624#undef TARGET_CANNOT_FORCE_CONST_MEM
17625#define TARGET_CANNOT_FORCE_CONST_MEM aarch64_cannot_force_const_mem
17626
50487d79
EM
17627#undef TARGET_CASE_VALUES_THRESHOLD
17628#define TARGET_CASE_VALUES_THRESHOLD aarch64_case_values_threshold
17629
43e9d192
IB
17630#undef TARGET_CONDITIONAL_REGISTER_USAGE
17631#define TARGET_CONDITIONAL_REGISTER_USAGE aarch64_conditional_register_usage
17632
17633/* Only the least significant bit is used for initialization guard
17634 variables. */
17635#undef TARGET_CXX_GUARD_MASK_BIT
17636#define TARGET_CXX_GUARD_MASK_BIT hook_bool_void_true
17637
17638#undef TARGET_C_MODE_FOR_SUFFIX
17639#define TARGET_C_MODE_FOR_SUFFIX aarch64_c_mode_for_suffix
17640
17641#ifdef TARGET_BIG_ENDIAN_DEFAULT
17642#undef TARGET_DEFAULT_TARGET_FLAGS
17643#define TARGET_DEFAULT_TARGET_FLAGS (MASK_BIG_END)
17644#endif
17645
17646#undef TARGET_CLASS_MAX_NREGS
17647#define TARGET_CLASS_MAX_NREGS aarch64_class_max_nregs
17648
119103ca
JG
17649#undef TARGET_BUILTIN_DECL
17650#define TARGET_BUILTIN_DECL aarch64_builtin_decl
17651
a6fc00da
BH
17652#undef TARGET_BUILTIN_RECIPROCAL
17653#define TARGET_BUILTIN_RECIPROCAL aarch64_builtin_reciprocal
17654
11e554b3
JG
17655#undef TARGET_C_EXCESS_PRECISION
17656#define TARGET_C_EXCESS_PRECISION aarch64_excess_precision
17657
43e9d192
IB
17658#undef TARGET_EXPAND_BUILTIN
17659#define TARGET_EXPAND_BUILTIN aarch64_expand_builtin
17660
17661#undef TARGET_EXPAND_BUILTIN_VA_START
17662#define TARGET_EXPAND_BUILTIN_VA_START aarch64_expand_builtin_va_start
17663
9697e620
JG
17664#undef TARGET_FOLD_BUILTIN
17665#define TARGET_FOLD_BUILTIN aarch64_fold_builtin
17666
43e9d192
IB
17667#undef TARGET_FUNCTION_ARG
17668#define TARGET_FUNCTION_ARG aarch64_function_arg
17669
17670#undef TARGET_FUNCTION_ARG_ADVANCE
17671#define TARGET_FUNCTION_ARG_ADVANCE aarch64_function_arg_advance
17672
17673#undef TARGET_FUNCTION_ARG_BOUNDARY
17674#define TARGET_FUNCTION_ARG_BOUNDARY aarch64_function_arg_boundary
17675
76b0cbf8
RS
17676#undef TARGET_FUNCTION_ARG_PADDING
17677#define TARGET_FUNCTION_ARG_PADDING aarch64_function_arg_padding
17678
43cacb12
RS
17679#undef TARGET_GET_RAW_RESULT_MODE
17680#define TARGET_GET_RAW_RESULT_MODE aarch64_get_reg_raw_mode
17681#undef TARGET_GET_RAW_ARG_MODE
17682#define TARGET_GET_RAW_ARG_MODE aarch64_get_reg_raw_mode
17683
43e9d192
IB
17684#undef TARGET_FUNCTION_OK_FOR_SIBCALL
17685#define TARGET_FUNCTION_OK_FOR_SIBCALL aarch64_function_ok_for_sibcall
17686
17687#undef TARGET_FUNCTION_VALUE
17688#define TARGET_FUNCTION_VALUE aarch64_function_value
17689
17690#undef TARGET_FUNCTION_VALUE_REGNO_P
17691#define TARGET_FUNCTION_VALUE_REGNO_P aarch64_function_value_regno_p
17692
fc72cba7
AL
17693#undef TARGET_GIMPLE_FOLD_BUILTIN
17694#define TARGET_GIMPLE_FOLD_BUILTIN aarch64_gimple_fold_builtin
0ac198d3 17695
43e9d192
IB
17696#undef TARGET_GIMPLIFY_VA_ARG_EXPR
17697#define TARGET_GIMPLIFY_VA_ARG_EXPR aarch64_gimplify_va_arg_expr
17698
17699#undef TARGET_INIT_BUILTINS
17700#define TARGET_INIT_BUILTINS aarch64_init_builtins
17701
c64f7d37
WD
17702#undef TARGET_IRA_CHANGE_PSEUDO_ALLOCNO_CLASS
17703#define TARGET_IRA_CHANGE_PSEUDO_ALLOCNO_CLASS \
17704 aarch64_ira_change_pseudo_allocno_class
17705
43e9d192
IB
17706#undef TARGET_LEGITIMATE_ADDRESS_P
17707#define TARGET_LEGITIMATE_ADDRESS_P aarch64_legitimate_address_hook_p
17708
17709#undef TARGET_LEGITIMATE_CONSTANT_P
17710#define TARGET_LEGITIMATE_CONSTANT_P aarch64_legitimate_constant_p
17711
491ec060
WD
17712#undef TARGET_LEGITIMIZE_ADDRESS_DISPLACEMENT
17713#define TARGET_LEGITIMIZE_ADDRESS_DISPLACEMENT \
17714 aarch64_legitimize_address_displacement
17715
43e9d192
IB
17716#undef TARGET_LIBGCC_CMP_RETURN_MODE
17717#define TARGET_LIBGCC_CMP_RETURN_MODE aarch64_libgcc_cmp_return_mode
17718
11e554b3
JG
17719#undef TARGET_LIBGCC_FLOATING_MODE_SUPPORTED_P
17720#define TARGET_LIBGCC_FLOATING_MODE_SUPPORTED_P \
17721aarch64_libgcc_floating_mode_supported_p
17722
ac2b960f
YZ
17723#undef TARGET_MANGLE_TYPE
17724#define TARGET_MANGLE_TYPE aarch64_mangle_type
17725
43e9d192
IB
17726#undef TARGET_MEMORY_MOVE_COST
17727#define TARGET_MEMORY_MOVE_COST aarch64_memory_move_cost
17728
26e0ff94
WD
17729#undef TARGET_MIN_DIVISIONS_FOR_RECIP_MUL
17730#define TARGET_MIN_DIVISIONS_FOR_RECIP_MUL aarch64_min_divisions_for_recip_mul
17731
43e9d192
IB
17732#undef TARGET_MUST_PASS_IN_STACK
17733#define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
17734
17735/* This target hook should return true if accesses to volatile bitfields
17736 should use the narrowest mode possible. It should return false if these
17737 accesses should use the bitfield container type. */
17738#undef TARGET_NARROW_VOLATILE_BITFIELD
17739#define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
17740
17741#undef TARGET_OPTION_OVERRIDE
17742#define TARGET_OPTION_OVERRIDE aarch64_override_options
17743
17744#undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
17745#define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE \
17746 aarch64_override_options_after_change
17747
361fb3ee
KT
17748#undef TARGET_OPTION_SAVE
17749#define TARGET_OPTION_SAVE aarch64_option_save
17750
17751#undef TARGET_OPTION_RESTORE
17752#define TARGET_OPTION_RESTORE aarch64_option_restore
17753
17754#undef TARGET_OPTION_PRINT
17755#define TARGET_OPTION_PRINT aarch64_option_print
17756
5a2c8331
KT
17757#undef TARGET_OPTION_VALID_ATTRIBUTE_P
17758#define TARGET_OPTION_VALID_ATTRIBUTE_P aarch64_option_valid_attribute_p
17759
d78006d9
KT
17760#undef TARGET_SET_CURRENT_FUNCTION
17761#define TARGET_SET_CURRENT_FUNCTION aarch64_set_current_function
17762
43e9d192
IB
17763#undef TARGET_PASS_BY_REFERENCE
17764#define TARGET_PASS_BY_REFERENCE aarch64_pass_by_reference
17765
17766#undef TARGET_PREFERRED_RELOAD_CLASS
17767#define TARGET_PREFERRED_RELOAD_CLASS aarch64_preferred_reload_class
17768
cee66c68
WD
17769#undef TARGET_SCHED_REASSOCIATION_WIDTH
17770#define TARGET_SCHED_REASSOCIATION_WIDTH aarch64_reassociation_width
17771
c2ec330c
AL
17772#undef TARGET_PROMOTED_TYPE
17773#define TARGET_PROMOTED_TYPE aarch64_promoted_type
17774
43e9d192
IB
17775#undef TARGET_SECONDARY_RELOAD
17776#define TARGET_SECONDARY_RELOAD aarch64_secondary_reload
17777
17778#undef TARGET_SHIFT_TRUNCATION_MASK
17779#define TARGET_SHIFT_TRUNCATION_MASK aarch64_shift_truncation_mask
17780
17781#undef TARGET_SETUP_INCOMING_VARARGS
17782#define TARGET_SETUP_INCOMING_VARARGS aarch64_setup_incoming_varargs
17783
17784#undef TARGET_STRUCT_VALUE_RTX
17785#define TARGET_STRUCT_VALUE_RTX aarch64_struct_value_rtx
17786
17787#undef TARGET_REGISTER_MOVE_COST
17788#define TARGET_REGISTER_MOVE_COST aarch64_register_move_cost
17789
17790#undef TARGET_RETURN_IN_MEMORY
17791#define TARGET_RETURN_IN_MEMORY aarch64_return_in_memory
17792
17793#undef TARGET_RETURN_IN_MSB
17794#define TARGET_RETURN_IN_MSB aarch64_return_in_msb
17795
17796#undef TARGET_RTX_COSTS
7cc2145f 17797#define TARGET_RTX_COSTS aarch64_rtx_costs_wrapper
43e9d192 17798
2e5f8203
JG
17799#undef TARGET_SCALAR_MODE_SUPPORTED_P
17800#define TARGET_SCALAR_MODE_SUPPORTED_P aarch64_scalar_mode_supported_p
17801
d126a4ae
AP
17802#undef TARGET_SCHED_ISSUE_RATE
17803#define TARGET_SCHED_ISSUE_RATE aarch64_sched_issue_rate
17804
d03f7e44
MK
17805#undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
17806#define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
17807 aarch64_sched_first_cycle_multipass_dfa_lookahead
17808
2d6bc7fa
KT
17809#undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD
17810#define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD \
17811 aarch64_first_cycle_multipass_dfa_lookahead_guard
17812
827ab47a
KT
17813#undef TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS
17814#define TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS \
17815 aarch64_get_separate_components
17816
17817#undef TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB
17818#define TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB \
17819 aarch64_components_for_bb
17820
17821#undef TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS
17822#define TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS \
17823 aarch64_disqualify_components
17824
17825#undef TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS
17826#define TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS \
17827 aarch64_emit_prologue_components
17828
17829#undef TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS
17830#define TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS \
17831 aarch64_emit_epilogue_components
17832
17833#undef TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS
17834#define TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS \
17835 aarch64_set_handled_components
17836
43e9d192
IB
17837#undef TARGET_TRAMPOLINE_INIT
17838#define TARGET_TRAMPOLINE_INIT aarch64_trampoline_init
17839
17840#undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
17841#define TARGET_USE_BLOCKS_FOR_CONSTANT_P aarch64_use_blocks_for_constant_p
17842
17843#undef TARGET_VECTOR_MODE_SUPPORTED_P
17844#define TARGET_VECTOR_MODE_SUPPORTED_P aarch64_vector_mode_supported_p
17845
7df76747
N
17846#undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
17847#define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
17848 aarch64_builtin_support_vector_misalignment
17849
9f4cbab8
RS
17850#undef TARGET_ARRAY_MODE
17851#define TARGET_ARRAY_MODE aarch64_array_mode
17852
43e9d192
IB
17853#undef TARGET_ARRAY_MODE_SUPPORTED_P
17854#define TARGET_ARRAY_MODE_SUPPORTED_P aarch64_array_mode_supported_p
17855
8990e73a
TB
17856#undef TARGET_VECTORIZE_ADD_STMT_COST
17857#define TARGET_VECTORIZE_ADD_STMT_COST aarch64_add_stmt_cost
17858
17859#undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
17860#define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
17861 aarch64_builtin_vectorization_cost
17862
43e9d192
IB
17863#undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
17864#define TARGET_VECTORIZE_PREFERRED_SIMD_MODE aarch64_preferred_simd_mode
17865
42fc9a7f
JG
17866#undef TARGET_VECTORIZE_BUILTINS
17867#define TARGET_VECTORIZE_BUILTINS
17868
17869#undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
17870#define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
17871 aarch64_builtin_vectorized_function
17872
3b357264
JG
17873#undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
17874#define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
17875 aarch64_autovectorize_vector_sizes
17876
aa87aced
KV
17877#undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
17878#define TARGET_ATOMIC_ASSIGN_EXPAND_FENV \
17879 aarch64_atomic_assign_expand_fenv
17880
43e9d192
IB
17881/* Section anchor support. */
17882
17883#undef TARGET_MIN_ANCHOR_OFFSET
17884#define TARGET_MIN_ANCHOR_OFFSET -256
17885
17886/* Limit the maximum anchor offset to 4k-1, since that's the limit for a
17887 byte offset; we can do much more for larger data types, but have no way
17888 to determine the size of the access. We assume accesses are aligned. */
17889#undef TARGET_MAX_ANCHOR_OFFSET
17890#define TARGET_MAX_ANCHOR_OFFSET 4095
17891
db0253a4
TB
17892#undef TARGET_VECTOR_ALIGNMENT
17893#define TARGET_VECTOR_ALIGNMENT aarch64_simd_vector_alignment
17894
43cacb12
RS
17895#undef TARGET_VECTORIZE_PREFERRED_VECTOR_ALIGNMENT
17896#define TARGET_VECTORIZE_PREFERRED_VECTOR_ALIGNMENT \
17897 aarch64_vectorize_preferred_vector_alignment
db0253a4
TB
17898#undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
17899#define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
17900 aarch64_simd_vector_alignment_reachable
17901
88b08073
JG
17902/* vec_perm support. */
17903
f151c9e1
RS
17904#undef TARGET_VECTORIZE_VEC_PERM_CONST
17905#define TARGET_VECTORIZE_VEC_PERM_CONST \
17906 aarch64_vectorize_vec_perm_const
88b08073 17907
43cacb12
RS
17908#undef TARGET_VECTORIZE_GET_MASK_MODE
17909#define TARGET_VECTORIZE_GET_MASK_MODE aarch64_get_mask_mode
76a34e3f
RS
17910#undef TARGET_VECTORIZE_EMPTY_MASK_IS_EXPENSIVE
17911#define TARGET_VECTORIZE_EMPTY_MASK_IS_EXPENSIVE \
17912 aarch64_empty_mask_is_expensive
43cacb12 17913
c2ec330c
AL
17914#undef TARGET_INIT_LIBFUNCS
17915#define TARGET_INIT_LIBFUNCS aarch64_init_libfuncs
70f09188 17916
706b2314 17917#undef TARGET_FIXED_CONDITION_CODE_REGS
70f09188
AP
17918#define TARGET_FIXED_CONDITION_CODE_REGS aarch64_fixed_condition_code_regs
17919
5cb74e90
RR
17920#undef TARGET_FLAGS_REGNUM
17921#define TARGET_FLAGS_REGNUM CC_REGNUM
17922
78607708
TV
17923#undef TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS
17924#define TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS true
17925
a3125fc2
CL
17926#undef TARGET_ASAN_SHADOW_OFFSET
17927#define TARGET_ASAN_SHADOW_OFFSET aarch64_asan_shadow_offset
17928
0c4ec427
RE
17929#undef TARGET_LEGITIMIZE_ADDRESS
17930#define TARGET_LEGITIMIZE_ADDRESS aarch64_legitimize_address
17931
b48d6421
KT
17932#undef TARGET_SCHED_CAN_SPECULATE_INSN
17933#define TARGET_SCHED_CAN_SPECULATE_INSN aarch64_sched_can_speculate_insn
17934
594bdd53
FY
17935#undef TARGET_CAN_USE_DOLOOP_P
17936#define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
17937
9bca63d4
WD
17938#undef TARGET_SCHED_ADJUST_PRIORITY
17939#define TARGET_SCHED_ADJUST_PRIORITY aarch64_sched_adjust_priority
17940
6a569cdd
KT
17941#undef TARGET_SCHED_MACRO_FUSION_P
17942#define TARGET_SCHED_MACRO_FUSION_P aarch64_macro_fusion_p
17943
17944#undef TARGET_SCHED_MACRO_FUSION_PAIR_P
17945#define TARGET_SCHED_MACRO_FUSION_PAIR_P aarch_macro_fusion_pair_p
17946
350013bc
BC
17947#undef TARGET_SCHED_FUSION_PRIORITY
17948#define TARGET_SCHED_FUSION_PRIORITY aarch64_sched_fusion_priority
17949
7b841a12
JW
17950#undef TARGET_UNSPEC_MAY_TRAP_P
17951#define TARGET_UNSPEC_MAY_TRAP_P aarch64_unspec_may_trap_p
17952
1b1e81f8
JW
17953#undef TARGET_USE_PSEUDO_PIC_REG
17954#define TARGET_USE_PSEUDO_PIC_REG aarch64_use_pseudo_pic_reg
17955
cc8ca59e
JB
17956#undef TARGET_PRINT_OPERAND
17957#define TARGET_PRINT_OPERAND aarch64_print_operand
17958
17959#undef TARGET_PRINT_OPERAND_ADDRESS
17960#define TARGET_PRINT_OPERAND_ADDRESS aarch64_print_operand_address
17961
ee62a5a6
RS
17962#undef TARGET_OPTAB_SUPPORTED_P
17963#define TARGET_OPTAB_SUPPORTED_P aarch64_optab_supported_p
17964
43203dea
RR
17965#undef TARGET_OMIT_STRUCT_RETURN_REG
17966#define TARGET_OMIT_STRUCT_RETURN_REG true
17967
43cacb12
RS
17968#undef TARGET_DWARF_POLY_INDETERMINATE_VALUE
17969#define TARGET_DWARF_POLY_INDETERMINATE_VALUE \
17970 aarch64_dwarf_poly_indeterminate_value
17971
f46fe37e
EB
17972/* The architecture reserves bits 0 and 1 so use bit 2 for descriptors. */
17973#undef TARGET_CUSTOM_FUNCTION_DESCRIPTORS
17974#define TARGET_CUSTOM_FUNCTION_DESCRIPTORS 4
17975
c43f4279
RS
17976#undef TARGET_HARD_REGNO_NREGS
17977#define TARGET_HARD_REGNO_NREGS aarch64_hard_regno_nregs
f939c3e6
RS
17978#undef TARGET_HARD_REGNO_MODE_OK
17979#define TARGET_HARD_REGNO_MODE_OK aarch64_hard_regno_mode_ok
17980
99e1629f
RS
17981#undef TARGET_MODES_TIEABLE_P
17982#define TARGET_MODES_TIEABLE_P aarch64_modes_tieable_p
17983
80ec73f4
RS
17984#undef TARGET_HARD_REGNO_CALL_PART_CLOBBERED
17985#define TARGET_HARD_REGNO_CALL_PART_CLOBBERED \
17986 aarch64_hard_regno_call_part_clobbered
17987
58e17cf8
RS
17988#undef TARGET_CONSTANT_ALIGNMENT
17989#define TARGET_CONSTANT_ALIGNMENT aarch64_constant_alignment
17990
43cacb12
RS
17991#undef TARGET_COMPUTE_PRESSURE_CLASSES
17992#define TARGET_COMPUTE_PRESSURE_CLASSES aarch64_compute_pressure_classes
17993
17994#undef TARGET_CAN_CHANGE_MODE_CLASS
17995#define TARGET_CAN_CHANGE_MODE_CLASS aarch64_can_change_mode_class
17996
5cce8171
RS
17997#undef TARGET_SELECT_EARLY_REMAT_MODES
17998#define TARGET_SELECT_EARLY_REMAT_MODES aarch64_select_early_remat_modes
17999
51b86113
DM
18000#if CHECKING_P
18001#undef TARGET_RUN_TARGET_SELFTESTS
18002#define TARGET_RUN_TARGET_SELFTESTS selftest::aarch64_run_selftests
18003#endif /* #if CHECKING_P */
18004
43e9d192
IB
18005struct gcc_target targetm = TARGET_INITIALIZER;
18006
18007#include "gt-aarch64.h"
This page took 4.867516 seconds and 5 git commands to generate.