]> gcc.gnu.org Git - gcc.git/blob - gcc/config/arm/arm.cc
Restore build with GCC 4.8 to GCC 5
[gcc.git] / gcc / config / arm / arm.cc
1 /* Output routines for GCC for ARM.
2 Copyright (C) 1991-2023 Free Software Foundation, Inc.
3 Contributed by Pieter `Tiggr' Schoenmakers (rcpieter@win.tue.nl)
4 and Martin Simmons (@harleqn.co.uk).
5 More major hacks by Richard Earnshaw (rearnsha@arm.com).
6
7 This file is part of GCC.
8
9 GCC is free software; you can redistribute it and/or modify it
10 under the terms of the GNU General Public License as published
11 by the Free Software Foundation; either version 3, or (at your
12 option) any later version.
13
14 GCC is distributed in the hope that it will be useful, but WITHOUT
15 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
16 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
17 License for more details.
18
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
22
23 #define IN_TARGET_CODE 1
24
25 #include "config.h"
26 #define INCLUDE_STRING
27 #include "system.h"
28 #include "coretypes.h"
29 #include "backend.h"
30 #include "target.h"
31 #include "rtl.h"
32 #include "tree.h"
33 #include "memmodel.h"
34 #include "cfghooks.h"
35 #include "cfgloop.h"
36 #include "df.h"
37 #include "tm_p.h"
38 #include "stringpool.h"
39 #include "attribs.h"
40 #include "optabs.h"
41 #include "regs.h"
42 #include "emit-rtl.h"
43 #include "recog.h"
44 #include "cgraph.h"
45 #include "diagnostic-core.h"
46 #include "alias.h"
47 #include "fold-const.h"
48 #include "stor-layout.h"
49 #include "calls.h"
50 #include "varasm.h"
51 #include "output.h"
52 #include "insn-attr.h"
53 #include "flags.h"
54 #include "reload.h"
55 #include "explow.h"
56 #include "expr.h"
57 #include "cfgrtl.h"
58 #include "sched-int.h"
59 #include "common/common-target.h"
60 #include "langhooks.h"
61 #include "intl.h"
62 #include "libfuncs.h"
63 #include "opts.h"
64 #include "dumpfile.h"
65 #include "target-globals.h"
66 #include "builtins.h"
67 #include "tm-constrs.h"
68 #include "rtl-iter.h"
69 #include "optabs-libfuncs.h"
70 #include "gimplify.h"
71 #include "gimple.h"
72 #include "gimple-iterator.h"
73 #include "selftest.h"
74 #include "tree-vectorizer.h"
75 #include "opts.h"
76 #include "aarch-common.h"
77 #include "aarch-common-protos.h"
78
79 /* This file should be included last. */
80 #include "target-def.h"
81
82 /* Forward definitions of types. */
83 typedef struct minipool_node Mnode;
84 typedef struct minipool_fixup Mfix;
85
86 void (*arm_lang_output_object_attributes_hook)(void);
87
88 struct four_ints
89 {
90 int i[4];
91 };
92
93 /* Forward function declarations. */
94 static bool arm_const_not_ok_for_debug_p (rtx);
95 static int arm_needs_doubleword_align (machine_mode, const_tree);
96 static int arm_compute_static_chain_stack_bytes (void);
97 static arm_stack_offsets *arm_get_frame_offsets (void);
98 static void arm_compute_frame_layout (void);
99 static void arm_add_gc_roots (void);
100 static int arm_gen_constant (enum rtx_code, machine_mode, rtx,
101 unsigned HOST_WIDE_INT, rtx, rtx, int, int);
102 static unsigned bit_count (unsigned long);
103 static unsigned bitmap_popcount (const sbitmap);
104 static int arm_address_register_rtx_p (rtx, int);
105 static int arm_legitimate_index_p (machine_mode, rtx, RTX_CODE, int);
106 static bool is_called_in_ARM_mode (tree);
107 static int thumb2_legitimate_index_p (machine_mode, rtx, int);
108 static int thumb1_base_register_rtx_p (rtx, machine_mode, int);
109 static rtx arm_legitimize_address (rtx, rtx, machine_mode);
110 static reg_class_t arm_preferred_reload_class (rtx, reg_class_t);
111 static rtx thumb_legitimize_address (rtx, rtx, machine_mode);
112 inline static int thumb1_index_register_rtx_p (rtx, int);
113 static int thumb_far_jump_used_p (void);
114 static bool thumb_force_lr_save (void);
115 static unsigned arm_size_return_regs (void);
116 static bool arm_assemble_integer (rtx, unsigned int, int);
117 static void arm_print_operand (FILE *, rtx, int);
118 static void arm_print_operand_address (FILE *, machine_mode, rtx);
119 static bool arm_print_operand_punct_valid_p (unsigned char code);
120 static const char *fp_const_from_val (REAL_VALUE_TYPE *);
121 static arm_cc get_arm_condition_code (rtx);
122 static bool arm_fixed_condition_code_regs (unsigned int *, unsigned int *);
123 static const char *output_multi_immediate (rtx *, const char *, const char *,
124 int, HOST_WIDE_INT);
125 static const char *shift_op (rtx, HOST_WIDE_INT *);
126 static struct machine_function *arm_init_machine_status (void);
127 static void thumb_exit (FILE *, int);
128 static HOST_WIDE_INT get_jump_table_size (rtx_jump_table_data *);
129 static Mnode *move_minipool_fix_forward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
130 static Mnode *add_minipool_forward_ref (Mfix *);
131 static Mnode *move_minipool_fix_backward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
132 static Mnode *add_minipool_backward_ref (Mfix *);
133 static void assign_minipool_offsets (Mfix *);
134 static void arm_print_value (FILE *, rtx);
135 static void dump_minipool (rtx_insn *);
136 static int arm_barrier_cost (rtx_insn *);
137 static Mfix *create_fix_barrier (Mfix *, HOST_WIDE_INT);
138 static void push_minipool_barrier (rtx_insn *, HOST_WIDE_INT);
139 static void push_minipool_fix (rtx_insn *, HOST_WIDE_INT, rtx *,
140 machine_mode, rtx);
141 static void arm_reorg (void);
142 static void note_invalid_constants (rtx_insn *, HOST_WIDE_INT, int);
143 static unsigned long arm_compute_save_reg0_reg12_mask (void);
144 static unsigned long arm_compute_save_core_reg_mask (void);
145 static unsigned long arm_isr_value (tree);
146 static unsigned long arm_compute_func_type (void);
147 static tree arm_handle_fndecl_attribute (tree *, tree, tree, int, bool *);
148 static tree arm_handle_pcs_attribute (tree *, tree, tree, int, bool *);
149 static tree arm_handle_isr_attribute (tree *, tree, tree, int, bool *);
150 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
151 static tree arm_handle_notshared_attribute (tree *, tree, tree, int, bool *);
152 #endif
153 static tree arm_handle_cmse_nonsecure_entry (tree *, tree, tree, int, bool *);
154 static tree arm_handle_cmse_nonsecure_call (tree *, tree, tree, int, bool *);
155 static void arm_output_function_epilogue (FILE *);
156 static void arm_output_function_prologue (FILE *);
157 static int arm_comp_type_attributes (const_tree, const_tree);
158 static void arm_set_default_type_attributes (tree);
159 static int arm_adjust_cost (rtx_insn *, int, rtx_insn *, int, unsigned int);
160 static int arm_sched_reorder (FILE *, int, rtx_insn **, int *, int);
161 static int optimal_immediate_sequence (enum rtx_code code,
162 unsigned HOST_WIDE_INT val,
163 struct four_ints *return_sequence);
164 static int optimal_immediate_sequence_1 (enum rtx_code code,
165 unsigned HOST_WIDE_INT val,
166 struct four_ints *return_sequence,
167 int i);
168 static int arm_get_strip_length (int);
169 static bool arm_function_ok_for_sibcall (tree, tree);
170 static machine_mode arm_promote_function_mode (const_tree,
171 machine_mode, int *,
172 const_tree, int);
173 static bool arm_return_in_memory (const_tree, const_tree);
174 static rtx arm_function_value (const_tree, const_tree, bool);
175 static rtx arm_libcall_value_1 (machine_mode);
176 static rtx arm_libcall_value (machine_mode, const_rtx);
177 static bool arm_function_value_regno_p (const unsigned int);
178 static void arm_internal_label (FILE *, const char *, unsigned long);
179 static void arm_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT,
180 tree);
181 static bool arm_have_conditional_execution (void);
182 static bool arm_cannot_force_const_mem (machine_mode, rtx);
183 static bool arm_legitimate_constant_p (machine_mode, rtx);
184 static bool arm_rtx_costs (rtx, machine_mode, int, int, int *, bool);
185 static int arm_insn_cost (rtx_insn *, bool);
186 static int arm_address_cost (rtx, machine_mode, addr_space_t, bool);
187 static int arm_register_move_cost (machine_mode, reg_class_t, reg_class_t);
188 static int arm_memory_move_cost (machine_mode, reg_class_t, bool);
189 static void emit_constant_insn (rtx cond, rtx pattern);
190 static rtx_insn *emit_set_insn (rtx, rtx);
191 static void arm_add_cfa_adjust_cfa_note (rtx, int, rtx, rtx);
192 static rtx emit_multi_reg_push (unsigned long, unsigned long);
193 static void arm_emit_multi_reg_pop (unsigned long);
194 static int vfp_emit_fstmd (int, int);
195 static void arm_emit_vfp_multi_reg_pop (int, int, rtx);
196 static int arm_arg_partial_bytes (cumulative_args_t,
197 const function_arg_info &);
198 static rtx arm_function_arg (cumulative_args_t, const function_arg_info &);
199 static void arm_function_arg_advance (cumulative_args_t,
200 const function_arg_info &);
201 static pad_direction arm_function_arg_padding (machine_mode, const_tree);
202 static unsigned int arm_function_arg_boundary (machine_mode, const_tree);
203 static rtx aapcs_allocate_return_reg (machine_mode, const_tree,
204 const_tree);
205 static rtx aapcs_libcall_value (machine_mode);
206 static int aapcs_select_return_coproc (const_tree, const_tree);
207
208 #ifdef OBJECT_FORMAT_ELF
209 static void arm_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED;
210 static void arm_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED;
211 #endif
212 #ifndef ARM_PE
213 static void arm_encode_section_info (tree, rtx, int);
214 #endif
215
216 static void arm_file_end (void);
217 static void arm_file_start (void);
218 static void arm_insert_attributes (tree, tree *);
219
220 static void arm_setup_incoming_varargs (cumulative_args_t,
221 const function_arg_info &, int *, int);
222 static bool arm_pass_by_reference (cumulative_args_t,
223 const function_arg_info &);
224 static bool arm_promote_prototypes (const_tree);
225 static bool arm_default_short_enums (void);
226 static bool arm_align_anon_bitfield (void);
227 static bool arm_return_in_msb (const_tree);
228 static bool arm_must_pass_in_stack (const function_arg_info &);
229 static bool arm_return_in_memory (const_tree, const_tree);
230 #if ARM_UNWIND_INFO
231 static void arm_unwind_emit (FILE *, rtx_insn *);
232 static bool arm_output_ttype (rtx);
233 static void arm_asm_emit_except_personality (rtx);
234 #endif
235 static void arm_asm_init_sections (void);
236 static rtx arm_dwarf_register_span (rtx);
237
238 static tree arm_cxx_guard_type (void);
239 static bool arm_cxx_guard_mask_bit (void);
240 static tree arm_get_cookie_size (tree);
241 static bool arm_cookie_has_size (void);
242 static bool arm_cxx_cdtor_returns_this (void);
243 static bool arm_cxx_key_method_may_be_inline (void);
244 static void arm_cxx_determine_class_data_visibility (tree);
245 static bool arm_cxx_class_data_always_comdat (void);
246 static bool arm_cxx_use_aeabi_atexit (void);
247 static void arm_init_libfuncs (void);
248 static tree arm_build_builtin_va_list (void);
249 static void arm_expand_builtin_va_start (tree, rtx);
250 static tree arm_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *);
251 static void arm_option_override (void);
252 static void arm_option_restore (struct gcc_options *, struct gcc_options *,
253 struct cl_target_option *);
254 static void arm_override_options_after_change (void);
255 static void arm_option_print (FILE *, int, struct cl_target_option *);
256 static void arm_set_current_function (tree);
257 static bool arm_can_inline_p (tree, tree);
258 static void arm_relayout_function (tree);
259 static bool arm_valid_target_attribute_p (tree, tree, tree, int);
260 static unsigned HOST_WIDE_INT arm_shift_truncation_mask (machine_mode);
261 static bool arm_sched_can_speculate_insn (rtx_insn *);
262 static bool arm_macro_fusion_p (void);
263 static bool arm_cannot_copy_insn_p (rtx_insn *);
264 static int arm_issue_rate (void);
265 static int arm_sched_variable_issue (FILE *, int, rtx_insn *, int);
266 static int arm_first_cycle_multipass_dfa_lookahead (void);
267 static int arm_first_cycle_multipass_dfa_lookahead_guard (rtx_insn *, int);
268 static void arm_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
269 static bool arm_output_addr_const_extra (FILE *, rtx);
270 static bool arm_allocate_stack_slots_for_args (void);
271 static bool arm_warn_func_return (tree);
272 static tree arm_promoted_type (const_tree t);
273 static bool arm_scalar_mode_supported_p (scalar_mode);
274 static bool arm_frame_pointer_required (void);
275 static bool arm_can_eliminate (const int, const int);
276 static void arm_asm_trampoline_template (FILE *);
277 static void arm_trampoline_init (rtx, tree, rtx);
278 static rtx arm_trampoline_adjust_address (rtx);
279 static rtx_insn *arm_pic_static_addr (rtx orig, rtx reg);
280 static bool cortex_a9_sched_adjust_cost (rtx_insn *, int, rtx_insn *, int *);
281 static bool xscale_sched_adjust_cost (rtx_insn *, int, rtx_insn *, int *);
282 static bool fa726te_sched_adjust_cost (rtx_insn *, int, rtx_insn *, int *);
283 static bool arm_array_mode_supported_p (machine_mode,
284 unsigned HOST_WIDE_INT);
285 static machine_mode arm_preferred_simd_mode (scalar_mode);
286 static bool arm_class_likely_spilled_p (reg_class_t);
287 static HOST_WIDE_INT arm_vector_alignment (const_tree type);
288 static bool arm_vector_alignment_reachable (const_tree type, bool is_packed);
289 static bool arm_builtin_support_vector_misalignment (machine_mode mode,
290 const_tree type,
291 int misalignment,
292 bool is_packed);
293 static void arm_conditional_register_usage (void);
294 static enum flt_eval_method arm_excess_precision (enum excess_precision_type);
295 static reg_class_t arm_preferred_rename_class (reg_class_t rclass);
296 static unsigned int arm_autovectorize_vector_modes (vector_modes *, bool);
297 static int arm_default_branch_cost (bool, bool);
298 static int arm_cortex_a5_branch_cost (bool, bool);
299 static int arm_cortex_m_branch_cost (bool, bool);
300 static int arm_cortex_m7_branch_cost (bool, bool);
301
302 static bool arm_vectorize_vec_perm_const (machine_mode, machine_mode, rtx, rtx,
303 rtx, const vec_perm_indices &);
304
305 static bool aarch_macro_fusion_pair_p (rtx_insn*, rtx_insn*);
306
307 static int arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
308 tree vectype,
309 int misalign ATTRIBUTE_UNUSED);
310
311 static void arm_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
312 bool op0_preserve_value);
313 static unsigned HOST_WIDE_INT arm_asan_shadow_offset (void);
314
315 static void arm_sched_fusion_priority (rtx_insn *, int, int *, int*);
316 static bool arm_can_output_mi_thunk (const_tree, HOST_WIDE_INT, HOST_WIDE_INT,
317 const_tree);
318 static section *arm_function_section (tree, enum node_frequency, bool, bool);
319 static bool arm_asm_elf_flags_numeric (unsigned int flags, unsigned int *num);
320 static unsigned int arm_elf_section_type_flags (tree decl, const char *name,
321 int reloc);
322 static void arm_expand_divmod_libfunc (rtx, machine_mode, rtx, rtx, rtx *, rtx *);
323 static opt_scalar_float_mode arm_floatn_mode (int, bool);
324 static unsigned int arm_hard_regno_nregs (unsigned int, machine_mode);
325 static bool arm_hard_regno_mode_ok (unsigned int, machine_mode);
326 static bool arm_modes_tieable_p (machine_mode, machine_mode);
327 static HOST_WIDE_INT arm_constant_alignment (const_tree, HOST_WIDE_INT);
328 static rtx_insn *thumb1_md_asm_adjust (vec<rtx> &, vec<rtx> &,
329 vec<machine_mode> &,
330 vec<const char *> &, vec<rtx> &,
331 vec<rtx> &, HARD_REG_SET &, location_t);
332 static const char *arm_identify_fpu_from_isa (sbitmap);
333 \f
334 /* Table of machine attributes. */
335 static const attribute_spec arm_gnu_attributes[] =
336 {
337 /* { name, min_len, max_len, decl_req, type_req, fn_type_req,
338 affects_type_identity, handler, exclude } */
339 /* Function calls made to this symbol must be done indirectly, because
340 it may lie outside of the 26 bit addressing range of a normal function
341 call. */
342 { "long_call", 0, 0, false, true, true, false, NULL, NULL },
343 /* Whereas these functions are always known to reside within the 26 bit
344 addressing range. */
345 { "short_call", 0, 0, false, true, true, false, NULL, NULL },
346 /* Specify the procedure call conventions for a function. */
347 { "pcs", 1, 1, false, true, true, false, arm_handle_pcs_attribute,
348 NULL },
349 /* Interrupt Service Routines have special prologue and epilogue requirements. */
350 { "isr", 0, 1, false, false, false, false, arm_handle_isr_attribute,
351 NULL },
352 { "interrupt", 0, 1, false, false, false, false, arm_handle_isr_attribute,
353 NULL },
354 { "naked", 0, 0, true, false, false, false,
355 arm_handle_fndecl_attribute, NULL },
356 #ifdef ARM_PE
357 /* ARM/PE has three new attributes:
358 interfacearm - ?
359 dllexport - for exporting a function/variable that will live in a dll
360 dllimport - for importing a function/variable from a dll
361
362 Microsoft allows multiple declspecs in one __declspec, separating
363 them with spaces. We do NOT support this. Instead, use __declspec
364 multiple times.
365 */
366 { "dllimport", 0, 0, true, false, false, false, NULL, NULL },
367 { "dllexport", 0, 0, true, false, false, false, NULL, NULL },
368 { "interfacearm", 0, 0, true, false, false, false,
369 arm_handle_fndecl_attribute, NULL },
370 #elif TARGET_DLLIMPORT_DECL_ATTRIBUTES
371 { "dllimport", 0, 0, false, false, false, false, handle_dll_attribute,
372 NULL },
373 { "dllexport", 0, 0, false, false, false, false, handle_dll_attribute,
374 NULL },
375 { "notshared", 0, 0, false, true, false, false,
376 arm_handle_notshared_attribute, NULL },
377 #endif
378 /* ARMv8-M Security Extensions support. */
379 { "cmse_nonsecure_entry", 0, 0, true, false, false, false,
380 arm_handle_cmse_nonsecure_entry, NULL },
381 { "cmse_nonsecure_call", 0, 0, false, false, false, true,
382 arm_handle_cmse_nonsecure_call, NULL },
383 { "Advanced SIMD type", 1, 1, false, true, false, true, NULL, NULL }
384 };
385
386 static const scoped_attribute_specs arm_gnu_attribute_table =
387 {
388 "gnu", { arm_gnu_attributes }
389 };
390
391 static const scoped_attribute_specs *const arm_attribute_table[] =
392 {
393 &arm_gnu_attribute_table
394 };
395 \f
396 /* Initialize the GCC target structure. */
397 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
398 #undef TARGET_MERGE_DECL_ATTRIBUTES
399 #define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
400 #endif
401
402 #undef TARGET_CHECK_BUILTIN_CALL
403 #define TARGET_CHECK_BUILTIN_CALL arm_check_builtin_call
404
405 #undef TARGET_LEGITIMIZE_ADDRESS
406 #define TARGET_LEGITIMIZE_ADDRESS arm_legitimize_address
407
408 #undef TARGET_ATTRIBUTE_TABLE
409 #define TARGET_ATTRIBUTE_TABLE arm_attribute_table
410
411 #undef TARGET_INSERT_ATTRIBUTES
412 #define TARGET_INSERT_ATTRIBUTES arm_insert_attributes
413
414 #undef TARGET_ASM_FILE_START
415 #define TARGET_ASM_FILE_START arm_file_start
416 #undef TARGET_ASM_FILE_END
417 #define TARGET_ASM_FILE_END arm_file_end
418
419 #undef TARGET_ASM_ALIGNED_SI_OP
420 #define TARGET_ASM_ALIGNED_SI_OP NULL
421 #undef TARGET_ASM_INTEGER
422 #define TARGET_ASM_INTEGER arm_assemble_integer
423
424 #undef TARGET_PRINT_OPERAND
425 #define TARGET_PRINT_OPERAND arm_print_operand
426 #undef TARGET_PRINT_OPERAND_ADDRESS
427 #define TARGET_PRINT_OPERAND_ADDRESS arm_print_operand_address
428 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
429 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P arm_print_operand_punct_valid_p
430
431 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
432 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA arm_output_addr_const_extra
433
434 #undef TARGET_ASM_FUNCTION_PROLOGUE
435 #define TARGET_ASM_FUNCTION_PROLOGUE arm_output_function_prologue
436
437 #undef TARGET_ASM_FUNCTION_EPILOGUE
438 #define TARGET_ASM_FUNCTION_EPILOGUE arm_output_function_epilogue
439
440 #undef TARGET_CAN_INLINE_P
441 #define TARGET_CAN_INLINE_P arm_can_inline_p
442
443 #undef TARGET_RELAYOUT_FUNCTION
444 #define TARGET_RELAYOUT_FUNCTION arm_relayout_function
445
446 #undef TARGET_OPTION_OVERRIDE
447 #define TARGET_OPTION_OVERRIDE arm_option_override
448
449 #undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
450 #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE arm_override_options_after_change
451
452 #undef TARGET_OPTION_RESTORE
453 #define TARGET_OPTION_RESTORE arm_option_restore
454
455 #undef TARGET_OPTION_PRINT
456 #define TARGET_OPTION_PRINT arm_option_print
457
458 #undef TARGET_COMP_TYPE_ATTRIBUTES
459 #define TARGET_COMP_TYPE_ATTRIBUTES arm_comp_type_attributes
460
461 #undef TARGET_SCHED_CAN_SPECULATE_INSN
462 #define TARGET_SCHED_CAN_SPECULATE_INSN arm_sched_can_speculate_insn
463
464 #undef TARGET_SCHED_MACRO_FUSION_P
465 #define TARGET_SCHED_MACRO_FUSION_P arm_macro_fusion_p
466
467 #undef TARGET_SCHED_MACRO_FUSION_PAIR_P
468 #define TARGET_SCHED_MACRO_FUSION_PAIR_P aarch_macro_fusion_pair_p
469
470 #undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
471 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES arm_set_default_type_attributes
472
473 #undef TARGET_SCHED_ADJUST_COST
474 #define TARGET_SCHED_ADJUST_COST arm_adjust_cost
475
476 #undef TARGET_SET_CURRENT_FUNCTION
477 #define TARGET_SET_CURRENT_FUNCTION arm_set_current_function
478
479 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
480 #define TARGET_OPTION_VALID_ATTRIBUTE_P arm_valid_target_attribute_p
481
482 #undef TARGET_SCHED_REORDER
483 #define TARGET_SCHED_REORDER arm_sched_reorder
484
485 #undef TARGET_REGISTER_MOVE_COST
486 #define TARGET_REGISTER_MOVE_COST arm_register_move_cost
487
488 #undef TARGET_MEMORY_MOVE_COST
489 #define TARGET_MEMORY_MOVE_COST arm_memory_move_cost
490
491 #undef TARGET_ENCODE_SECTION_INFO
492 #ifdef ARM_PE
493 #define TARGET_ENCODE_SECTION_INFO arm_pe_encode_section_info
494 #else
495 #define TARGET_ENCODE_SECTION_INFO arm_encode_section_info
496 #endif
497
498 #undef TARGET_STRIP_NAME_ENCODING
499 #define TARGET_STRIP_NAME_ENCODING arm_strip_name_encoding
500
501 #undef TARGET_ASM_INTERNAL_LABEL
502 #define TARGET_ASM_INTERNAL_LABEL arm_internal_label
503
504 #undef TARGET_FLOATN_MODE
505 #define TARGET_FLOATN_MODE arm_floatn_mode
506
507 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
508 #define TARGET_FUNCTION_OK_FOR_SIBCALL arm_function_ok_for_sibcall
509
510 #undef TARGET_FUNCTION_VALUE
511 #define TARGET_FUNCTION_VALUE arm_function_value
512
513 #undef TARGET_LIBCALL_VALUE
514 #define TARGET_LIBCALL_VALUE arm_libcall_value
515
516 #undef TARGET_FUNCTION_VALUE_REGNO_P
517 #define TARGET_FUNCTION_VALUE_REGNO_P arm_function_value_regno_p
518
519 #undef TARGET_GIMPLE_FOLD_BUILTIN
520 #define TARGET_GIMPLE_FOLD_BUILTIN arm_gimple_fold_builtin
521
522 #undef TARGET_ASM_OUTPUT_MI_THUNK
523 #define TARGET_ASM_OUTPUT_MI_THUNK arm_output_mi_thunk
524 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
525 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK arm_can_output_mi_thunk
526
527 #undef TARGET_RTX_COSTS
528 #define TARGET_RTX_COSTS arm_rtx_costs
529 #undef TARGET_ADDRESS_COST
530 #define TARGET_ADDRESS_COST arm_address_cost
531 #undef TARGET_INSN_COST
532 #define TARGET_INSN_COST arm_insn_cost
533
534 #undef TARGET_SHIFT_TRUNCATION_MASK
535 #define TARGET_SHIFT_TRUNCATION_MASK arm_shift_truncation_mask
536 #undef TARGET_VECTOR_MODE_SUPPORTED_P
537 #define TARGET_VECTOR_MODE_SUPPORTED_P arm_vector_mode_supported_p
538 #undef TARGET_ARRAY_MODE_SUPPORTED_P
539 #define TARGET_ARRAY_MODE_SUPPORTED_P arm_array_mode_supported_p
540 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
541 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE arm_preferred_simd_mode
542 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_MODES
543 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_MODES \
544 arm_autovectorize_vector_modes
545
546 #undef TARGET_MACHINE_DEPENDENT_REORG
547 #define TARGET_MACHINE_DEPENDENT_REORG arm_reorg
548
549 #undef TARGET_INIT_BUILTINS
550 #define TARGET_INIT_BUILTINS arm_init_builtins
551 #undef TARGET_EXPAND_BUILTIN
552 #define TARGET_EXPAND_BUILTIN arm_expand_builtin
553 #undef TARGET_BUILTIN_DECL
554 #define TARGET_BUILTIN_DECL arm_builtin_decl
555
556 #undef TARGET_INIT_LIBFUNCS
557 #define TARGET_INIT_LIBFUNCS arm_init_libfuncs
558
559 #undef TARGET_PROMOTE_FUNCTION_MODE
560 #define TARGET_PROMOTE_FUNCTION_MODE arm_promote_function_mode
561 #undef TARGET_PROMOTE_PROTOTYPES
562 #define TARGET_PROMOTE_PROTOTYPES arm_promote_prototypes
563 #undef TARGET_PASS_BY_REFERENCE
564 #define TARGET_PASS_BY_REFERENCE arm_pass_by_reference
565 #undef TARGET_ARG_PARTIAL_BYTES
566 #define TARGET_ARG_PARTIAL_BYTES arm_arg_partial_bytes
567 #undef TARGET_FUNCTION_ARG
568 #define TARGET_FUNCTION_ARG arm_function_arg
569 #undef TARGET_FUNCTION_ARG_ADVANCE
570 #define TARGET_FUNCTION_ARG_ADVANCE arm_function_arg_advance
571 #undef TARGET_FUNCTION_ARG_PADDING
572 #define TARGET_FUNCTION_ARG_PADDING arm_function_arg_padding
573 #undef TARGET_FUNCTION_ARG_BOUNDARY
574 #define TARGET_FUNCTION_ARG_BOUNDARY arm_function_arg_boundary
575
576 #undef TARGET_SETUP_INCOMING_VARARGS
577 #define TARGET_SETUP_INCOMING_VARARGS arm_setup_incoming_varargs
578
579 #undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS
580 #define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS arm_allocate_stack_slots_for_args
581
582 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
583 #define TARGET_ASM_TRAMPOLINE_TEMPLATE arm_asm_trampoline_template
584 #undef TARGET_TRAMPOLINE_INIT
585 #define TARGET_TRAMPOLINE_INIT arm_trampoline_init
586 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
587 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS arm_trampoline_adjust_address
588
589 #undef TARGET_WARN_FUNC_RETURN
590 #define TARGET_WARN_FUNC_RETURN arm_warn_func_return
591
592 #undef TARGET_DEFAULT_SHORT_ENUMS
593 #define TARGET_DEFAULT_SHORT_ENUMS arm_default_short_enums
594
595 #undef TARGET_ALIGN_ANON_BITFIELD
596 #define TARGET_ALIGN_ANON_BITFIELD arm_align_anon_bitfield
597
598 #undef TARGET_NARROW_VOLATILE_BITFIELD
599 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
600
601 #undef TARGET_CXX_GUARD_TYPE
602 #define TARGET_CXX_GUARD_TYPE arm_cxx_guard_type
603
604 #undef TARGET_CXX_GUARD_MASK_BIT
605 #define TARGET_CXX_GUARD_MASK_BIT arm_cxx_guard_mask_bit
606
607 #undef TARGET_CXX_GET_COOKIE_SIZE
608 #define TARGET_CXX_GET_COOKIE_SIZE arm_get_cookie_size
609
610 #undef TARGET_CXX_COOKIE_HAS_SIZE
611 #define TARGET_CXX_COOKIE_HAS_SIZE arm_cookie_has_size
612
613 #undef TARGET_CXX_CDTOR_RETURNS_THIS
614 #define TARGET_CXX_CDTOR_RETURNS_THIS arm_cxx_cdtor_returns_this
615
616 #undef TARGET_CXX_KEY_METHOD_MAY_BE_INLINE
617 #define TARGET_CXX_KEY_METHOD_MAY_BE_INLINE arm_cxx_key_method_may_be_inline
618
619 #undef TARGET_CXX_USE_AEABI_ATEXIT
620 #define TARGET_CXX_USE_AEABI_ATEXIT arm_cxx_use_aeabi_atexit
621
622 #undef TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY
623 #define TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY \
624 arm_cxx_determine_class_data_visibility
625
626 #undef TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT
627 #define TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT arm_cxx_class_data_always_comdat
628
629 #undef TARGET_RETURN_IN_MSB
630 #define TARGET_RETURN_IN_MSB arm_return_in_msb
631
632 #undef TARGET_RETURN_IN_MEMORY
633 #define TARGET_RETURN_IN_MEMORY arm_return_in_memory
634
635 #undef TARGET_MUST_PASS_IN_STACK
636 #define TARGET_MUST_PASS_IN_STACK arm_must_pass_in_stack
637
638 #if ARM_UNWIND_INFO
639 #undef TARGET_ASM_UNWIND_EMIT
640 #define TARGET_ASM_UNWIND_EMIT arm_unwind_emit
641
642 /* EABI unwinding tables use a different format for the typeinfo tables. */
643 #undef TARGET_ASM_TTYPE
644 #define TARGET_ASM_TTYPE arm_output_ttype
645
646 #undef TARGET_ARM_EABI_UNWINDER
647 #define TARGET_ARM_EABI_UNWINDER true
648
649 #undef TARGET_ASM_EMIT_EXCEPT_PERSONALITY
650 #define TARGET_ASM_EMIT_EXCEPT_PERSONALITY arm_asm_emit_except_personality
651
652 #endif /* ARM_UNWIND_INFO */
653
654 #undef TARGET_ASM_INIT_SECTIONS
655 #define TARGET_ASM_INIT_SECTIONS arm_asm_init_sections
656
657 #undef TARGET_DWARF_REGISTER_SPAN
658 #define TARGET_DWARF_REGISTER_SPAN arm_dwarf_register_span
659
660 #undef TARGET_CANNOT_COPY_INSN_P
661 #define TARGET_CANNOT_COPY_INSN_P arm_cannot_copy_insn_p
662
663 #ifdef HAVE_AS_TLS
664 #undef TARGET_HAVE_TLS
665 #define TARGET_HAVE_TLS true
666 #endif
667
668 #undef TARGET_HAVE_CONDITIONAL_EXECUTION
669 #define TARGET_HAVE_CONDITIONAL_EXECUTION arm_have_conditional_execution
670
671 #undef TARGET_LEGITIMATE_CONSTANT_P
672 #define TARGET_LEGITIMATE_CONSTANT_P arm_legitimate_constant_p
673
674 #undef TARGET_CANNOT_FORCE_CONST_MEM
675 #define TARGET_CANNOT_FORCE_CONST_MEM arm_cannot_force_const_mem
676
677 #undef TARGET_MAX_ANCHOR_OFFSET
678 #define TARGET_MAX_ANCHOR_OFFSET 4095
679
680 /* The minimum is set such that the total size of the block
681 for a particular anchor is -4088 + 1 + 4095 bytes, which is
682 divisible by eight, ensuring natural spacing of anchors. */
683 #undef TARGET_MIN_ANCHOR_OFFSET
684 #define TARGET_MIN_ANCHOR_OFFSET -4088
685
686 #undef TARGET_SCHED_ISSUE_RATE
687 #define TARGET_SCHED_ISSUE_RATE arm_issue_rate
688
689 #undef TARGET_SCHED_VARIABLE_ISSUE
690 #define TARGET_SCHED_VARIABLE_ISSUE arm_sched_variable_issue
691
692 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
693 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
694 arm_first_cycle_multipass_dfa_lookahead
695
696 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD
697 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD \
698 arm_first_cycle_multipass_dfa_lookahead_guard
699
700 #undef TARGET_MANGLE_TYPE
701 #define TARGET_MANGLE_TYPE arm_mangle_type
702
703 #undef TARGET_INVALID_CONVERSION
704 #define TARGET_INVALID_CONVERSION arm_invalid_conversion
705
706 #undef TARGET_INVALID_UNARY_OP
707 #define TARGET_INVALID_UNARY_OP arm_invalid_unary_op
708
709 #undef TARGET_INVALID_BINARY_OP
710 #define TARGET_INVALID_BINARY_OP arm_invalid_binary_op
711
712 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
713 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV arm_atomic_assign_expand_fenv
714
715 #undef TARGET_BUILD_BUILTIN_VA_LIST
716 #define TARGET_BUILD_BUILTIN_VA_LIST arm_build_builtin_va_list
717 #undef TARGET_EXPAND_BUILTIN_VA_START
718 #define TARGET_EXPAND_BUILTIN_VA_START arm_expand_builtin_va_start
719 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
720 #define TARGET_GIMPLIFY_VA_ARG_EXPR arm_gimplify_va_arg_expr
721
722 #ifdef HAVE_AS_TLS
723 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
724 #define TARGET_ASM_OUTPUT_DWARF_DTPREL arm_output_dwarf_dtprel
725 #endif
726
727 #undef TARGET_LEGITIMATE_ADDRESS_P
728 #define TARGET_LEGITIMATE_ADDRESS_P arm_legitimate_address_p
729
730 #undef TARGET_PREFERRED_RELOAD_CLASS
731 #define TARGET_PREFERRED_RELOAD_CLASS arm_preferred_reload_class
732
733 #undef TARGET_PROMOTED_TYPE
734 #define TARGET_PROMOTED_TYPE arm_promoted_type
735
736 #undef TARGET_SCALAR_MODE_SUPPORTED_P
737 #define TARGET_SCALAR_MODE_SUPPORTED_P arm_scalar_mode_supported_p
738
739 #undef TARGET_COMPUTE_FRAME_LAYOUT
740 #define TARGET_COMPUTE_FRAME_LAYOUT arm_compute_frame_layout
741
742 #undef TARGET_FRAME_POINTER_REQUIRED
743 #define TARGET_FRAME_POINTER_REQUIRED arm_frame_pointer_required
744
745 #undef TARGET_CAN_ELIMINATE
746 #define TARGET_CAN_ELIMINATE arm_can_eliminate
747
748 #undef TARGET_CONDITIONAL_REGISTER_USAGE
749 #define TARGET_CONDITIONAL_REGISTER_USAGE arm_conditional_register_usage
750
751 #undef TARGET_CLASS_LIKELY_SPILLED_P
752 #define TARGET_CLASS_LIKELY_SPILLED_P arm_class_likely_spilled_p
753
754 #undef TARGET_VECTORIZE_BUILTINS
755 #define TARGET_VECTORIZE_BUILTINS
756
757 #undef TARGET_VECTOR_ALIGNMENT
758 #define TARGET_VECTOR_ALIGNMENT arm_vector_alignment
759
760 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
761 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
762 arm_vector_alignment_reachable
763
764 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
765 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
766 arm_builtin_support_vector_misalignment
767
768 #undef TARGET_PREFERRED_RENAME_CLASS
769 #define TARGET_PREFERRED_RENAME_CLASS \
770 arm_preferred_rename_class
771
772 #undef TARGET_VECTORIZE_VEC_PERM_CONST
773 #define TARGET_VECTORIZE_VEC_PERM_CONST arm_vectorize_vec_perm_const
774
775 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
776 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
777 arm_builtin_vectorization_cost
778
779 #undef TARGET_CANONICALIZE_COMPARISON
780 #define TARGET_CANONICALIZE_COMPARISON \
781 arm_canonicalize_comparison
782
783 #undef TARGET_ASAN_SHADOW_OFFSET
784 #define TARGET_ASAN_SHADOW_OFFSET arm_asan_shadow_offset
785
786 #undef MAX_INSN_PER_IT_BLOCK
787 #define MAX_INSN_PER_IT_BLOCK (arm_restrict_it ? 1 : 4)
788
789 #undef TARGET_CAN_USE_DOLOOP_P
790 #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
791
792 #undef TARGET_CONST_NOT_OK_FOR_DEBUG_P
793 #define TARGET_CONST_NOT_OK_FOR_DEBUG_P arm_const_not_ok_for_debug_p
794
795 #undef TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS
796 #define TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS true
797
798 #undef TARGET_SCHED_FUSION_PRIORITY
799 #define TARGET_SCHED_FUSION_PRIORITY arm_sched_fusion_priority
800
801 #undef TARGET_ASM_FUNCTION_SECTION
802 #define TARGET_ASM_FUNCTION_SECTION arm_function_section
803
804 #undef TARGET_ASM_ELF_FLAGS_NUMERIC
805 #define TARGET_ASM_ELF_FLAGS_NUMERIC arm_asm_elf_flags_numeric
806
807 #undef TARGET_SECTION_TYPE_FLAGS
808 #define TARGET_SECTION_TYPE_FLAGS arm_elf_section_type_flags
809
810 #undef TARGET_EXPAND_DIVMOD_LIBFUNC
811 #define TARGET_EXPAND_DIVMOD_LIBFUNC arm_expand_divmod_libfunc
812
813 #undef TARGET_C_EXCESS_PRECISION
814 #define TARGET_C_EXCESS_PRECISION arm_excess_precision
815
816 /* Although the architecture reserves bits 0 and 1, only the former is
817 used for ARM/Thumb ISA selection in v7 and earlier versions. */
818 #undef TARGET_CUSTOM_FUNCTION_DESCRIPTORS
819 #define TARGET_CUSTOM_FUNCTION_DESCRIPTORS 2
820
821 #undef TARGET_FIXED_CONDITION_CODE_REGS
822 #define TARGET_FIXED_CONDITION_CODE_REGS arm_fixed_condition_code_regs
823
824 #undef TARGET_HARD_REGNO_NREGS
825 #define TARGET_HARD_REGNO_NREGS arm_hard_regno_nregs
826 #undef TARGET_HARD_REGNO_MODE_OK
827 #define TARGET_HARD_REGNO_MODE_OK arm_hard_regno_mode_ok
828
829 #undef TARGET_MODES_TIEABLE_P
830 #define TARGET_MODES_TIEABLE_P arm_modes_tieable_p
831
832 #undef TARGET_CAN_CHANGE_MODE_CLASS
833 #define TARGET_CAN_CHANGE_MODE_CLASS arm_can_change_mode_class
834
835 #undef TARGET_CONSTANT_ALIGNMENT
836 #define TARGET_CONSTANT_ALIGNMENT arm_constant_alignment
837
838 #undef TARGET_INVALID_WITHIN_DOLOOP
839 #define TARGET_INVALID_WITHIN_DOLOOP arm_invalid_within_doloop
840
841 #undef TARGET_MD_ASM_ADJUST
842 #define TARGET_MD_ASM_ADJUST arm_md_asm_adjust
843
844 #undef TARGET_STACK_PROTECT_GUARD
845 #define TARGET_STACK_PROTECT_GUARD arm_stack_protect_guard
846
847 #undef TARGET_VECTORIZE_GET_MASK_MODE
848 #define TARGET_VECTORIZE_GET_MASK_MODE arm_get_mask_mode
849 \f
850 /* Obstack for minipool constant handling. */
851 static struct obstack minipool_obstack;
852 static char * minipool_startobj;
853
854 /* The maximum number of insns skipped which
855 will be conditionalised if possible. */
856 static int max_insns_skipped = 5;
857
858 /* True if we are currently building a constant table. */
859 int making_const_table;
860
861 /* The processor for which instructions should be scheduled. */
862 enum processor_type arm_tune = TARGET_CPU_arm_none;
863
864 /* The current tuning set. */
865 const struct tune_params *current_tune;
866
867 /* Which floating point hardware to schedule for. */
868 int arm_fpu_attr;
869
870 /* Used for Thumb call_via trampolines. */
871 rtx thumb_call_via_label[14];
872 static int thumb_call_reg_needed;
873
874 /* The bits in this mask specify which instruction scheduling options should
875 be used. */
876 unsigned int tune_flags = 0;
877
878 /* The highest ARM architecture version supported by the
879 target. */
880 enum base_architecture arm_base_arch = BASE_ARCH_0;
881
882 /* Active target architecture and tuning. */
883
884 struct arm_build_target arm_active_target;
885
886 /* The following are used in the arm.md file as equivalents to bits
887 in the above two flag variables. */
888
889 /* Nonzero if this chip supports the ARM Architecture 4 extensions. */
890 int arm_arch4 = 0;
891
892 /* Nonzero if this chip supports the ARM Architecture 4t extensions. */
893 int arm_arch4t = 0;
894
895 /* Nonzero if this chip supports the ARM Architecture 5T extensions. */
896 int arm_arch5t = 0;
897
898 /* Nonzero if this chip supports the ARM Architecture 5TE extensions. */
899 int arm_arch5te = 0;
900
901 /* Nonzero if this chip supports the ARM Architecture 6 extensions. */
902 int arm_arch6 = 0;
903
904 /* Nonzero if this chip supports the ARM 6K extensions. */
905 int arm_arch6k = 0;
906
907 /* Nonzero if this chip supports the ARM 6KZ extensions. */
908 int arm_arch6kz = 0;
909
910 /* Nonzero if instructions present in ARMv6-M can be used. */
911 int arm_arch6m = 0;
912
913 /* Nonzero if this chip supports the ARM 7 extensions. */
914 int arm_arch7 = 0;
915
916 /* Nonzero if this chip supports the Large Physical Address Extension. */
917 int arm_arch_lpae = 0;
918
919 /* Nonzero if instructions not present in the 'M' profile can be used. */
920 int arm_arch_notm = 0;
921
922 /* Nonzero if instructions present in ARMv7E-M can be used. */
923 int arm_arch7em = 0;
924
925 /* Nonzero if instructions present in ARMv8 can be used. */
926 int arm_arch8 = 0;
927
928 /* Nonzero if this chip supports the ARMv8.1 extensions. */
929 int arm_arch8_1 = 0;
930
931 /* Nonzero if this chip supports the ARM Architecture 8.2 extensions. */
932 int arm_arch8_2 = 0;
933
934 /* Nonzero if this chip supports the ARM Architecture 8.3 extensions. */
935 int arm_arch8_3 = 0;
936
937 /* Nonzero if this chip supports the ARM Architecture 8.4 extensions. */
938 int arm_arch8_4 = 0;
939
940 /* Nonzero if this chip supports the ARM Architecture 8-M Mainline
941 extensions. */
942 int arm_arch8m_main = 0;
943
944 /* Nonzero if this chip supports the ARM Architecture 8.1-M Mainline
945 extensions. */
946 int arm_arch8_1m_main = 0;
947
948 /* Nonzero if this chip supports the FP16 instructions extension of ARM
949 Architecture 8.2. */
950 int arm_fp16_inst = 0;
951
952 /* Nonzero if this chip can benefit from load scheduling. */
953 int arm_ld_sched = 0;
954
955 /* Nonzero if this chip is a StrongARM. */
956 int arm_tune_strongarm = 0;
957
958 /* Nonzero if this chip supports Intel Wireless MMX technology. */
959 int arm_arch_iwmmxt = 0;
960
961 /* Nonzero if this chip supports Intel Wireless MMX2 technology. */
962 int arm_arch_iwmmxt2 = 0;
963
964 /* Nonzero if this chip is an XScale. */
965 int arm_arch_xscale = 0;
966
967 /* Nonzero if tuning for XScale */
968 int arm_tune_xscale = 0;
969
970 /* Nonzero if we want to tune for stores that access the write-buffer.
971 This typically means an ARM6 or ARM7 with MMU or MPU. */
972 int arm_tune_wbuf = 0;
973
974 /* Nonzero if tuning for Cortex-A9. */
975 int arm_tune_cortex_a9 = 0;
976
977 /* Nonzero if we should define __THUMB_INTERWORK__ in the
978 preprocessor.
979 XXX This is a bit of a hack, it's intended to help work around
980 problems in GLD which doesn't understand that armv5t code is
981 interworking clean. */
982 int arm_cpp_interwork = 0;
983
984 /* Nonzero if chip supports Thumb 1. */
985 int arm_arch_thumb1;
986
987 /* Nonzero if chip supports Thumb 2. */
988 int arm_arch_thumb2;
989
990 /* Nonzero if chip supports integer division instruction. */
991 int arm_arch_arm_hwdiv;
992 int arm_arch_thumb_hwdiv;
993
994 /* Nonzero if chip disallows volatile memory access in IT block. */
995 int arm_arch_no_volatile_ce;
996
997 /* Nonzero if we shouldn't use literal pools. */
998 bool arm_disable_literal_pool = false;
999
1000 /* The register number to be used for the PIC offset register. */
1001 unsigned arm_pic_register = INVALID_REGNUM;
1002
1003 enum arm_pcs arm_pcs_default;
1004
1005 /* For an explanation of these variables, see final_prescan_insn below. */
1006 int arm_ccfsm_state;
1007 /* arm_current_cc is also used for Thumb-2 cond_exec blocks. */
1008 enum arm_cond_code arm_current_cc;
1009
1010 rtx arm_target_insn;
1011 int arm_target_label;
1012 /* The number of conditionally executed insns, including the current insn. */
1013 int arm_condexec_count = 0;
1014 /* A bitmask specifying the patterns for the IT block.
1015 Zero means do not output an IT block before this insn. */
1016 int arm_condexec_mask = 0;
1017 /* The number of bits used in arm_condexec_mask. */
1018 int arm_condexec_masklen = 0;
1019
1020 /* Nonzero if chip supports the ARMv8 CRC instructions. */
1021 int arm_arch_crc = 0;
1022
1023 /* Nonzero if chip supports the AdvSIMD Dot Product instructions. */
1024 int arm_arch_dotprod = 0;
1025
1026 /* Nonzero if chip supports the ARMv8-M security extensions. */
1027 int arm_arch_cmse = 0;
1028
1029 /* Nonzero if the core has a very small, high-latency, multiply unit. */
1030 int arm_m_profile_small_mul = 0;
1031
1032 /* Nonzero if chip supports the AdvSIMD I8MM instructions. */
1033 int arm_arch_i8mm = 0;
1034
1035 /* Nonzero if chip supports the BFloat16 instructions. */
1036 int arm_arch_bf16 = 0;
1037
1038 /* Nonzero if chip supports the Custom Datapath Extension. */
1039 int arm_arch_cde = 0;
1040 int arm_arch_cde_coproc = 0;
1041 const int arm_arch_cde_coproc_bits[] = {
1042 0x1, 0x2, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80
1043 };
1044
1045 /* The condition codes of the ARM, and the inverse function. */
1046 static const char * const arm_condition_codes[] =
1047 {
1048 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
1049 "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
1050 };
1051
1052 /* The register numbers in sequence, for passing to arm_gen_load_multiple. */
1053 int arm_regs_in_sequence[] =
1054 {
1055 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
1056 };
1057
1058 #define DEF_FP_SYSREG(reg) #reg,
1059 const char *fp_sysreg_names[NB_FP_SYSREGS] = {
1060 FP_SYSREGS
1061 };
1062 #undef DEF_FP_SYSREG
1063
1064 #define ARM_LSL_NAME "lsl"
1065 #define streq(string1, string2) (strcmp (string1, string2) == 0)
1066
1067 #define THUMB2_WORK_REGS \
1068 (0xff & ~((1 << THUMB_HARD_FRAME_POINTER_REGNUM) \
1069 | (1 << SP_REGNUM) \
1070 | (1 << PC_REGNUM) \
1071 | (PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM \
1072 ? (1 << PIC_OFFSET_TABLE_REGNUM) \
1073 : 0)))
1074 \f
1075 /* Initialization code. */
1076
1077 struct cpu_tune
1078 {
1079 enum processor_type scheduler;
1080 unsigned int tune_flags;
1081 const struct tune_params *tune;
1082 };
1083
1084 #define ARM_PREFETCH_NOT_BENEFICIAL { 0, -1, -1 }
1085 #define ARM_PREFETCH_BENEFICIAL(num_slots,l1_size,l1_line_size) \
1086 { \
1087 num_slots, \
1088 l1_size, \
1089 l1_line_size \
1090 }
1091
1092 /* arm generic vectorizer costs. */
1093 static const
1094 struct cpu_vec_costs arm_default_vec_cost = {
1095 1, /* scalar_stmt_cost. */
1096 1, /* scalar load_cost. */
1097 1, /* scalar_store_cost. */
1098 1, /* vec_stmt_cost. */
1099 1, /* vec_to_scalar_cost. */
1100 1, /* scalar_to_vec_cost. */
1101 1, /* vec_align_load_cost. */
1102 1, /* vec_unalign_load_cost. */
1103 1, /* vec_unalign_store_cost. */
1104 1, /* vec_store_cost. */
1105 3, /* cond_taken_branch_cost. */
1106 1, /* cond_not_taken_branch_cost. */
1107 };
1108
1109 /* Cost tables for AArch32 + AArch64 cores should go in aarch-cost-tables.h */
1110 #include "aarch-cost-tables.h"
1111
1112
1113
1114 const struct cpu_cost_table cortexa9_extra_costs =
1115 {
1116 /* ALU */
1117 {
1118 0, /* arith. */
1119 0, /* logical. */
1120 0, /* shift. */
1121 COSTS_N_INSNS (1), /* shift_reg. */
1122 COSTS_N_INSNS (1), /* arith_shift. */
1123 COSTS_N_INSNS (2), /* arith_shift_reg. */
1124 0, /* log_shift. */
1125 COSTS_N_INSNS (1), /* log_shift_reg. */
1126 COSTS_N_INSNS (1), /* extend. */
1127 COSTS_N_INSNS (2), /* extend_arith. */
1128 COSTS_N_INSNS (1), /* bfi. */
1129 COSTS_N_INSNS (1), /* bfx. */
1130 0, /* clz. */
1131 0, /* rev. */
1132 0, /* non_exec. */
1133 true /* non_exec_costs_exec. */
1134 },
1135 {
1136 /* MULT SImode */
1137 {
1138 COSTS_N_INSNS (3), /* simple. */
1139 COSTS_N_INSNS (3), /* flag_setting. */
1140 COSTS_N_INSNS (2), /* extend. */
1141 COSTS_N_INSNS (3), /* add. */
1142 COSTS_N_INSNS (2), /* extend_add. */
1143 COSTS_N_INSNS (30) /* idiv. No HW div on Cortex A9. */
1144 },
1145 /* MULT DImode */
1146 {
1147 0, /* simple (N/A). */
1148 0, /* flag_setting (N/A). */
1149 COSTS_N_INSNS (4), /* extend. */
1150 0, /* add (N/A). */
1151 COSTS_N_INSNS (4), /* extend_add. */
1152 0 /* idiv (N/A). */
1153 }
1154 },
1155 /* LD/ST */
1156 {
1157 COSTS_N_INSNS (2), /* load. */
1158 COSTS_N_INSNS (2), /* load_sign_extend. */
1159 COSTS_N_INSNS (2), /* ldrd. */
1160 COSTS_N_INSNS (2), /* ldm_1st. */
1161 1, /* ldm_regs_per_insn_1st. */
1162 2, /* ldm_regs_per_insn_subsequent. */
1163 COSTS_N_INSNS (5), /* loadf. */
1164 COSTS_N_INSNS (5), /* loadd. */
1165 COSTS_N_INSNS (1), /* load_unaligned. */
1166 COSTS_N_INSNS (2), /* store. */
1167 COSTS_N_INSNS (2), /* strd. */
1168 COSTS_N_INSNS (2), /* stm_1st. */
1169 1, /* stm_regs_per_insn_1st. */
1170 2, /* stm_regs_per_insn_subsequent. */
1171 COSTS_N_INSNS (1), /* storef. */
1172 COSTS_N_INSNS (1), /* stored. */
1173 COSTS_N_INSNS (1), /* store_unaligned. */
1174 COSTS_N_INSNS (1), /* loadv. */
1175 COSTS_N_INSNS (1) /* storev. */
1176 },
1177 {
1178 /* FP SFmode */
1179 {
1180 COSTS_N_INSNS (14), /* div. */
1181 COSTS_N_INSNS (4), /* mult. */
1182 COSTS_N_INSNS (7), /* mult_addsub. */
1183 COSTS_N_INSNS (30), /* fma. */
1184 COSTS_N_INSNS (3), /* addsub. */
1185 COSTS_N_INSNS (1), /* fpconst. */
1186 COSTS_N_INSNS (1), /* neg. */
1187 COSTS_N_INSNS (3), /* compare. */
1188 COSTS_N_INSNS (3), /* widen. */
1189 COSTS_N_INSNS (3), /* narrow. */
1190 COSTS_N_INSNS (3), /* toint. */
1191 COSTS_N_INSNS (3), /* fromint. */
1192 COSTS_N_INSNS (3) /* roundint. */
1193 },
1194 /* FP DFmode */
1195 {
1196 COSTS_N_INSNS (24), /* div. */
1197 COSTS_N_INSNS (5), /* mult. */
1198 COSTS_N_INSNS (8), /* mult_addsub. */
1199 COSTS_N_INSNS (30), /* fma. */
1200 COSTS_N_INSNS (3), /* addsub. */
1201 COSTS_N_INSNS (1), /* fpconst. */
1202 COSTS_N_INSNS (1), /* neg. */
1203 COSTS_N_INSNS (3), /* compare. */
1204 COSTS_N_INSNS (3), /* widen. */
1205 COSTS_N_INSNS (3), /* narrow. */
1206 COSTS_N_INSNS (3), /* toint. */
1207 COSTS_N_INSNS (3), /* fromint. */
1208 COSTS_N_INSNS (3) /* roundint. */
1209 }
1210 },
1211 /* Vector */
1212 {
1213 COSTS_N_INSNS (1), /* alu. */
1214 COSTS_N_INSNS (4), /* mult. */
1215 COSTS_N_INSNS (1), /* movi. */
1216 COSTS_N_INSNS (2), /* dup. */
1217 COSTS_N_INSNS (2) /* extract. */
1218 }
1219 };
1220
1221 const struct cpu_cost_table cortexa8_extra_costs =
1222 {
1223 /* ALU */
1224 {
1225 0, /* arith. */
1226 0, /* logical. */
1227 COSTS_N_INSNS (1), /* shift. */
1228 0, /* shift_reg. */
1229 COSTS_N_INSNS (1), /* arith_shift. */
1230 0, /* arith_shift_reg. */
1231 COSTS_N_INSNS (1), /* log_shift. */
1232 0, /* log_shift_reg. */
1233 0, /* extend. */
1234 0, /* extend_arith. */
1235 0, /* bfi. */
1236 0, /* bfx. */
1237 0, /* clz. */
1238 0, /* rev. */
1239 0, /* non_exec. */
1240 true /* non_exec_costs_exec. */
1241 },
1242 {
1243 /* MULT SImode */
1244 {
1245 COSTS_N_INSNS (1), /* simple. */
1246 COSTS_N_INSNS (1), /* flag_setting. */
1247 COSTS_N_INSNS (1), /* extend. */
1248 COSTS_N_INSNS (1), /* add. */
1249 COSTS_N_INSNS (1), /* extend_add. */
1250 COSTS_N_INSNS (30) /* idiv. No HW div on Cortex A8. */
1251 },
1252 /* MULT DImode */
1253 {
1254 0, /* simple (N/A). */
1255 0, /* flag_setting (N/A). */
1256 COSTS_N_INSNS (2), /* extend. */
1257 0, /* add (N/A). */
1258 COSTS_N_INSNS (2), /* extend_add. */
1259 0 /* idiv (N/A). */
1260 }
1261 },
1262 /* LD/ST */
1263 {
1264 COSTS_N_INSNS (1), /* load. */
1265 COSTS_N_INSNS (1), /* load_sign_extend. */
1266 COSTS_N_INSNS (1), /* ldrd. */
1267 COSTS_N_INSNS (1), /* ldm_1st. */
1268 1, /* ldm_regs_per_insn_1st. */
1269 2, /* ldm_regs_per_insn_subsequent. */
1270 COSTS_N_INSNS (1), /* loadf. */
1271 COSTS_N_INSNS (1), /* loadd. */
1272 COSTS_N_INSNS (1), /* load_unaligned. */
1273 COSTS_N_INSNS (1), /* store. */
1274 COSTS_N_INSNS (1), /* strd. */
1275 COSTS_N_INSNS (1), /* stm_1st. */
1276 1, /* stm_regs_per_insn_1st. */
1277 2, /* stm_regs_per_insn_subsequent. */
1278 COSTS_N_INSNS (1), /* storef. */
1279 COSTS_N_INSNS (1), /* stored. */
1280 COSTS_N_INSNS (1), /* store_unaligned. */
1281 COSTS_N_INSNS (1), /* loadv. */
1282 COSTS_N_INSNS (1) /* storev. */
1283 },
1284 {
1285 /* FP SFmode */
1286 {
1287 COSTS_N_INSNS (36), /* div. */
1288 COSTS_N_INSNS (11), /* mult. */
1289 COSTS_N_INSNS (20), /* mult_addsub. */
1290 COSTS_N_INSNS (30), /* fma. */
1291 COSTS_N_INSNS (9), /* addsub. */
1292 COSTS_N_INSNS (3), /* fpconst. */
1293 COSTS_N_INSNS (3), /* neg. */
1294 COSTS_N_INSNS (6), /* compare. */
1295 COSTS_N_INSNS (4), /* widen. */
1296 COSTS_N_INSNS (4), /* narrow. */
1297 COSTS_N_INSNS (8), /* toint. */
1298 COSTS_N_INSNS (8), /* fromint. */
1299 COSTS_N_INSNS (8) /* roundint. */
1300 },
1301 /* FP DFmode */
1302 {
1303 COSTS_N_INSNS (64), /* div. */
1304 COSTS_N_INSNS (16), /* mult. */
1305 COSTS_N_INSNS (25), /* mult_addsub. */
1306 COSTS_N_INSNS (30), /* fma. */
1307 COSTS_N_INSNS (9), /* addsub. */
1308 COSTS_N_INSNS (3), /* fpconst. */
1309 COSTS_N_INSNS (3), /* neg. */
1310 COSTS_N_INSNS (6), /* compare. */
1311 COSTS_N_INSNS (6), /* widen. */
1312 COSTS_N_INSNS (6), /* narrow. */
1313 COSTS_N_INSNS (8), /* toint. */
1314 COSTS_N_INSNS (8), /* fromint. */
1315 COSTS_N_INSNS (8) /* roundint. */
1316 }
1317 },
1318 /* Vector */
1319 {
1320 COSTS_N_INSNS (1), /* alu. */
1321 COSTS_N_INSNS (4), /* mult. */
1322 COSTS_N_INSNS (1), /* movi. */
1323 COSTS_N_INSNS (2), /* dup. */
1324 COSTS_N_INSNS (2) /* extract. */
1325 }
1326 };
1327
1328 const struct cpu_cost_table cortexa5_extra_costs =
1329 {
1330 /* ALU */
1331 {
1332 0, /* arith. */
1333 0, /* logical. */
1334 COSTS_N_INSNS (1), /* shift. */
1335 COSTS_N_INSNS (1), /* shift_reg. */
1336 COSTS_N_INSNS (1), /* arith_shift. */
1337 COSTS_N_INSNS (1), /* arith_shift_reg. */
1338 COSTS_N_INSNS (1), /* log_shift. */
1339 COSTS_N_INSNS (1), /* log_shift_reg. */
1340 COSTS_N_INSNS (1), /* extend. */
1341 COSTS_N_INSNS (1), /* extend_arith. */
1342 COSTS_N_INSNS (1), /* bfi. */
1343 COSTS_N_INSNS (1), /* bfx. */
1344 COSTS_N_INSNS (1), /* clz. */
1345 COSTS_N_INSNS (1), /* rev. */
1346 0, /* non_exec. */
1347 true /* non_exec_costs_exec. */
1348 },
1349
1350 {
1351 /* MULT SImode */
1352 {
1353 0, /* simple. */
1354 COSTS_N_INSNS (1), /* flag_setting. */
1355 COSTS_N_INSNS (1), /* extend. */
1356 COSTS_N_INSNS (1), /* add. */
1357 COSTS_N_INSNS (1), /* extend_add. */
1358 COSTS_N_INSNS (7) /* idiv. */
1359 },
1360 /* MULT DImode */
1361 {
1362 0, /* simple (N/A). */
1363 0, /* flag_setting (N/A). */
1364 COSTS_N_INSNS (1), /* extend. */
1365 0, /* add. */
1366 COSTS_N_INSNS (2), /* extend_add. */
1367 0 /* idiv (N/A). */
1368 }
1369 },
1370 /* LD/ST */
1371 {
1372 COSTS_N_INSNS (1), /* load. */
1373 COSTS_N_INSNS (1), /* load_sign_extend. */
1374 COSTS_N_INSNS (6), /* ldrd. */
1375 COSTS_N_INSNS (1), /* ldm_1st. */
1376 1, /* ldm_regs_per_insn_1st. */
1377 2, /* ldm_regs_per_insn_subsequent. */
1378 COSTS_N_INSNS (2), /* loadf. */
1379 COSTS_N_INSNS (4), /* loadd. */
1380 COSTS_N_INSNS (1), /* load_unaligned. */
1381 COSTS_N_INSNS (1), /* store. */
1382 COSTS_N_INSNS (3), /* strd. */
1383 COSTS_N_INSNS (1), /* stm_1st. */
1384 1, /* stm_regs_per_insn_1st. */
1385 2, /* stm_regs_per_insn_subsequent. */
1386 COSTS_N_INSNS (2), /* storef. */
1387 COSTS_N_INSNS (2), /* stored. */
1388 COSTS_N_INSNS (1), /* store_unaligned. */
1389 COSTS_N_INSNS (1), /* loadv. */
1390 COSTS_N_INSNS (1) /* storev. */
1391 },
1392 {
1393 /* FP SFmode */
1394 {
1395 COSTS_N_INSNS (15), /* div. */
1396 COSTS_N_INSNS (3), /* mult. */
1397 COSTS_N_INSNS (7), /* mult_addsub. */
1398 COSTS_N_INSNS (7), /* fma. */
1399 COSTS_N_INSNS (3), /* addsub. */
1400 COSTS_N_INSNS (3), /* fpconst. */
1401 COSTS_N_INSNS (3), /* neg. */
1402 COSTS_N_INSNS (3), /* compare. */
1403 COSTS_N_INSNS (3), /* widen. */
1404 COSTS_N_INSNS (3), /* narrow. */
1405 COSTS_N_INSNS (3), /* toint. */
1406 COSTS_N_INSNS (3), /* fromint. */
1407 COSTS_N_INSNS (3) /* roundint. */
1408 },
1409 /* FP DFmode */
1410 {
1411 COSTS_N_INSNS (30), /* div. */
1412 COSTS_N_INSNS (6), /* mult. */
1413 COSTS_N_INSNS (10), /* mult_addsub. */
1414 COSTS_N_INSNS (7), /* fma. */
1415 COSTS_N_INSNS (3), /* addsub. */
1416 COSTS_N_INSNS (3), /* fpconst. */
1417 COSTS_N_INSNS (3), /* neg. */
1418 COSTS_N_INSNS (3), /* compare. */
1419 COSTS_N_INSNS (3), /* widen. */
1420 COSTS_N_INSNS (3), /* narrow. */
1421 COSTS_N_INSNS (3), /* toint. */
1422 COSTS_N_INSNS (3), /* fromint. */
1423 COSTS_N_INSNS (3) /* roundint. */
1424 }
1425 },
1426 /* Vector */
1427 {
1428 COSTS_N_INSNS (1), /* alu. */
1429 COSTS_N_INSNS (4), /* mult. */
1430 COSTS_N_INSNS (1), /* movi. */
1431 COSTS_N_INSNS (2), /* dup. */
1432 COSTS_N_INSNS (2) /* extract. */
1433 }
1434 };
1435
1436
1437 const struct cpu_cost_table cortexa7_extra_costs =
1438 {
1439 /* ALU */
1440 {
1441 0, /* arith. */
1442 0, /* logical. */
1443 COSTS_N_INSNS (1), /* shift. */
1444 COSTS_N_INSNS (1), /* shift_reg. */
1445 COSTS_N_INSNS (1), /* arith_shift. */
1446 COSTS_N_INSNS (1), /* arith_shift_reg. */
1447 COSTS_N_INSNS (1), /* log_shift. */
1448 COSTS_N_INSNS (1), /* log_shift_reg. */
1449 COSTS_N_INSNS (1), /* extend. */
1450 COSTS_N_INSNS (1), /* extend_arith. */
1451 COSTS_N_INSNS (1), /* bfi. */
1452 COSTS_N_INSNS (1), /* bfx. */
1453 COSTS_N_INSNS (1), /* clz. */
1454 COSTS_N_INSNS (1), /* rev. */
1455 0, /* non_exec. */
1456 true /* non_exec_costs_exec. */
1457 },
1458
1459 {
1460 /* MULT SImode */
1461 {
1462 0, /* simple. */
1463 COSTS_N_INSNS (1), /* flag_setting. */
1464 COSTS_N_INSNS (1), /* extend. */
1465 COSTS_N_INSNS (1), /* add. */
1466 COSTS_N_INSNS (1), /* extend_add. */
1467 COSTS_N_INSNS (7) /* idiv. */
1468 },
1469 /* MULT DImode */
1470 {
1471 0, /* simple (N/A). */
1472 0, /* flag_setting (N/A). */
1473 COSTS_N_INSNS (1), /* extend. */
1474 0, /* add. */
1475 COSTS_N_INSNS (2), /* extend_add. */
1476 0 /* idiv (N/A). */
1477 }
1478 },
1479 /* LD/ST */
1480 {
1481 COSTS_N_INSNS (1), /* load. */
1482 COSTS_N_INSNS (1), /* load_sign_extend. */
1483 COSTS_N_INSNS (3), /* ldrd. */
1484 COSTS_N_INSNS (1), /* ldm_1st. */
1485 1, /* ldm_regs_per_insn_1st. */
1486 2, /* ldm_regs_per_insn_subsequent. */
1487 COSTS_N_INSNS (2), /* loadf. */
1488 COSTS_N_INSNS (2), /* loadd. */
1489 COSTS_N_INSNS (1), /* load_unaligned. */
1490 COSTS_N_INSNS (1), /* store. */
1491 COSTS_N_INSNS (3), /* strd. */
1492 COSTS_N_INSNS (1), /* stm_1st. */
1493 1, /* stm_regs_per_insn_1st. */
1494 2, /* stm_regs_per_insn_subsequent. */
1495 COSTS_N_INSNS (2), /* storef. */
1496 COSTS_N_INSNS (2), /* stored. */
1497 COSTS_N_INSNS (1), /* store_unaligned. */
1498 COSTS_N_INSNS (1), /* loadv. */
1499 COSTS_N_INSNS (1) /* storev. */
1500 },
1501 {
1502 /* FP SFmode */
1503 {
1504 COSTS_N_INSNS (15), /* div. */
1505 COSTS_N_INSNS (3), /* mult. */
1506 COSTS_N_INSNS (7), /* mult_addsub. */
1507 COSTS_N_INSNS (7), /* fma. */
1508 COSTS_N_INSNS (3), /* addsub. */
1509 COSTS_N_INSNS (3), /* fpconst. */
1510 COSTS_N_INSNS (3), /* neg. */
1511 COSTS_N_INSNS (3), /* compare. */
1512 COSTS_N_INSNS (3), /* widen. */
1513 COSTS_N_INSNS (3), /* narrow. */
1514 COSTS_N_INSNS (3), /* toint. */
1515 COSTS_N_INSNS (3), /* fromint. */
1516 COSTS_N_INSNS (3) /* roundint. */
1517 },
1518 /* FP DFmode */
1519 {
1520 COSTS_N_INSNS (30), /* div. */
1521 COSTS_N_INSNS (6), /* mult. */
1522 COSTS_N_INSNS (10), /* mult_addsub. */
1523 COSTS_N_INSNS (7), /* fma. */
1524 COSTS_N_INSNS (3), /* addsub. */
1525 COSTS_N_INSNS (3), /* fpconst. */
1526 COSTS_N_INSNS (3), /* neg. */
1527 COSTS_N_INSNS (3), /* compare. */
1528 COSTS_N_INSNS (3), /* widen. */
1529 COSTS_N_INSNS (3), /* narrow. */
1530 COSTS_N_INSNS (3), /* toint. */
1531 COSTS_N_INSNS (3), /* fromint. */
1532 COSTS_N_INSNS (3) /* roundint. */
1533 }
1534 },
1535 /* Vector */
1536 {
1537 COSTS_N_INSNS (1), /* alu. */
1538 COSTS_N_INSNS (4), /* mult. */
1539 COSTS_N_INSNS (1), /* movi. */
1540 COSTS_N_INSNS (2), /* dup. */
1541 COSTS_N_INSNS (2) /* extract. */
1542 }
1543 };
1544
1545 const struct cpu_cost_table cortexa12_extra_costs =
1546 {
1547 /* ALU */
1548 {
1549 0, /* arith. */
1550 0, /* logical. */
1551 0, /* shift. */
1552 COSTS_N_INSNS (1), /* shift_reg. */
1553 COSTS_N_INSNS (1), /* arith_shift. */
1554 COSTS_N_INSNS (1), /* arith_shift_reg. */
1555 COSTS_N_INSNS (1), /* log_shift. */
1556 COSTS_N_INSNS (1), /* log_shift_reg. */
1557 0, /* extend. */
1558 COSTS_N_INSNS (1), /* extend_arith. */
1559 0, /* bfi. */
1560 COSTS_N_INSNS (1), /* bfx. */
1561 COSTS_N_INSNS (1), /* clz. */
1562 COSTS_N_INSNS (1), /* rev. */
1563 0, /* non_exec. */
1564 true /* non_exec_costs_exec. */
1565 },
1566 /* MULT SImode */
1567 {
1568 {
1569 COSTS_N_INSNS (2), /* simple. */
1570 COSTS_N_INSNS (3), /* flag_setting. */
1571 COSTS_N_INSNS (2), /* extend. */
1572 COSTS_N_INSNS (3), /* add. */
1573 COSTS_N_INSNS (2), /* extend_add. */
1574 COSTS_N_INSNS (18) /* idiv. */
1575 },
1576 /* MULT DImode */
1577 {
1578 0, /* simple (N/A). */
1579 0, /* flag_setting (N/A). */
1580 COSTS_N_INSNS (3), /* extend. */
1581 0, /* add (N/A). */
1582 COSTS_N_INSNS (3), /* extend_add. */
1583 0 /* idiv (N/A). */
1584 }
1585 },
1586 /* LD/ST */
1587 {
1588 COSTS_N_INSNS (3), /* load. */
1589 COSTS_N_INSNS (3), /* load_sign_extend. */
1590 COSTS_N_INSNS (3), /* ldrd. */
1591 COSTS_N_INSNS (3), /* ldm_1st. */
1592 1, /* ldm_regs_per_insn_1st. */
1593 2, /* ldm_regs_per_insn_subsequent. */
1594 COSTS_N_INSNS (3), /* loadf. */
1595 COSTS_N_INSNS (3), /* loadd. */
1596 0, /* load_unaligned. */
1597 0, /* store. */
1598 0, /* strd. */
1599 0, /* stm_1st. */
1600 1, /* stm_regs_per_insn_1st. */
1601 2, /* stm_regs_per_insn_subsequent. */
1602 COSTS_N_INSNS (2), /* storef. */
1603 COSTS_N_INSNS (2), /* stored. */
1604 0, /* store_unaligned. */
1605 COSTS_N_INSNS (1), /* loadv. */
1606 COSTS_N_INSNS (1) /* storev. */
1607 },
1608 {
1609 /* FP SFmode */
1610 {
1611 COSTS_N_INSNS (17), /* div. */
1612 COSTS_N_INSNS (4), /* mult. */
1613 COSTS_N_INSNS (8), /* mult_addsub. */
1614 COSTS_N_INSNS (8), /* fma. */
1615 COSTS_N_INSNS (4), /* addsub. */
1616 COSTS_N_INSNS (2), /* fpconst. */
1617 COSTS_N_INSNS (2), /* neg. */
1618 COSTS_N_INSNS (2), /* compare. */
1619 COSTS_N_INSNS (4), /* widen. */
1620 COSTS_N_INSNS (4), /* narrow. */
1621 COSTS_N_INSNS (4), /* toint. */
1622 COSTS_N_INSNS (4), /* fromint. */
1623 COSTS_N_INSNS (4) /* roundint. */
1624 },
1625 /* FP DFmode */
1626 {
1627 COSTS_N_INSNS (31), /* div. */
1628 COSTS_N_INSNS (4), /* mult. */
1629 COSTS_N_INSNS (8), /* mult_addsub. */
1630 COSTS_N_INSNS (8), /* fma. */
1631 COSTS_N_INSNS (4), /* addsub. */
1632 COSTS_N_INSNS (2), /* fpconst. */
1633 COSTS_N_INSNS (2), /* neg. */
1634 COSTS_N_INSNS (2), /* compare. */
1635 COSTS_N_INSNS (4), /* widen. */
1636 COSTS_N_INSNS (4), /* narrow. */
1637 COSTS_N_INSNS (4), /* toint. */
1638 COSTS_N_INSNS (4), /* fromint. */
1639 COSTS_N_INSNS (4) /* roundint. */
1640 }
1641 },
1642 /* Vector */
1643 {
1644 COSTS_N_INSNS (1), /* alu. */
1645 COSTS_N_INSNS (4), /* mult. */
1646 COSTS_N_INSNS (1), /* movi. */
1647 COSTS_N_INSNS (2), /* dup. */
1648 COSTS_N_INSNS (2) /* extract. */
1649 }
1650 };
1651
1652 const struct cpu_cost_table cortexa15_extra_costs =
1653 {
1654 /* ALU */
1655 {
1656 0, /* arith. */
1657 0, /* logical. */
1658 0, /* shift. */
1659 0, /* shift_reg. */
1660 COSTS_N_INSNS (1), /* arith_shift. */
1661 COSTS_N_INSNS (1), /* arith_shift_reg. */
1662 COSTS_N_INSNS (1), /* log_shift. */
1663 COSTS_N_INSNS (1), /* log_shift_reg. */
1664 0, /* extend. */
1665 COSTS_N_INSNS (1), /* extend_arith. */
1666 COSTS_N_INSNS (1), /* bfi. */
1667 0, /* bfx. */
1668 0, /* clz. */
1669 0, /* rev. */
1670 0, /* non_exec. */
1671 true /* non_exec_costs_exec. */
1672 },
1673 /* MULT SImode */
1674 {
1675 {
1676 COSTS_N_INSNS (2), /* simple. */
1677 COSTS_N_INSNS (3), /* flag_setting. */
1678 COSTS_N_INSNS (2), /* extend. */
1679 COSTS_N_INSNS (2), /* add. */
1680 COSTS_N_INSNS (2), /* extend_add. */
1681 COSTS_N_INSNS (18) /* idiv. */
1682 },
1683 /* MULT DImode */
1684 {
1685 0, /* simple (N/A). */
1686 0, /* flag_setting (N/A). */
1687 COSTS_N_INSNS (3), /* extend. */
1688 0, /* add (N/A). */
1689 COSTS_N_INSNS (3), /* extend_add. */
1690 0 /* idiv (N/A). */
1691 }
1692 },
1693 /* LD/ST */
1694 {
1695 COSTS_N_INSNS (3), /* load. */
1696 COSTS_N_INSNS (3), /* load_sign_extend. */
1697 COSTS_N_INSNS (3), /* ldrd. */
1698 COSTS_N_INSNS (4), /* ldm_1st. */
1699 1, /* ldm_regs_per_insn_1st. */
1700 2, /* ldm_regs_per_insn_subsequent. */
1701 COSTS_N_INSNS (4), /* loadf. */
1702 COSTS_N_INSNS (4), /* loadd. */
1703 0, /* load_unaligned. */
1704 0, /* store. */
1705 0, /* strd. */
1706 COSTS_N_INSNS (1), /* stm_1st. */
1707 1, /* stm_regs_per_insn_1st. */
1708 2, /* stm_regs_per_insn_subsequent. */
1709 0, /* storef. */
1710 0, /* stored. */
1711 0, /* store_unaligned. */
1712 COSTS_N_INSNS (1), /* loadv. */
1713 COSTS_N_INSNS (1) /* storev. */
1714 },
1715 {
1716 /* FP SFmode */
1717 {
1718 COSTS_N_INSNS (17), /* div. */
1719 COSTS_N_INSNS (4), /* mult. */
1720 COSTS_N_INSNS (8), /* mult_addsub. */
1721 COSTS_N_INSNS (8), /* fma. */
1722 COSTS_N_INSNS (4), /* addsub. */
1723 COSTS_N_INSNS (2), /* fpconst. */
1724 COSTS_N_INSNS (2), /* neg. */
1725 COSTS_N_INSNS (5), /* compare. */
1726 COSTS_N_INSNS (4), /* widen. */
1727 COSTS_N_INSNS (4), /* narrow. */
1728 COSTS_N_INSNS (4), /* toint. */
1729 COSTS_N_INSNS (4), /* fromint. */
1730 COSTS_N_INSNS (4) /* roundint. */
1731 },
1732 /* FP DFmode */
1733 {
1734 COSTS_N_INSNS (31), /* div. */
1735 COSTS_N_INSNS (4), /* mult. */
1736 COSTS_N_INSNS (8), /* mult_addsub. */
1737 COSTS_N_INSNS (8), /* fma. */
1738 COSTS_N_INSNS (4), /* addsub. */
1739 COSTS_N_INSNS (2), /* fpconst. */
1740 COSTS_N_INSNS (2), /* neg. */
1741 COSTS_N_INSNS (2), /* compare. */
1742 COSTS_N_INSNS (4), /* widen. */
1743 COSTS_N_INSNS (4), /* narrow. */
1744 COSTS_N_INSNS (4), /* toint. */
1745 COSTS_N_INSNS (4), /* fromint. */
1746 COSTS_N_INSNS (4) /* roundint. */
1747 }
1748 },
1749 /* Vector */
1750 {
1751 COSTS_N_INSNS (1), /* alu. */
1752 COSTS_N_INSNS (4), /* mult. */
1753 COSTS_N_INSNS (1), /* movi. */
1754 COSTS_N_INSNS (2), /* dup. */
1755 COSTS_N_INSNS (2) /* extract. */
1756 }
1757 };
1758
1759 const struct cpu_cost_table v7m_extra_costs =
1760 {
1761 /* ALU */
1762 {
1763 0, /* arith. */
1764 0, /* logical. */
1765 0, /* shift. */
1766 0, /* shift_reg. */
1767 0, /* arith_shift. */
1768 COSTS_N_INSNS (1), /* arith_shift_reg. */
1769 0, /* log_shift. */
1770 COSTS_N_INSNS (1), /* log_shift_reg. */
1771 0, /* extend. */
1772 COSTS_N_INSNS (1), /* extend_arith. */
1773 0, /* bfi. */
1774 0, /* bfx. */
1775 0, /* clz. */
1776 0, /* rev. */
1777 COSTS_N_INSNS (1), /* non_exec. */
1778 false /* non_exec_costs_exec. */
1779 },
1780 {
1781 /* MULT SImode */
1782 {
1783 COSTS_N_INSNS (1), /* simple. */
1784 COSTS_N_INSNS (1), /* flag_setting. */
1785 COSTS_N_INSNS (2), /* extend. */
1786 COSTS_N_INSNS (1), /* add. */
1787 COSTS_N_INSNS (3), /* extend_add. */
1788 COSTS_N_INSNS (8) /* idiv. */
1789 },
1790 /* MULT DImode */
1791 {
1792 0, /* simple (N/A). */
1793 0, /* flag_setting (N/A). */
1794 COSTS_N_INSNS (2), /* extend. */
1795 0, /* add (N/A). */
1796 COSTS_N_INSNS (3), /* extend_add. */
1797 0 /* idiv (N/A). */
1798 }
1799 },
1800 /* LD/ST */
1801 {
1802 COSTS_N_INSNS (2), /* load. */
1803 0, /* load_sign_extend. */
1804 COSTS_N_INSNS (3), /* ldrd. */
1805 COSTS_N_INSNS (2), /* ldm_1st. */
1806 1, /* ldm_regs_per_insn_1st. */
1807 1, /* ldm_regs_per_insn_subsequent. */
1808 COSTS_N_INSNS (2), /* loadf. */
1809 COSTS_N_INSNS (3), /* loadd. */
1810 COSTS_N_INSNS (1), /* load_unaligned. */
1811 COSTS_N_INSNS (2), /* store. */
1812 COSTS_N_INSNS (3), /* strd. */
1813 COSTS_N_INSNS (2), /* stm_1st. */
1814 1, /* stm_regs_per_insn_1st. */
1815 1, /* stm_regs_per_insn_subsequent. */
1816 COSTS_N_INSNS (2), /* storef. */
1817 COSTS_N_INSNS (3), /* stored. */
1818 COSTS_N_INSNS (1), /* store_unaligned. */
1819 COSTS_N_INSNS (1), /* loadv. */
1820 COSTS_N_INSNS (1) /* storev. */
1821 },
1822 {
1823 /* FP SFmode */
1824 {
1825 COSTS_N_INSNS (7), /* div. */
1826 COSTS_N_INSNS (2), /* mult. */
1827 COSTS_N_INSNS (5), /* mult_addsub. */
1828 COSTS_N_INSNS (3), /* fma. */
1829 COSTS_N_INSNS (1), /* addsub. */
1830 0, /* fpconst. */
1831 0, /* neg. */
1832 0, /* compare. */
1833 0, /* widen. */
1834 0, /* narrow. */
1835 0, /* toint. */
1836 0, /* fromint. */
1837 0 /* roundint. */
1838 },
1839 /* FP DFmode */
1840 {
1841 COSTS_N_INSNS (15), /* div. */
1842 COSTS_N_INSNS (5), /* mult. */
1843 COSTS_N_INSNS (7), /* mult_addsub. */
1844 COSTS_N_INSNS (7), /* fma. */
1845 COSTS_N_INSNS (3), /* addsub. */
1846 0, /* fpconst. */
1847 0, /* neg. */
1848 0, /* compare. */
1849 0, /* widen. */
1850 0, /* narrow. */
1851 0, /* toint. */
1852 0, /* fromint. */
1853 0 /* roundint. */
1854 }
1855 },
1856 /* Vector */
1857 {
1858 COSTS_N_INSNS (1), /* alu. */
1859 COSTS_N_INSNS (4), /* mult. */
1860 COSTS_N_INSNS (1), /* movi. */
1861 COSTS_N_INSNS (2), /* dup. */
1862 COSTS_N_INSNS (2) /* extract. */
1863 }
1864 };
1865
1866 const struct addr_mode_cost_table generic_addr_mode_costs =
1867 {
1868 /* int. */
1869 {
1870 COSTS_N_INSNS (0), /* AMO_DEFAULT. */
1871 COSTS_N_INSNS (0), /* AMO_NO_WB. */
1872 COSTS_N_INSNS (0) /* AMO_WB. */
1873 },
1874 /* float. */
1875 {
1876 COSTS_N_INSNS (0), /* AMO_DEFAULT. */
1877 COSTS_N_INSNS (0), /* AMO_NO_WB. */
1878 COSTS_N_INSNS (0) /* AMO_WB. */
1879 },
1880 /* vector. */
1881 {
1882 COSTS_N_INSNS (0), /* AMO_DEFAULT. */
1883 COSTS_N_INSNS (0), /* AMO_NO_WB. */
1884 COSTS_N_INSNS (0) /* AMO_WB. */
1885 }
1886 };
1887
1888 const struct tune_params arm_slowmul_tune =
1889 {
1890 &generic_extra_costs, /* Insn extra costs. */
1891 &generic_addr_mode_costs, /* Addressing mode costs. */
1892 NULL, /* Sched adj cost. */
1893 arm_default_branch_cost,
1894 &arm_default_vec_cost,
1895 3, /* Constant limit. */
1896 5, /* Max cond insns. */
1897 8, /* Memset max inline. */
1898 1, /* Issue rate. */
1899 ARM_PREFETCH_NOT_BENEFICIAL,
1900 tune_params::PREF_CONST_POOL_TRUE,
1901 tune_params::PREF_LDRD_FALSE,
1902 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1903 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1904 tune_params::DISPARAGE_FLAGS_NEITHER,
1905 tune_params::PREF_NEON_STRINGOPS_FALSE,
1906 tune_params::FUSE_NOTHING,
1907 tune_params::SCHED_AUTOPREF_OFF
1908 };
1909
1910 const struct tune_params arm_fastmul_tune =
1911 {
1912 &generic_extra_costs, /* Insn extra costs. */
1913 &generic_addr_mode_costs, /* Addressing mode costs. */
1914 NULL, /* Sched adj cost. */
1915 arm_default_branch_cost,
1916 &arm_default_vec_cost,
1917 1, /* Constant limit. */
1918 5, /* Max cond insns. */
1919 8, /* Memset max inline. */
1920 1, /* Issue rate. */
1921 ARM_PREFETCH_NOT_BENEFICIAL,
1922 tune_params::PREF_CONST_POOL_TRUE,
1923 tune_params::PREF_LDRD_FALSE,
1924 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1925 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1926 tune_params::DISPARAGE_FLAGS_NEITHER,
1927 tune_params::PREF_NEON_STRINGOPS_FALSE,
1928 tune_params::FUSE_NOTHING,
1929 tune_params::SCHED_AUTOPREF_OFF
1930 };
1931
1932 /* StrongARM has early execution of branches, so a sequence that is worth
1933 skipping is shorter. Set max_insns_skipped to a lower value. */
1934
1935 const struct tune_params arm_strongarm_tune =
1936 {
1937 &generic_extra_costs, /* Insn extra costs. */
1938 &generic_addr_mode_costs, /* Addressing mode costs. */
1939 NULL, /* Sched adj cost. */
1940 arm_default_branch_cost,
1941 &arm_default_vec_cost,
1942 1, /* Constant limit. */
1943 3, /* Max cond insns. */
1944 8, /* Memset max inline. */
1945 1, /* Issue rate. */
1946 ARM_PREFETCH_NOT_BENEFICIAL,
1947 tune_params::PREF_CONST_POOL_TRUE,
1948 tune_params::PREF_LDRD_FALSE,
1949 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1950 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1951 tune_params::DISPARAGE_FLAGS_NEITHER,
1952 tune_params::PREF_NEON_STRINGOPS_FALSE,
1953 tune_params::FUSE_NOTHING,
1954 tune_params::SCHED_AUTOPREF_OFF
1955 };
1956
1957 const struct tune_params arm_xscale_tune =
1958 {
1959 &generic_extra_costs, /* Insn extra costs. */
1960 &generic_addr_mode_costs, /* Addressing mode costs. */
1961 xscale_sched_adjust_cost,
1962 arm_default_branch_cost,
1963 &arm_default_vec_cost,
1964 2, /* Constant limit. */
1965 3, /* Max cond insns. */
1966 8, /* Memset max inline. */
1967 1, /* Issue rate. */
1968 ARM_PREFETCH_NOT_BENEFICIAL,
1969 tune_params::PREF_CONST_POOL_TRUE,
1970 tune_params::PREF_LDRD_FALSE,
1971 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1972 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1973 tune_params::DISPARAGE_FLAGS_NEITHER,
1974 tune_params::PREF_NEON_STRINGOPS_FALSE,
1975 tune_params::FUSE_NOTHING,
1976 tune_params::SCHED_AUTOPREF_OFF
1977 };
1978
1979 const struct tune_params arm_9e_tune =
1980 {
1981 &generic_extra_costs, /* Insn extra costs. */
1982 &generic_addr_mode_costs, /* Addressing mode costs. */
1983 NULL, /* Sched adj cost. */
1984 arm_default_branch_cost,
1985 &arm_default_vec_cost,
1986 1, /* Constant limit. */
1987 5, /* Max cond insns. */
1988 8, /* Memset max inline. */
1989 1, /* Issue rate. */
1990 ARM_PREFETCH_NOT_BENEFICIAL,
1991 tune_params::PREF_CONST_POOL_TRUE,
1992 tune_params::PREF_LDRD_FALSE,
1993 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1994 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1995 tune_params::DISPARAGE_FLAGS_NEITHER,
1996 tune_params::PREF_NEON_STRINGOPS_FALSE,
1997 tune_params::FUSE_NOTHING,
1998 tune_params::SCHED_AUTOPREF_OFF
1999 };
2000
2001 const struct tune_params arm_marvell_pj4_tune =
2002 {
2003 &generic_extra_costs, /* Insn extra costs. */
2004 &generic_addr_mode_costs, /* Addressing mode costs. */
2005 NULL, /* Sched adj cost. */
2006 arm_default_branch_cost,
2007 &arm_default_vec_cost,
2008 1, /* Constant limit. */
2009 5, /* Max cond insns. */
2010 8, /* Memset max inline. */
2011 2, /* Issue rate. */
2012 ARM_PREFETCH_NOT_BENEFICIAL,
2013 tune_params::PREF_CONST_POOL_TRUE,
2014 tune_params::PREF_LDRD_FALSE,
2015 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2016 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2017 tune_params::DISPARAGE_FLAGS_NEITHER,
2018 tune_params::PREF_NEON_STRINGOPS_FALSE,
2019 tune_params::FUSE_NOTHING,
2020 tune_params::SCHED_AUTOPREF_OFF
2021 };
2022
2023 const struct tune_params arm_v6t2_tune =
2024 {
2025 &generic_extra_costs, /* Insn extra costs. */
2026 &generic_addr_mode_costs, /* Addressing mode costs. */
2027 NULL, /* Sched adj cost. */
2028 arm_default_branch_cost,
2029 &arm_default_vec_cost,
2030 1, /* Constant limit. */
2031 5, /* Max cond insns. */
2032 8, /* Memset max inline. */
2033 1, /* Issue rate. */
2034 ARM_PREFETCH_NOT_BENEFICIAL,
2035 tune_params::PREF_CONST_POOL_FALSE,
2036 tune_params::PREF_LDRD_FALSE,
2037 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2038 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2039 tune_params::DISPARAGE_FLAGS_NEITHER,
2040 tune_params::PREF_NEON_STRINGOPS_FALSE,
2041 tune_params::FUSE_NOTHING,
2042 tune_params::SCHED_AUTOPREF_OFF
2043 };
2044
2045
2046 /* Generic Cortex tuning. Use more specific tunings if appropriate. */
2047 const struct tune_params arm_cortex_tune =
2048 {
2049 &generic_extra_costs,
2050 &generic_addr_mode_costs, /* Addressing mode costs. */
2051 NULL, /* Sched adj cost. */
2052 arm_default_branch_cost,
2053 &arm_default_vec_cost,
2054 1, /* Constant limit. */
2055 5, /* Max cond insns. */
2056 8, /* Memset max inline. */
2057 2, /* Issue rate. */
2058 ARM_PREFETCH_NOT_BENEFICIAL,
2059 tune_params::PREF_CONST_POOL_FALSE,
2060 tune_params::PREF_LDRD_FALSE,
2061 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2062 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2063 tune_params::DISPARAGE_FLAGS_NEITHER,
2064 tune_params::PREF_NEON_STRINGOPS_FALSE,
2065 tune_params::FUSE_NOTHING,
2066 tune_params::SCHED_AUTOPREF_OFF
2067 };
2068
2069 const struct tune_params arm_cortex_a8_tune =
2070 {
2071 &cortexa8_extra_costs,
2072 &generic_addr_mode_costs, /* Addressing mode costs. */
2073 NULL, /* Sched adj cost. */
2074 arm_default_branch_cost,
2075 &arm_default_vec_cost,
2076 1, /* Constant limit. */
2077 5, /* Max cond insns. */
2078 8, /* Memset max inline. */
2079 2, /* Issue rate. */
2080 ARM_PREFETCH_NOT_BENEFICIAL,
2081 tune_params::PREF_CONST_POOL_FALSE,
2082 tune_params::PREF_LDRD_FALSE,
2083 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2084 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2085 tune_params::DISPARAGE_FLAGS_NEITHER,
2086 tune_params::PREF_NEON_STRINGOPS_TRUE,
2087 tune_params::FUSE_NOTHING,
2088 tune_params::SCHED_AUTOPREF_OFF
2089 };
2090
2091 const struct tune_params arm_cortex_a7_tune =
2092 {
2093 &cortexa7_extra_costs,
2094 &generic_addr_mode_costs, /* Addressing mode costs. */
2095 NULL, /* Sched adj cost. */
2096 arm_default_branch_cost,
2097 &arm_default_vec_cost,
2098 1, /* Constant limit. */
2099 5, /* Max cond insns. */
2100 8, /* Memset max inline. */
2101 2, /* Issue rate. */
2102 ARM_PREFETCH_NOT_BENEFICIAL,
2103 tune_params::PREF_CONST_POOL_FALSE,
2104 tune_params::PREF_LDRD_FALSE,
2105 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2106 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2107 tune_params::DISPARAGE_FLAGS_NEITHER,
2108 tune_params::PREF_NEON_STRINGOPS_TRUE,
2109 tune_params::FUSE_NOTHING,
2110 tune_params::SCHED_AUTOPREF_OFF
2111 };
2112
2113 const struct tune_params arm_cortex_a15_tune =
2114 {
2115 &cortexa15_extra_costs,
2116 &generic_addr_mode_costs, /* Addressing mode costs. */
2117 NULL, /* Sched adj cost. */
2118 arm_default_branch_cost,
2119 &arm_default_vec_cost,
2120 1, /* Constant limit. */
2121 2, /* Max cond insns. */
2122 8, /* Memset max inline. */
2123 3, /* Issue rate. */
2124 ARM_PREFETCH_NOT_BENEFICIAL,
2125 tune_params::PREF_CONST_POOL_FALSE,
2126 tune_params::PREF_LDRD_TRUE,
2127 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2128 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2129 tune_params::DISPARAGE_FLAGS_ALL,
2130 tune_params::PREF_NEON_STRINGOPS_TRUE,
2131 tune_params::FUSE_NOTHING,
2132 tune_params::SCHED_AUTOPREF_FULL
2133 };
2134
2135 const struct tune_params arm_cortex_a35_tune =
2136 {
2137 &cortexa53_extra_costs,
2138 &generic_addr_mode_costs, /* Addressing mode costs. */
2139 NULL, /* Sched adj cost. */
2140 arm_default_branch_cost,
2141 &arm_default_vec_cost,
2142 1, /* Constant limit. */
2143 5, /* Max cond insns. */
2144 8, /* Memset max inline. */
2145 1, /* Issue rate. */
2146 ARM_PREFETCH_NOT_BENEFICIAL,
2147 tune_params::PREF_CONST_POOL_FALSE,
2148 tune_params::PREF_LDRD_FALSE,
2149 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2150 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2151 tune_params::DISPARAGE_FLAGS_NEITHER,
2152 tune_params::PREF_NEON_STRINGOPS_TRUE,
2153 FUSE_OPS (tune_params::FUSE_MOVW_MOVT),
2154 tune_params::SCHED_AUTOPREF_OFF
2155 };
2156
2157 const struct tune_params arm_cortex_a53_tune =
2158 {
2159 &cortexa53_extra_costs,
2160 &generic_addr_mode_costs, /* Addressing mode costs. */
2161 NULL, /* Sched adj cost. */
2162 arm_default_branch_cost,
2163 &arm_default_vec_cost,
2164 1, /* Constant limit. */
2165 5, /* Max cond insns. */
2166 8, /* Memset max inline. */
2167 2, /* Issue rate. */
2168 ARM_PREFETCH_NOT_BENEFICIAL,
2169 tune_params::PREF_CONST_POOL_FALSE,
2170 tune_params::PREF_LDRD_FALSE,
2171 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2172 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2173 tune_params::DISPARAGE_FLAGS_NEITHER,
2174 tune_params::PREF_NEON_STRINGOPS_TRUE,
2175 FUSE_OPS (tune_params::FUSE_MOVW_MOVT | tune_params::FUSE_AES_AESMC),
2176 tune_params::SCHED_AUTOPREF_OFF
2177 };
2178
2179 const struct tune_params arm_cortex_a57_tune =
2180 {
2181 &cortexa57_extra_costs,
2182 &generic_addr_mode_costs, /* addressing mode costs */
2183 NULL, /* Sched adj cost. */
2184 arm_default_branch_cost,
2185 &arm_default_vec_cost,
2186 1, /* Constant limit. */
2187 2, /* Max cond insns. */
2188 8, /* Memset max inline. */
2189 3, /* Issue rate. */
2190 ARM_PREFETCH_NOT_BENEFICIAL,
2191 tune_params::PREF_CONST_POOL_FALSE,
2192 tune_params::PREF_LDRD_TRUE,
2193 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2194 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2195 tune_params::DISPARAGE_FLAGS_ALL,
2196 tune_params::PREF_NEON_STRINGOPS_TRUE,
2197 FUSE_OPS (tune_params::FUSE_MOVW_MOVT | tune_params::FUSE_AES_AESMC),
2198 tune_params::SCHED_AUTOPREF_FULL
2199 };
2200
2201 const struct tune_params arm_exynosm1_tune =
2202 {
2203 &exynosm1_extra_costs,
2204 &generic_addr_mode_costs, /* Addressing mode costs. */
2205 NULL, /* Sched adj cost. */
2206 arm_default_branch_cost,
2207 &arm_default_vec_cost,
2208 1, /* Constant limit. */
2209 2, /* Max cond insns. */
2210 8, /* Memset max inline. */
2211 3, /* Issue rate. */
2212 ARM_PREFETCH_NOT_BENEFICIAL,
2213 tune_params::PREF_CONST_POOL_FALSE,
2214 tune_params::PREF_LDRD_TRUE,
2215 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* Thumb. */
2216 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* ARM. */
2217 tune_params::DISPARAGE_FLAGS_ALL,
2218 tune_params::PREF_NEON_STRINGOPS_TRUE,
2219 tune_params::FUSE_NOTHING,
2220 tune_params::SCHED_AUTOPREF_OFF
2221 };
2222
2223 const struct tune_params arm_xgene1_tune =
2224 {
2225 &xgene1_extra_costs,
2226 &generic_addr_mode_costs, /* Addressing mode costs. */
2227 NULL, /* Sched adj cost. */
2228 arm_default_branch_cost,
2229 &arm_default_vec_cost,
2230 1, /* Constant limit. */
2231 2, /* Max cond insns. */
2232 32, /* Memset max inline. */
2233 4, /* Issue rate. */
2234 ARM_PREFETCH_NOT_BENEFICIAL,
2235 tune_params::PREF_CONST_POOL_FALSE,
2236 tune_params::PREF_LDRD_TRUE,
2237 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2238 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2239 tune_params::DISPARAGE_FLAGS_ALL,
2240 tune_params::PREF_NEON_STRINGOPS_FALSE,
2241 tune_params::FUSE_NOTHING,
2242 tune_params::SCHED_AUTOPREF_OFF
2243 };
2244
2245 /* Branches can be dual-issued on Cortex-A5, so conditional execution is
2246 less appealing. Set max_insns_skipped to a low value. */
2247
2248 const struct tune_params arm_cortex_a5_tune =
2249 {
2250 &cortexa5_extra_costs,
2251 &generic_addr_mode_costs, /* Addressing mode costs. */
2252 NULL, /* Sched adj cost. */
2253 arm_cortex_a5_branch_cost,
2254 &arm_default_vec_cost,
2255 1, /* Constant limit. */
2256 1, /* Max cond insns. */
2257 8, /* Memset max inline. */
2258 2, /* Issue rate. */
2259 ARM_PREFETCH_NOT_BENEFICIAL,
2260 tune_params::PREF_CONST_POOL_FALSE,
2261 tune_params::PREF_LDRD_FALSE,
2262 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* Thumb. */
2263 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* ARM. */
2264 tune_params::DISPARAGE_FLAGS_NEITHER,
2265 tune_params::PREF_NEON_STRINGOPS_TRUE,
2266 tune_params::FUSE_NOTHING,
2267 tune_params::SCHED_AUTOPREF_OFF
2268 };
2269
2270 const struct tune_params arm_cortex_a9_tune =
2271 {
2272 &cortexa9_extra_costs,
2273 &generic_addr_mode_costs, /* Addressing mode costs. */
2274 cortex_a9_sched_adjust_cost,
2275 arm_default_branch_cost,
2276 &arm_default_vec_cost,
2277 1, /* Constant limit. */
2278 5, /* Max cond insns. */
2279 8, /* Memset max inline. */
2280 2, /* Issue rate. */
2281 ARM_PREFETCH_BENEFICIAL(4,32,32),
2282 tune_params::PREF_CONST_POOL_FALSE,
2283 tune_params::PREF_LDRD_FALSE,
2284 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2285 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2286 tune_params::DISPARAGE_FLAGS_NEITHER,
2287 tune_params::PREF_NEON_STRINGOPS_FALSE,
2288 tune_params::FUSE_NOTHING,
2289 tune_params::SCHED_AUTOPREF_OFF
2290 };
2291
2292 const struct tune_params arm_cortex_a12_tune =
2293 {
2294 &cortexa12_extra_costs,
2295 &generic_addr_mode_costs, /* Addressing mode costs. */
2296 NULL, /* Sched adj cost. */
2297 arm_default_branch_cost,
2298 &arm_default_vec_cost, /* Vectorizer costs. */
2299 1, /* Constant limit. */
2300 2, /* Max cond insns. */
2301 8, /* Memset max inline. */
2302 2, /* Issue rate. */
2303 ARM_PREFETCH_NOT_BENEFICIAL,
2304 tune_params::PREF_CONST_POOL_FALSE,
2305 tune_params::PREF_LDRD_TRUE,
2306 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2307 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2308 tune_params::DISPARAGE_FLAGS_ALL,
2309 tune_params::PREF_NEON_STRINGOPS_TRUE,
2310 FUSE_OPS (tune_params::FUSE_MOVW_MOVT),
2311 tune_params::SCHED_AUTOPREF_OFF
2312 };
2313
2314 const struct tune_params arm_cortex_a73_tune =
2315 {
2316 &cortexa57_extra_costs,
2317 &generic_addr_mode_costs, /* Addressing mode costs. */
2318 NULL, /* Sched adj cost. */
2319 arm_default_branch_cost,
2320 &arm_default_vec_cost, /* Vectorizer costs. */
2321 1, /* Constant limit. */
2322 2, /* Max cond insns. */
2323 8, /* Memset max inline. */
2324 2, /* Issue rate. */
2325 ARM_PREFETCH_NOT_BENEFICIAL,
2326 tune_params::PREF_CONST_POOL_FALSE,
2327 tune_params::PREF_LDRD_TRUE,
2328 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2329 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2330 tune_params::DISPARAGE_FLAGS_ALL,
2331 tune_params::PREF_NEON_STRINGOPS_TRUE,
2332 FUSE_OPS (tune_params::FUSE_AES_AESMC | tune_params::FUSE_MOVW_MOVT),
2333 tune_params::SCHED_AUTOPREF_FULL
2334 };
2335
2336 /* armv7m tuning. On Cortex-M4 cores for example, MOVW/MOVT take a single
2337 cycle to execute each. An LDR from the constant pool also takes two cycles
2338 to execute, but mildly increases pipelining opportunity (consecutive
2339 loads/stores can be pipelined together, saving one cycle), and may also
2340 improve icache utilisation. Hence we prefer the constant pool for such
2341 processors. */
2342
2343 const struct tune_params arm_v7m_tune =
2344 {
2345 &v7m_extra_costs,
2346 &generic_addr_mode_costs, /* Addressing mode costs. */
2347 NULL, /* Sched adj cost. */
2348 arm_cortex_m_branch_cost,
2349 &arm_default_vec_cost,
2350 1, /* Constant limit. */
2351 2, /* Max cond insns. */
2352 8, /* Memset max inline. */
2353 1, /* Issue rate. */
2354 ARM_PREFETCH_NOT_BENEFICIAL,
2355 tune_params::PREF_CONST_POOL_TRUE,
2356 tune_params::PREF_LDRD_FALSE,
2357 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* Thumb. */
2358 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* ARM. */
2359 tune_params::DISPARAGE_FLAGS_NEITHER,
2360 tune_params::PREF_NEON_STRINGOPS_FALSE,
2361 tune_params::FUSE_NOTHING,
2362 tune_params::SCHED_AUTOPREF_OFF
2363 };
2364
2365 /* Cortex-M7 tuning. */
2366
2367 const struct tune_params arm_cortex_m7_tune =
2368 {
2369 &v7m_extra_costs,
2370 &generic_addr_mode_costs, /* Addressing mode costs. */
2371 NULL, /* Sched adj cost. */
2372 arm_cortex_m7_branch_cost,
2373 &arm_default_vec_cost,
2374 0, /* Constant limit. */
2375 1, /* Max cond insns. */
2376 8, /* Memset max inline. */
2377 2, /* Issue rate. */
2378 ARM_PREFETCH_NOT_BENEFICIAL,
2379 tune_params::PREF_CONST_POOL_TRUE,
2380 tune_params::PREF_LDRD_FALSE,
2381 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2382 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2383 tune_params::DISPARAGE_FLAGS_NEITHER,
2384 tune_params::PREF_NEON_STRINGOPS_FALSE,
2385 tune_params::FUSE_NOTHING,
2386 tune_params::SCHED_AUTOPREF_OFF
2387 };
2388
2389 /* The arm_v6m_tune is duplicated from arm_cortex_tune, rather than
2390 arm_v6t2_tune. It is used for cortex-m0, cortex-m1, cortex-m0plus and
2391 cortex-m23. */
2392 const struct tune_params arm_v6m_tune =
2393 {
2394 &generic_extra_costs, /* Insn extra costs. */
2395 &generic_addr_mode_costs, /* Addressing mode costs. */
2396 NULL, /* Sched adj cost. */
2397 arm_default_branch_cost,
2398 &arm_default_vec_cost, /* Vectorizer costs. */
2399 1, /* Constant limit. */
2400 5, /* Max cond insns. */
2401 8, /* Memset max inline. */
2402 1, /* Issue rate. */
2403 ARM_PREFETCH_NOT_BENEFICIAL,
2404 tune_params::PREF_CONST_POOL_FALSE,
2405 tune_params::PREF_LDRD_FALSE,
2406 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* Thumb. */
2407 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* ARM. */
2408 tune_params::DISPARAGE_FLAGS_NEITHER,
2409 tune_params::PREF_NEON_STRINGOPS_FALSE,
2410 tune_params::FUSE_NOTHING,
2411 tune_params::SCHED_AUTOPREF_OFF
2412 };
2413
2414 const struct tune_params arm_fa726te_tune =
2415 {
2416 &generic_extra_costs, /* Insn extra costs. */
2417 &generic_addr_mode_costs, /* Addressing mode costs. */
2418 fa726te_sched_adjust_cost,
2419 arm_default_branch_cost,
2420 &arm_default_vec_cost,
2421 1, /* Constant limit. */
2422 5, /* Max cond insns. */
2423 8, /* Memset max inline. */
2424 2, /* Issue rate. */
2425 ARM_PREFETCH_NOT_BENEFICIAL,
2426 tune_params::PREF_CONST_POOL_TRUE,
2427 tune_params::PREF_LDRD_FALSE,
2428 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2429 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2430 tune_params::DISPARAGE_FLAGS_NEITHER,
2431 tune_params::PREF_NEON_STRINGOPS_FALSE,
2432 tune_params::FUSE_NOTHING,
2433 tune_params::SCHED_AUTOPREF_OFF
2434 };
2435
2436 char *accepted_branch_protection_string = NULL;
2437
2438 /* Auto-generated CPU, FPU and architecture tables. */
2439 #include "arm-cpu-data.h"
2440
2441 /* The name of the preprocessor macro to define for this architecture. PROFILE
2442 is replaced by the architecture name (eg. 8A) in arm_option_override () and
2443 is thus chosen to be big enough to hold the longest architecture name. */
2444
2445 char arm_arch_name[] = "__ARM_ARCH_PROFILE__";
2446
2447 /* Supported TLS relocations. */
2448
2449 enum tls_reloc {
2450 TLS_GD32,
2451 TLS_GD32_FDPIC,
2452 TLS_LDM32,
2453 TLS_LDM32_FDPIC,
2454 TLS_LDO32,
2455 TLS_IE32,
2456 TLS_IE32_FDPIC,
2457 TLS_LE32,
2458 TLS_DESCSEQ /* GNU scheme */
2459 };
2460
2461 /* The maximum number of insns to be used when loading a constant. */
2462 inline static int
2463 arm_constant_limit (bool size_p)
2464 {
2465 return size_p ? 1 : current_tune->constant_limit;
2466 }
2467
2468 /* Emit an insn that's a simple single-set. Both the operands must be known
2469 to be valid. */
2470 inline static rtx_insn *
2471 emit_set_insn (rtx x, rtx y)
2472 {
2473 return emit_insn (gen_rtx_SET (x, y));
2474 }
2475
2476 /* Return the number of bits set in VALUE. */
2477 static unsigned
2478 bit_count (unsigned long value)
2479 {
2480 unsigned long count = 0;
2481
2482 while (value)
2483 {
2484 count++;
2485 value &= value - 1; /* Clear the least-significant set bit. */
2486 }
2487
2488 return count;
2489 }
2490
2491 /* Return the number of bits set in BMAP. */
2492 static unsigned
2493 bitmap_popcount (const sbitmap bmap)
2494 {
2495 unsigned int count = 0;
2496 unsigned int n = 0;
2497 sbitmap_iterator sbi;
2498
2499 EXECUTE_IF_SET_IN_BITMAP (bmap, 0, n, sbi)
2500 count++;
2501 return count;
2502 }
2503
2504 typedef struct
2505 {
2506 machine_mode mode;
2507 const char *name;
2508 } arm_fixed_mode_set;
2509
2510 /* A small helper for setting fixed-point library libfuncs. */
2511
2512 static void
2513 arm_set_fixed_optab_libfunc (optab optable, machine_mode mode,
2514 const char *funcname, const char *modename,
2515 int num_suffix)
2516 {
2517 char buffer[50];
2518
2519 if (num_suffix == 0)
2520 sprintf (buffer, "__gnu_%s%s", funcname, modename);
2521 else
2522 sprintf (buffer, "__gnu_%s%s%d", funcname, modename, num_suffix);
2523
2524 set_optab_libfunc (optable, mode, buffer);
2525 }
2526
2527 static void
2528 arm_set_fixed_conv_libfunc (convert_optab optable, machine_mode to,
2529 machine_mode from, const char *funcname,
2530 const char *toname, const char *fromname)
2531 {
2532 char buffer[50];
2533 const char *maybe_suffix_2 = "";
2534
2535 /* Follow the logic for selecting a "2" suffix in fixed-bit.h. */
2536 if (ALL_FIXED_POINT_MODE_P (from) && ALL_FIXED_POINT_MODE_P (to)
2537 && UNSIGNED_FIXED_POINT_MODE_P (from) == UNSIGNED_FIXED_POINT_MODE_P (to)
2538 && ALL_FRACT_MODE_P (from) == ALL_FRACT_MODE_P (to))
2539 maybe_suffix_2 = "2";
2540
2541 sprintf (buffer, "__gnu_%s%s%s%s", funcname, fromname, toname,
2542 maybe_suffix_2);
2543
2544 set_conv_libfunc (optable, to, from, buffer);
2545 }
2546
2547 static GTY(()) rtx speculation_barrier_libfunc;
2548
2549 /* Record that we have no arithmetic or comparison libfuncs for
2550 machine mode MODE. */
2551
2552 static void
2553 arm_block_arith_comp_libfuncs_for_mode (machine_mode mode)
2554 {
2555 /* Arithmetic. */
2556 set_optab_libfunc (add_optab, mode, NULL);
2557 set_optab_libfunc (sdiv_optab, mode, NULL);
2558 set_optab_libfunc (smul_optab, mode, NULL);
2559 set_optab_libfunc (neg_optab, mode, NULL);
2560 set_optab_libfunc (sub_optab, mode, NULL);
2561
2562 /* Comparisons. */
2563 set_optab_libfunc (eq_optab, mode, NULL);
2564 set_optab_libfunc (ne_optab, mode, NULL);
2565 set_optab_libfunc (lt_optab, mode, NULL);
2566 set_optab_libfunc (le_optab, mode, NULL);
2567 set_optab_libfunc (ge_optab, mode, NULL);
2568 set_optab_libfunc (gt_optab, mode, NULL);
2569 set_optab_libfunc (unord_optab, mode, NULL);
2570 }
2571
2572 /* Set up library functions unique to ARM. */
2573 static void
2574 arm_init_libfuncs (void)
2575 {
2576 machine_mode mode_iter;
2577
2578 /* For Linux, we have access to kernel support for atomic operations. */
2579 if (arm_abi == ARM_ABI_AAPCS_LINUX)
2580 init_sync_libfuncs (MAX_SYNC_LIBFUNC_SIZE);
2581
2582 /* There are no special library functions unless we are using the
2583 ARM BPABI. */
2584 if (!TARGET_BPABI)
2585 return;
2586
2587 /* The functions below are described in Section 4 of the "Run-Time
2588 ABI for the ARM architecture", Version 1.0. */
2589
2590 /* Double-precision floating-point arithmetic. Table 2. */
2591 set_optab_libfunc (add_optab, DFmode, "__aeabi_dadd");
2592 set_optab_libfunc (sdiv_optab, DFmode, "__aeabi_ddiv");
2593 set_optab_libfunc (smul_optab, DFmode, "__aeabi_dmul");
2594 set_optab_libfunc (neg_optab, DFmode, "__aeabi_dneg");
2595 set_optab_libfunc (sub_optab, DFmode, "__aeabi_dsub");
2596
2597 /* Double-precision comparisons. Table 3. */
2598 set_optab_libfunc (eq_optab, DFmode, "__aeabi_dcmpeq");
2599 set_optab_libfunc (ne_optab, DFmode, NULL);
2600 set_optab_libfunc (lt_optab, DFmode, "__aeabi_dcmplt");
2601 set_optab_libfunc (le_optab, DFmode, "__aeabi_dcmple");
2602 set_optab_libfunc (ge_optab, DFmode, "__aeabi_dcmpge");
2603 set_optab_libfunc (gt_optab, DFmode, "__aeabi_dcmpgt");
2604 set_optab_libfunc (unord_optab, DFmode, "__aeabi_dcmpun");
2605
2606 /* Single-precision floating-point arithmetic. Table 4. */
2607 set_optab_libfunc (add_optab, SFmode, "__aeabi_fadd");
2608 set_optab_libfunc (sdiv_optab, SFmode, "__aeabi_fdiv");
2609 set_optab_libfunc (smul_optab, SFmode, "__aeabi_fmul");
2610 set_optab_libfunc (neg_optab, SFmode, "__aeabi_fneg");
2611 set_optab_libfunc (sub_optab, SFmode, "__aeabi_fsub");
2612
2613 /* Single-precision comparisons. Table 5. */
2614 set_optab_libfunc (eq_optab, SFmode, "__aeabi_fcmpeq");
2615 set_optab_libfunc (ne_optab, SFmode, NULL);
2616 set_optab_libfunc (lt_optab, SFmode, "__aeabi_fcmplt");
2617 set_optab_libfunc (le_optab, SFmode, "__aeabi_fcmple");
2618 set_optab_libfunc (ge_optab, SFmode, "__aeabi_fcmpge");
2619 set_optab_libfunc (gt_optab, SFmode, "__aeabi_fcmpgt");
2620 set_optab_libfunc (unord_optab, SFmode, "__aeabi_fcmpun");
2621
2622 /* Floating-point to integer conversions. Table 6. */
2623 set_conv_libfunc (sfix_optab, SImode, DFmode, "__aeabi_d2iz");
2624 set_conv_libfunc (ufix_optab, SImode, DFmode, "__aeabi_d2uiz");
2625 set_conv_libfunc (sfix_optab, DImode, DFmode, "__aeabi_d2lz");
2626 set_conv_libfunc (ufix_optab, DImode, DFmode, "__aeabi_d2ulz");
2627 set_conv_libfunc (sfix_optab, SImode, SFmode, "__aeabi_f2iz");
2628 set_conv_libfunc (ufix_optab, SImode, SFmode, "__aeabi_f2uiz");
2629 set_conv_libfunc (sfix_optab, DImode, SFmode, "__aeabi_f2lz");
2630 set_conv_libfunc (ufix_optab, DImode, SFmode, "__aeabi_f2ulz");
2631
2632 /* Conversions between floating types. Table 7. */
2633 set_conv_libfunc (trunc_optab, SFmode, DFmode, "__aeabi_d2f");
2634 set_conv_libfunc (sext_optab, DFmode, SFmode, "__aeabi_f2d");
2635
2636 /* Integer to floating-point conversions. Table 8. */
2637 set_conv_libfunc (sfloat_optab, DFmode, SImode, "__aeabi_i2d");
2638 set_conv_libfunc (ufloat_optab, DFmode, SImode, "__aeabi_ui2d");
2639 set_conv_libfunc (sfloat_optab, DFmode, DImode, "__aeabi_l2d");
2640 set_conv_libfunc (ufloat_optab, DFmode, DImode, "__aeabi_ul2d");
2641 set_conv_libfunc (sfloat_optab, SFmode, SImode, "__aeabi_i2f");
2642 set_conv_libfunc (ufloat_optab, SFmode, SImode, "__aeabi_ui2f");
2643 set_conv_libfunc (sfloat_optab, SFmode, DImode, "__aeabi_l2f");
2644 set_conv_libfunc (ufloat_optab, SFmode, DImode, "__aeabi_ul2f");
2645
2646 /* Long long. Table 9. */
2647 set_optab_libfunc (smul_optab, DImode, "__aeabi_lmul");
2648 set_optab_libfunc (sdivmod_optab, DImode, "__aeabi_ldivmod");
2649 set_optab_libfunc (udivmod_optab, DImode, "__aeabi_uldivmod");
2650 set_optab_libfunc (ashl_optab, DImode, "__aeabi_llsl");
2651 set_optab_libfunc (lshr_optab, DImode, "__aeabi_llsr");
2652 set_optab_libfunc (ashr_optab, DImode, "__aeabi_lasr");
2653 set_optab_libfunc (cmp_optab, DImode, "__aeabi_lcmp");
2654 set_optab_libfunc (ucmp_optab, DImode, "__aeabi_ulcmp");
2655
2656 /* Integer (32/32->32) division. \S 4.3.1. */
2657 set_optab_libfunc (sdivmod_optab, SImode, "__aeabi_idivmod");
2658 set_optab_libfunc (udivmod_optab, SImode, "__aeabi_uidivmod");
2659
2660 /* The divmod functions are designed so that they can be used for
2661 plain division, even though they return both the quotient and the
2662 remainder. The quotient is returned in the usual location (i.e.,
2663 r0 for SImode, {r0, r1} for DImode), just as would be expected
2664 for an ordinary division routine. Because the AAPCS calling
2665 conventions specify that all of { r0, r1, r2, r3 } are
2666 callee-saved registers, there is no need to tell the compiler
2667 explicitly that those registers are clobbered by these
2668 routines. */
2669 set_optab_libfunc (sdiv_optab, DImode, "__aeabi_ldivmod");
2670 set_optab_libfunc (udiv_optab, DImode, "__aeabi_uldivmod");
2671
2672 /* For SImode division the ABI provides div-without-mod routines,
2673 which are faster. */
2674 set_optab_libfunc (sdiv_optab, SImode, "__aeabi_idiv");
2675 set_optab_libfunc (udiv_optab, SImode, "__aeabi_uidiv");
2676
2677 /* We don't have mod libcalls. Fortunately gcc knows how to use the
2678 divmod libcalls instead. */
2679 set_optab_libfunc (smod_optab, DImode, NULL);
2680 set_optab_libfunc (umod_optab, DImode, NULL);
2681 set_optab_libfunc (smod_optab, SImode, NULL);
2682 set_optab_libfunc (umod_optab, SImode, NULL);
2683
2684 /* Half-precision float operations. The compiler handles all operations
2685 with NULL libfuncs by converting the SFmode. */
2686 switch (arm_fp16_format)
2687 {
2688 case ARM_FP16_FORMAT_IEEE:
2689 case ARM_FP16_FORMAT_ALTERNATIVE:
2690
2691 /* Conversions. */
2692 set_conv_libfunc (trunc_optab, HFmode, SFmode,
2693 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
2694 ? "__gnu_f2h_ieee"
2695 : "__gnu_f2h_alternative"));
2696 set_conv_libfunc (sext_optab, SFmode, HFmode,
2697 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
2698 ? "__gnu_h2f_ieee"
2699 : "__gnu_h2f_alternative"));
2700
2701 set_conv_libfunc (trunc_optab, HFmode, DFmode,
2702 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
2703 ? "__gnu_d2h_ieee"
2704 : "__gnu_d2h_alternative"));
2705
2706 arm_block_arith_comp_libfuncs_for_mode (HFmode);
2707 break;
2708
2709 default:
2710 break;
2711 }
2712
2713 /* For all possible libcalls in BFmode, record NULL. */
2714 FOR_EACH_MODE_IN_CLASS (mode_iter, MODE_FLOAT)
2715 {
2716 set_conv_libfunc (trunc_optab, BFmode, mode_iter, NULL);
2717 set_conv_libfunc (trunc_optab, mode_iter, BFmode, NULL);
2718 set_conv_libfunc (sext_optab, mode_iter, BFmode, NULL);
2719 set_conv_libfunc (sext_optab, BFmode, mode_iter, NULL);
2720 }
2721 arm_block_arith_comp_libfuncs_for_mode (BFmode);
2722
2723 /* Use names prefixed with __gnu_ for fixed-point helper functions. */
2724 {
2725 const arm_fixed_mode_set fixed_arith_modes[] =
2726 {
2727 { E_QQmode, "qq" },
2728 { E_UQQmode, "uqq" },
2729 { E_HQmode, "hq" },
2730 { E_UHQmode, "uhq" },
2731 { E_SQmode, "sq" },
2732 { E_USQmode, "usq" },
2733 { E_DQmode, "dq" },
2734 { E_UDQmode, "udq" },
2735 { E_TQmode, "tq" },
2736 { E_UTQmode, "utq" },
2737 { E_HAmode, "ha" },
2738 { E_UHAmode, "uha" },
2739 { E_SAmode, "sa" },
2740 { E_USAmode, "usa" },
2741 { E_DAmode, "da" },
2742 { E_UDAmode, "uda" },
2743 { E_TAmode, "ta" },
2744 { E_UTAmode, "uta" }
2745 };
2746 const arm_fixed_mode_set fixed_conv_modes[] =
2747 {
2748 { E_QQmode, "qq" },
2749 { E_UQQmode, "uqq" },
2750 { E_HQmode, "hq" },
2751 { E_UHQmode, "uhq" },
2752 { E_SQmode, "sq" },
2753 { E_USQmode, "usq" },
2754 { E_DQmode, "dq" },
2755 { E_UDQmode, "udq" },
2756 { E_TQmode, "tq" },
2757 { E_UTQmode, "utq" },
2758 { E_HAmode, "ha" },
2759 { E_UHAmode, "uha" },
2760 { E_SAmode, "sa" },
2761 { E_USAmode, "usa" },
2762 { E_DAmode, "da" },
2763 { E_UDAmode, "uda" },
2764 { E_TAmode, "ta" },
2765 { E_UTAmode, "uta" },
2766 { E_QImode, "qi" },
2767 { E_HImode, "hi" },
2768 { E_SImode, "si" },
2769 { E_DImode, "di" },
2770 { E_TImode, "ti" },
2771 { E_SFmode, "sf" },
2772 { E_DFmode, "df" }
2773 };
2774 unsigned int i, j;
2775
2776 for (i = 0; i < ARRAY_SIZE (fixed_arith_modes); i++)
2777 {
2778 arm_set_fixed_optab_libfunc (add_optab, fixed_arith_modes[i].mode,
2779 "add", fixed_arith_modes[i].name, 3);
2780 arm_set_fixed_optab_libfunc (ssadd_optab, fixed_arith_modes[i].mode,
2781 "ssadd", fixed_arith_modes[i].name, 3);
2782 arm_set_fixed_optab_libfunc (usadd_optab, fixed_arith_modes[i].mode,
2783 "usadd", fixed_arith_modes[i].name, 3);
2784 arm_set_fixed_optab_libfunc (sub_optab, fixed_arith_modes[i].mode,
2785 "sub", fixed_arith_modes[i].name, 3);
2786 arm_set_fixed_optab_libfunc (sssub_optab, fixed_arith_modes[i].mode,
2787 "sssub", fixed_arith_modes[i].name, 3);
2788 arm_set_fixed_optab_libfunc (ussub_optab, fixed_arith_modes[i].mode,
2789 "ussub", fixed_arith_modes[i].name, 3);
2790 arm_set_fixed_optab_libfunc (smul_optab, fixed_arith_modes[i].mode,
2791 "mul", fixed_arith_modes[i].name, 3);
2792 arm_set_fixed_optab_libfunc (ssmul_optab, fixed_arith_modes[i].mode,
2793 "ssmul", fixed_arith_modes[i].name, 3);
2794 arm_set_fixed_optab_libfunc (usmul_optab, fixed_arith_modes[i].mode,
2795 "usmul", fixed_arith_modes[i].name, 3);
2796 arm_set_fixed_optab_libfunc (sdiv_optab, fixed_arith_modes[i].mode,
2797 "div", fixed_arith_modes[i].name, 3);
2798 arm_set_fixed_optab_libfunc (udiv_optab, fixed_arith_modes[i].mode,
2799 "udiv", fixed_arith_modes[i].name, 3);
2800 arm_set_fixed_optab_libfunc (ssdiv_optab, fixed_arith_modes[i].mode,
2801 "ssdiv", fixed_arith_modes[i].name, 3);
2802 arm_set_fixed_optab_libfunc (usdiv_optab, fixed_arith_modes[i].mode,
2803 "usdiv", fixed_arith_modes[i].name, 3);
2804 arm_set_fixed_optab_libfunc (neg_optab, fixed_arith_modes[i].mode,
2805 "neg", fixed_arith_modes[i].name, 2);
2806 arm_set_fixed_optab_libfunc (ssneg_optab, fixed_arith_modes[i].mode,
2807 "ssneg", fixed_arith_modes[i].name, 2);
2808 arm_set_fixed_optab_libfunc (usneg_optab, fixed_arith_modes[i].mode,
2809 "usneg", fixed_arith_modes[i].name, 2);
2810 arm_set_fixed_optab_libfunc (ashl_optab, fixed_arith_modes[i].mode,
2811 "ashl", fixed_arith_modes[i].name, 3);
2812 arm_set_fixed_optab_libfunc (ashr_optab, fixed_arith_modes[i].mode,
2813 "ashr", fixed_arith_modes[i].name, 3);
2814 arm_set_fixed_optab_libfunc (lshr_optab, fixed_arith_modes[i].mode,
2815 "lshr", fixed_arith_modes[i].name, 3);
2816 arm_set_fixed_optab_libfunc (ssashl_optab, fixed_arith_modes[i].mode,
2817 "ssashl", fixed_arith_modes[i].name, 3);
2818 arm_set_fixed_optab_libfunc (usashl_optab, fixed_arith_modes[i].mode,
2819 "usashl", fixed_arith_modes[i].name, 3);
2820 arm_set_fixed_optab_libfunc (cmp_optab, fixed_arith_modes[i].mode,
2821 "cmp", fixed_arith_modes[i].name, 2);
2822 }
2823
2824 for (i = 0; i < ARRAY_SIZE (fixed_conv_modes); i++)
2825 for (j = 0; j < ARRAY_SIZE (fixed_conv_modes); j++)
2826 {
2827 if (i == j
2828 || (!ALL_FIXED_POINT_MODE_P (fixed_conv_modes[i].mode)
2829 && !ALL_FIXED_POINT_MODE_P (fixed_conv_modes[j].mode)))
2830 continue;
2831
2832 arm_set_fixed_conv_libfunc (fract_optab, fixed_conv_modes[i].mode,
2833 fixed_conv_modes[j].mode, "fract",
2834 fixed_conv_modes[i].name,
2835 fixed_conv_modes[j].name);
2836 arm_set_fixed_conv_libfunc (satfract_optab,
2837 fixed_conv_modes[i].mode,
2838 fixed_conv_modes[j].mode, "satfract",
2839 fixed_conv_modes[i].name,
2840 fixed_conv_modes[j].name);
2841 arm_set_fixed_conv_libfunc (fractuns_optab,
2842 fixed_conv_modes[i].mode,
2843 fixed_conv_modes[j].mode, "fractuns",
2844 fixed_conv_modes[i].name,
2845 fixed_conv_modes[j].name);
2846 arm_set_fixed_conv_libfunc (satfractuns_optab,
2847 fixed_conv_modes[i].mode,
2848 fixed_conv_modes[j].mode, "satfractuns",
2849 fixed_conv_modes[i].name,
2850 fixed_conv_modes[j].name);
2851 }
2852 }
2853
2854 if (TARGET_AAPCS_BASED)
2855 synchronize_libfunc = init_one_libfunc ("__sync_synchronize");
2856
2857 speculation_barrier_libfunc = init_one_libfunc ("__speculation_barrier");
2858 }
2859
2860 /* Implement TARGET_GIMPLE_FOLD_BUILTIN. */
2861 static bool
2862 arm_gimple_fold_builtin (gimple_stmt_iterator *gsi)
2863 {
2864 gcall *stmt = as_a <gcall *> (gsi_stmt (*gsi));
2865 tree fndecl = gimple_call_fndecl (stmt);
2866 unsigned int code = DECL_MD_FUNCTION_CODE (fndecl);
2867 unsigned int subcode = code >> ARM_BUILTIN_SHIFT;
2868 gimple *new_stmt = NULL;
2869 switch (code & ARM_BUILTIN_CLASS)
2870 {
2871 case ARM_BUILTIN_GENERAL:
2872 break;
2873 case ARM_BUILTIN_MVE:
2874 new_stmt = arm_mve::gimple_fold_builtin (subcode, stmt);
2875 }
2876 if (!new_stmt)
2877 return false;
2878
2879 gsi_replace (gsi, new_stmt, true);
2880 return true;
2881 }
2882
2883 /* On AAPCS systems, this is the "struct __va_list". */
2884 static GTY(()) tree va_list_type;
2885
2886 /* Return the type to use as __builtin_va_list. */
2887 static tree
2888 arm_build_builtin_va_list (void)
2889 {
2890 tree va_list_name;
2891 tree ap_field;
2892
2893 if (!TARGET_AAPCS_BASED)
2894 return std_build_builtin_va_list ();
2895
2896 /* AAPCS \S 7.1.4 requires that va_list be a typedef for a type
2897 defined as:
2898
2899 struct __va_list
2900 {
2901 void *__ap;
2902 };
2903
2904 The C Library ABI further reinforces this definition in \S
2905 4.1.
2906
2907 We must follow this definition exactly. The structure tag
2908 name is visible in C++ mangled names, and thus forms a part
2909 of the ABI. The field name may be used by people who
2910 #include <stdarg.h>. */
2911 /* Create the type. */
2912 va_list_type = lang_hooks.types.make_type (RECORD_TYPE);
2913 /* Give it the required name. */
2914 va_list_name = build_decl (BUILTINS_LOCATION,
2915 TYPE_DECL,
2916 get_identifier ("__va_list"),
2917 va_list_type);
2918 DECL_ARTIFICIAL (va_list_name) = 1;
2919 TYPE_NAME (va_list_type) = va_list_name;
2920 TYPE_STUB_DECL (va_list_type) = va_list_name;
2921 /* Create the __ap field. */
2922 ap_field = build_decl (BUILTINS_LOCATION,
2923 FIELD_DECL,
2924 get_identifier ("__ap"),
2925 ptr_type_node);
2926 DECL_ARTIFICIAL (ap_field) = 1;
2927 DECL_FIELD_CONTEXT (ap_field) = va_list_type;
2928 TYPE_FIELDS (va_list_type) = ap_field;
2929 /* Compute its layout. */
2930 layout_type (va_list_type);
2931
2932 return va_list_type;
2933 }
2934
2935 /* Return an expression of type "void *" pointing to the next
2936 available argument in a variable-argument list. VALIST is the
2937 user-level va_list object, of type __builtin_va_list. */
2938 static tree
2939 arm_extract_valist_ptr (tree valist)
2940 {
2941 if (TREE_TYPE (valist) == error_mark_node)
2942 return error_mark_node;
2943
2944 /* On an AAPCS target, the pointer is stored within "struct
2945 va_list". */
2946 if (TARGET_AAPCS_BASED)
2947 {
2948 tree ap_field = TYPE_FIELDS (TREE_TYPE (valist));
2949 valist = build3 (COMPONENT_REF, TREE_TYPE (ap_field),
2950 valist, ap_field, NULL_TREE);
2951 }
2952
2953 return valist;
2954 }
2955
2956 /* Implement TARGET_EXPAND_BUILTIN_VA_START. */
2957 static void
2958 arm_expand_builtin_va_start (tree valist, rtx nextarg)
2959 {
2960 valist = arm_extract_valist_ptr (valist);
2961 std_expand_builtin_va_start (valist, nextarg);
2962 }
2963
2964 /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR. */
2965 static tree
2966 arm_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
2967 gimple_seq *post_p)
2968 {
2969 valist = arm_extract_valist_ptr (valist);
2970 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
2971 }
2972
2973 /* Check any incompatible options that the user has specified. */
2974 static void
2975 arm_option_check_internal (struct gcc_options *opts)
2976 {
2977 int flags = opts->x_target_flags;
2978
2979 /* iWMMXt and NEON are incompatible. */
2980 if (TARGET_IWMMXT
2981 && bitmap_bit_p (arm_active_target.isa, isa_bit_neon))
2982 error ("iWMMXt and NEON are incompatible");
2983
2984 /* Make sure that the processor choice does not conflict with any of the
2985 other command line choices. */
2986 if (TARGET_ARM_P (flags)
2987 && !bitmap_bit_p (arm_active_target.isa, isa_bit_notm))
2988 error ("target CPU does not support ARM mode");
2989
2990 /* TARGET_BACKTRACE cannot be used here as crtl->is_leaf is not set yet. */
2991 if ((TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME) && TARGET_ARM_P (flags))
2992 warning (0, "enabling backtrace support is only meaningful when compiling for the Thumb");
2993
2994 if (TARGET_ARM_P (flags) && TARGET_CALLEE_INTERWORKING)
2995 warning (0, "enabling callee interworking support is only meaningful when compiling for the Thumb");
2996
2997 /* If this target is normally configured to use APCS frames, warn if they
2998 are turned off and debugging is turned on. */
2999 if (TARGET_ARM_P (flags)
3000 && write_symbols != NO_DEBUG
3001 && !TARGET_APCS_FRAME
3002 && (TARGET_DEFAULT & MASK_APCS_FRAME))
3003 warning (0, "%<-g%> with %<-mno-apcs-frame%> may not give sensible "
3004 "debugging");
3005
3006 /* iWMMXt unsupported under Thumb mode. */
3007 if (TARGET_THUMB_P (flags) && TARGET_IWMMXT)
3008 error ("iWMMXt unsupported under Thumb mode");
3009
3010 if (TARGET_HARD_TP && TARGET_THUMB1_P (flags))
3011 error ("cannot use %<-mtp=cp15%> with 16-bit Thumb");
3012
3013 if (TARGET_THUMB_P (flags) && TARGET_VXWORKS_RTP && flag_pic)
3014 {
3015 error ("RTP PIC is incompatible with Thumb");
3016 flag_pic = 0;
3017 }
3018
3019 if (target_pure_code || target_slow_flash_data)
3020 {
3021 const char *flag = (target_pure_code ? "-mpure-code" :
3022 "-mslow-flash-data");
3023 bool common_unsupported_modes = arm_arch_notm || flag_pic || TARGET_NEON;
3024
3025 /* We only support -mslow-flash-data on M-profile targets with
3026 MOVT. */
3027 if (target_slow_flash_data && (!TARGET_HAVE_MOVT || common_unsupported_modes))
3028 error ("%s only supports non-pic code on M-profile targets with the "
3029 "MOVT instruction", flag);
3030
3031 /* We only support -mpure-code on M-profile targets. */
3032 if (target_pure_code && common_unsupported_modes)
3033 error ("%s only supports non-pic code on M-profile targets", flag);
3034
3035 /* Cannot load addresses: -mslow-flash-data forbids literal pool and
3036 -mword-relocations forbids relocation of MOVT/MOVW. */
3037 if (target_word_relocations)
3038 error ("%s incompatible with %<-mword-relocations%>", flag);
3039 }
3040 }
3041
3042 /* Recompute the global settings depending on target attribute options. */
3043
3044 static void
3045 arm_option_params_internal (void)
3046 {
3047 /* If we are not using the default (ARM mode) section anchor offset
3048 ranges, then set the correct ranges now. */
3049 if (TARGET_THUMB1)
3050 {
3051 /* Thumb-1 LDR instructions cannot have negative offsets.
3052 Permissible positive offset ranges are 5-bit (for byte loads),
3053 6-bit (for halfword loads), or 7-bit (for word loads).
3054 Empirical results suggest a 7-bit anchor range gives the best
3055 overall code size. */
3056 targetm.min_anchor_offset = 0;
3057 targetm.max_anchor_offset = 127;
3058 }
3059 else if (TARGET_THUMB2)
3060 {
3061 /* The minimum is set such that the total size of the block
3062 for a particular anchor is 248 + 1 + 4095 bytes, which is
3063 divisible by eight, ensuring natural spacing of anchors. */
3064 targetm.min_anchor_offset = -248;
3065 targetm.max_anchor_offset = 4095;
3066 }
3067 else
3068 {
3069 targetm.min_anchor_offset = TARGET_MIN_ANCHOR_OFFSET;
3070 targetm.max_anchor_offset = TARGET_MAX_ANCHOR_OFFSET;
3071 }
3072
3073 /* Increase the number of conditional instructions with -Os. */
3074 max_insns_skipped = optimize_size ? 4 : current_tune->max_insns_skipped;
3075
3076 /* For THUMB2, we limit the conditional sequence to one IT block. */
3077 if (TARGET_THUMB2)
3078 max_insns_skipped = MIN (max_insns_skipped, MAX_INSN_PER_IT_BLOCK);
3079
3080 if (TARGET_THUMB1)
3081 targetm.md_asm_adjust = thumb1_md_asm_adjust;
3082 else
3083 targetm.md_asm_adjust = arm_md_asm_adjust;
3084 }
3085
3086 /* True if -mflip-thumb should next add an attribute for the default
3087 mode, false if it should next add an attribute for the opposite mode. */
3088 static GTY(()) bool thumb_flipper;
3089
3090 /* Options after initial target override. */
3091 static GTY(()) tree init_optimize;
3092
3093 static void
3094 arm_override_options_after_change_1 (struct gcc_options *opts,
3095 struct gcc_options *opts_set)
3096 {
3097 /* -falign-functions without argument: supply one. */
3098 if (opts->x_flag_align_functions && !opts_set->x_str_align_functions)
3099 opts->x_str_align_functions = TARGET_THUMB_P (opts->x_target_flags)
3100 && opts->x_optimize_size ? "2" : "4";
3101 }
3102
3103 /* Implement targetm.override_options_after_change. */
3104
3105 static void
3106 arm_override_options_after_change (void)
3107 {
3108 arm_override_options_after_change_1 (&global_options, &global_options_set);
3109 }
3110
3111 /* Implement TARGET_OPTION_RESTORE. */
3112 static void
3113 arm_option_restore (struct gcc_options */* opts */,
3114 struct gcc_options */* opts_set */,
3115 struct cl_target_option *ptr)
3116 {
3117 arm_configure_build_target (&arm_active_target, ptr, false);
3118 arm_option_reconfigure_globals ();
3119 }
3120
3121 /* Reset options between modes that the user has specified. */
3122 static void
3123 arm_option_override_internal (struct gcc_options *opts,
3124 struct gcc_options *opts_set)
3125 {
3126 arm_override_options_after_change_1 (opts, opts_set);
3127
3128 if (TARGET_INTERWORK && !bitmap_bit_p (arm_active_target.isa, isa_bit_thumb))
3129 {
3130 /* The default is to enable interworking, so this warning message would
3131 be confusing to users who have just compiled with
3132 eg, -march=armv4. */
3133 /* warning (0, "ignoring -minterwork because target CPU does not support THUMB"); */
3134 opts->x_target_flags &= ~MASK_INTERWORK;
3135 }
3136
3137 if (TARGET_THUMB_P (opts->x_target_flags)
3138 && !bitmap_bit_p (arm_active_target.isa, isa_bit_thumb))
3139 {
3140 warning (0, "target CPU does not support THUMB instructions");
3141 opts->x_target_flags &= ~MASK_THUMB;
3142 }
3143
3144 if (TARGET_APCS_FRAME && TARGET_THUMB_P (opts->x_target_flags))
3145 {
3146 /* warning (0, "ignoring -mapcs-frame because -mthumb was used"); */
3147 opts->x_target_flags &= ~MASK_APCS_FRAME;
3148 }
3149
3150 /* Callee super interworking implies thumb interworking. Adding
3151 this to the flags here simplifies the logic elsewhere. */
3152 if (TARGET_THUMB_P (opts->x_target_flags) && TARGET_CALLEE_INTERWORKING)
3153 opts->x_target_flags |= MASK_INTERWORK;
3154
3155 /* need to remember initial values so combinaisons of options like
3156 -mflip-thumb -mthumb -fno-schedule-insns work for any attribute. */
3157 cl_optimization *to = TREE_OPTIMIZATION (init_optimize);
3158
3159 if (! opts_set->x_arm_restrict_it)
3160 opts->x_arm_restrict_it = arm_arch8;
3161
3162 /* ARM execution state and M profile don't have [restrict] IT. */
3163 if (!TARGET_THUMB2_P (opts->x_target_flags) || !arm_arch_notm)
3164 opts->x_arm_restrict_it = 0;
3165
3166 /* Use the IT size from CPU specific tuning unless -mrestrict-it is used. */
3167 if (!opts_set->x_arm_restrict_it
3168 && (opts_set->x_arm_cpu_string || opts_set->x_arm_tune_string))
3169 opts->x_arm_restrict_it = 0;
3170
3171 /* Enable -munaligned-access by default for
3172 - all ARMv6 architecture-based processors when compiling for a 32-bit ISA
3173 i.e. Thumb2 and ARM state only.
3174 - ARMv7-A, ARMv7-R, and ARMv7-M architecture-based processors.
3175 - ARMv8 architecture-base processors.
3176
3177 Disable -munaligned-access by default for
3178 - all pre-ARMv6 architecture-based processors
3179 - ARMv6-M architecture-based processors
3180 - ARMv8-M Baseline processors. */
3181
3182 if (! opts_set->x_unaligned_access)
3183 {
3184 opts->x_unaligned_access = (TARGET_32BIT_P (opts->x_target_flags)
3185 && arm_arch6 && (arm_arch_notm || arm_arch7));
3186 }
3187 else if (opts->x_unaligned_access == 1
3188 && !(arm_arch6 && (arm_arch_notm || arm_arch7)))
3189 {
3190 warning (0, "target CPU does not support unaligned accesses");
3191 opts->x_unaligned_access = 0;
3192 }
3193
3194 /* Don't warn since it's on by default in -O2. */
3195 if (TARGET_THUMB1_P (opts->x_target_flags))
3196 opts->x_flag_schedule_insns = 0;
3197 else
3198 opts->x_flag_schedule_insns = to->x_flag_schedule_insns;
3199
3200 /* Disable shrink-wrap when optimizing function for size, since it tends to
3201 generate additional returns. */
3202 if (optimize_function_for_size_p (cfun)
3203 && TARGET_THUMB2_P (opts->x_target_flags))
3204 opts->x_flag_shrink_wrap = false;
3205 else
3206 opts->x_flag_shrink_wrap = to->x_flag_shrink_wrap;
3207
3208 /* In Thumb1 mode, we emit the epilogue in RTL, but the last insn
3209 - epilogue_insns - does not accurately model the corresponding insns
3210 emitted in the asm file. In particular, see the comment in thumb_exit
3211 'Find out how many of the (return) argument registers we can corrupt'.
3212 As a consequence, the epilogue may clobber registers without fipa-ra
3213 finding out about it. Therefore, disable fipa-ra in Thumb1 mode.
3214 TODO: Accurately model clobbers for epilogue_insns and reenable
3215 fipa-ra. */
3216 if (TARGET_THUMB1_P (opts->x_target_flags))
3217 opts->x_flag_ipa_ra = 0;
3218 else
3219 opts->x_flag_ipa_ra = to->x_flag_ipa_ra;
3220
3221 /* Thumb2 inline assembly code should always use unified syntax.
3222 This will apply to ARM and Thumb1 eventually. */
3223 if (TARGET_THUMB2_P (opts->x_target_flags))
3224 opts->x_inline_asm_unified = true;
3225
3226 if (arm_stack_protector_guard == SSP_GLOBAL
3227 && opts->x_arm_stack_protector_guard_offset_str)
3228 {
3229 error ("incompatible options %<-mstack-protector-guard=global%> and "
3230 "%<-mstack-protector-guard-offset=%s%>",
3231 arm_stack_protector_guard_offset_str);
3232 }
3233
3234 if (opts->x_arm_stack_protector_guard_offset_str)
3235 {
3236 char *end;
3237 const char *str = arm_stack_protector_guard_offset_str;
3238 errno = 0;
3239 long offs = strtol (arm_stack_protector_guard_offset_str, &end, 0);
3240 if (!*str || *end || errno)
3241 error ("%qs is not a valid offset in %qs", str,
3242 "-mstack-protector-guard-offset=");
3243 arm_stack_protector_guard_offset = offs;
3244 }
3245
3246 if (arm_current_function_pac_enabled_p ())
3247 {
3248 if (!arm_arch8m_main)
3249 error ("This architecture does not support branch protection "
3250 "instructions");
3251 if (TARGET_TPCS_FRAME)
3252 sorry ("Return address signing is not supported with %<-mtpcs-frame%>.");
3253 }
3254
3255 #ifdef SUBTARGET_OVERRIDE_INTERNAL_OPTIONS
3256 SUBTARGET_OVERRIDE_INTERNAL_OPTIONS;
3257 #endif
3258 }
3259
3260 static sbitmap isa_all_fpubits_internal;
3261 static sbitmap isa_all_fpbits;
3262 static sbitmap isa_quirkbits;
3263
3264 /* Configure a build target TARGET from the user-specified options OPTS and
3265 OPTS_SET. If WARN_COMPATIBLE, emit a diagnostic if both the CPU and
3266 architecture have been specified, but the two are not identical. */
3267 void
3268 arm_configure_build_target (struct arm_build_target *target,
3269 struct cl_target_option *opts,
3270 bool warn_compatible)
3271 {
3272 const cpu_option *arm_selected_tune = NULL;
3273 const arch_option *arm_selected_arch = NULL;
3274 const cpu_option *arm_selected_cpu = NULL;
3275 const arm_fpu_desc *arm_selected_fpu = NULL;
3276 const char *tune_opts = NULL;
3277 const char *arch_opts = NULL;
3278 const char *cpu_opts = NULL;
3279
3280 bitmap_clear (target->isa);
3281 target->core_name = NULL;
3282 target->arch_name = NULL;
3283
3284 if (opts->x_arm_arch_string)
3285 {
3286 arm_selected_arch = arm_parse_arch_option_name (all_architectures,
3287 "-march",
3288 opts->x_arm_arch_string);
3289 arch_opts = strchr (opts->x_arm_arch_string, '+');
3290 }
3291
3292 if (opts->x_arm_cpu_string)
3293 {
3294 arm_selected_cpu = arm_parse_cpu_option_name (all_cores, "-mcpu",
3295 opts->x_arm_cpu_string);
3296 cpu_opts = strchr (opts->x_arm_cpu_string, '+');
3297 arm_selected_tune = arm_selected_cpu;
3298 /* If taking the tuning from -mcpu, we don't need to rescan the
3299 options for tuning. */
3300 }
3301
3302 if (opts->x_arm_tune_string)
3303 {
3304 arm_selected_tune = arm_parse_cpu_option_name (all_cores, "-mtune",
3305 opts->x_arm_tune_string);
3306 tune_opts = strchr (opts->x_arm_tune_string, '+');
3307 }
3308
3309 if (opts->x_arm_branch_protection_string)
3310 {
3311 aarch_validate_mbranch_protection (opts->x_arm_branch_protection_string);
3312
3313 if (aarch_ra_sign_key != AARCH_KEY_A)
3314 {
3315 warning (0, "invalid key type for %<-mbranch-protection=%>");
3316 aarch_ra_sign_key = AARCH_KEY_A;
3317 }
3318 }
3319
3320 if (arm_selected_arch)
3321 {
3322 arm_initialize_isa (target->isa, arm_selected_arch->common.isa_bits);
3323 arm_parse_option_features (target->isa, &arm_selected_arch->common,
3324 arch_opts);
3325
3326 if (arm_selected_cpu)
3327 {
3328 auto_sbitmap cpu_isa (isa_num_bits);
3329 auto_sbitmap isa_delta (isa_num_bits);
3330
3331 arm_initialize_isa (cpu_isa, arm_selected_cpu->common.isa_bits);
3332 arm_parse_option_features (cpu_isa, &arm_selected_cpu->common,
3333 cpu_opts);
3334 bitmap_xor (isa_delta, cpu_isa, target->isa);
3335 /* Ignore any bits that are quirk bits. */
3336 bitmap_and_compl (isa_delta, isa_delta, isa_quirkbits);
3337 /* If the user (or the default configuration) has specified a
3338 specific FPU, then ignore any bits that depend on the FPU
3339 configuration. Do similarly if using the soft-float
3340 ABI. */
3341 if (opts->x_arm_fpu_index != TARGET_FPU_auto
3342 || arm_float_abi == ARM_FLOAT_ABI_SOFT)
3343 bitmap_and_compl (isa_delta, isa_delta, isa_all_fpbits);
3344
3345 if (!bitmap_empty_p (isa_delta))
3346 {
3347 if (warn_compatible)
3348 warning (0, "switch %<-mcpu=%s%> conflicts "
3349 "with switch %<-march=%s%>",
3350 opts->x_arm_cpu_string,
3351 opts->x_arm_arch_string);
3352
3353 /* -march wins for code generation.
3354 -mcpu wins for default tuning. */
3355 if (!arm_selected_tune)
3356 arm_selected_tune = arm_selected_cpu;
3357
3358 arm_selected_cpu = all_cores + arm_selected_arch->tune_id;
3359 target->arch_name = arm_selected_arch->common.name;
3360 }
3361 else
3362 {
3363 /* Architecture and CPU are essentially the same.
3364 Prefer the CPU setting. */
3365 arm_selected_arch = all_architectures + arm_selected_cpu->arch;
3366 target->core_name = arm_selected_cpu->common.name;
3367 /* Copy the CPU's capabilities, so that we inherit the
3368 appropriate extensions and quirks. */
3369 bitmap_copy (target->isa, cpu_isa);
3370 }
3371 }
3372 else
3373 {
3374 /* Pick a CPU based on the architecture. */
3375 arm_selected_cpu = all_cores + arm_selected_arch->tune_id;
3376 target->arch_name = arm_selected_arch->common.name;
3377 /* Note: target->core_name is left unset in this path. */
3378 }
3379 }
3380 else if (arm_selected_cpu)
3381 {
3382 target->core_name = arm_selected_cpu->common.name;
3383 arm_initialize_isa (target->isa, arm_selected_cpu->common.isa_bits);
3384 arm_parse_option_features (target->isa, &arm_selected_cpu->common,
3385 cpu_opts);
3386 arm_selected_arch = all_architectures + arm_selected_cpu->arch;
3387 }
3388 /* If the user did not specify a processor or architecture, choose
3389 one for them. */
3390 else
3391 {
3392 const cpu_option *sel;
3393 auto_sbitmap sought_isa (isa_num_bits);
3394 bitmap_clear (sought_isa);
3395 auto_sbitmap default_isa (isa_num_bits);
3396
3397 arm_selected_cpu = arm_parse_cpu_option_name (all_cores, "default CPU",
3398 TARGET_CPU_DEFAULT);
3399 cpu_opts = strchr (TARGET_CPU_DEFAULT, '+');
3400 gcc_assert (arm_selected_cpu->common.name);
3401
3402 /* RWE: All of the selection logic below (to the end of this
3403 'if' clause) looks somewhat suspect. It appears to be mostly
3404 there to support forcing thumb support when the default CPU
3405 does not have thumb (somewhat dubious in terms of what the
3406 user might be expecting). I think it should be removed once
3407 support for the pre-thumb era cores is removed. */
3408 sel = arm_selected_cpu;
3409 arm_initialize_isa (default_isa, sel->common.isa_bits);
3410 arm_parse_option_features (default_isa, &arm_selected_cpu->common,
3411 cpu_opts);
3412
3413 /* Now check to see if the user has specified any command line
3414 switches that require certain abilities from the cpu. */
3415
3416 if (TARGET_INTERWORK || TARGET_THUMB)
3417 bitmap_set_bit (sought_isa, isa_bit_thumb);
3418
3419 /* If there are such requirements and the default CPU does not
3420 satisfy them, we need to run over the complete list of
3421 cores looking for one that is satisfactory. */
3422 if (!bitmap_empty_p (sought_isa)
3423 && !bitmap_subset_p (sought_isa, default_isa))
3424 {
3425 auto_sbitmap candidate_isa (isa_num_bits);
3426 /* We're only interested in a CPU with at least the
3427 capabilities of the default CPU and the required
3428 additional features. */
3429 bitmap_ior (default_isa, default_isa, sought_isa);
3430
3431 /* Try to locate a CPU type that supports all of the abilities
3432 of the default CPU, plus the extra abilities requested by
3433 the user. */
3434 for (sel = all_cores; sel->common.name != NULL; sel++)
3435 {
3436 arm_initialize_isa (candidate_isa, sel->common.isa_bits);
3437 /* An exact match? */
3438 if (bitmap_equal_p (default_isa, candidate_isa))
3439 break;
3440 }
3441
3442 if (sel->common.name == NULL)
3443 {
3444 unsigned current_bit_count = isa_num_bits;
3445 const cpu_option *best_fit = NULL;
3446
3447 /* Ideally we would like to issue an error message here
3448 saying that it was not possible to find a CPU compatible
3449 with the default CPU, but which also supports the command
3450 line options specified by the programmer, and so they
3451 ought to use the -mcpu=<name> command line option to
3452 override the default CPU type.
3453
3454 If we cannot find a CPU that has exactly the
3455 characteristics of the default CPU and the given
3456 command line options we scan the array again looking
3457 for a best match. The best match must have at least
3458 the capabilities of the perfect match. */
3459 for (sel = all_cores; sel->common.name != NULL; sel++)
3460 {
3461 arm_initialize_isa (candidate_isa, sel->common.isa_bits);
3462
3463 if (bitmap_subset_p (default_isa, candidate_isa))
3464 {
3465 unsigned count;
3466
3467 bitmap_and_compl (candidate_isa, candidate_isa,
3468 default_isa);
3469 count = bitmap_popcount (candidate_isa);
3470
3471 if (count < current_bit_count)
3472 {
3473 best_fit = sel;
3474 current_bit_count = count;
3475 }
3476 }
3477
3478 gcc_assert (best_fit);
3479 sel = best_fit;
3480 }
3481 }
3482 arm_selected_cpu = sel;
3483 }
3484
3485 /* Now we know the CPU, we can finally initialize the target
3486 structure. */
3487 target->core_name = arm_selected_cpu->common.name;
3488 arm_initialize_isa (target->isa, arm_selected_cpu->common.isa_bits);
3489 arm_parse_option_features (target->isa, &arm_selected_cpu->common,
3490 cpu_opts);
3491 arm_selected_arch = all_architectures + arm_selected_cpu->arch;
3492 }
3493
3494 gcc_assert (arm_selected_cpu);
3495 gcc_assert (arm_selected_arch);
3496
3497 if (opts->x_arm_fpu_index != TARGET_FPU_auto)
3498 {
3499 arm_selected_fpu = &all_fpus[opts->x_arm_fpu_index];
3500 auto_sbitmap fpu_bits (isa_num_bits);
3501
3502 arm_initialize_isa (fpu_bits, arm_selected_fpu->isa_bits);
3503 /* This should clear out ALL bits relating to the FPU/simd
3504 extensions, to avoid potentially invalid combinations later on
3505 that we can't match. At present we only clear out those bits
3506 that can be set by -mfpu. This should be fixed in GCC-12. */
3507 bitmap_and_compl (target->isa, target->isa, isa_all_fpubits_internal);
3508 bitmap_ior (target->isa, target->isa, fpu_bits);
3509 }
3510
3511 /* If we have the soft-float ABI, clear any feature bits relating to use of
3512 floating-point operations. They'll just confuse things later on. */
3513 if (arm_float_abi == ARM_FLOAT_ABI_SOFT)
3514 bitmap_and_compl (target->isa, target->isa, isa_all_fpbits);
3515
3516 /* There may be implied bits which we still need to enable. These are
3517 non-named features which are needed to complete other sets of features,
3518 but cannot be enabled from arm-cpus.in due to being shared between
3519 multiple fgroups. Each entry in all_implied_fbits is of the form
3520 ante -> cons, meaning that if the feature "ante" is enabled, we should
3521 implicitly enable "cons". */
3522 const struct fbit_implication *impl = all_implied_fbits;
3523 while (impl->ante)
3524 {
3525 if (bitmap_bit_p (target->isa, impl->ante))
3526 bitmap_set_bit (target->isa, impl->cons);
3527 impl++;
3528 }
3529
3530 if (!arm_selected_tune)
3531 arm_selected_tune = arm_selected_cpu;
3532 else /* Validate the features passed to -mtune. */
3533 arm_parse_option_features (NULL, &arm_selected_tune->common, tune_opts);
3534
3535 const cpu_tune *tune_data = &all_tunes[arm_selected_tune - all_cores];
3536
3537 /* Finish initializing the target structure. */
3538 if (!target->arch_name)
3539 target->arch_name = arm_selected_arch->common.name;
3540 target->arch_pp_name = arm_selected_arch->arch;
3541 target->base_arch = arm_selected_arch->base_arch;
3542 target->profile = arm_selected_arch->profile;
3543
3544 target->tune_flags = tune_data->tune_flags;
3545 target->tune = tune_data->tune;
3546 target->tune_core = tune_data->scheduler;
3547 }
3548
3549 /* Fix up any incompatible options that the user has specified. */
3550 static void
3551 arm_option_override (void)
3552 {
3553 static const enum isa_feature fpu_bitlist_internal[]
3554 = { ISA_ALL_FPU_INTERNAL, isa_nobit };
3555 /* isa_bit_mve_float is also part of FP bit list for arch v8.1-m.main. */
3556 static const enum isa_feature fp_bitlist[]
3557 = { ISA_ALL_FP, isa_bit_mve_float, isa_nobit };
3558 static const enum isa_feature quirk_bitlist[] = { ISA_ALL_QUIRKS, isa_nobit};
3559 cl_target_option opts;
3560
3561 isa_quirkbits = sbitmap_alloc (isa_num_bits);
3562 arm_initialize_isa (isa_quirkbits, quirk_bitlist);
3563
3564 isa_all_fpubits_internal = sbitmap_alloc (isa_num_bits);
3565 isa_all_fpbits = sbitmap_alloc (isa_num_bits);
3566 arm_initialize_isa (isa_all_fpubits_internal, fpu_bitlist_internal);
3567 arm_initialize_isa (isa_all_fpbits, fp_bitlist);
3568
3569 arm_active_target.isa = sbitmap_alloc (isa_num_bits);
3570
3571 if (!OPTION_SET_P (arm_fpu_index))
3572 {
3573 bool ok;
3574 int fpu_index;
3575
3576 ok = opt_enum_arg_to_value (OPT_mfpu_, FPUTYPE_AUTO, &fpu_index,
3577 CL_TARGET);
3578 gcc_assert (ok);
3579 arm_fpu_index = (enum fpu_type) fpu_index;
3580 }
3581
3582 cl_target_option_save (&opts, &global_options, &global_options_set);
3583 arm_configure_build_target (&arm_active_target, &opts, true);
3584
3585 #ifdef SUBTARGET_OVERRIDE_OPTIONS
3586 SUBTARGET_OVERRIDE_OPTIONS;
3587 #endif
3588
3589 /* Initialize boolean versions of the architectural flags, for use
3590 in the arm.md file and for enabling feature flags. */
3591 arm_option_reconfigure_globals ();
3592
3593 arm_tune = arm_active_target.tune_core;
3594 tune_flags = arm_active_target.tune_flags;
3595 current_tune = arm_active_target.tune;
3596
3597 /* TBD: Dwarf info for apcs frame is not handled yet. */
3598 if (TARGET_APCS_FRAME)
3599 flag_shrink_wrap = false;
3600
3601 if (TARGET_APCS_STACK && !TARGET_APCS_FRAME)
3602 {
3603 warning (0, "%<-mapcs-stack-check%> incompatible with "
3604 "%<-mno-apcs-frame%>");
3605 target_flags |= MASK_APCS_FRAME;
3606 }
3607
3608 if (TARGET_POKE_FUNCTION_NAME)
3609 target_flags |= MASK_APCS_FRAME;
3610
3611 if (TARGET_APCS_REENT && flag_pic)
3612 error ("%<-fpic%> and %<-mapcs-reent%> are incompatible");
3613
3614 if (TARGET_APCS_REENT)
3615 warning (0, "APCS reentrant code not supported. Ignored");
3616
3617 /* Set up some tuning parameters. */
3618 arm_ld_sched = (tune_flags & TF_LDSCHED) != 0;
3619 arm_tune_strongarm = (tune_flags & TF_STRONG) != 0;
3620 arm_tune_wbuf = (tune_flags & TF_WBUF) != 0;
3621 arm_tune_xscale = (tune_flags & TF_XSCALE) != 0;
3622 arm_tune_cortex_a9 = (arm_tune == TARGET_CPU_cortexa9) != 0;
3623 arm_m_profile_small_mul = (tune_flags & TF_SMALLMUL) != 0;
3624
3625 /* For arm2/3 there is no need to do any scheduling if we are doing
3626 software floating-point. */
3627 if (TARGET_SOFT_FLOAT && (tune_flags & TF_NO_MODE32))
3628 flag_schedule_insns = flag_schedule_insns_after_reload = 0;
3629
3630 /* Override the default structure alignment for AAPCS ABI. */
3631 if (!OPTION_SET_P (arm_structure_size_boundary))
3632 {
3633 if (TARGET_AAPCS_BASED)
3634 arm_structure_size_boundary = 8;
3635 }
3636 else
3637 {
3638 warning (0, "option %<-mstructure-size-boundary%> is deprecated");
3639
3640 if (arm_structure_size_boundary != 8
3641 && arm_structure_size_boundary != 32
3642 && !(ARM_DOUBLEWORD_ALIGN && arm_structure_size_boundary == 64))
3643 {
3644 if (ARM_DOUBLEWORD_ALIGN)
3645 warning (0,
3646 "structure size boundary can only be set to 8, 32 or 64");
3647 else
3648 warning (0, "structure size boundary can only be set to 8 or 32");
3649 arm_structure_size_boundary
3650 = (TARGET_AAPCS_BASED ? 8 : DEFAULT_STRUCTURE_SIZE_BOUNDARY);
3651 }
3652 }
3653
3654 if (TARGET_VXWORKS_RTP)
3655 {
3656 if (!OPTION_SET_P (arm_pic_data_is_text_relative))
3657 arm_pic_data_is_text_relative = 0;
3658 }
3659 else if (flag_pic
3660 && !arm_pic_data_is_text_relative
3661 && !(OPTION_SET_P (target_flags) & MASK_SINGLE_PIC_BASE))
3662 /* When text & data segments don't have a fixed displacement, the
3663 intended use is with a single, read only, pic base register.
3664 Unless the user explicitly requested not to do that, set
3665 it. */
3666 target_flags |= MASK_SINGLE_PIC_BASE;
3667
3668 /* If stack checking is disabled, we can use r10 as the PIC register,
3669 which keeps r9 available. The EABI specifies r9 as the PIC register. */
3670 if (flag_pic && TARGET_SINGLE_PIC_BASE)
3671 {
3672 if (TARGET_VXWORKS_RTP)
3673 warning (0, "RTP PIC is incompatible with %<-msingle-pic-base%>");
3674 arm_pic_register = (TARGET_APCS_STACK || TARGET_AAPCS_BASED) ? 9 : 10;
3675 }
3676
3677 if (flag_pic && TARGET_VXWORKS_RTP)
3678 arm_pic_register = 9;
3679
3680 /* If in FDPIC mode then force arm_pic_register to be r9. */
3681 if (TARGET_FDPIC)
3682 {
3683 arm_pic_register = FDPIC_REGNUM;
3684 if (TARGET_THUMB1)
3685 sorry ("FDPIC mode is not supported in Thumb-1 mode");
3686 }
3687
3688 if (arm_pic_register_string != NULL)
3689 {
3690 int pic_register = decode_reg_name (arm_pic_register_string);
3691
3692 if (!flag_pic)
3693 warning (0, "%<-mpic-register=%> is useless without %<-fpic%>");
3694
3695 /* Prevent the user from choosing an obviously stupid PIC register. */
3696 else if (pic_register < 0 || call_used_or_fixed_reg_p (pic_register)
3697 || pic_register == HARD_FRAME_POINTER_REGNUM
3698 || pic_register == STACK_POINTER_REGNUM
3699 || pic_register >= PC_REGNUM
3700 || (TARGET_VXWORKS_RTP
3701 && (unsigned int) pic_register != arm_pic_register))
3702 error ("unable to use %qs for PIC register", arm_pic_register_string);
3703 else
3704 arm_pic_register = pic_register;
3705 }
3706
3707 if (flag_pic)
3708 target_word_relocations = 1;
3709
3710 /* Enable -mfix-cortex-m3-ldrd by default for Cortex-M3 cores. */
3711 if (fix_cm3_ldrd == 2)
3712 {
3713 if (bitmap_bit_p (arm_active_target.isa, isa_bit_quirk_cm3_ldrd))
3714 fix_cm3_ldrd = 1;
3715 else
3716 fix_cm3_ldrd = 0;
3717 }
3718
3719 /* Enable fix_vlldm by default if required. */
3720 if (fix_vlldm == 2)
3721 {
3722 if (bitmap_bit_p (arm_active_target.isa, isa_bit_quirk_vlldm))
3723 fix_vlldm = 1;
3724 else
3725 fix_vlldm = 0;
3726 }
3727
3728 /* Enable fix_aes by default if required. */
3729 if (fix_aes_erratum_1742098 == 2)
3730 {
3731 if (bitmap_bit_p (arm_active_target.isa, isa_bit_quirk_aes_1742098))
3732 fix_aes_erratum_1742098 = 1;
3733 else
3734 fix_aes_erratum_1742098 = 0;
3735 }
3736
3737 /* Hot/Cold partitioning is not currently supported, since we can't
3738 handle literal pool placement in that case. */
3739 if (flag_reorder_blocks_and_partition)
3740 {
3741 inform (input_location,
3742 "%<-freorder-blocks-and-partition%> not supported "
3743 "on this architecture");
3744 flag_reorder_blocks_and_partition = 0;
3745 flag_reorder_blocks = 1;
3746 }
3747
3748 if (flag_pic)
3749 /* Hoisting PIC address calculations more aggressively provides a small,
3750 but measurable, size reduction for PIC code. Therefore, we decrease
3751 the bar for unrestricted expression hoisting to the cost of PIC address
3752 calculation, which is 2 instructions. */
3753 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
3754 param_gcse_unrestricted_cost, 2);
3755
3756 /* ARM EABI defaults to strict volatile bitfields. */
3757 if (TARGET_AAPCS_BASED && flag_strict_volatile_bitfields < 0
3758 && abi_version_at_least(2))
3759 flag_strict_volatile_bitfields = 1;
3760
3761 /* Enable sw prefetching at -O3 for CPUS that have prefetch, and we
3762 have deemed it beneficial (signified by setting
3763 prefetch.num_slots to 1 or more). */
3764 if (flag_prefetch_loop_arrays < 0
3765 && HAVE_prefetch
3766 && optimize >= 3
3767 && current_tune->prefetch.num_slots > 0)
3768 flag_prefetch_loop_arrays = 1;
3769
3770 /* Set up parameters to be used in prefetching algorithm. Do not
3771 override the defaults unless we are tuning for a core we have
3772 researched values for. */
3773 if (current_tune->prefetch.num_slots > 0)
3774 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
3775 param_simultaneous_prefetches,
3776 current_tune->prefetch.num_slots);
3777 if (current_tune->prefetch.l1_cache_line_size >= 0)
3778 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
3779 param_l1_cache_line_size,
3780 current_tune->prefetch.l1_cache_line_size);
3781 if (current_tune->prefetch.l1_cache_line_size >= 0)
3782 {
3783 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
3784 param_destruct_interfere_size,
3785 current_tune->prefetch.l1_cache_line_size);
3786 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
3787 param_construct_interfere_size,
3788 current_tune->prefetch.l1_cache_line_size);
3789 }
3790 else
3791 {
3792 /* For a generic ARM target, JF Bastien proposed using 64 for both. */
3793 /* ??? Cortex A9 has a 32-byte cache line, so why not 32 for
3794 constructive? */
3795 /* More recent Cortex chips have a 64-byte cache line, but are marked
3796 ARM_PREFETCH_NOT_BENEFICIAL, so they get these defaults. */
3797 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
3798 param_destruct_interfere_size, 64);
3799 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
3800 param_construct_interfere_size, 64);
3801 }
3802
3803 if (current_tune->prefetch.l1_cache_size >= 0)
3804 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
3805 param_l1_cache_size,
3806 current_tune->prefetch.l1_cache_size);
3807
3808 /* Look through ready list and all of queue for instructions
3809 relevant for L2 auto-prefetcher. */
3810 int sched_autopref_queue_depth;
3811
3812 switch (current_tune->sched_autopref)
3813 {
3814 case tune_params::SCHED_AUTOPREF_OFF:
3815 sched_autopref_queue_depth = -1;
3816 break;
3817
3818 case tune_params::SCHED_AUTOPREF_RANK:
3819 sched_autopref_queue_depth = 0;
3820 break;
3821
3822 case tune_params::SCHED_AUTOPREF_FULL:
3823 sched_autopref_queue_depth = max_insn_queue_index + 1;
3824 break;
3825
3826 default:
3827 gcc_unreachable ();
3828 }
3829
3830 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
3831 param_sched_autopref_queue_depth,
3832 sched_autopref_queue_depth);
3833
3834 /* Currently, for slow flash data, we just disable literal pools. We also
3835 disable it for pure-code. */
3836 if (target_slow_flash_data || target_pure_code)
3837 arm_disable_literal_pool = true;
3838
3839 /* Disable scheduling fusion by default if it's not armv7 processor
3840 or doesn't prefer ldrd/strd. */
3841 if (flag_schedule_fusion == 2
3842 && (!arm_arch7 || !current_tune->prefer_ldrd_strd))
3843 flag_schedule_fusion = 0;
3844
3845 /* Need to remember initial options before they are overriden. */
3846 init_optimize = build_optimization_node (&global_options,
3847 &global_options_set);
3848
3849 arm_options_perform_arch_sanity_checks ();
3850 arm_option_override_internal (&global_options, &global_options_set);
3851 arm_option_check_internal (&global_options);
3852 arm_option_params_internal ();
3853
3854 /* Create the default target_options structure. */
3855 target_option_default_node = target_option_current_node
3856 = build_target_option_node (&global_options, &global_options_set);
3857
3858 /* Register global variables with the garbage collector. */
3859 arm_add_gc_roots ();
3860
3861 /* Init initial mode for testing. */
3862 thumb_flipper = TARGET_THUMB;
3863 }
3864
3865
3866 /* Reconfigure global status flags from the active_target.isa. */
3867 void
3868 arm_option_reconfigure_globals (void)
3869 {
3870 sprintf (arm_arch_name, "__ARM_ARCH_%s__", arm_active_target.arch_pp_name);
3871 arm_base_arch = arm_active_target.base_arch;
3872
3873 /* Initialize boolean versions of the architectural flags, for use
3874 in the arm.md file. */
3875 arm_arch4 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv4);
3876 arm_arch4t = arm_arch4 && bitmap_bit_p (arm_active_target.isa, isa_bit_thumb);
3877 arm_arch5t = bitmap_bit_p (arm_active_target.isa, isa_bit_armv5t);
3878 arm_arch5te = bitmap_bit_p (arm_active_target.isa, isa_bit_armv5te);
3879 arm_arch6 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv6);
3880 arm_arch6k = bitmap_bit_p (arm_active_target.isa, isa_bit_armv6k);
3881 arm_arch_notm = bitmap_bit_p (arm_active_target.isa, isa_bit_notm);
3882 arm_arch6m = arm_arch6 && !arm_arch_notm;
3883 arm_arch7 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv7);
3884 arm_arch7em = bitmap_bit_p (arm_active_target.isa, isa_bit_armv7em);
3885 arm_arch8 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv8);
3886 arm_arch8_1 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv8_1);
3887 arm_arch8_2 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv8_2);
3888 arm_arch8_3 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv8_3);
3889 arm_arch8_4 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv8_4);
3890 arm_arch8_1m_main = bitmap_bit_p (arm_active_target.isa,
3891 isa_bit_armv8_1m_main);
3892 arm_arch_thumb1 = bitmap_bit_p (arm_active_target.isa, isa_bit_thumb);
3893 arm_arch_thumb2 = bitmap_bit_p (arm_active_target.isa, isa_bit_thumb2);
3894 arm_arch_xscale = bitmap_bit_p (arm_active_target.isa, isa_bit_xscale);
3895 arm_arch_iwmmxt = bitmap_bit_p (arm_active_target.isa, isa_bit_iwmmxt);
3896 arm_arch_iwmmxt2 = bitmap_bit_p (arm_active_target.isa, isa_bit_iwmmxt2);
3897 arm_arch_thumb_hwdiv = bitmap_bit_p (arm_active_target.isa, isa_bit_tdiv);
3898 arm_arch_arm_hwdiv = bitmap_bit_p (arm_active_target.isa, isa_bit_adiv);
3899 arm_arch_crc = bitmap_bit_p (arm_active_target.isa, isa_bit_crc32);
3900 arm_arch_cmse = bitmap_bit_p (arm_active_target.isa, isa_bit_cmse);
3901 arm_arch8m_main = arm_arch7 && arm_arch_cmse;
3902 arm_arch_lpae = bitmap_bit_p (arm_active_target.isa, isa_bit_lpae);
3903 arm_arch_i8mm = bitmap_bit_p (arm_active_target.isa, isa_bit_i8mm);
3904 arm_arch_bf16 = bitmap_bit_p (arm_active_target.isa, isa_bit_bf16);
3905
3906 arm_fp16_inst = bitmap_bit_p (arm_active_target.isa, isa_bit_fp16);
3907 if (arm_fp16_inst)
3908 {
3909 if (arm_fp16_format == ARM_FP16_FORMAT_ALTERNATIVE)
3910 error ("selected fp16 options are incompatible");
3911 arm_fp16_format = ARM_FP16_FORMAT_IEEE;
3912 }
3913
3914 arm_arch_cde = 0;
3915 arm_arch_cde_coproc = 0;
3916 int cde_bits[] = {isa_bit_cdecp0, isa_bit_cdecp1, isa_bit_cdecp2,
3917 isa_bit_cdecp3, isa_bit_cdecp4, isa_bit_cdecp5,
3918 isa_bit_cdecp6, isa_bit_cdecp7};
3919 for (int i = 0, e = ARRAY_SIZE (cde_bits); i < e; i++)
3920 {
3921 int cde_bit = bitmap_bit_p (arm_active_target.isa, cde_bits[i]);
3922 if (cde_bit)
3923 {
3924 arm_arch_cde |= cde_bit;
3925 arm_arch_cde_coproc |= arm_arch_cde_coproc_bits[i];
3926 }
3927 }
3928
3929 /* And finally, set up some quirks. */
3930 arm_arch_no_volatile_ce
3931 = bitmap_bit_p (arm_active_target.isa, isa_bit_quirk_no_volatile_ce);
3932 arm_arch6kz = arm_arch6k && bitmap_bit_p (arm_active_target.isa,
3933 isa_bit_quirk_armv6kz);
3934
3935 /* Use the cp15 method if it is available. */
3936 if (target_thread_pointer == TP_AUTO)
3937 {
3938 if (arm_arch6k && !TARGET_THUMB1)
3939 target_thread_pointer = TP_TPIDRURO;
3940 else
3941 target_thread_pointer = TP_SOFT;
3942 }
3943
3944 if (!TARGET_HARD_TP && arm_stack_protector_guard == SSP_TLSREG)
3945 error("%<-mstack-protector-guard=tls%> needs a hardware TLS register");
3946 }
3947
3948 /* Perform some validation between the desired architecture and the rest of the
3949 options. */
3950 void
3951 arm_options_perform_arch_sanity_checks (void)
3952 {
3953 /* V5T code we generate is completely interworking capable, so we turn off
3954 TARGET_INTERWORK here to avoid many tests later on. */
3955
3956 /* XXX However, we must pass the right pre-processor defines to CPP
3957 or GLD can get confused. This is a hack. */
3958 if (TARGET_INTERWORK)
3959 arm_cpp_interwork = 1;
3960
3961 if (arm_arch5t)
3962 target_flags &= ~MASK_INTERWORK;
3963
3964 if (TARGET_IWMMXT && !ARM_DOUBLEWORD_ALIGN)
3965 error ("iwmmxt requires an AAPCS compatible ABI for proper operation");
3966
3967 if (TARGET_IWMMXT_ABI && !TARGET_IWMMXT)
3968 error ("iwmmxt abi requires an iwmmxt capable cpu");
3969
3970 /* BPABI targets use linker tricks to allow interworking on cores
3971 without thumb support. */
3972 if (TARGET_INTERWORK
3973 && !TARGET_BPABI
3974 && !bitmap_bit_p (arm_active_target.isa, isa_bit_thumb))
3975 {
3976 warning (0, "target CPU does not support interworking" );
3977 target_flags &= ~MASK_INTERWORK;
3978 }
3979
3980 /* If soft-float is specified then don't use FPU. */
3981 if (TARGET_SOFT_FLOAT)
3982 arm_fpu_attr = FPU_NONE;
3983 else
3984 arm_fpu_attr = FPU_VFP;
3985
3986 if (TARGET_AAPCS_BASED)
3987 {
3988 if (TARGET_CALLER_INTERWORKING)
3989 error ("AAPCS does not support %<-mcaller-super-interworking%>");
3990 else
3991 if (TARGET_CALLEE_INTERWORKING)
3992 error ("AAPCS does not support %<-mcallee-super-interworking%>");
3993 }
3994
3995 /* __fp16 support currently assumes the core has ldrh. */
3996 if (!arm_arch4 && arm_fp16_format != ARM_FP16_FORMAT_NONE)
3997 sorry ("%<__fp16%> and no ldrh");
3998
3999 if (use_cmse && !arm_arch_cmse)
4000 error ("target CPU does not support ARMv8-M Security Extensions");
4001
4002 /* We don't clear D16-D31 VFP registers for cmse_nonsecure_call functions
4003 and ARMv8-M Baseline and Mainline do not allow such configuration. */
4004 if (use_cmse && TARGET_HARD_FLOAT && LAST_VFP_REGNUM > LAST_LO_VFP_REGNUM)
4005 error ("ARMv8-M Security Extensions incompatible with selected FPU");
4006
4007
4008 if (TARGET_AAPCS_BASED)
4009 {
4010 if (arm_abi == ARM_ABI_IWMMXT)
4011 arm_pcs_default = ARM_PCS_AAPCS_IWMMXT;
4012 else if (TARGET_HARD_FLOAT_ABI)
4013 {
4014 arm_pcs_default = ARM_PCS_AAPCS_VFP;
4015 if (!bitmap_bit_p (arm_active_target.isa, isa_bit_vfpv2)
4016 && !bitmap_bit_p (arm_active_target.isa, isa_bit_mve))
4017 error ("%<-mfloat-abi=hard%>: selected architecture lacks an FPU");
4018 }
4019 else
4020 arm_pcs_default = ARM_PCS_AAPCS;
4021 }
4022 else
4023 {
4024 if (arm_float_abi == ARM_FLOAT_ABI_HARD)
4025 sorry ("%<-mfloat-abi=hard%> and VFP");
4026
4027 if (arm_abi == ARM_ABI_APCS)
4028 arm_pcs_default = ARM_PCS_APCS;
4029 else
4030 arm_pcs_default = ARM_PCS_ATPCS;
4031 }
4032 }
4033
4034 /* Test whether a local function descriptor is canonical, i.e.,
4035 whether we can use GOTOFFFUNCDESC to compute the address of the
4036 function. */
4037 static bool
4038 arm_fdpic_local_funcdesc_p (rtx fnx)
4039 {
4040 tree fn;
4041 enum symbol_visibility vis;
4042 bool ret;
4043
4044 if (!TARGET_FDPIC)
4045 return true;
4046
4047 if (! SYMBOL_REF_LOCAL_P (fnx))
4048 return false;
4049
4050 fn = SYMBOL_REF_DECL (fnx);
4051
4052 if (! fn)
4053 return false;
4054
4055 vis = DECL_VISIBILITY (fn);
4056
4057 if (vis == VISIBILITY_PROTECTED)
4058 /* Private function descriptors for protected functions are not
4059 canonical. Temporarily change the visibility to global so that
4060 we can ensure uniqueness of funcdesc pointers. */
4061 DECL_VISIBILITY (fn) = VISIBILITY_DEFAULT;
4062
4063 ret = default_binds_local_p_1 (fn, flag_pic);
4064
4065 DECL_VISIBILITY (fn) = vis;
4066
4067 return ret;
4068 }
4069
4070 static void
4071 arm_add_gc_roots (void)
4072 {
4073 gcc_obstack_init(&minipool_obstack);
4074 minipool_startobj = (char *) obstack_alloc (&minipool_obstack, 0);
4075 }
4076 \f
4077 /* A table of known ARM exception types.
4078 For use with the interrupt function attribute. */
4079
4080 typedef struct
4081 {
4082 const char *const arg;
4083 const unsigned long return_value;
4084 }
4085 isr_attribute_arg;
4086
4087 static const isr_attribute_arg isr_attribute_args [] =
4088 {
4089 { "IRQ", ARM_FT_ISR },
4090 { "irq", ARM_FT_ISR },
4091 { "FIQ", ARM_FT_FIQ },
4092 { "fiq", ARM_FT_FIQ },
4093 { "ABORT", ARM_FT_ISR },
4094 { "abort", ARM_FT_ISR },
4095 { "UNDEF", ARM_FT_EXCEPTION },
4096 { "undef", ARM_FT_EXCEPTION },
4097 { "SWI", ARM_FT_EXCEPTION },
4098 { "swi", ARM_FT_EXCEPTION },
4099 { NULL, ARM_FT_NORMAL }
4100 };
4101
4102 /* Returns the (interrupt) function type of the current
4103 function, or ARM_FT_UNKNOWN if the type cannot be determined. */
4104
4105 static unsigned long
4106 arm_isr_value (tree argument)
4107 {
4108 const isr_attribute_arg * ptr;
4109 const char * arg;
4110
4111 if (!arm_arch_notm)
4112 return ARM_FT_NORMAL | ARM_FT_STACKALIGN;
4113
4114 /* No argument - default to IRQ. */
4115 if (argument == NULL_TREE)
4116 return ARM_FT_ISR;
4117
4118 /* Get the value of the argument. */
4119 if (TREE_VALUE (argument) == NULL_TREE
4120 || TREE_CODE (TREE_VALUE (argument)) != STRING_CST)
4121 return ARM_FT_UNKNOWN;
4122
4123 arg = TREE_STRING_POINTER (TREE_VALUE (argument));
4124
4125 /* Check it against the list of known arguments. */
4126 for (ptr = isr_attribute_args; ptr->arg != NULL; ptr++)
4127 if (streq (arg, ptr->arg))
4128 return ptr->return_value;
4129
4130 /* An unrecognized interrupt type. */
4131 return ARM_FT_UNKNOWN;
4132 }
4133
4134 /* Computes the type of the current function. */
4135
4136 static unsigned long
4137 arm_compute_func_type (void)
4138 {
4139 unsigned long type = ARM_FT_UNKNOWN;
4140 tree a;
4141 tree attr;
4142
4143 gcc_assert (TREE_CODE (current_function_decl) == FUNCTION_DECL);
4144
4145 /* Decide if the current function is volatile. Such functions
4146 never return, and many memory cycles can be saved by not storing
4147 register values that will never be needed again. This optimization
4148 was added to speed up context switching in a kernel application. */
4149 if (optimize > 0
4150 && (TREE_NOTHROW (current_function_decl)
4151 || !(flag_unwind_tables
4152 || (flag_exceptions
4153 && arm_except_unwind_info (&global_options) != UI_SJLJ)))
4154 && TREE_THIS_VOLATILE (current_function_decl))
4155 type |= ARM_FT_VOLATILE;
4156
4157 if (cfun->static_chain_decl != NULL)
4158 type |= ARM_FT_NESTED;
4159
4160 attr = DECL_ATTRIBUTES (current_function_decl);
4161
4162 a = lookup_attribute ("naked", attr);
4163 if (a != NULL_TREE)
4164 type |= ARM_FT_NAKED;
4165
4166 a = lookup_attribute ("isr", attr);
4167 if (a == NULL_TREE)
4168 a = lookup_attribute ("interrupt", attr);
4169
4170 if (a == NULL_TREE)
4171 type |= TARGET_INTERWORK ? ARM_FT_INTERWORKED : ARM_FT_NORMAL;
4172 else
4173 type |= arm_isr_value (TREE_VALUE (a));
4174
4175 if (lookup_attribute ("cmse_nonsecure_entry", attr))
4176 type |= ARM_FT_CMSE_ENTRY;
4177
4178 return type;
4179 }
4180
4181 /* Returns the type of the current function. */
4182
4183 unsigned long
4184 arm_current_func_type (void)
4185 {
4186 if (ARM_FUNC_TYPE (cfun->machine->func_type) == ARM_FT_UNKNOWN)
4187 cfun->machine->func_type = arm_compute_func_type ();
4188
4189 return cfun->machine->func_type;
4190 }
4191
4192 bool
4193 arm_allocate_stack_slots_for_args (void)
4194 {
4195 /* Naked functions should not allocate stack slots for arguments. */
4196 return !IS_NAKED (arm_current_func_type ());
4197 }
4198
4199 static bool
4200 arm_warn_func_return (tree decl)
4201 {
4202 /* Naked functions are implemented entirely in assembly, including the
4203 return sequence, so suppress warnings about this. */
4204 return lookup_attribute ("naked", DECL_ATTRIBUTES (decl)) == NULL_TREE;
4205 }
4206
4207 \f
4208 /* Output assembler code for a block containing the constant parts
4209 of a trampoline, leaving space for the variable parts.
4210
4211 On the ARM, (if r8 is the static chain regnum, and remembering that
4212 referencing pc adds an offset of 8) the trampoline looks like:
4213 ldr r8, [pc, #0]
4214 ldr pc, [pc]
4215 .word static chain value
4216 .word function's address
4217 XXX FIXME: When the trampoline returns, r8 will be clobbered.
4218
4219 In FDPIC mode, the trampoline looks like:
4220 .word trampoline address
4221 .word trampoline GOT address
4222 ldr r12, [pc, #8] ; #4 for Arm mode
4223 ldr r9, [pc, #8] ; #4 for Arm mode
4224 ldr pc, [pc, #8] ; #4 for Arm mode
4225 .word static chain value
4226 .word GOT address
4227 .word function's address
4228 */
4229
4230 static void
4231 arm_asm_trampoline_template (FILE *f)
4232 {
4233 fprintf (f, "\t.syntax unified\n");
4234
4235 if (TARGET_FDPIC)
4236 {
4237 /* The first two words are a function descriptor pointing to the
4238 trampoline code just below. */
4239 if (TARGET_ARM)
4240 fprintf (f, "\t.arm\n");
4241 else if (TARGET_THUMB2)
4242 fprintf (f, "\t.thumb\n");
4243 else
4244 /* Only ARM and Thumb-2 are supported. */
4245 gcc_unreachable ();
4246
4247 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
4248 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
4249 /* Trampoline code which sets the static chain register but also
4250 PIC register before jumping into real code. */
4251 asm_fprintf (f, "\tldr\t%r, [%r, #%d]\n",
4252 STATIC_CHAIN_REGNUM, PC_REGNUM,
4253 TARGET_THUMB2 ? 8 : 4);
4254 asm_fprintf (f, "\tldr\t%r, [%r, #%d]\n",
4255 PIC_OFFSET_TABLE_REGNUM, PC_REGNUM,
4256 TARGET_THUMB2 ? 8 : 4);
4257 asm_fprintf (f, "\tldr\t%r, [%r, #%d]\n",
4258 PC_REGNUM, PC_REGNUM,
4259 TARGET_THUMB2 ? 8 : 4);
4260 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
4261 }
4262 else if (TARGET_ARM)
4263 {
4264 fprintf (f, "\t.arm\n");
4265 asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", STATIC_CHAIN_REGNUM, PC_REGNUM);
4266 asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", PC_REGNUM, PC_REGNUM);
4267 }
4268 else if (TARGET_THUMB2)
4269 {
4270 fprintf (f, "\t.thumb\n");
4271 /* The Thumb-2 trampoline is similar to the arm implementation.
4272 Unlike 16-bit Thumb, we enter the stub in thumb mode. */
4273 asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n",
4274 STATIC_CHAIN_REGNUM, PC_REGNUM);
4275 asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n", PC_REGNUM, PC_REGNUM);
4276 }
4277 else
4278 {
4279 ASM_OUTPUT_ALIGN (f, 2);
4280 fprintf (f, "\t.code\t16\n");
4281 fprintf (f, ".Ltrampoline_start:\n");
4282 asm_fprintf (f, "\tpush\t{r0, r1}\n");
4283 asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
4284 asm_fprintf (f, "\tmov\t%r, r0\n", STATIC_CHAIN_REGNUM);
4285 asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
4286 asm_fprintf (f, "\tstr\tr0, [%r, #4]\n", SP_REGNUM);
4287 asm_fprintf (f, "\tpop\t{r0, %r}\n", PC_REGNUM);
4288 }
4289 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
4290 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
4291 }
4292
4293 /* Emit RTL insns to initialize the variable parts of a trampoline. */
4294
4295 static void
4296 arm_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
4297 {
4298 rtx fnaddr, mem, a_tramp;
4299
4300 emit_block_move (m_tramp, assemble_trampoline_template (),
4301 GEN_INT (TRAMPOLINE_SIZE), BLOCK_OP_NORMAL);
4302
4303 if (TARGET_FDPIC)
4304 {
4305 rtx funcdesc = XEXP (DECL_RTL (fndecl), 0);
4306 rtx fnaddr = gen_rtx_MEM (Pmode, funcdesc);
4307 rtx gotaddr = gen_rtx_MEM (Pmode, plus_constant (Pmode, funcdesc, 4));
4308 /* The function start address is at offset 8, but in Thumb mode
4309 we want bit 0 set to 1 to indicate Thumb-ness, hence 9
4310 below. */
4311 rtx trampoline_code_start
4312 = plus_constant (Pmode, XEXP (m_tramp, 0), TARGET_THUMB2 ? 9 : 8);
4313
4314 /* Write initial funcdesc which points to the trampoline. */
4315 mem = adjust_address (m_tramp, SImode, 0);
4316 emit_move_insn (mem, trampoline_code_start);
4317 mem = adjust_address (m_tramp, SImode, 4);
4318 emit_move_insn (mem, gen_rtx_REG (Pmode, PIC_OFFSET_TABLE_REGNUM));
4319 /* Setup static chain. */
4320 mem = adjust_address (m_tramp, SImode, 20);
4321 emit_move_insn (mem, chain_value);
4322 /* GOT + real function entry point. */
4323 mem = adjust_address (m_tramp, SImode, 24);
4324 emit_move_insn (mem, gotaddr);
4325 mem = adjust_address (m_tramp, SImode, 28);
4326 emit_move_insn (mem, fnaddr);
4327 }
4328 else
4329 {
4330 mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 8 : 12);
4331 emit_move_insn (mem, chain_value);
4332
4333 mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 12 : 16);
4334 fnaddr = XEXP (DECL_RTL (fndecl), 0);
4335 emit_move_insn (mem, fnaddr);
4336 }
4337
4338 a_tramp = XEXP (m_tramp, 0);
4339 maybe_emit_call_builtin___clear_cache (a_tramp,
4340 plus_constant (ptr_mode,
4341 a_tramp,
4342 TRAMPOLINE_SIZE));
4343 }
4344
4345 /* Thumb trampolines should be entered in thumb mode, so set
4346 the bottom bit of the address. */
4347
4348 static rtx
4349 arm_trampoline_adjust_address (rtx addr)
4350 {
4351 /* For FDPIC don't fix trampoline address since it's a function
4352 descriptor and not a function address. */
4353 if (TARGET_THUMB && !TARGET_FDPIC)
4354 addr = expand_simple_binop (Pmode, IOR, addr, const1_rtx,
4355 NULL, 0, OPTAB_LIB_WIDEN);
4356 return addr;
4357 }
4358 \f
4359 /* Return 1 if REG needs to be saved. For interrupt handlers, this
4360 includes call-clobbered registers too. If this is a leaf function
4361 we can just examine the registers used by the RTL, but otherwise we
4362 have to assume that whatever function is called might clobber
4363 anything, and so we have to save all the call-clobbered registers
4364 as well. */
4365 static inline bool reg_needs_saving_p (unsigned reg)
4366 {
4367 unsigned long func_type = arm_current_func_type ();
4368
4369 if (IS_INTERRUPT (func_type))
4370 if (df_regs_ever_live_p (reg)
4371 /* Save call-clobbered core registers. */
4372 || (! crtl->is_leaf && call_used_or_fixed_reg_p (reg) && reg < FIRST_VFP_REGNUM))
4373 return true;
4374 else
4375 return false;
4376 else
4377 if (!df_regs_ever_live_p (reg)
4378 || call_used_or_fixed_reg_p (reg))
4379 return false;
4380 else
4381 return true;
4382 }
4383
4384 /* Return 1 if it is possible to return using a single instruction.
4385 If SIBLING is non-null, this is a test for a return before a sibling
4386 call. SIBLING is the call insn, so we can examine its register usage. */
4387
4388 int
4389 use_return_insn (int iscond, rtx sibling)
4390 {
4391 int regno;
4392 unsigned int func_type;
4393 unsigned long saved_int_regs;
4394 unsigned HOST_WIDE_INT stack_adjust;
4395 arm_stack_offsets *offsets;
4396
4397 /* Never use a return instruction before reload has run. */
4398 if (!reload_completed)
4399 return 0;
4400
4401 /* Never use a return instruction when return address signing
4402 mechanism is enabled as it requires more than one
4403 instruction. */
4404 if (arm_current_function_pac_enabled_p ())
4405 return 0;
4406
4407 func_type = arm_current_func_type ();
4408
4409 /* Naked, volatile and stack alignment functions need special
4410 consideration. */
4411 if (func_type & (ARM_FT_VOLATILE | ARM_FT_NAKED | ARM_FT_STACKALIGN))
4412 return 0;
4413
4414 /* So do interrupt functions that use the frame pointer and Thumb
4415 interrupt functions. */
4416 if (IS_INTERRUPT (func_type) && (frame_pointer_needed || TARGET_THUMB))
4417 return 0;
4418
4419 if (TARGET_LDRD && current_tune->prefer_ldrd_strd
4420 && !optimize_function_for_size_p (cfun))
4421 return 0;
4422
4423 offsets = arm_get_frame_offsets ();
4424 stack_adjust = offsets->outgoing_args - offsets->saved_regs;
4425
4426 /* As do variadic functions. */
4427 if (crtl->args.pretend_args_size
4428 || cfun->machine->uses_anonymous_args
4429 /* Or if the function calls __builtin_eh_return () */
4430 || crtl->calls_eh_return
4431 /* Or if the function calls alloca */
4432 || cfun->calls_alloca
4433 /* Or if there is a stack adjustment. However, if the stack pointer
4434 is saved on the stack, we can use a pre-incrementing stack load. */
4435 || !(stack_adjust == 0 || (TARGET_APCS_FRAME && frame_pointer_needed
4436 && stack_adjust == 4))
4437 /* Or if the static chain register was saved above the frame, under the
4438 assumption that the stack pointer isn't saved on the stack. */
4439 || (!(TARGET_APCS_FRAME && frame_pointer_needed)
4440 && arm_compute_static_chain_stack_bytes() != 0))
4441 return 0;
4442
4443 saved_int_regs = offsets->saved_regs_mask;
4444
4445 /* Unfortunately, the insn
4446
4447 ldmib sp, {..., sp, ...}
4448
4449 triggers a bug on most SA-110 based devices, such that the stack
4450 pointer won't be correctly restored if the instruction takes a
4451 page fault. We work around this problem by popping r3 along with
4452 the other registers, since that is never slower than executing
4453 another instruction.
4454
4455 We test for !arm_arch5t here, because code for any architecture
4456 less than this could potentially be run on one of the buggy
4457 chips. */
4458 if (stack_adjust == 4 && !arm_arch5t && TARGET_ARM)
4459 {
4460 /* Validate that r3 is a call-clobbered register (always true in
4461 the default abi) ... */
4462 if (!call_used_or_fixed_reg_p (3))
4463 return 0;
4464
4465 /* ... that it isn't being used for a return value ... */
4466 if (arm_size_return_regs () >= (4 * UNITS_PER_WORD))
4467 return 0;
4468
4469 /* ... or for a tail-call argument ... */
4470 if (sibling)
4471 {
4472 gcc_assert (CALL_P (sibling));
4473
4474 if (find_regno_fusage (sibling, USE, 3))
4475 return 0;
4476 }
4477
4478 /* ... and that there are no call-saved registers in r0-r2
4479 (always true in the default ABI). */
4480 if (saved_int_regs & 0x7)
4481 return 0;
4482 }
4483
4484 /* Can't be done if interworking with Thumb, and any registers have been
4485 stacked. */
4486 if (TARGET_INTERWORK && saved_int_regs != 0 && !IS_INTERRUPT(func_type))
4487 return 0;
4488
4489 /* On StrongARM, conditional returns are expensive if they aren't
4490 taken and multiple registers have been stacked. */
4491 if (iscond && arm_tune_strongarm)
4492 {
4493 /* Conditional return when just the LR is stored is a simple
4494 conditional-load instruction, that's not expensive. */
4495 if (saved_int_regs != 0 && saved_int_regs != (1 << LR_REGNUM))
4496 return 0;
4497
4498 if (flag_pic
4499 && arm_pic_register != INVALID_REGNUM
4500 && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
4501 return 0;
4502 }
4503
4504 /* ARMv8-M nonsecure entry function need to use bxns to return and thus need
4505 several instructions if anything needs to be popped. Armv8.1-M Mainline
4506 also needs several instructions to save and restore FP context. */
4507 if (IS_CMSE_ENTRY (func_type) && (saved_int_regs || TARGET_HAVE_FPCXT_CMSE))
4508 return 0;
4509
4510 /* If there are saved registers but the LR isn't saved, then we need
4511 two instructions for the return. */
4512 if (saved_int_regs && !(saved_int_regs & (1 << LR_REGNUM)))
4513 return 0;
4514
4515 /* Can't be done if any of the VFP regs are pushed,
4516 since this also requires an insn. */
4517 if (TARGET_VFP_BASE)
4518 for (regno = FIRST_VFP_REGNUM; regno <= LAST_VFP_REGNUM; regno++)
4519 if (reg_needs_saving_p (regno))
4520 return 0;
4521
4522 if (TARGET_REALLY_IWMMXT)
4523 for (regno = FIRST_IWMMXT_REGNUM; regno <= LAST_IWMMXT_REGNUM; regno++)
4524 if (reg_needs_saving_p (regno))
4525 return 0;
4526
4527 return 1;
4528 }
4529
4530 /* Return TRUE if we should try to use a simple_return insn, i.e. perform
4531 shrink-wrapping if possible. This is the case if we need to emit a
4532 prologue, which we can test by looking at the offsets. */
4533 bool
4534 use_simple_return_p (void)
4535 {
4536 arm_stack_offsets *offsets;
4537
4538 /* Note this function can be called before or after reload. */
4539 if (!reload_completed)
4540 arm_compute_frame_layout ();
4541
4542 offsets = arm_get_frame_offsets ();
4543 return offsets->outgoing_args != 0;
4544 }
4545
4546 /* Return TRUE if int I is a valid immediate ARM constant. */
4547
4548 int
4549 const_ok_for_arm (HOST_WIDE_INT i)
4550 {
4551 int lowbit;
4552
4553 /* For machines with >32 bit HOST_WIDE_INT, the bits above bit 31 must
4554 be all zero, or all one. */
4555 if ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff) != 0
4556 && ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff)
4557 != ((~(unsigned HOST_WIDE_INT) 0)
4558 & ~(unsigned HOST_WIDE_INT) 0xffffffff)))
4559 return FALSE;
4560
4561 i &= (unsigned HOST_WIDE_INT) 0xffffffff;
4562
4563 /* Fast return for 0 and small values. We must do this for zero, since
4564 the code below can't handle that one case. */
4565 if ((i & ~(unsigned HOST_WIDE_INT) 0xff) == 0)
4566 return TRUE;
4567
4568 /* Get the number of trailing zeros. */
4569 lowbit = ffs((int) i) - 1;
4570
4571 /* Only even shifts are allowed in ARM mode so round down to the
4572 nearest even number. */
4573 if (TARGET_ARM)
4574 lowbit &= ~1;
4575
4576 if ((i & ~(((unsigned HOST_WIDE_INT) 0xff) << lowbit)) == 0)
4577 return TRUE;
4578
4579 if (TARGET_ARM)
4580 {
4581 /* Allow rotated constants in ARM mode. */
4582 if (lowbit <= 4
4583 && ((i & ~0xc000003f) == 0
4584 || (i & ~0xf000000f) == 0
4585 || (i & ~0xfc000003) == 0))
4586 return TRUE;
4587 }
4588 else if (TARGET_THUMB2)
4589 {
4590 HOST_WIDE_INT v;
4591
4592 /* Allow repeated patterns 0x00XY00XY or 0xXYXYXYXY. */
4593 v = i & 0xff;
4594 v |= v << 16;
4595 if (i == v || i == (v | (v << 8)))
4596 return TRUE;
4597
4598 /* Allow repeated pattern 0xXY00XY00. */
4599 v = i & 0xff00;
4600 v |= v << 16;
4601 if (i == v)
4602 return TRUE;
4603 }
4604 else if (TARGET_HAVE_MOVT)
4605 {
4606 /* Thumb-1 Targets with MOVT. */
4607 if (i > 0xffff)
4608 return FALSE;
4609 else
4610 return TRUE;
4611 }
4612
4613 return FALSE;
4614 }
4615
4616 /* Return true if I is a valid constant for the operation CODE. */
4617 int
4618 const_ok_for_op (HOST_WIDE_INT i, enum rtx_code code)
4619 {
4620 if (const_ok_for_arm (i))
4621 return 1;
4622
4623 switch (code)
4624 {
4625 case SET:
4626 /* See if we can use movw. */
4627 if (TARGET_HAVE_MOVT && (i & 0xffff0000) == 0)
4628 return 1;
4629 else
4630 /* Otherwise, try mvn. */
4631 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
4632
4633 case PLUS:
4634 /* See if we can use addw or subw. */
4635 if (TARGET_THUMB2
4636 && ((i & 0xfffff000) == 0
4637 || ((-i) & 0xfffff000) == 0))
4638 return 1;
4639 /* Fall through. */
4640 case COMPARE:
4641 case EQ:
4642 case NE:
4643 case GT:
4644 case LE:
4645 case LT:
4646 case GE:
4647 case GEU:
4648 case LTU:
4649 case GTU:
4650 case LEU:
4651 case UNORDERED:
4652 case ORDERED:
4653 case UNEQ:
4654 case UNGE:
4655 case UNLT:
4656 case UNGT:
4657 case UNLE:
4658 return const_ok_for_arm (ARM_SIGN_EXTEND (-i));
4659
4660 case MINUS: /* Should only occur with (MINUS I reg) => rsb */
4661 case XOR:
4662 return 0;
4663
4664 case IOR:
4665 if (TARGET_THUMB2)
4666 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
4667 return 0;
4668
4669 case AND:
4670 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
4671
4672 default:
4673 gcc_unreachable ();
4674 }
4675 }
4676
4677 /* Return true if I is a valid di mode constant for the operation CODE. */
4678 int
4679 const_ok_for_dimode_op (HOST_WIDE_INT i, enum rtx_code code)
4680 {
4681 HOST_WIDE_INT hi_val = (i >> 32) & 0xFFFFFFFF;
4682 HOST_WIDE_INT lo_val = i & 0xFFFFFFFF;
4683 rtx hi = GEN_INT (hi_val);
4684 rtx lo = GEN_INT (lo_val);
4685
4686 if (TARGET_THUMB1)
4687 return 0;
4688
4689 switch (code)
4690 {
4691 case AND:
4692 case IOR:
4693 case XOR:
4694 return const_ok_for_op (hi_val, code) || hi_val == 0xFFFFFFFF
4695 || const_ok_for_op (lo_val, code) || lo_val == 0xFFFFFFFF;
4696 case PLUS:
4697 return arm_not_operand (hi, SImode) && arm_add_operand (lo, SImode);
4698
4699 default:
4700 return 0;
4701 }
4702 }
4703
4704 /* Emit a sequence of insns to handle a large constant.
4705 CODE is the code of the operation required, it can be any of SET, PLUS,
4706 IOR, AND, XOR, MINUS;
4707 MODE is the mode in which the operation is being performed;
4708 VAL is the integer to operate on;
4709 SOURCE is the other operand (a register, or a null-pointer for SET);
4710 SUBTARGETS means it is safe to create scratch registers if that will
4711 either produce a simpler sequence, or we will want to cse the values.
4712 Return value is the number of insns emitted. */
4713
4714 /* ??? Tweak this for thumb2. */
4715 int
4716 arm_split_constant (enum rtx_code code, machine_mode mode, rtx insn,
4717 HOST_WIDE_INT val, rtx target, rtx source, int subtargets)
4718 {
4719 rtx cond;
4720
4721 if (insn && GET_CODE (PATTERN (insn)) == COND_EXEC)
4722 cond = COND_EXEC_TEST (PATTERN (insn));
4723 else
4724 cond = NULL_RTX;
4725
4726 if (subtargets || code == SET
4727 || (REG_P (target) && REG_P (source)
4728 && REGNO (target) != REGNO (source)))
4729 {
4730 /* After arm_reorg has been called, we can't fix up expensive
4731 constants by pushing them into memory so we must synthesize
4732 them in-line, regardless of the cost. This is only likely to
4733 be more costly on chips that have load delay slots and we are
4734 compiling without running the scheduler (so no splitting
4735 occurred before the final instruction emission).
4736
4737 Ref: gcc -O1 -mcpu=strongarm gcc.c-torture/compile/980506-2.c
4738 */
4739 if (!cfun->machine->after_arm_reorg
4740 && !cond
4741 && (arm_gen_constant (code, mode, NULL_RTX, val, target, source,
4742 1, 0)
4743 > (arm_constant_limit (optimize_function_for_size_p (cfun))
4744 + (code != SET))))
4745 {
4746 if (code == SET)
4747 {
4748 /* Currently SET is the only monadic value for CODE, all
4749 the rest are diadic. */
4750 if (TARGET_USE_MOVT)
4751 arm_emit_movpair (target, GEN_INT (val));
4752 else
4753 emit_set_insn (target, GEN_INT (val));
4754
4755 return 1;
4756 }
4757 else
4758 {
4759 rtx temp = subtargets ? gen_reg_rtx (mode) : target;
4760
4761 if (TARGET_USE_MOVT)
4762 arm_emit_movpair (temp, GEN_INT (val));
4763 else
4764 emit_set_insn (temp, GEN_INT (val));
4765
4766 /* For MINUS, the value is subtracted from, since we never
4767 have subtraction of a constant. */
4768 if (code == MINUS)
4769 emit_set_insn (target, gen_rtx_MINUS (mode, temp, source));
4770 else
4771 emit_set_insn (target,
4772 gen_rtx_fmt_ee (code, mode, source, temp));
4773 return 2;
4774 }
4775 }
4776 }
4777
4778 return arm_gen_constant (code, mode, cond, val, target, source, subtargets,
4779 1);
4780 }
4781
4782 /* Return a sequence of integers, in RETURN_SEQUENCE that fit into
4783 ARM/THUMB2 immediates, and add up to VAL.
4784 Thr function return value gives the number of insns required. */
4785 static int
4786 optimal_immediate_sequence (enum rtx_code code, unsigned HOST_WIDE_INT val,
4787 struct four_ints *return_sequence)
4788 {
4789 int best_consecutive_zeros = 0;
4790 int i;
4791 int best_start = 0;
4792 int insns1, insns2;
4793 struct four_ints tmp_sequence;
4794
4795 /* If we aren't targeting ARM, the best place to start is always at
4796 the bottom, otherwise look more closely. */
4797 if (TARGET_ARM)
4798 {
4799 for (i = 0; i < 32; i += 2)
4800 {
4801 int consecutive_zeros = 0;
4802
4803 if (!(val & (3 << i)))
4804 {
4805 while ((i < 32) && !(val & (3 << i)))
4806 {
4807 consecutive_zeros += 2;
4808 i += 2;
4809 }
4810 if (consecutive_zeros > best_consecutive_zeros)
4811 {
4812 best_consecutive_zeros = consecutive_zeros;
4813 best_start = i - consecutive_zeros;
4814 }
4815 i -= 2;
4816 }
4817 }
4818 }
4819
4820 /* So long as it won't require any more insns to do so, it's
4821 desirable to emit a small constant (in bits 0...9) in the last
4822 insn. This way there is more chance that it can be combined with
4823 a later addressing insn to form a pre-indexed load or store
4824 operation. Consider:
4825
4826 *((volatile int *)0xe0000100) = 1;
4827 *((volatile int *)0xe0000110) = 2;
4828
4829 We want this to wind up as:
4830
4831 mov rA, #0xe0000000
4832 mov rB, #1
4833 str rB, [rA, #0x100]
4834 mov rB, #2
4835 str rB, [rA, #0x110]
4836
4837 rather than having to synthesize both large constants from scratch.
4838
4839 Therefore, we calculate how many insns would be required to emit
4840 the constant starting from `best_start', and also starting from
4841 zero (i.e. with bit 31 first to be output). If `best_start' doesn't
4842 yield a shorter sequence, we may as well use zero. */
4843 insns1 = optimal_immediate_sequence_1 (code, val, return_sequence, best_start);
4844 if (best_start != 0
4845 && ((HOST_WIDE_INT_1U << best_start) < val))
4846 {
4847 insns2 = optimal_immediate_sequence_1 (code, val, &tmp_sequence, 0);
4848 if (insns2 <= insns1)
4849 {
4850 *return_sequence = tmp_sequence;
4851 insns1 = insns2;
4852 }
4853 }
4854
4855 return insns1;
4856 }
4857
4858 /* As for optimal_immediate_sequence, but starting at bit-position I. */
4859 static int
4860 optimal_immediate_sequence_1 (enum rtx_code code, unsigned HOST_WIDE_INT val,
4861 struct four_ints *return_sequence, int i)
4862 {
4863 int remainder = val & 0xffffffff;
4864 int insns = 0;
4865
4866 /* Try and find a way of doing the job in either two or three
4867 instructions.
4868
4869 In ARM mode we can use 8-bit constants, rotated to any 2-bit aligned
4870 location. We start at position I. This may be the MSB, or
4871 optimial_immediate_sequence may have positioned it at the largest block
4872 of zeros that are aligned on a 2-bit boundary. We then fill up the temps,
4873 wrapping around to the top of the word when we drop off the bottom.
4874 In the worst case this code should produce no more than four insns.
4875
4876 In Thumb2 mode, we can use 32/16-bit replicated constants, and 8-bit
4877 constants, shifted to any arbitrary location. We should always start
4878 at the MSB. */
4879 do
4880 {
4881 int end;
4882 unsigned int b1, b2, b3, b4;
4883 unsigned HOST_WIDE_INT result;
4884 int loc;
4885
4886 gcc_assert (insns < 4);
4887
4888 if (i <= 0)
4889 i += 32;
4890
4891 /* First, find the next normal 12/8-bit shifted/rotated immediate. */
4892 if (remainder & ((TARGET_ARM ? (3 << (i - 2)) : (1 << (i - 1)))))
4893 {
4894 loc = i;
4895 if (i <= 12 && TARGET_THUMB2 && code == PLUS)
4896 /* We can use addw/subw for the last 12 bits. */
4897 result = remainder;
4898 else
4899 {
4900 /* Use an 8-bit shifted/rotated immediate. */
4901 end = i - 8;
4902 if (end < 0)
4903 end += 32;
4904 result = remainder & ((0x0ff << end)
4905 | ((i < end) ? (0xff >> (32 - end))
4906 : 0));
4907 i -= 8;
4908 }
4909 }
4910 else
4911 {
4912 /* Arm allows rotates by a multiple of two. Thumb-2 allows
4913 arbitrary shifts. */
4914 i -= TARGET_ARM ? 2 : 1;
4915 continue;
4916 }
4917
4918 /* Next, see if we can do a better job with a thumb2 replicated
4919 constant.
4920
4921 We do it this way around to catch the cases like 0x01F001E0 where
4922 two 8-bit immediates would work, but a replicated constant would
4923 make it worse.
4924
4925 TODO: 16-bit constants that don't clear all the bits, but still win.
4926 TODO: Arithmetic splitting for set/add/sub, rather than bitwise. */
4927 if (TARGET_THUMB2)
4928 {
4929 b1 = (remainder & 0xff000000) >> 24;
4930 b2 = (remainder & 0x00ff0000) >> 16;
4931 b3 = (remainder & 0x0000ff00) >> 8;
4932 b4 = remainder & 0xff;
4933
4934 if (loc > 24)
4935 {
4936 /* The 8-bit immediate already found clears b1 (and maybe b2),
4937 but must leave b3 and b4 alone. */
4938
4939 /* First try to find a 32-bit replicated constant that clears
4940 almost everything. We can assume that we can't do it in one,
4941 or else we wouldn't be here. */
4942 unsigned int tmp = b1 & b2 & b3 & b4;
4943 unsigned int tmp2 = tmp + (tmp << 8) + (tmp << 16)
4944 + (tmp << 24);
4945 unsigned int matching_bytes = (tmp == b1) + (tmp == b2)
4946 + (tmp == b3) + (tmp == b4);
4947 if (tmp
4948 && (matching_bytes >= 3
4949 || (matching_bytes == 2
4950 && const_ok_for_op (remainder & ~tmp2, code))))
4951 {
4952 /* At least 3 of the bytes match, and the fourth has at
4953 least as many bits set, or two of the bytes match
4954 and it will only require one more insn to finish. */
4955 result = tmp2;
4956 i = tmp != b1 ? 32
4957 : tmp != b2 ? 24
4958 : tmp != b3 ? 16
4959 : 8;
4960 }
4961
4962 /* Second, try to find a 16-bit replicated constant that can
4963 leave three of the bytes clear. If b2 or b4 is already
4964 zero, then we can. If the 8-bit from above would not
4965 clear b2 anyway, then we still win. */
4966 else if (b1 == b3 && (!b2 || !b4
4967 || (remainder & 0x00ff0000 & ~result)))
4968 {
4969 result = remainder & 0xff00ff00;
4970 i = 24;
4971 }
4972 }
4973 else if (loc > 16)
4974 {
4975 /* The 8-bit immediate already found clears b2 (and maybe b3)
4976 and we don't get here unless b1 is alredy clear, but it will
4977 leave b4 unchanged. */
4978
4979 /* If we can clear b2 and b4 at once, then we win, since the
4980 8-bits couldn't possibly reach that far. */
4981 if (b2 == b4)
4982 {
4983 result = remainder & 0x00ff00ff;
4984 i = 16;
4985 }
4986 }
4987 }
4988
4989 return_sequence->i[insns++] = result;
4990 remainder &= ~result;
4991
4992 if (code == SET || code == MINUS)
4993 code = PLUS;
4994 }
4995 while (remainder);
4996
4997 return insns;
4998 }
4999
5000 /* Emit an instruction with the indicated PATTERN. If COND is
5001 non-NULL, conditionalize the execution of the instruction on COND
5002 being true. */
5003
5004 static void
5005 emit_constant_insn (rtx cond, rtx pattern)
5006 {
5007 if (cond)
5008 pattern = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (cond), pattern);
5009 emit_insn (pattern);
5010 }
5011
5012 /* As above, but extra parameter GENERATE which, if clear, suppresses
5013 RTL generation. */
5014
5015 static int
5016 arm_gen_constant (enum rtx_code code, machine_mode mode, rtx cond,
5017 unsigned HOST_WIDE_INT val, rtx target, rtx source,
5018 int subtargets, int generate)
5019 {
5020 int can_invert = 0;
5021 int can_negate = 0;
5022 int final_invert = 0;
5023 int i;
5024 int set_sign_bit_copies = 0;
5025 int clear_sign_bit_copies = 0;
5026 int clear_zero_bit_copies = 0;
5027 int set_zero_bit_copies = 0;
5028 int insns = 0, neg_insns, inv_insns;
5029 unsigned HOST_WIDE_INT temp1, temp2;
5030 unsigned HOST_WIDE_INT remainder = val & 0xffffffff;
5031 struct four_ints *immediates;
5032 struct four_ints pos_immediates, neg_immediates, inv_immediates;
5033
5034 /* Find out which operations are safe for a given CODE. Also do a quick
5035 check for degenerate cases; these can occur when DImode operations
5036 are split. */
5037 switch (code)
5038 {
5039 case SET:
5040 can_invert = 1;
5041 break;
5042
5043 case PLUS:
5044 can_negate = 1;
5045 break;
5046
5047 case IOR:
5048 if (remainder == 0xffffffff)
5049 {
5050 if (generate)
5051 emit_constant_insn (cond,
5052 gen_rtx_SET (target,
5053 GEN_INT (ARM_SIGN_EXTEND (val))));
5054 return 1;
5055 }
5056
5057 if (remainder == 0)
5058 {
5059 if (reload_completed && rtx_equal_p (target, source))
5060 return 0;
5061
5062 if (generate)
5063 emit_constant_insn (cond, gen_rtx_SET (target, source));
5064 return 1;
5065 }
5066 break;
5067
5068 case AND:
5069 if (remainder == 0)
5070 {
5071 if (generate)
5072 emit_constant_insn (cond, gen_rtx_SET (target, const0_rtx));
5073 return 1;
5074 }
5075 if (remainder == 0xffffffff)
5076 {
5077 if (reload_completed && rtx_equal_p (target, source))
5078 return 0;
5079 if (generate)
5080 emit_constant_insn (cond, gen_rtx_SET (target, source));
5081 return 1;
5082 }
5083 can_invert = 1;
5084 break;
5085
5086 case XOR:
5087 if (remainder == 0)
5088 {
5089 if (reload_completed && rtx_equal_p (target, source))
5090 return 0;
5091 if (generate)
5092 emit_constant_insn (cond, gen_rtx_SET (target, source));
5093 return 1;
5094 }
5095
5096 if (remainder == 0xffffffff)
5097 {
5098 if (generate)
5099 emit_constant_insn (cond,
5100 gen_rtx_SET (target,
5101 gen_rtx_NOT (mode, source)));
5102 return 1;
5103 }
5104 final_invert = 1;
5105 break;
5106
5107 case MINUS:
5108 /* We treat MINUS as (val - source), since (source - val) is always
5109 passed as (source + (-val)). */
5110 if (remainder == 0)
5111 {
5112 if (generate)
5113 emit_constant_insn (cond,
5114 gen_rtx_SET (target,
5115 gen_rtx_NEG (mode, source)));
5116 return 1;
5117 }
5118 if (const_ok_for_arm (val))
5119 {
5120 if (generate)
5121 emit_constant_insn (cond,
5122 gen_rtx_SET (target,
5123 gen_rtx_MINUS (mode, GEN_INT (val),
5124 source)));
5125 return 1;
5126 }
5127
5128 break;
5129
5130 default:
5131 gcc_unreachable ();
5132 }
5133
5134 /* If we can do it in one insn get out quickly. */
5135 if (const_ok_for_op (val, code))
5136 {
5137 if (generate)
5138 emit_constant_insn (cond,
5139 gen_rtx_SET (target,
5140 (source
5141 ? gen_rtx_fmt_ee (code, mode, source,
5142 GEN_INT (val))
5143 : GEN_INT (val))));
5144 return 1;
5145 }
5146
5147 /* On targets with UXTH/UBFX, we can deal with AND (2^N)-1 in a single
5148 insn. */
5149 if (code == AND && (i = exact_log2 (remainder + 1)) > 0
5150 && (arm_arch_thumb2 || (i == 16 && arm_arch6 && mode == SImode)))
5151 {
5152 if (generate)
5153 {
5154 if (mode == SImode && i == 16)
5155 /* Use UXTH in preference to UBFX, since on Thumb2 it's a
5156 smaller insn. */
5157 emit_constant_insn (cond,
5158 gen_zero_extendhisi2
5159 (target, gen_lowpart (HImode, source)));
5160 else
5161 /* Extz only supports SImode, but we can coerce the operands
5162 into that mode. */
5163 emit_constant_insn (cond,
5164 gen_extzv_t2 (gen_lowpart (SImode, target),
5165 gen_lowpart (SImode, source),
5166 GEN_INT (i), const0_rtx));
5167 }
5168
5169 return 1;
5170 }
5171
5172 /* Calculate a few attributes that may be useful for specific
5173 optimizations. */
5174 /* Count number of leading zeros. */
5175 for (i = 31; i >= 0; i--)
5176 {
5177 if ((remainder & (1 << i)) == 0)
5178 clear_sign_bit_copies++;
5179 else
5180 break;
5181 }
5182
5183 /* Count number of leading 1's. */
5184 for (i = 31; i >= 0; i--)
5185 {
5186 if ((remainder & (1 << i)) != 0)
5187 set_sign_bit_copies++;
5188 else
5189 break;
5190 }
5191
5192 /* Count number of trailing zero's. */
5193 for (i = 0; i <= 31; i++)
5194 {
5195 if ((remainder & (1 << i)) == 0)
5196 clear_zero_bit_copies++;
5197 else
5198 break;
5199 }
5200
5201 /* Count number of trailing 1's. */
5202 for (i = 0; i <= 31; i++)
5203 {
5204 if ((remainder & (1 << i)) != 0)
5205 set_zero_bit_copies++;
5206 else
5207 break;
5208 }
5209
5210 switch (code)
5211 {
5212 case SET:
5213 /* See if we can do this by sign_extending a constant that is known
5214 to be negative. This is a good, way of doing it, since the shift
5215 may well merge into a subsequent insn. */
5216 if (set_sign_bit_copies > 1)
5217 {
5218 if (const_ok_for_arm
5219 (temp1 = ARM_SIGN_EXTEND (remainder
5220 << (set_sign_bit_copies - 1))))
5221 {
5222 if (generate)
5223 {
5224 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5225 emit_constant_insn (cond,
5226 gen_rtx_SET (new_src, GEN_INT (temp1)));
5227 emit_constant_insn (cond,
5228 gen_ashrsi3 (target, new_src,
5229 GEN_INT (set_sign_bit_copies - 1)));
5230 }
5231 return 2;
5232 }
5233 /* For an inverted constant, we will need to set the low bits,
5234 these will be shifted out of harm's way. */
5235 temp1 |= (1 << (set_sign_bit_copies - 1)) - 1;
5236 if (const_ok_for_arm (~temp1))
5237 {
5238 if (generate)
5239 {
5240 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5241 emit_constant_insn (cond,
5242 gen_rtx_SET (new_src, GEN_INT (temp1)));
5243 emit_constant_insn (cond,
5244 gen_ashrsi3 (target, new_src,
5245 GEN_INT (set_sign_bit_copies - 1)));
5246 }
5247 return 2;
5248 }
5249 }
5250
5251 /* See if we can calculate the value as the difference between two
5252 valid immediates. */
5253 if (clear_sign_bit_copies + clear_zero_bit_copies <= 16)
5254 {
5255 int topshift = clear_sign_bit_copies & ~1;
5256
5257 temp1 = ARM_SIGN_EXTEND ((remainder + (0x00800000 >> topshift))
5258 & (0xff000000 >> topshift));
5259
5260 /* If temp1 is zero, then that means the 9 most significant
5261 bits of remainder were 1 and we've caused it to overflow.
5262 When topshift is 0 we don't need to do anything since we
5263 can borrow from 'bit 32'. */
5264 if (temp1 == 0 && topshift != 0)
5265 temp1 = 0x80000000 >> (topshift - 1);
5266
5267 temp2 = ARM_SIGN_EXTEND (temp1 - remainder);
5268
5269 if (const_ok_for_arm (temp2))
5270 {
5271 if (generate)
5272 {
5273 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5274 emit_constant_insn (cond,
5275 gen_rtx_SET (new_src, GEN_INT (temp1)));
5276 emit_constant_insn (cond,
5277 gen_addsi3 (target, new_src,
5278 GEN_INT (-temp2)));
5279 }
5280
5281 return 2;
5282 }
5283 }
5284
5285 /* See if we can generate this by setting the bottom (or the top)
5286 16 bits, and then shifting these into the other half of the
5287 word. We only look for the simplest cases, to do more would cost
5288 too much. Be careful, however, not to generate this when the
5289 alternative would take fewer insns. */
5290 if (val & 0xffff0000)
5291 {
5292 temp1 = remainder & 0xffff0000;
5293 temp2 = remainder & 0x0000ffff;
5294
5295 /* Overlaps outside this range are best done using other methods. */
5296 for (i = 9; i < 24; i++)
5297 {
5298 if ((((temp2 | (temp2 << i)) & 0xffffffff) == remainder)
5299 && !const_ok_for_arm (temp2))
5300 {
5301 rtx new_src = (subtargets
5302 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
5303 : target);
5304 insns = arm_gen_constant (code, mode, cond, temp2, new_src,
5305 source, subtargets, generate);
5306 source = new_src;
5307 if (generate)
5308 emit_constant_insn
5309 (cond,
5310 gen_rtx_SET
5311 (target,
5312 gen_rtx_IOR (mode,
5313 gen_rtx_ASHIFT (mode, source,
5314 GEN_INT (i)),
5315 source)));
5316 return insns + 1;
5317 }
5318 }
5319
5320 /* Don't duplicate cases already considered. */
5321 for (i = 17; i < 24; i++)
5322 {
5323 if (((temp1 | (temp1 >> i)) == remainder)
5324 && !const_ok_for_arm (temp1))
5325 {
5326 rtx new_src = (subtargets
5327 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
5328 : target);
5329 insns = arm_gen_constant (code, mode, cond, temp1, new_src,
5330 source, subtargets, generate);
5331 source = new_src;
5332 if (generate)
5333 emit_constant_insn
5334 (cond,
5335 gen_rtx_SET (target,
5336 gen_rtx_IOR
5337 (mode,
5338 gen_rtx_LSHIFTRT (mode, source,
5339 GEN_INT (i)),
5340 source)));
5341 return insns + 1;
5342 }
5343 }
5344 }
5345 break;
5346
5347 case IOR:
5348 case XOR:
5349 /* If we have IOR or XOR, and the constant can be loaded in a
5350 single instruction, and we can find a temporary to put it in,
5351 then this can be done in two instructions instead of 3-4. */
5352 if (subtargets
5353 /* TARGET can't be NULL if SUBTARGETS is 0 */
5354 || (reload_completed && !reg_mentioned_p (target, source)))
5355 {
5356 if (const_ok_for_arm (ARM_SIGN_EXTEND (~val)))
5357 {
5358 if (generate)
5359 {
5360 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
5361
5362 emit_constant_insn (cond,
5363 gen_rtx_SET (sub, GEN_INT (val)));
5364 emit_constant_insn (cond,
5365 gen_rtx_SET (target,
5366 gen_rtx_fmt_ee (code, mode,
5367 source, sub)));
5368 }
5369 return 2;
5370 }
5371 }
5372
5373 if (code == XOR)
5374 break;
5375
5376 /* Convert.
5377 x = y | constant ( which is composed of set_sign_bit_copies of leading 1s
5378 and the remainder 0s for e.g. 0xfff00000)
5379 x = ~(~(y ashift set_sign_bit_copies) lshiftrt set_sign_bit_copies)
5380
5381 This can be done in 2 instructions by using shifts with mov or mvn.
5382 e.g. for
5383 x = x | 0xfff00000;
5384 we generate.
5385 mvn r0, r0, asl #12
5386 mvn r0, r0, lsr #12 */
5387 if (set_sign_bit_copies > 8
5388 && (val & (HOST_WIDE_INT_M1U << (32 - set_sign_bit_copies))) == val)
5389 {
5390 if (generate)
5391 {
5392 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
5393 rtx shift = GEN_INT (set_sign_bit_copies);
5394
5395 emit_constant_insn
5396 (cond,
5397 gen_rtx_SET (sub,
5398 gen_rtx_NOT (mode,
5399 gen_rtx_ASHIFT (mode,
5400 source,
5401 shift))));
5402 emit_constant_insn
5403 (cond,
5404 gen_rtx_SET (target,
5405 gen_rtx_NOT (mode,
5406 gen_rtx_LSHIFTRT (mode, sub,
5407 shift))));
5408 }
5409 return 2;
5410 }
5411
5412 /* Convert
5413 x = y | constant (which has set_zero_bit_copies number of trailing ones).
5414 to
5415 x = ~((~y lshiftrt set_zero_bit_copies) ashift set_zero_bit_copies).
5416
5417 For eg. r0 = r0 | 0xfff
5418 mvn r0, r0, lsr #12
5419 mvn r0, r0, asl #12
5420
5421 */
5422 if (set_zero_bit_copies > 8
5423 && (remainder & ((1 << set_zero_bit_copies) - 1)) == remainder)
5424 {
5425 if (generate)
5426 {
5427 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
5428 rtx shift = GEN_INT (set_zero_bit_copies);
5429
5430 emit_constant_insn
5431 (cond,
5432 gen_rtx_SET (sub,
5433 gen_rtx_NOT (mode,
5434 gen_rtx_LSHIFTRT (mode,
5435 source,
5436 shift))));
5437 emit_constant_insn
5438 (cond,
5439 gen_rtx_SET (target,
5440 gen_rtx_NOT (mode,
5441 gen_rtx_ASHIFT (mode, sub,
5442 shift))));
5443 }
5444 return 2;
5445 }
5446
5447 /* This will never be reached for Thumb2 because orn is a valid
5448 instruction. This is for Thumb1 and the ARM 32 bit cases.
5449
5450 x = y | constant (such that ~constant is a valid constant)
5451 Transform this to
5452 x = ~(~y & ~constant).
5453 */
5454 if (const_ok_for_arm (temp1 = ARM_SIGN_EXTEND (~val)))
5455 {
5456 if (generate)
5457 {
5458 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
5459 emit_constant_insn (cond,
5460 gen_rtx_SET (sub,
5461 gen_rtx_NOT (mode, source)));
5462 source = sub;
5463 if (subtargets)
5464 sub = gen_reg_rtx (mode);
5465 emit_constant_insn (cond,
5466 gen_rtx_SET (sub,
5467 gen_rtx_AND (mode, source,
5468 GEN_INT (temp1))));
5469 emit_constant_insn (cond,
5470 gen_rtx_SET (target,
5471 gen_rtx_NOT (mode, sub)));
5472 }
5473 return 3;
5474 }
5475 break;
5476
5477 case AND:
5478 /* See if two shifts will do 2 or more insn's worth of work. */
5479 if (clear_sign_bit_copies >= 16 && clear_sign_bit_copies < 24)
5480 {
5481 HOST_WIDE_INT shift_mask = ((0xffffffff
5482 << (32 - clear_sign_bit_copies))
5483 & 0xffffffff);
5484
5485 if ((remainder | shift_mask) != 0xffffffff)
5486 {
5487 HOST_WIDE_INT new_val
5488 = ARM_SIGN_EXTEND (remainder | shift_mask);
5489
5490 if (generate)
5491 {
5492 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5493 insns = arm_gen_constant (AND, SImode, cond, new_val,
5494 new_src, source, subtargets, 1);
5495 source = new_src;
5496 }
5497 else
5498 {
5499 rtx targ = subtargets ? NULL_RTX : target;
5500 insns = arm_gen_constant (AND, mode, cond, new_val,
5501 targ, source, subtargets, 0);
5502 }
5503 }
5504
5505 if (generate)
5506 {
5507 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5508 rtx shift = GEN_INT (clear_sign_bit_copies);
5509
5510 emit_insn (gen_ashlsi3 (new_src, source, shift));
5511 emit_insn (gen_lshrsi3 (target, new_src, shift));
5512 }
5513
5514 return insns + 2;
5515 }
5516
5517 if (clear_zero_bit_copies >= 16 && clear_zero_bit_copies < 24)
5518 {
5519 HOST_WIDE_INT shift_mask = (1 << clear_zero_bit_copies) - 1;
5520
5521 if ((remainder | shift_mask) != 0xffffffff)
5522 {
5523 HOST_WIDE_INT new_val
5524 = ARM_SIGN_EXTEND (remainder | shift_mask);
5525 if (generate)
5526 {
5527 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5528
5529 insns = arm_gen_constant (AND, mode, cond, new_val,
5530 new_src, source, subtargets, 1);
5531 source = new_src;
5532 }
5533 else
5534 {
5535 rtx targ = subtargets ? NULL_RTX : target;
5536
5537 insns = arm_gen_constant (AND, mode, cond, new_val,
5538 targ, source, subtargets, 0);
5539 }
5540 }
5541
5542 if (generate)
5543 {
5544 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5545 rtx shift = GEN_INT (clear_zero_bit_copies);
5546
5547 emit_insn (gen_lshrsi3 (new_src, source, shift));
5548 emit_insn (gen_ashlsi3 (target, new_src, shift));
5549 }
5550
5551 return insns + 2;
5552 }
5553
5554 break;
5555
5556 default:
5557 break;
5558 }
5559
5560 /* Calculate what the instruction sequences would be if we generated it
5561 normally, negated, or inverted. */
5562 if (code == AND)
5563 /* AND cannot be split into multiple insns, so invert and use BIC. */
5564 insns = 99;
5565 else
5566 insns = optimal_immediate_sequence (code, remainder, &pos_immediates);
5567
5568 if (can_negate)
5569 neg_insns = optimal_immediate_sequence (code, (-remainder) & 0xffffffff,
5570 &neg_immediates);
5571 else
5572 neg_insns = 99;
5573
5574 if (can_invert || final_invert)
5575 inv_insns = optimal_immediate_sequence (code, remainder ^ 0xffffffff,
5576 &inv_immediates);
5577 else
5578 inv_insns = 99;
5579
5580 immediates = &pos_immediates;
5581
5582 /* Is the negated immediate sequence more efficient? */
5583 if (neg_insns < insns && neg_insns <= inv_insns)
5584 {
5585 insns = neg_insns;
5586 immediates = &neg_immediates;
5587 }
5588 else
5589 can_negate = 0;
5590
5591 /* Is the inverted immediate sequence more efficient?
5592 We must allow for an extra NOT instruction for XOR operations, although
5593 there is some chance that the final 'mvn' will get optimized later. */
5594 if ((inv_insns + 1) < insns || (!final_invert && inv_insns < insns))
5595 {
5596 insns = inv_insns;
5597 immediates = &inv_immediates;
5598 }
5599 else
5600 {
5601 can_invert = 0;
5602 final_invert = 0;
5603 }
5604
5605 /* Now output the chosen sequence as instructions. */
5606 if (generate)
5607 {
5608 for (i = 0; i < insns; i++)
5609 {
5610 rtx new_src, temp1_rtx;
5611
5612 temp1 = immediates->i[i];
5613
5614 if (code == SET || code == MINUS)
5615 new_src = (subtargets ? gen_reg_rtx (mode) : target);
5616 else if ((final_invert || i < (insns - 1)) && subtargets)
5617 new_src = gen_reg_rtx (mode);
5618 else
5619 new_src = target;
5620
5621 if (can_invert)
5622 temp1 = ~temp1;
5623 else if (can_negate)
5624 temp1 = -temp1;
5625
5626 temp1 = trunc_int_for_mode (temp1, mode);
5627 temp1_rtx = GEN_INT (temp1);
5628
5629 if (code == SET)
5630 ;
5631 else if (code == MINUS)
5632 temp1_rtx = gen_rtx_MINUS (mode, temp1_rtx, source);
5633 else
5634 temp1_rtx = gen_rtx_fmt_ee (code, mode, source, temp1_rtx);
5635
5636 emit_constant_insn (cond, gen_rtx_SET (new_src, temp1_rtx));
5637 source = new_src;
5638
5639 if (code == SET)
5640 {
5641 can_negate = can_invert;
5642 can_invert = 0;
5643 code = PLUS;
5644 }
5645 else if (code == MINUS)
5646 code = PLUS;
5647 }
5648 }
5649
5650 if (final_invert)
5651 {
5652 if (generate)
5653 emit_constant_insn (cond, gen_rtx_SET (target,
5654 gen_rtx_NOT (mode, source)));
5655 insns++;
5656 }
5657
5658 return insns;
5659 }
5660
5661 /* Return TRUE if op is a constant where both the low and top words are
5662 suitable for RSB/RSC instructions. This is never true for Thumb, since
5663 we do not have RSC in that case. */
5664 static bool
5665 arm_const_double_prefer_rsbs_rsc (rtx op)
5666 {
5667 /* Thumb lacks RSC, so we never prefer that sequence. */
5668 if (TARGET_THUMB || !CONST_INT_P (op))
5669 return false;
5670 HOST_WIDE_INT hi, lo;
5671 lo = UINTVAL (op) & 0xffffffffULL;
5672 hi = UINTVAL (op) >> 32;
5673 return const_ok_for_arm (lo) && const_ok_for_arm (hi);
5674 }
5675
5676 /* Canonicalize a comparison so that we are more likely to recognize it.
5677 This can be done for a few constant compares, where we can make the
5678 immediate value easier to load. */
5679
5680 static void
5681 arm_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
5682 bool op0_preserve_value)
5683 {
5684 machine_mode mode;
5685 unsigned HOST_WIDE_INT i, maxval;
5686
5687 mode = GET_MODE (*op0);
5688 if (mode == VOIDmode)
5689 mode = GET_MODE (*op1);
5690
5691 maxval = (HOST_WIDE_INT_1U << (GET_MODE_BITSIZE (mode) - 1)) - 1;
5692
5693 /* For DImode, we have GE/LT/GEU/LTU comparisons (with cmp/sbc). In
5694 ARM mode we can also use cmp/cmpeq for GTU/LEU. GT/LE must be
5695 either reversed or (for constant OP1) adjusted to GE/LT.
5696 Similarly for GTU/LEU in Thumb mode. */
5697 if (mode == DImode)
5698 {
5699
5700 if (*code == GT || *code == LE
5701 || *code == GTU || *code == LEU)
5702 {
5703 /* Missing comparison. First try to use an available
5704 comparison. */
5705 if (CONST_INT_P (*op1))
5706 {
5707 i = INTVAL (*op1);
5708 switch (*code)
5709 {
5710 case GT:
5711 case LE:
5712 if (i != maxval)
5713 {
5714 /* Try to convert to GE/LT, unless that would be more
5715 expensive. */
5716 if (!arm_const_double_by_immediates (GEN_INT (i + 1))
5717 && arm_const_double_prefer_rsbs_rsc (*op1))
5718 return;
5719 *op1 = GEN_INT (i + 1);
5720 *code = *code == GT ? GE : LT;
5721 }
5722 else
5723 {
5724 /* GT maxval is always false, LE maxval is always true.
5725 We can't fold that away here as we must make a
5726 comparison, but we can fold them to comparisons
5727 with the same result that can be handled:
5728 op0 GT maxval -> op0 LT minval
5729 op0 LE maxval -> op0 GE minval
5730 where minval = (-maxval - 1). */
5731 *op1 = GEN_INT (-maxval - 1);
5732 *code = *code == GT ? LT : GE;
5733 }
5734 return;
5735
5736 case GTU:
5737 case LEU:
5738 if (i != ~((unsigned HOST_WIDE_INT) 0))
5739 {
5740 /* Try to convert to GEU/LTU, unless that would
5741 be more expensive. */
5742 if (!arm_const_double_by_immediates (GEN_INT (i + 1))
5743 && arm_const_double_prefer_rsbs_rsc (*op1))
5744 return;
5745 *op1 = GEN_INT (i + 1);
5746 *code = *code == GTU ? GEU : LTU;
5747 }
5748 else
5749 {
5750 /* GTU ~0 is always false, LEU ~0 is always true.
5751 We can't fold that away here as we must make a
5752 comparison, but we can fold them to comparisons
5753 with the same result that can be handled:
5754 op0 GTU ~0 -> op0 LTU 0
5755 op0 LEU ~0 -> op0 GEU 0. */
5756 *op1 = const0_rtx;
5757 *code = *code == GTU ? LTU : GEU;
5758 }
5759 return;
5760
5761 default:
5762 gcc_unreachable ();
5763 }
5764 }
5765
5766 if (!op0_preserve_value)
5767 {
5768 std::swap (*op0, *op1);
5769 *code = (int)swap_condition ((enum rtx_code)*code);
5770 }
5771 }
5772 return;
5773 }
5774
5775 /* If *op0 is (zero_extend:SI (subreg:QI (reg:SI) 0)) and comparing
5776 with const0_rtx, change it to (and:SI (reg:SI) (const_int 255)),
5777 to facilitate possible combining with a cmp into 'ands'. */
5778 if (mode == SImode
5779 && GET_CODE (*op0) == ZERO_EXTEND
5780 && GET_CODE (XEXP (*op0, 0)) == SUBREG
5781 && GET_MODE (XEXP (*op0, 0)) == QImode
5782 && GET_MODE (SUBREG_REG (XEXP (*op0, 0))) == SImode
5783 && subreg_lowpart_p (XEXP (*op0, 0))
5784 && *op1 == const0_rtx)
5785 *op0 = gen_rtx_AND (SImode, SUBREG_REG (XEXP (*op0, 0)),
5786 GEN_INT (255));
5787
5788 /* Comparisons smaller than DImode. Only adjust comparisons against
5789 an out-of-range constant. */
5790 if (!CONST_INT_P (*op1)
5791 || const_ok_for_arm (INTVAL (*op1))
5792 || const_ok_for_arm (- INTVAL (*op1)))
5793 return;
5794
5795 i = INTVAL (*op1);
5796
5797 switch (*code)
5798 {
5799 case EQ:
5800 case NE:
5801 return;
5802
5803 case GT:
5804 case LE:
5805 if (i != maxval
5806 && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
5807 {
5808 *op1 = GEN_INT (ARM_SIGN_EXTEND (i + 1));
5809 *code = *code == GT ? GE : LT;
5810 return;
5811 }
5812 break;
5813
5814 case GE:
5815 case LT:
5816 if (i != ~maxval
5817 && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
5818 {
5819 *op1 = GEN_INT (i - 1);
5820 *code = *code == GE ? GT : LE;
5821 return;
5822 }
5823 break;
5824
5825 case GTU:
5826 case LEU:
5827 if (i != ~((unsigned HOST_WIDE_INT) 0)
5828 && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
5829 {
5830 *op1 = GEN_INT (ARM_SIGN_EXTEND (i + 1));
5831 *code = *code == GTU ? GEU : LTU;
5832 return;
5833 }
5834 break;
5835
5836 case GEU:
5837 case LTU:
5838 if (i != 0
5839 && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
5840 {
5841 *op1 = GEN_INT (i - 1);
5842 *code = *code == GEU ? GTU : LEU;
5843 return;
5844 }
5845 break;
5846
5847 default:
5848 gcc_unreachable ();
5849 }
5850 }
5851
5852
5853 /* Define how to find the value returned by a function. */
5854
5855 static rtx
5856 arm_function_value(const_tree type, const_tree func,
5857 bool outgoing ATTRIBUTE_UNUSED)
5858 {
5859 machine_mode mode;
5860 int unsignedp ATTRIBUTE_UNUSED;
5861 rtx r ATTRIBUTE_UNUSED;
5862
5863 mode = TYPE_MODE (type);
5864
5865 if (TARGET_AAPCS_BASED)
5866 return aapcs_allocate_return_reg (mode, type, func);
5867
5868 /* Promote integer types. */
5869 if (INTEGRAL_TYPE_P (type))
5870 mode = arm_promote_function_mode (type, mode, &unsignedp, func, 1);
5871
5872 /* Promotes small structs returned in a register to full-word size
5873 for big-endian AAPCS. */
5874 if (arm_return_in_msb (type))
5875 {
5876 HOST_WIDE_INT size = int_size_in_bytes (type);
5877 if (size % UNITS_PER_WORD != 0)
5878 {
5879 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
5880 mode = int_mode_for_size (size * BITS_PER_UNIT, 0).require ();
5881 }
5882 }
5883
5884 return arm_libcall_value_1 (mode);
5885 }
5886
5887 /* libcall hashtable helpers. */
5888
5889 struct libcall_hasher : nofree_ptr_hash <const rtx_def>
5890 {
5891 static inline hashval_t hash (const rtx_def *);
5892 static inline bool equal (const rtx_def *, const rtx_def *);
5893 static inline void remove (rtx_def *);
5894 };
5895
5896 inline bool
5897 libcall_hasher::equal (const rtx_def *p1, const rtx_def *p2)
5898 {
5899 return rtx_equal_p (p1, p2);
5900 }
5901
5902 inline hashval_t
5903 libcall_hasher::hash (const rtx_def *p1)
5904 {
5905 return hash_rtx (p1, VOIDmode, NULL, NULL, FALSE);
5906 }
5907
5908 typedef hash_table<libcall_hasher> libcall_table_type;
5909
5910 static void
5911 add_libcall (libcall_table_type *htab, rtx libcall)
5912 {
5913 *htab->find_slot (libcall, INSERT) = libcall;
5914 }
5915
5916 static bool
5917 arm_libcall_uses_aapcs_base (const_rtx libcall)
5918 {
5919 static bool init_done = false;
5920 static libcall_table_type *libcall_htab = NULL;
5921
5922 if (!init_done)
5923 {
5924 init_done = true;
5925
5926 libcall_htab = new libcall_table_type (31);
5927 add_libcall (libcall_htab,
5928 convert_optab_libfunc (sfloat_optab, SFmode, SImode));
5929 add_libcall (libcall_htab,
5930 convert_optab_libfunc (sfloat_optab, DFmode, SImode));
5931 add_libcall (libcall_htab,
5932 convert_optab_libfunc (sfloat_optab, SFmode, DImode));
5933 add_libcall (libcall_htab,
5934 convert_optab_libfunc (sfloat_optab, DFmode, DImode));
5935
5936 add_libcall (libcall_htab,
5937 convert_optab_libfunc (ufloat_optab, SFmode, SImode));
5938 add_libcall (libcall_htab,
5939 convert_optab_libfunc (ufloat_optab, DFmode, SImode));
5940 add_libcall (libcall_htab,
5941 convert_optab_libfunc (ufloat_optab, SFmode, DImode));
5942 add_libcall (libcall_htab,
5943 convert_optab_libfunc (ufloat_optab, DFmode, DImode));
5944
5945 add_libcall (libcall_htab,
5946 convert_optab_libfunc (sext_optab, SFmode, HFmode));
5947 add_libcall (libcall_htab,
5948 convert_optab_libfunc (trunc_optab, HFmode, SFmode));
5949 add_libcall (libcall_htab,
5950 convert_optab_libfunc (sfix_optab, SImode, DFmode));
5951 add_libcall (libcall_htab,
5952 convert_optab_libfunc (ufix_optab, SImode, DFmode));
5953 add_libcall (libcall_htab,
5954 convert_optab_libfunc (sfix_optab, DImode, DFmode));
5955 add_libcall (libcall_htab,
5956 convert_optab_libfunc (ufix_optab, DImode, DFmode));
5957 add_libcall (libcall_htab,
5958 convert_optab_libfunc (sfix_optab, DImode, SFmode));
5959 add_libcall (libcall_htab,
5960 convert_optab_libfunc (ufix_optab, DImode, SFmode));
5961 add_libcall (libcall_htab,
5962 convert_optab_libfunc (sfix_optab, SImode, SFmode));
5963 add_libcall (libcall_htab,
5964 convert_optab_libfunc (ufix_optab, SImode, SFmode));
5965
5966 /* Values from double-precision helper functions are returned in core
5967 registers if the selected core only supports single-precision
5968 arithmetic, even if we are using the hard-float ABI. The same is
5969 true for single-precision helpers except in case of MVE, because in
5970 MVE we will be using the hard-float ABI on a CPU which doesn't support
5971 single-precision operations in hardware. In MVE the following check
5972 enables use of emulation for the single-precision arithmetic
5973 operations. */
5974 if (TARGET_HAVE_MVE)
5975 {
5976 add_libcall (libcall_htab, optab_libfunc (add_optab, SFmode));
5977 add_libcall (libcall_htab, optab_libfunc (sdiv_optab, SFmode));
5978 add_libcall (libcall_htab, optab_libfunc (smul_optab, SFmode));
5979 add_libcall (libcall_htab, optab_libfunc (neg_optab, SFmode));
5980 add_libcall (libcall_htab, optab_libfunc (sub_optab, SFmode));
5981 add_libcall (libcall_htab, optab_libfunc (eq_optab, SFmode));
5982 add_libcall (libcall_htab, optab_libfunc (lt_optab, SFmode));
5983 add_libcall (libcall_htab, optab_libfunc (le_optab, SFmode));
5984 add_libcall (libcall_htab, optab_libfunc (ge_optab, SFmode));
5985 add_libcall (libcall_htab, optab_libfunc (gt_optab, SFmode));
5986 add_libcall (libcall_htab, optab_libfunc (unord_optab, SFmode));
5987 }
5988 add_libcall (libcall_htab, optab_libfunc (add_optab, DFmode));
5989 add_libcall (libcall_htab, optab_libfunc (sdiv_optab, DFmode));
5990 add_libcall (libcall_htab, optab_libfunc (smul_optab, DFmode));
5991 add_libcall (libcall_htab, optab_libfunc (neg_optab, DFmode));
5992 add_libcall (libcall_htab, optab_libfunc (sub_optab, DFmode));
5993 add_libcall (libcall_htab, optab_libfunc (eq_optab, DFmode));
5994 add_libcall (libcall_htab, optab_libfunc (lt_optab, DFmode));
5995 add_libcall (libcall_htab, optab_libfunc (le_optab, DFmode));
5996 add_libcall (libcall_htab, optab_libfunc (ge_optab, DFmode));
5997 add_libcall (libcall_htab, optab_libfunc (gt_optab, DFmode));
5998 add_libcall (libcall_htab, optab_libfunc (unord_optab, DFmode));
5999 add_libcall (libcall_htab, convert_optab_libfunc (sext_optab, DFmode,
6000 SFmode));
6001 add_libcall (libcall_htab, convert_optab_libfunc (trunc_optab, SFmode,
6002 DFmode));
6003 add_libcall (libcall_htab,
6004 convert_optab_libfunc (trunc_optab, HFmode, DFmode));
6005 }
6006
6007 return libcall && libcall_htab->find (libcall) != NULL;
6008 }
6009
6010 static rtx
6011 arm_libcall_value_1 (machine_mode mode)
6012 {
6013 if (TARGET_AAPCS_BASED)
6014 return aapcs_libcall_value (mode);
6015 else if (TARGET_IWMMXT_ABI
6016 && arm_vector_mode_supported_p (mode))
6017 return gen_rtx_REG (mode, FIRST_IWMMXT_REGNUM);
6018 else
6019 return gen_rtx_REG (mode, ARG_REGISTER (1));
6020 }
6021
6022 /* Define how to find the value returned by a library function
6023 assuming the value has mode MODE. */
6024
6025 static rtx
6026 arm_libcall_value (machine_mode mode, const_rtx libcall)
6027 {
6028 if (TARGET_AAPCS_BASED && arm_pcs_default != ARM_PCS_AAPCS
6029 && GET_MODE_CLASS (mode) == MODE_FLOAT)
6030 {
6031 /* The following libcalls return their result in integer registers,
6032 even though they return a floating point value. */
6033 if (arm_libcall_uses_aapcs_base (libcall))
6034 return gen_rtx_REG (mode, ARG_REGISTER(1));
6035
6036 }
6037
6038 return arm_libcall_value_1 (mode);
6039 }
6040
6041 /* Implement TARGET_FUNCTION_VALUE_REGNO_P. */
6042
6043 static bool
6044 arm_function_value_regno_p (const unsigned int regno)
6045 {
6046 if (regno == ARG_REGISTER (1)
6047 || (TARGET_32BIT
6048 && TARGET_AAPCS_BASED
6049 && TARGET_HARD_FLOAT
6050 && regno == FIRST_VFP_REGNUM)
6051 || (TARGET_IWMMXT_ABI
6052 && regno == FIRST_IWMMXT_REGNUM))
6053 return true;
6054
6055 return false;
6056 }
6057
6058 /* Determine the amount of memory needed to store the possible return
6059 registers of an untyped call. */
6060 int
6061 arm_apply_result_size (void)
6062 {
6063 int size = 16;
6064
6065 if (TARGET_32BIT)
6066 {
6067 if (TARGET_HARD_FLOAT_ABI)
6068 size += 32;
6069 if (TARGET_IWMMXT_ABI)
6070 size += 8;
6071 }
6072
6073 return size;
6074 }
6075
6076 /* Decide whether TYPE should be returned in memory (true)
6077 or in a register (false). FNTYPE is the type of the function making
6078 the call. */
6079 static bool
6080 arm_return_in_memory (const_tree type, const_tree fntype)
6081 {
6082 HOST_WIDE_INT size;
6083
6084 size = int_size_in_bytes (type); /* Negative if not fixed size. */
6085
6086 if (TARGET_AAPCS_BASED)
6087 {
6088 /* Simple, non-aggregate types (ie not including vectors and
6089 complex) are always returned in a register (or registers).
6090 We don't care about which register here, so we can short-cut
6091 some of the detail. */
6092 if (!AGGREGATE_TYPE_P (type)
6093 && TREE_CODE (type) != VECTOR_TYPE
6094 && TREE_CODE (type) != COMPLEX_TYPE)
6095 return false;
6096
6097 /* Any return value that is no larger than one word can be
6098 returned in r0. */
6099 if (((unsigned HOST_WIDE_INT) size) <= UNITS_PER_WORD)
6100 return false;
6101
6102 /* Check any available co-processors to see if they accept the
6103 type as a register candidate (VFP, for example, can return
6104 some aggregates in consecutive registers). These aren't
6105 available if the call is variadic. */
6106 if (aapcs_select_return_coproc (type, fntype) >= 0)
6107 return false;
6108
6109 /* Vector values should be returned using ARM registers, not
6110 memory (unless they're over 16 bytes, which will break since
6111 we only have four call-clobbered registers to play with). */
6112 if (TREE_CODE (type) == VECTOR_TYPE)
6113 return (size < 0 || size > (4 * UNITS_PER_WORD));
6114
6115 /* The rest go in memory. */
6116 return true;
6117 }
6118
6119 if (TREE_CODE (type) == VECTOR_TYPE)
6120 return (size < 0 || size > (4 * UNITS_PER_WORD));
6121
6122 if (!AGGREGATE_TYPE_P (type) &&
6123 (TREE_CODE (type) != VECTOR_TYPE))
6124 /* All simple types are returned in registers. */
6125 return false;
6126
6127 if (arm_abi != ARM_ABI_APCS)
6128 {
6129 /* ATPCS and later return aggregate types in memory only if they are
6130 larger than a word (or are variable size). */
6131 return (size < 0 || size > UNITS_PER_WORD);
6132 }
6133
6134 /* For the arm-wince targets we choose to be compatible with Microsoft's
6135 ARM and Thumb compilers, which always return aggregates in memory. */
6136 #ifndef ARM_WINCE
6137 /* All structures/unions bigger than one word are returned in memory.
6138 Also catch the case where int_size_in_bytes returns -1. In this case
6139 the aggregate is either huge or of variable size, and in either case
6140 we will want to return it via memory and not in a register. */
6141 if (size < 0 || size > UNITS_PER_WORD)
6142 return true;
6143
6144 if (TREE_CODE (type) == RECORD_TYPE)
6145 {
6146 tree field;
6147
6148 /* For a struct the APCS says that we only return in a register
6149 if the type is 'integer like' and every addressable element
6150 has an offset of zero. For practical purposes this means
6151 that the structure can have at most one non bit-field element
6152 and that this element must be the first one in the structure. */
6153
6154 /* Find the first field, ignoring non FIELD_DECL things which will
6155 have been created by C++. */
6156 /* NOTE: This code is deprecated and has not been updated to handle
6157 DECL_FIELD_ABI_IGNORED. */
6158 for (field = TYPE_FIELDS (type);
6159 field && TREE_CODE (field) != FIELD_DECL;
6160 field = DECL_CHAIN (field))
6161 continue;
6162
6163 if (field == NULL)
6164 return false; /* An empty structure. Allowed by an extension to ANSI C. */
6165
6166 /* Check that the first field is valid for returning in a register. */
6167
6168 /* ... Floats are not allowed */
6169 if (FLOAT_TYPE_P (TREE_TYPE (field)))
6170 return true;
6171
6172 /* ... Aggregates that are not themselves valid for returning in
6173 a register are not allowed. */
6174 if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
6175 return true;
6176
6177 /* Now check the remaining fields, if any. Only bitfields are allowed,
6178 since they are not addressable. */
6179 for (field = DECL_CHAIN (field);
6180 field;
6181 field = DECL_CHAIN (field))
6182 {
6183 if (TREE_CODE (field) != FIELD_DECL)
6184 continue;
6185
6186 if (!DECL_BIT_FIELD_TYPE (field))
6187 return true;
6188 }
6189
6190 return false;
6191 }
6192
6193 if (TREE_CODE (type) == UNION_TYPE)
6194 {
6195 tree field;
6196
6197 /* Unions can be returned in registers if every element is
6198 integral, or can be returned in an integer register. */
6199 for (field = TYPE_FIELDS (type);
6200 field;
6201 field = DECL_CHAIN (field))
6202 {
6203 if (TREE_CODE (field) != FIELD_DECL)
6204 continue;
6205
6206 if (FLOAT_TYPE_P (TREE_TYPE (field)))
6207 return true;
6208
6209 if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
6210 return true;
6211 }
6212
6213 return false;
6214 }
6215 #endif /* not ARM_WINCE */
6216
6217 /* Return all other types in memory. */
6218 return true;
6219 }
6220
6221 const struct pcs_attribute_arg
6222 {
6223 const char *arg;
6224 enum arm_pcs value;
6225 } pcs_attribute_args[] =
6226 {
6227 {"aapcs", ARM_PCS_AAPCS},
6228 {"aapcs-vfp", ARM_PCS_AAPCS_VFP},
6229 #if 0
6230 /* We could recognize these, but changes would be needed elsewhere
6231 * to implement them. */
6232 {"aapcs-iwmmxt", ARM_PCS_AAPCS_IWMMXT},
6233 {"atpcs", ARM_PCS_ATPCS},
6234 {"apcs", ARM_PCS_APCS},
6235 #endif
6236 {NULL, ARM_PCS_UNKNOWN}
6237 };
6238
6239 static enum arm_pcs
6240 arm_pcs_from_attribute (tree attr)
6241 {
6242 const struct pcs_attribute_arg *ptr;
6243 const char *arg;
6244
6245 /* Get the value of the argument. */
6246 if (TREE_VALUE (attr) == NULL_TREE
6247 || TREE_CODE (TREE_VALUE (attr)) != STRING_CST)
6248 return ARM_PCS_UNKNOWN;
6249
6250 arg = TREE_STRING_POINTER (TREE_VALUE (attr));
6251
6252 /* Check it against the list of known arguments. */
6253 for (ptr = pcs_attribute_args; ptr->arg != NULL; ptr++)
6254 if (streq (arg, ptr->arg))
6255 return ptr->value;
6256
6257 /* An unrecognized interrupt type. */
6258 return ARM_PCS_UNKNOWN;
6259 }
6260
6261 /* Get the PCS variant to use for this call. TYPE is the function's type
6262 specification, DECL is the specific declartion. DECL may be null if
6263 the call could be indirect or if this is a library call. */
6264 static enum arm_pcs
6265 arm_get_pcs_model (const_tree type, const_tree decl ATTRIBUTE_UNUSED)
6266 {
6267 bool user_convention = false;
6268 enum arm_pcs user_pcs = arm_pcs_default;
6269 tree attr;
6270
6271 gcc_assert (type);
6272
6273 attr = lookup_attribute ("pcs", TYPE_ATTRIBUTES (type));
6274 if (attr)
6275 {
6276 user_pcs = arm_pcs_from_attribute (TREE_VALUE (attr));
6277 user_convention = true;
6278 }
6279
6280 if (TARGET_AAPCS_BASED)
6281 {
6282 /* Detect varargs functions. These always use the base rules
6283 (no argument is ever a candidate for a co-processor
6284 register). */
6285 bool base_rules = stdarg_p (type);
6286
6287 if (user_convention)
6288 {
6289 if (user_pcs > ARM_PCS_AAPCS_LOCAL)
6290 sorry ("non-AAPCS derived PCS variant");
6291 else if (base_rules && user_pcs != ARM_PCS_AAPCS)
6292 error ("variadic functions must use the base AAPCS variant");
6293 }
6294
6295 if (base_rules)
6296 return ARM_PCS_AAPCS;
6297 else if (user_convention)
6298 return user_pcs;
6299 #if 0
6300 /* Unfortunately, this is not safe and can lead to wrong code
6301 being generated (PR96882). Not all calls into the back-end
6302 pass the DECL, so it is unsafe to make any PCS-changing
6303 decisions based on it. In particular the RETURN_IN_MEMORY
6304 hook is only ever passed a TYPE. This needs revisiting to
6305 see if there are any partial improvements that can be
6306 re-enabled. */
6307 else if (decl && flag_unit_at_a_time)
6308 {
6309 /* Local functions never leak outside this compilation unit,
6310 so we are free to use whatever conventions are
6311 appropriate. */
6312 /* FIXME: remove CONST_CAST_TREE when cgraph is constified. */
6313 cgraph_node *local_info_node
6314 = cgraph_node::local_info_node (CONST_CAST_TREE (decl));
6315 if (local_info_node && local_info_node->local)
6316 return ARM_PCS_AAPCS_LOCAL;
6317 }
6318 #endif
6319 }
6320 else if (user_convention && user_pcs != arm_pcs_default)
6321 sorry ("PCS variant");
6322
6323 /* For everything else we use the target's default. */
6324 return arm_pcs_default;
6325 }
6326
6327
6328 static void
6329 aapcs_vfp_cum_init (CUMULATIVE_ARGS *pcum ATTRIBUTE_UNUSED,
6330 const_tree fntype ATTRIBUTE_UNUSED,
6331 rtx libcall ATTRIBUTE_UNUSED,
6332 const_tree fndecl ATTRIBUTE_UNUSED)
6333 {
6334 /* Record the unallocated VFP registers. */
6335 pcum->aapcs_vfp_regs_free = (1 << NUM_VFP_ARG_REGS) - 1;
6336 pcum->aapcs_vfp_reg_alloc = 0;
6337 }
6338
6339 /* Bitmasks that indicate whether earlier versions of GCC would have
6340 taken a different path through the ABI logic. This should result in
6341 a -Wpsabi warning if the earlier path led to a different ABI decision.
6342
6343 WARN_PSABI_EMPTY_CXX17_BASE
6344 Indicates that the type includes an artificial empty C++17 base field
6345 that, prior to GCC 10.1, would prevent the type from being treated as
6346 a HFA or HVA. See PR94711 for details.
6347
6348 WARN_PSABI_NO_UNIQUE_ADDRESS
6349 Indicates that the type includes an empty [[no_unique_address]] field
6350 that, prior to GCC 10.1, would prevent the type from being treated as
6351 a HFA or HVA. */
6352 const unsigned int WARN_PSABI_EMPTY_CXX17_BASE = 1U << 0;
6353 const unsigned int WARN_PSABI_NO_UNIQUE_ADDRESS = 1U << 1;
6354 const unsigned int WARN_PSABI_ZERO_WIDTH_BITFIELD = 1U << 2;
6355
6356 /* Walk down the type tree of TYPE counting consecutive base elements.
6357 If *MODEP is VOIDmode, then set it to the first valid floating point
6358 type. If a non-floating point type is found, or if a floating point
6359 type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
6360 otherwise return the count in the sub-tree.
6361
6362 The WARN_PSABI_FLAGS argument allows the caller to check whether this
6363 function has changed its behavior relative to earlier versions of GCC.
6364 Normally the argument should be nonnull and point to a zero-initialized
6365 variable. The function then records whether the ABI decision might
6366 be affected by a known fix to the ABI logic, setting the associated
6367 WARN_PSABI_* bits if so.
6368
6369 When the argument is instead a null pointer, the function tries to
6370 simulate the behavior of GCC before all such ABI fixes were made.
6371 This is useful to check whether the function returns something
6372 different after the ABI fixes. */
6373 static int
6374 aapcs_vfp_sub_candidate (const_tree type, machine_mode *modep,
6375 unsigned int *warn_psabi_flags)
6376 {
6377 machine_mode mode;
6378 HOST_WIDE_INT size;
6379
6380 switch (TREE_CODE (type))
6381 {
6382 case REAL_TYPE:
6383 mode = TYPE_MODE (type);
6384 if (mode != DFmode && mode != SFmode && mode != HFmode && mode != BFmode)
6385 return -1;
6386
6387 if (*modep == VOIDmode)
6388 *modep = mode;
6389
6390 if (*modep == mode)
6391 return 1;
6392
6393 break;
6394
6395 case COMPLEX_TYPE:
6396 mode = TYPE_MODE (TREE_TYPE (type));
6397 if (mode != DFmode && mode != SFmode)
6398 return -1;
6399
6400 if (*modep == VOIDmode)
6401 *modep = mode;
6402
6403 if (*modep == mode)
6404 return 2;
6405
6406 break;
6407
6408 case VECTOR_TYPE:
6409 /* Use V2SImode and V4SImode as representatives of all 64-bit
6410 and 128-bit vector types, whether or not those modes are
6411 supported with the present options. */
6412 size = int_size_in_bytes (type);
6413 switch (size)
6414 {
6415 case 8:
6416 mode = V2SImode;
6417 break;
6418 case 16:
6419 mode = V4SImode;
6420 break;
6421 default:
6422 return -1;
6423 }
6424
6425 if (*modep == VOIDmode)
6426 *modep = mode;
6427
6428 /* Vector modes are considered to be opaque: two vectors are
6429 equivalent for the purposes of being homogeneous aggregates
6430 if they are the same size. */
6431 if (*modep == mode)
6432 return 1;
6433
6434 break;
6435
6436 case ARRAY_TYPE:
6437 {
6438 int count;
6439 tree index = TYPE_DOMAIN (type);
6440
6441 /* Can't handle incomplete types nor sizes that are not
6442 fixed. */
6443 if (!COMPLETE_TYPE_P (type)
6444 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
6445 return -1;
6446
6447 count = aapcs_vfp_sub_candidate (TREE_TYPE (type), modep,
6448 warn_psabi_flags);
6449 if (count == -1
6450 || !index
6451 || !TYPE_MAX_VALUE (index)
6452 || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index))
6453 || !TYPE_MIN_VALUE (index)
6454 || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index))
6455 || count < 0)
6456 return -1;
6457
6458 count *= (1 + tree_to_uhwi (TYPE_MAX_VALUE (index))
6459 - tree_to_uhwi (TYPE_MIN_VALUE (index)));
6460
6461 /* There must be no padding. */
6462 if (wi::to_wide (TYPE_SIZE (type))
6463 != count * GET_MODE_BITSIZE (*modep))
6464 return -1;
6465
6466 return count;
6467 }
6468
6469 case RECORD_TYPE:
6470 {
6471 int count = 0;
6472 int sub_count;
6473 tree field;
6474
6475 /* Can't handle incomplete types nor sizes that are not
6476 fixed. */
6477 if (!COMPLETE_TYPE_P (type)
6478 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
6479 return -1;
6480
6481 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6482 {
6483 if (TREE_CODE (field) != FIELD_DECL)
6484 continue;
6485
6486 if (DECL_FIELD_ABI_IGNORED (field))
6487 {
6488 /* See whether this is something that earlier versions of
6489 GCC failed to ignore. */
6490 unsigned int flag;
6491 if (lookup_attribute ("no_unique_address",
6492 DECL_ATTRIBUTES (field)))
6493 flag = WARN_PSABI_NO_UNIQUE_ADDRESS;
6494 else if (cxx17_empty_base_field_p (field))
6495 flag = WARN_PSABI_EMPTY_CXX17_BASE;
6496 else
6497 /* No compatibility problem. */
6498 continue;
6499
6500 /* Simulate the old behavior when WARN_PSABI_FLAGS is null. */
6501 if (warn_psabi_flags)
6502 {
6503 *warn_psabi_flags |= flag;
6504 continue;
6505 }
6506 }
6507 /* A zero-width bitfield may affect layout in some
6508 circumstances, but adds no members. The determination
6509 of whether or not a type is an HFA is performed after
6510 layout is complete, so if the type still looks like an
6511 HFA afterwards, it is still classed as one. This is
6512 potentially an ABI break for the hard-float ABI. */
6513 else if (DECL_BIT_FIELD (field)
6514 && integer_zerop (DECL_SIZE (field)))
6515 {
6516 /* Prior to GCC-12 these fields were striped early,
6517 hiding them from the back-end entirely and
6518 resulting in the correct behaviour for argument
6519 passing. Simulate that old behaviour without
6520 generating a warning. */
6521 if (DECL_FIELD_CXX_ZERO_WIDTH_BIT_FIELD (field))
6522 continue;
6523 if (warn_psabi_flags)
6524 {
6525 *warn_psabi_flags |= WARN_PSABI_ZERO_WIDTH_BITFIELD;
6526 continue;
6527 }
6528 }
6529
6530 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep,
6531 warn_psabi_flags);
6532 if (sub_count < 0)
6533 return -1;
6534 count += sub_count;
6535 }
6536
6537 /* There must be no padding. */
6538 if (wi::to_wide (TYPE_SIZE (type))
6539 != count * GET_MODE_BITSIZE (*modep))
6540 return -1;
6541
6542 return count;
6543 }
6544
6545 case UNION_TYPE:
6546 case QUAL_UNION_TYPE:
6547 {
6548 /* These aren't very interesting except in a degenerate case. */
6549 int count = 0;
6550 int sub_count;
6551 tree field;
6552
6553 /* Can't handle incomplete types nor sizes that are not
6554 fixed. */
6555 if (!COMPLETE_TYPE_P (type)
6556 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
6557 return -1;
6558
6559 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6560 {
6561 if (TREE_CODE (field) != FIELD_DECL)
6562 continue;
6563
6564 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep,
6565 warn_psabi_flags);
6566 if (sub_count < 0)
6567 return -1;
6568 count = count > sub_count ? count : sub_count;
6569 }
6570
6571 /* There must be no padding. */
6572 if (wi::to_wide (TYPE_SIZE (type))
6573 != count * GET_MODE_BITSIZE (*modep))
6574 return -1;
6575
6576 return count;
6577 }
6578
6579 default:
6580 break;
6581 }
6582
6583 return -1;
6584 }
6585
6586 /* Return true if PCS_VARIANT should use VFP registers. */
6587 static bool
6588 use_vfp_abi (enum arm_pcs pcs_variant, bool is_double)
6589 {
6590 if (pcs_variant == ARM_PCS_AAPCS_VFP)
6591 {
6592 static bool seen_thumb1_vfp = false;
6593
6594 if (TARGET_THUMB1 && !seen_thumb1_vfp)
6595 {
6596 sorry ("Thumb-1 %<hard-float%> VFP ABI");
6597 /* sorry() is not immediately fatal, so only display this once. */
6598 seen_thumb1_vfp = true;
6599 }
6600
6601 return true;
6602 }
6603
6604 if (pcs_variant != ARM_PCS_AAPCS_LOCAL)
6605 return false;
6606
6607 return (TARGET_32BIT && TARGET_HARD_FLOAT &&
6608 (TARGET_VFP_DOUBLE || !is_double));
6609 }
6610
6611 /* Return true if an argument whose type is TYPE, or mode is MODE, is
6612 suitable for passing or returning in VFP registers for the PCS
6613 variant selected. If it is, then *BASE_MODE is updated to contain
6614 a machine mode describing each element of the argument's type and
6615 *COUNT to hold the number of such elements. */
6616 static bool
6617 aapcs_vfp_is_call_or_return_candidate (enum arm_pcs pcs_variant,
6618 machine_mode mode, const_tree type,
6619 machine_mode *base_mode, int *count)
6620 {
6621 machine_mode new_mode = VOIDmode;
6622
6623 /* If we have the type information, prefer that to working things
6624 out from the mode. */
6625 if (type)
6626 {
6627 unsigned int warn_psabi_flags = 0;
6628 int ag_count = aapcs_vfp_sub_candidate (type, &new_mode,
6629 &warn_psabi_flags);
6630 if (ag_count > 0 && ag_count <= 4)
6631 {
6632 static unsigned last_reported_type_uid;
6633 unsigned uid = TYPE_UID (TYPE_MAIN_VARIANT (type));
6634 int alt;
6635 if (warn_psabi
6636 && warn_psabi_flags
6637 && uid != last_reported_type_uid
6638 && ((alt = aapcs_vfp_sub_candidate (type, &new_mode, NULL))
6639 != ag_count))
6640 {
6641 const char *url10
6642 = CHANGES_ROOT_URL "gcc-10/changes.html#empty_base";
6643 const char *url12
6644 = CHANGES_ROOT_URL "gcc-12/changes.html#zero_width_bitfields";
6645 gcc_assert (alt == -1);
6646 last_reported_type_uid = uid;
6647 /* Use TYPE_MAIN_VARIANT to strip any redundant const
6648 qualification. */
6649 if (warn_psabi_flags & WARN_PSABI_NO_UNIQUE_ADDRESS)
6650 inform (input_location, "parameter passing for argument of "
6651 "type %qT with %<[[no_unique_address]]%> members "
6652 "changed %{in GCC 10.1%}",
6653 TYPE_MAIN_VARIANT (type), url10);
6654 else if (warn_psabi_flags & WARN_PSABI_EMPTY_CXX17_BASE)
6655 inform (input_location, "parameter passing for argument of "
6656 "type %qT when C++17 is enabled changed to match "
6657 "C++14 %{in GCC 10.1%}",
6658 TYPE_MAIN_VARIANT (type), url10);
6659 else if (warn_psabi_flags & WARN_PSABI_ZERO_WIDTH_BITFIELD)
6660 inform (input_location, "parameter passing for argument of "
6661 "type %qT changed %{in GCC 12.1%}",
6662 TYPE_MAIN_VARIANT (type), url12);
6663 }
6664 *count = ag_count;
6665 }
6666 else
6667 return false;
6668 }
6669 else if (GET_MODE_CLASS (mode) == MODE_FLOAT
6670 || GET_MODE_CLASS (mode) == MODE_VECTOR_INT
6671 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
6672 {
6673 *count = 1;
6674 new_mode = mode;
6675 }
6676 else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
6677 {
6678 *count = 2;
6679 new_mode = (mode == DCmode ? DFmode : SFmode);
6680 }
6681 else
6682 return false;
6683
6684
6685 if (!use_vfp_abi (pcs_variant, ARM_NUM_REGS (new_mode) > 1))
6686 return false;
6687
6688 *base_mode = new_mode;
6689
6690 if (TARGET_GENERAL_REGS_ONLY)
6691 error ("argument of type %qT not permitted with %<-mgeneral-regs-only%>",
6692 type);
6693
6694 return true;
6695 }
6696
6697 static bool
6698 aapcs_vfp_is_return_candidate (enum arm_pcs pcs_variant,
6699 machine_mode mode, const_tree type)
6700 {
6701 int count ATTRIBUTE_UNUSED;
6702 machine_mode ag_mode ATTRIBUTE_UNUSED;
6703
6704 if (!use_vfp_abi (pcs_variant, false))
6705 return false;
6706 return aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
6707 &ag_mode, &count);
6708 }
6709
6710 static bool
6711 aapcs_vfp_is_call_candidate (CUMULATIVE_ARGS *pcum, machine_mode mode,
6712 const_tree type)
6713 {
6714 if (!use_vfp_abi (pcum->pcs_variant, false))
6715 return false;
6716
6717 return aapcs_vfp_is_call_or_return_candidate (pcum->pcs_variant, mode, type,
6718 &pcum->aapcs_vfp_rmode,
6719 &pcum->aapcs_vfp_rcount);
6720 }
6721
6722 /* Implement the allocate field in aapcs_cp_arg_layout. See the comment there
6723 for the behaviour of this function. */
6724
6725 static bool
6726 aapcs_vfp_allocate (CUMULATIVE_ARGS *pcum, machine_mode mode,
6727 const_tree type ATTRIBUTE_UNUSED)
6728 {
6729 int rmode_size
6730 = MAX (GET_MODE_SIZE (pcum->aapcs_vfp_rmode), GET_MODE_SIZE (SFmode));
6731 int shift = rmode_size / GET_MODE_SIZE (SFmode);
6732 unsigned mask = (1 << (shift * pcum->aapcs_vfp_rcount)) - 1;
6733 int regno;
6734
6735 for (regno = 0; regno < NUM_VFP_ARG_REGS; regno += shift)
6736 if (((pcum->aapcs_vfp_regs_free >> regno) & mask) == mask)
6737 {
6738 pcum->aapcs_vfp_reg_alloc = mask << regno;
6739 if (mode == BLKmode
6740 || (mode == TImode && ! (TARGET_NEON || TARGET_HAVE_MVE))
6741 || ! arm_hard_regno_mode_ok (FIRST_VFP_REGNUM + regno, mode))
6742 {
6743 int i;
6744 int rcount = pcum->aapcs_vfp_rcount;
6745 int rshift = shift;
6746 machine_mode rmode = pcum->aapcs_vfp_rmode;
6747 rtx par;
6748 if (!(TARGET_NEON || TARGET_HAVE_MVE))
6749 {
6750 /* Avoid using unsupported vector modes. */
6751 if (rmode == V2SImode)
6752 rmode = DImode;
6753 else if (rmode == V4SImode)
6754 {
6755 rmode = DImode;
6756 rcount *= 2;
6757 rshift /= 2;
6758 }
6759 }
6760 par = gen_rtx_PARALLEL (mode, rtvec_alloc (rcount));
6761 for (i = 0; i < rcount; i++)
6762 {
6763 rtx tmp = gen_rtx_REG (rmode,
6764 FIRST_VFP_REGNUM + regno + i * rshift);
6765 tmp = gen_rtx_EXPR_LIST
6766 (VOIDmode, tmp,
6767 GEN_INT (i * GET_MODE_SIZE (rmode)));
6768 XVECEXP (par, 0, i) = tmp;
6769 }
6770
6771 pcum->aapcs_reg = par;
6772 }
6773 else
6774 pcum->aapcs_reg = gen_rtx_REG (mode, FIRST_VFP_REGNUM + regno);
6775 return true;
6776 }
6777 return false;
6778 }
6779
6780 /* Implement the allocate_return_reg field in aapcs_cp_arg_layout. See the
6781 comment there for the behaviour of this function. */
6782
6783 static rtx
6784 aapcs_vfp_allocate_return_reg (enum arm_pcs pcs_variant ATTRIBUTE_UNUSED,
6785 machine_mode mode,
6786 const_tree type ATTRIBUTE_UNUSED)
6787 {
6788 if (!use_vfp_abi (pcs_variant, false))
6789 return NULL;
6790
6791 if (mode == BLKmode
6792 || (GET_MODE_CLASS (mode) == MODE_INT
6793 && GET_MODE_SIZE (mode) >= GET_MODE_SIZE (TImode)
6794 && !(TARGET_NEON || TARGET_HAVE_MVE)))
6795 {
6796 int count;
6797 machine_mode ag_mode;
6798 int i;
6799 rtx par;
6800 int shift;
6801
6802 aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
6803 &ag_mode, &count);
6804
6805 if (!(TARGET_NEON || TARGET_HAVE_MVE))
6806 {
6807 if (ag_mode == V2SImode)
6808 ag_mode = DImode;
6809 else if (ag_mode == V4SImode)
6810 {
6811 ag_mode = DImode;
6812 count *= 2;
6813 }
6814 }
6815 shift = GET_MODE_SIZE(ag_mode) / GET_MODE_SIZE(SFmode);
6816 par = gen_rtx_PARALLEL (mode, rtvec_alloc (count));
6817 for (i = 0; i < count; i++)
6818 {
6819 rtx tmp = gen_rtx_REG (ag_mode, FIRST_VFP_REGNUM + i * shift);
6820 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
6821 GEN_INT (i * GET_MODE_SIZE (ag_mode)));
6822 XVECEXP (par, 0, i) = tmp;
6823 }
6824
6825 return par;
6826 }
6827
6828 return gen_rtx_REG (mode, FIRST_VFP_REGNUM);
6829 }
6830
6831 static void
6832 aapcs_vfp_advance (CUMULATIVE_ARGS *pcum ATTRIBUTE_UNUSED,
6833 machine_mode mode ATTRIBUTE_UNUSED,
6834 const_tree type ATTRIBUTE_UNUSED)
6835 {
6836 pcum->aapcs_vfp_regs_free &= ~pcum->aapcs_vfp_reg_alloc;
6837 pcum->aapcs_vfp_reg_alloc = 0;
6838 return;
6839 }
6840
6841 #define AAPCS_CP(X) \
6842 { \
6843 aapcs_ ## X ## _cum_init, \
6844 aapcs_ ## X ## _is_call_candidate, \
6845 aapcs_ ## X ## _allocate, \
6846 aapcs_ ## X ## _is_return_candidate, \
6847 aapcs_ ## X ## _allocate_return_reg, \
6848 aapcs_ ## X ## _advance \
6849 }
6850
6851 /* Table of co-processors that can be used to pass arguments in
6852 registers. Idealy no arugment should be a candidate for more than
6853 one co-processor table entry, but the table is processed in order
6854 and stops after the first match. If that entry then fails to put
6855 the argument into a co-processor register, the argument will go on
6856 the stack. */
6857 static struct
6858 {
6859 /* Initialize co-processor related state in CUMULATIVE_ARGS structure. */
6860 void (*cum_init) (CUMULATIVE_ARGS *, const_tree, rtx, const_tree);
6861
6862 /* Return true if an argument of mode MODE (or type TYPE if MODE is
6863 BLKmode) is a candidate for this co-processor's registers; this
6864 function should ignore any position-dependent state in
6865 CUMULATIVE_ARGS and only use call-type dependent information. */
6866 bool (*is_call_candidate) (CUMULATIVE_ARGS *, machine_mode, const_tree);
6867
6868 /* Return true if the argument does get a co-processor register; it
6869 should set aapcs_reg to an RTX of the register allocated as is
6870 required for a return from FUNCTION_ARG. */
6871 bool (*allocate) (CUMULATIVE_ARGS *, machine_mode, const_tree);
6872
6873 /* Return true if a result of mode MODE (or type TYPE if MODE is BLKmode) can
6874 be returned in this co-processor's registers. */
6875 bool (*is_return_candidate) (enum arm_pcs, machine_mode, const_tree);
6876
6877 /* Allocate and return an RTX element to hold the return type of a call. This
6878 routine must not fail and will only be called if is_return_candidate
6879 returned true with the same parameters. */
6880 rtx (*allocate_return_reg) (enum arm_pcs, machine_mode, const_tree);
6881
6882 /* Finish processing this argument and prepare to start processing
6883 the next one. */
6884 void (*advance) (CUMULATIVE_ARGS *, machine_mode, const_tree);
6885 } aapcs_cp_arg_layout[ARM_NUM_COPROC_SLOTS] =
6886 {
6887 AAPCS_CP(vfp)
6888 };
6889
6890 #undef AAPCS_CP
6891
6892 static int
6893 aapcs_select_call_coproc (CUMULATIVE_ARGS *pcum, machine_mode mode,
6894 const_tree type)
6895 {
6896 int i;
6897
6898 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
6899 if (aapcs_cp_arg_layout[i].is_call_candidate (pcum, mode, type))
6900 return i;
6901
6902 return -1;
6903 }
6904
6905 static int
6906 aapcs_select_return_coproc (const_tree type, const_tree fntype)
6907 {
6908 /* We aren't passed a decl, so we can't check that a call is local.
6909 However, it isn't clear that that would be a win anyway, since it
6910 might limit some tail-calling opportunities. */
6911 enum arm_pcs pcs_variant;
6912
6913 if (fntype)
6914 {
6915 const_tree fndecl = NULL_TREE;
6916
6917 if (TREE_CODE (fntype) == FUNCTION_DECL)
6918 {
6919 fndecl = fntype;
6920 fntype = TREE_TYPE (fntype);
6921 }
6922
6923 pcs_variant = arm_get_pcs_model (fntype, fndecl);
6924 }
6925 else
6926 pcs_variant = arm_pcs_default;
6927
6928 if (pcs_variant != ARM_PCS_AAPCS)
6929 {
6930 int i;
6931
6932 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
6933 if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant,
6934 TYPE_MODE (type),
6935 type))
6936 return i;
6937 }
6938 return -1;
6939 }
6940
6941 static rtx
6942 aapcs_allocate_return_reg (machine_mode mode, const_tree type,
6943 const_tree fntype)
6944 {
6945 /* We aren't passed a decl, so we can't check that a call is local.
6946 However, it isn't clear that that would be a win anyway, since it
6947 might limit some tail-calling opportunities. */
6948 enum arm_pcs pcs_variant;
6949 int unsignedp ATTRIBUTE_UNUSED;
6950
6951 if (fntype)
6952 {
6953 const_tree fndecl = NULL_TREE;
6954
6955 if (TREE_CODE (fntype) == FUNCTION_DECL)
6956 {
6957 fndecl = fntype;
6958 fntype = TREE_TYPE (fntype);
6959 }
6960
6961 pcs_variant = arm_get_pcs_model (fntype, fndecl);
6962 }
6963 else
6964 pcs_variant = arm_pcs_default;
6965
6966 /* Promote integer types. */
6967 if (type && INTEGRAL_TYPE_P (type))
6968 mode = arm_promote_function_mode (type, mode, &unsignedp, fntype, 1);
6969
6970 if (pcs_variant != ARM_PCS_AAPCS)
6971 {
6972 int i;
6973
6974 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
6975 if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant, mode,
6976 type))
6977 return aapcs_cp_arg_layout[i].allocate_return_reg (pcs_variant,
6978 mode, type);
6979 }
6980
6981 /* Promotes small structs returned in a register to full-word size
6982 for big-endian AAPCS. */
6983 if (type && arm_return_in_msb (type))
6984 {
6985 HOST_WIDE_INT size = int_size_in_bytes (type);
6986 if (size % UNITS_PER_WORD != 0)
6987 {
6988 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
6989 mode = int_mode_for_size (size * BITS_PER_UNIT, 0).require ();
6990 }
6991 }
6992
6993 return gen_rtx_REG (mode, R0_REGNUM);
6994 }
6995
6996 static rtx
6997 aapcs_libcall_value (machine_mode mode)
6998 {
6999 if (BYTES_BIG_ENDIAN && ALL_FIXED_POINT_MODE_P (mode)
7000 && GET_MODE_SIZE (mode) <= 4)
7001 mode = SImode;
7002
7003 return aapcs_allocate_return_reg (mode, NULL_TREE, NULL_TREE);
7004 }
7005
7006 /* Lay out a function argument using the AAPCS rules. The rule
7007 numbers referred to here are those in the AAPCS. */
7008 static void
7009 aapcs_layout_arg (CUMULATIVE_ARGS *pcum, machine_mode mode,
7010 const_tree type, bool named)
7011 {
7012 int nregs, nregs2;
7013 int ncrn;
7014
7015 /* We only need to do this once per argument. */
7016 if (pcum->aapcs_arg_processed)
7017 return;
7018
7019 pcum->aapcs_arg_processed = true;
7020
7021 /* Special case: if named is false then we are handling an incoming
7022 anonymous argument which is on the stack. */
7023 if (!named)
7024 return;
7025
7026 /* Is this a potential co-processor register candidate? */
7027 if (pcum->pcs_variant != ARM_PCS_AAPCS)
7028 {
7029 int slot = aapcs_select_call_coproc (pcum, mode, type);
7030 pcum->aapcs_cprc_slot = slot;
7031
7032 /* We don't have to apply any of the rules from part B of the
7033 preparation phase, these are handled elsewhere in the
7034 compiler. */
7035
7036 if (slot >= 0)
7037 {
7038 /* A Co-processor register candidate goes either in its own
7039 class of registers or on the stack. */
7040 if (!pcum->aapcs_cprc_failed[slot])
7041 {
7042 /* C1.cp - Try to allocate the argument to co-processor
7043 registers. */
7044 if (aapcs_cp_arg_layout[slot].allocate (pcum, mode, type))
7045 return;
7046
7047 /* C2.cp - Put the argument on the stack and note that we
7048 can't assign any more candidates in this slot. We also
7049 need to note that we have allocated stack space, so that
7050 we won't later try to split a non-cprc candidate between
7051 core registers and the stack. */
7052 pcum->aapcs_cprc_failed[slot] = true;
7053 pcum->can_split = false;
7054 }
7055
7056 /* We didn't get a register, so this argument goes on the
7057 stack. */
7058 gcc_assert (pcum->can_split == false);
7059 return;
7060 }
7061 }
7062
7063 /* C3 - For double-word aligned arguments, round the NCRN up to the
7064 next even number. */
7065 ncrn = pcum->aapcs_ncrn;
7066 if (ncrn & 1)
7067 {
7068 int res = arm_needs_doubleword_align (mode, type);
7069 /* Only warn during RTL expansion of call stmts, otherwise we would
7070 warn e.g. during gimplification even on functions that will be
7071 always inlined, and we'd warn multiple times. Don't warn when
7072 called in expand_function_start either, as we warn instead in
7073 arm_function_arg_boundary in that case. */
7074 if (res < 0 && warn_psabi && currently_expanding_gimple_stmt)
7075 inform (input_location, "parameter passing for argument of type "
7076 "%qT changed in GCC 7.1", type);
7077 else if (res > 0)
7078 ncrn++;
7079 }
7080
7081 nregs = ARM_NUM_REGS2(mode, type);
7082
7083 /* Sigh, this test should really assert that nregs > 0, but a GCC
7084 extension allows empty structs and then gives them empty size; it
7085 then allows such a structure to be passed by value. For some of
7086 the code below we have to pretend that such an argument has
7087 non-zero size so that we 'locate' it correctly either in
7088 registers or on the stack. */
7089 gcc_assert (nregs >= 0);
7090
7091 nregs2 = nregs ? nregs : 1;
7092
7093 /* C4 - Argument fits entirely in core registers. */
7094 if (ncrn + nregs2 <= NUM_ARG_REGS)
7095 {
7096 pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
7097 pcum->aapcs_next_ncrn = ncrn + nregs;
7098 return;
7099 }
7100
7101 /* C5 - Some core registers left and there are no arguments already
7102 on the stack: split this argument between the remaining core
7103 registers and the stack. */
7104 if (ncrn < NUM_ARG_REGS && pcum->can_split)
7105 {
7106 pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
7107 pcum->aapcs_next_ncrn = NUM_ARG_REGS;
7108 pcum->aapcs_partial = (NUM_ARG_REGS - ncrn) * UNITS_PER_WORD;
7109 return;
7110 }
7111
7112 /* C6 - NCRN is set to 4. */
7113 pcum->aapcs_next_ncrn = NUM_ARG_REGS;
7114
7115 /* C7,C8 - arugment goes on the stack. We have nothing to do here. */
7116 return;
7117 }
7118
7119 /* Initialize a variable CUM of type CUMULATIVE_ARGS
7120 for a call to a function whose data type is FNTYPE.
7121 For a library call, FNTYPE is NULL. */
7122 void
7123 arm_init_cumulative_args (CUMULATIVE_ARGS *pcum, tree fntype,
7124 rtx libname,
7125 tree fndecl ATTRIBUTE_UNUSED)
7126 {
7127 /* Long call handling. */
7128 if (fntype)
7129 pcum->pcs_variant = arm_get_pcs_model (fntype, fndecl);
7130 else
7131 pcum->pcs_variant = arm_pcs_default;
7132
7133 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
7134 {
7135 if (arm_libcall_uses_aapcs_base (libname))
7136 pcum->pcs_variant = ARM_PCS_AAPCS;
7137
7138 pcum->aapcs_ncrn = pcum->aapcs_next_ncrn = 0;
7139 pcum->aapcs_reg = NULL_RTX;
7140 pcum->aapcs_partial = 0;
7141 pcum->aapcs_arg_processed = false;
7142 pcum->aapcs_cprc_slot = -1;
7143 pcum->can_split = true;
7144
7145 if (pcum->pcs_variant != ARM_PCS_AAPCS)
7146 {
7147 int i;
7148
7149 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
7150 {
7151 pcum->aapcs_cprc_failed[i] = false;
7152 aapcs_cp_arg_layout[i].cum_init (pcum, fntype, libname, fndecl);
7153 }
7154 }
7155 return;
7156 }
7157
7158 /* Legacy ABIs */
7159
7160 /* On the ARM, the offset starts at 0. */
7161 pcum->nregs = 0;
7162 pcum->iwmmxt_nregs = 0;
7163 pcum->can_split = true;
7164
7165 /* Varargs vectors are treated the same as long long.
7166 named_count avoids having to change the way arm handles 'named' */
7167 pcum->named_count = 0;
7168 pcum->nargs = 0;
7169
7170 if (TARGET_REALLY_IWMMXT && fntype)
7171 {
7172 tree fn_arg;
7173
7174 for (fn_arg = TYPE_ARG_TYPES (fntype);
7175 fn_arg;
7176 fn_arg = TREE_CHAIN (fn_arg))
7177 pcum->named_count += 1;
7178
7179 if (! pcum->named_count)
7180 pcum->named_count = INT_MAX;
7181 }
7182 }
7183
7184 /* Return 2 if double word alignment is required for argument passing,
7185 but wasn't required before the fix for PR88469.
7186 Return 1 if double word alignment is required for argument passing.
7187 Return -1 if double word alignment used to be required for argument
7188 passing before PR77728 ABI fix, but is not required anymore.
7189 Return 0 if double word alignment is not required and wasn't requried
7190 before either. */
7191 static int
7192 arm_needs_doubleword_align (machine_mode mode, const_tree type)
7193 {
7194 if (!type)
7195 return GET_MODE_ALIGNMENT (mode) > PARM_BOUNDARY;
7196
7197 /* Scalar and vector types: Use natural alignment, i.e. of base type. */
7198 if (!AGGREGATE_TYPE_P (type))
7199 return TYPE_ALIGN (TYPE_MAIN_VARIANT (type)) > PARM_BOUNDARY;
7200
7201 /* Array types: Use member alignment of element type. */
7202 if (TREE_CODE (type) == ARRAY_TYPE)
7203 return TYPE_ALIGN (TREE_TYPE (type)) > PARM_BOUNDARY;
7204
7205 int ret = 0;
7206 int ret2 = 0;
7207 /* Record/aggregate types: Use greatest member alignment of any member.
7208
7209 Note that we explicitly consider zero-sized fields here, even though
7210 they don't map to AAPCS machine types. For example, in:
7211
7212 struct __attribute__((aligned(8))) empty {};
7213
7214 struct s {
7215 [[no_unique_address]] empty e;
7216 int x;
7217 };
7218
7219 "s" contains only one Fundamental Data Type (the int field)
7220 but gains 8-byte alignment and size thanks to "e". */
7221 for (tree field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
7222 if (DECL_ALIGN (field) > PARM_BOUNDARY)
7223 {
7224 if (TREE_CODE (field) == FIELD_DECL)
7225 return 1;
7226 else
7227 /* Before PR77728 fix, we were incorrectly considering also
7228 other aggregate fields, like VAR_DECLs, TYPE_DECLs etc.
7229 Make sure we can warn about that with -Wpsabi. */
7230 ret = -1;
7231 }
7232 else if (TREE_CODE (field) == FIELD_DECL
7233 && DECL_BIT_FIELD_TYPE (field)
7234 && TYPE_ALIGN (DECL_BIT_FIELD_TYPE (field)) > PARM_BOUNDARY)
7235 ret2 = 1;
7236
7237 if (ret2)
7238 return 2;
7239
7240 return ret;
7241 }
7242
7243
7244 /* Determine where to put an argument to a function.
7245 Value is zero to push the argument on the stack,
7246 or a hard register in which to store the argument.
7247
7248 CUM is a variable of type CUMULATIVE_ARGS which gives info about
7249 the preceding args and about the function being called.
7250 ARG is a description of the argument.
7251
7252 On the ARM, normally the first 16 bytes are passed in registers r0-r3; all
7253 other arguments are passed on the stack. If (NAMED == 0) (which happens
7254 only in assign_parms, since TARGET_SETUP_INCOMING_VARARGS is
7255 defined), say it is passed in the stack (function_prologue will
7256 indeed make it pass in the stack if necessary). */
7257
7258 static rtx
7259 arm_function_arg (cumulative_args_t pcum_v, const function_arg_info &arg)
7260 {
7261 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
7262 int nregs;
7263
7264 /* Handle the special case quickly. Pick an arbitrary value for op2 of
7265 a call insn (op3 of a call_value insn). */
7266 if (arg.end_marker_p ())
7267 return const0_rtx;
7268
7269 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
7270 {
7271 aapcs_layout_arg (pcum, arg.mode, arg.type, arg.named);
7272 return pcum->aapcs_reg;
7273 }
7274
7275 /* Varargs vectors are treated the same as long long.
7276 named_count avoids having to change the way arm handles 'named' */
7277 if (TARGET_IWMMXT_ABI
7278 && arm_vector_mode_supported_p (arg.mode)
7279 && pcum->named_count > pcum->nargs + 1)
7280 {
7281 if (pcum->iwmmxt_nregs <= 9)
7282 return gen_rtx_REG (arg.mode,
7283 pcum->iwmmxt_nregs + FIRST_IWMMXT_REGNUM);
7284 else
7285 {
7286 pcum->can_split = false;
7287 return NULL_RTX;
7288 }
7289 }
7290
7291 /* Put doubleword aligned quantities in even register pairs. */
7292 if ((pcum->nregs & 1) && ARM_DOUBLEWORD_ALIGN)
7293 {
7294 int res = arm_needs_doubleword_align (arg.mode, arg.type);
7295 if (res < 0 && warn_psabi)
7296 inform (input_location, "parameter passing for argument of type "
7297 "%qT changed in GCC 7.1", arg.type);
7298 else if (res > 0)
7299 {
7300 pcum->nregs++;
7301 if (res > 1 && warn_psabi)
7302 inform (input_location, "parameter passing for argument of type "
7303 "%qT changed in GCC 9.1", arg.type);
7304 }
7305 }
7306
7307 /* Only allow splitting an arg between regs and memory if all preceding
7308 args were allocated to regs. For args passed by reference we only count
7309 the reference pointer. */
7310 if (pcum->can_split)
7311 nregs = 1;
7312 else
7313 nregs = ARM_NUM_REGS2 (arg.mode, arg.type);
7314
7315 if (!arg.named || pcum->nregs + nregs > NUM_ARG_REGS)
7316 return NULL_RTX;
7317
7318 return gen_rtx_REG (arg.mode, pcum->nregs);
7319 }
7320
7321 static unsigned int
7322 arm_function_arg_boundary (machine_mode mode, const_tree type)
7323 {
7324 if (!ARM_DOUBLEWORD_ALIGN)
7325 return PARM_BOUNDARY;
7326
7327 int res = arm_needs_doubleword_align (mode, type);
7328 if (res < 0 && warn_psabi)
7329 inform (input_location, "parameter passing for argument of type %qT "
7330 "changed in GCC 7.1", type);
7331 if (res > 1 && warn_psabi)
7332 inform (input_location, "parameter passing for argument of type "
7333 "%qT changed in GCC 9.1", type);
7334
7335 return res > 0 ? DOUBLEWORD_ALIGNMENT : PARM_BOUNDARY;
7336 }
7337
7338 static int
7339 arm_arg_partial_bytes (cumulative_args_t pcum_v, const function_arg_info &arg)
7340 {
7341 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
7342 int nregs = pcum->nregs;
7343
7344 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
7345 {
7346 aapcs_layout_arg (pcum, arg.mode, arg.type, arg.named);
7347 return pcum->aapcs_partial;
7348 }
7349
7350 if (TARGET_IWMMXT_ABI && arm_vector_mode_supported_p (arg.mode))
7351 return 0;
7352
7353 if (NUM_ARG_REGS > nregs
7354 && (NUM_ARG_REGS < nregs + ARM_NUM_REGS2 (arg.mode, arg.type))
7355 && pcum->can_split)
7356 return (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
7357
7358 return 0;
7359 }
7360
7361 /* Update the data in PCUM to advance over argument ARG. */
7362
7363 static void
7364 arm_function_arg_advance (cumulative_args_t pcum_v,
7365 const function_arg_info &arg)
7366 {
7367 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
7368
7369 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
7370 {
7371 aapcs_layout_arg (pcum, arg.mode, arg.type, arg.named);
7372
7373 if (pcum->aapcs_cprc_slot >= 0)
7374 {
7375 aapcs_cp_arg_layout[pcum->aapcs_cprc_slot].advance (pcum, arg.mode,
7376 arg.type);
7377 pcum->aapcs_cprc_slot = -1;
7378 }
7379
7380 /* Generic stuff. */
7381 pcum->aapcs_arg_processed = false;
7382 pcum->aapcs_ncrn = pcum->aapcs_next_ncrn;
7383 pcum->aapcs_reg = NULL_RTX;
7384 pcum->aapcs_partial = 0;
7385 }
7386 else
7387 {
7388 pcum->nargs += 1;
7389 if (arm_vector_mode_supported_p (arg.mode)
7390 && pcum->named_count > pcum->nargs
7391 && TARGET_IWMMXT_ABI)
7392 pcum->iwmmxt_nregs += 1;
7393 else
7394 pcum->nregs += ARM_NUM_REGS2 (arg.mode, arg.type);
7395 }
7396 }
7397
7398 /* Variable sized types are passed by reference. This is a GCC
7399 extension to the ARM ABI. */
7400
7401 static bool
7402 arm_pass_by_reference (cumulative_args_t, const function_arg_info &arg)
7403 {
7404 return arg.type && TREE_CODE (TYPE_SIZE (arg.type)) != INTEGER_CST;
7405 }
7406 \f
7407 /* Encode the current state of the #pragma [no_]long_calls. */
7408 typedef enum
7409 {
7410 OFF, /* No #pragma [no_]long_calls is in effect. */
7411 LONG, /* #pragma long_calls is in effect. */
7412 SHORT /* #pragma no_long_calls is in effect. */
7413 } arm_pragma_enum;
7414
7415 static arm_pragma_enum arm_pragma_long_calls = OFF;
7416
7417 void
7418 arm_pr_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
7419 {
7420 arm_pragma_long_calls = LONG;
7421 }
7422
7423 void
7424 arm_pr_no_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
7425 {
7426 arm_pragma_long_calls = SHORT;
7427 }
7428
7429 void
7430 arm_pr_long_calls_off (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
7431 {
7432 arm_pragma_long_calls = OFF;
7433 }
7434 \f
7435 /* Handle an attribute requiring a FUNCTION_DECL;
7436 arguments as in struct attribute_spec.handler. */
7437 static tree
7438 arm_handle_fndecl_attribute (tree *node, tree name, tree args ATTRIBUTE_UNUSED,
7439 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
7440 {
7441 if (TREE_CODE (*node) != FUNCTION_DECL)
7442 {
7443 warning (OPT_Wattributes, "%qE attribute only applies to functions",
7444 name);
7445 *no_add_attrs = true;
7446 }
7447
7448 return NULL_TREE;
7449 }
7450
7451 /* Handle an "interrupt" or "isr" attribute;
7452 arguments as in struct attribute_spec.handler. */
7453 static tree
7454 arm_handle_isr_attribute (tree *node, tree name, tree args, int flags,
7455 bool *no_add_attrs)
7456 {
7457 if (DECL_P (*node))
7458 {
7459 if (TREE_CODE (*node) != FUNCTION_DECL)
7460 {
7461 warning (OPT_Wattributes, "%qE attribute only applies to functions",
7462 name);
7463 *no_add_attrs = true;
7464 }
7465 else if (TARGET_VFP_BASE)
7466 {
7467 warning (OPT_Wattributes, "FP registers might be clobbered despite %qE attribute: compile with %<-mgeneral-regs-only%>",
7468 name);
7469 }
7470 /* FIXME: the argument if any is checked for type attributes;
7471 should it be checked for decl ones? */
7472 }
7473 else
7474 {
7475 if (FUNC_OR_METHOD_TYPE_P (*node))
7476 {
7477 if (arm_isr_value (args) == ARM_FT_UNKNOWN)
7478 {
7479 warning (OPT_Wattributes, "%qE attribute ignored",
7480 name);
7481 *no_add_attrs = true;
7482 }
7483 }
7484 else if (TREE_CODE (*node) == POINTER_TYPE
7485 && FUNC_OR_METHOD_TYPE_P (TREE_TYPE (*node))
7486 && arm_isr_value (args) != ARM_FT_UNKNOWN)
7487 {
7488 *node = build_variant_type_copy (*node);
7489 TREE_TYPE (*node) = build_type_attribute_variant
7490 (TREE_TYPE (*node),
7491 tree_cons (name, args, TYPE_ATTRIBUTES (TREE_TYPE (*node))));
7492 *no_add_attrs = true;
7493 }
7494 else
7495 {
7496 /* Possibly pass this attribute on from the type to a decl. */
7497 if (flags & ((int) ATTR_FLAG_DECL_NEXT
7498 | (int) ATTR_FLAG_FUNCTION_NEXT
7499 | (int) ATTR_FLAG_ARRAY_NEXT))
7500 {
7501 *no_add_attrs = true;
7502 return tree_cons (name, args, NULL_TREE);
7503 }
7504 else
7505 {
7506 warning (OPT_Wattributes, "%qE attribute ignored",
7507 name);
7508 }
7509 }
7510 }
7511
7512 return NULL_TREE;
7513 }
7514
7515 /* Handle a "pcs" attribute; arguments as in struct
7516 attribute_spec.handler. */
7517 static tree
7518 arm_handle_pcs_attribute (tree *node ATTRIBUTE_UNUSED, tree name, tree args,
7519 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
7520 {
7521 if (arm_pcs_from_attribute (args) == ARM_PCS_UNKNOWN)
7522 {
7523 warning (OPT_Wattributes, "%qE attribute ignored", name);
7524 *no_add_attrs = true;
7525 }
7526 return NULL_TREE;
7527 }
7528
7529 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
7530 /* Handle the "notshared" attribute. This attribute is another way of
7531 requesting hidden visibility. ARM's compiler supports
7532 "__declspec(notshared)"; we support the same thing via an
7533 attribute. */
7534
7535 static tree
7536 arm_handle_notshared_attribute (tree *node,
7537 tree name ATTRIBUTE_UNUSED,
7538 tree args ATTRIBUTE_UNUSED,
7539 int flags ATTRIBUTE_UNUSED,
7540 bool *no_add_attrs)
7541 {
7542 tree decl = TYPE_NAME (*node);
7543
7544 if (decl)
7545 {
7546 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
7547 DECL_VISIBILITY_SPECIFIED (decl) = 1;
7548 *no_add_attrs = false;
7549 }
7550 return NULL_TREE;
7551 }
7552 #endif
7553
7554 /* This function returns true if a function with declaration FNDECL and type
7555 FNTYPE uses the stack to pass arguments or return variables and false
7556 otherwise. This is used for functions with the attributes
7557 'cmse_nonsecure_call' or 'cmse_nonsecure_entry' and this function will issue
7558 diagnostic messages if the stack is used. NAME is the name of the attribute
7559 used. */
7560
7561 static bool
7562 cmse_func_args_or_return_in_stack (tree fndecl, tree name, tree fntype)
7563 {
7564 function_args_iterator args_iter;
7565 CUMULATIVE_ARGS args_so_far_v;
7566 cumulative_args_t args_so_far;
7567 bool first_param = true;
7568 tree arg_type, prev_arg_type = NULL_TREE, ret_type;
7569
7570 /* Error out if any argument is passed on the stack. */
7571 arm_init_cumulative_args (&args_so_far_v, fntype, NULL_RTX, fndecl);
7572 args_so_far = pack_cumulative_args (&args_so_far_v);
7573 FOREACH_FUNCTION_ARGS (fntype, arg_type, args_iter)
7574 {
7575 rtx arg_rtx;
7576
7577 prev_arg_type = arg_type;
7578 if (VOID_TYPE_P (arg_type))
7579 continue;
7580
7581 function_arg_info arg (arg_type, /*named=*/true);
7582 if (!first_param)
7583 /* ??? We should advance after processing the argument and pass
7584 the argument we're advancing past. */
7585 arm_function_arg_advance (args_so_far, arg);
7586 arg_rtx = arm_function_arg (args_so_far, arg);
7587 if (!arg_rtx || arm_arg_partial_bytes (args_so_far, arg))
7588 {
7589 error ("%qE attribute not available to functions with arguments "
7590 "passed on the stack", name);
7591 return true;
7592 }
7593 first_param = false;
7594 }
7595
7596 /* Error out for variadic functions since we cannot control how many
7597 arguments will be passed and thus stack could be used. stdarg_p () is not
7598 used for the checking to avoid browsing arguments twice. */
7599 if (prev_arg_type != NULL_TREE && !VOID_TYPE_P (prev_arg_type))
7600 {
7601 error ("%qE attribute not available to functions with variable number "
7602 "of arguments", name);
7603 return true;
7604 }
7605
7606 /* Error out if return value is passed on the stack. */
7607 ret_type = TREE_TYPE (fntype);
7608 if (arm_return_in_memory (ret_type, fntype))
7609 {
7610 error ("%qE attribute not available to functions that return value on "
7611 "the stack", name);
7612 return true;
7613 }
7614 return false;
7615 }
7616
7617 /* Called upon detection of the use of the cmse_nonsecure_entry attribute, this
7618 function will check whether the attribute is allowed here and will add the
7619 attribute to the function declaration tree or otherwise issue a warning. */
7620
7621 static tree
7622 arm_handle_cmse_nonsecure_entry (tree *node, tree name,
7623 tree /* args */,
7624 int /* flags */,
7625 bool *no_add_attrs)
7626 {
7627 tree fndecl;
7628
7629 if (!use_cmse)
7630 {
7631 *no_add_attrs = true;
7632 warning (OPT_Wattributes, "%qE attribute ignored without %<-mcmse%> "
7633 "option", name);
7634 return NULL_TREE;
7635 }
7636
7637 /* Ignore attribute for function types. */
7638 if (TREE_CODE (*node) != FUNCTION_DECL)
7639 {
7640 warning (OPT_Wattributes, "%qE attribute only applies to functions",
7641 name);
7642 *no_add_attrs = true;
7643 return NULL_TREE;
7644 }
7645
7646 fndecl = *node;
7647
7648 /* Warn for static linkage functions. */
7649 if (!TREE_PUBLIC (fndecl))
7650 {
7651 warning (OPT_Wattributes, "%qE attribute has no effect on functions "
7652 "with static linkage", name);
7653 *no_add_attrs = true;
7654 return NULL_TREE;
7655 }
7656
7657 *no_add_attrs |= cmse_func_args_or_return_in_stack (fndecl, name,
7658 TREE_TYPE (fndecl));
7659 return NULL_TREE;
7660 }
7661
7662
7663 /* Called upon detection of the use of the cmse_nonsecure_call attribute, this
7664 function will check whether the attribute is allowed here and will add the
7665 attribute to the function type tree or otherwise issue a diagnostic. The
7666 reason we check this at declaration time is to only allow the use of the
7667 attribute with declarations of function pointers and not function
7668 declarations. This function checks NODE is of the expected type and issues
7669 diagnostics otherwise using NAME. If it is not of the expected type
7670 *NO_ADD_ATTRS will be set to true. */
7671
7672 static tree
7673 arm_handle_cmse_nonsecure_call (tree *node, tree name,
7674 tree /* args */,
7675 int /* flags */,
7676 bool *no_add_attrs)
7677 {
7678 tree decl = NULL_TREE;
7679 tree fntype, type;
7680
7681 if (!use_cmse)
7682 {
7683 *no_add_attrs = true;
7684 warning (OPT_Wattributes, "%qE attribute ignored without %<-mcmse%> "
7685 "option", name);
7686 return NULL_TREE;
7687 }
7688
7689 if (DECL_P (*node))
7690 {
7691 fntype = TREE_TYPE (*node);
7692
7693 if (VAR_P (*node) || TREE_CODE (*node) == TYPE_DECL)
7694 decl = *node;
7695 }
7696 else
7697 fntype = *node;
7698
7699 while (fntype && TREE_CODE (fntype) == POINTER_TYPE)
7700 fntype = TREE_TYPE (fntype);
7701
7702 if ((DECL_P (*node) && !decl) || TREE_CODE (fntype) != FUNCTION_TYPE)
7703 {
7704 warning (OPT_Wattributes, "%qE attribute only applies to base type of a "
7705 "function pointer", name);
7706 *no_add_attrs = true;
7707 return NULL_TREE;
7708 }
7709
7710 *no_add_attrs |= cmse_func_args_or_return_in_stack (NULL, name, fntype);
7711
7712 if (*no_add_attrs)
7713 return NULL_TREE;
7714
7715 /* Prevent trees being shared among function types with and without
7716 cmse_nonsecure_call attribute. */
7717 if (decl)
7718 {
7719 type = build_distinct_type_copy (TREE_TYPE (decl));
7720 TREE_TYPE (decl) = type;
7721 }
7722 else
7723 {
7724 type = build_distinct_type_copy (*node);
7725 *node = type;
7726 }
7727
7728 fntype = type;
7729
7730 while (TREE_CODE (fntype) != FUNCTION_TYPE)
7731 {
7732 type = fntype;
7733 fntype = TREE_TYPE (fntype);
7734 fntype = build_distinct_type_copy (fntype);
7735 TREE_TYPE (type) = fntype;
7736 }
7737
7738 /* Construct a type attribute and add it to the function type. */
7739 tree attrs = tree_cons (get_identifier ("cmse_nonsecure_call"), NULL_TREE,
7740 TYPE_ATTRIBUTES (fntype));
7741 TYPE_ATTRIBUTES (fntype) = attrs;
7742 return NULL_TREE;
7743 }
7744
7745 /* Return 0 if the attributes for two types are incompatible, 1 if they
7746 are compatible, and 2 if they are nearly compatible (which causes a
7747 warning to be generated). */
7748 static int
7749 arm_comp_type_attributes (const_tree type1, const_tree type2)
7750 {
7751 int l1, l2, s1, s2;
7752
7753 tree attrs1 = lookup_attribute ("Advanced SIMD type",
7754 TYPE_ATTRIBUTES (type1));
7755 tree attrs2 = lookup_attribute ("Advanced SIMD type",
7756 TYPE_ATTRIBUTES (type2));
7757 if (bool (attrs1) != bool (attrs2))
7758 return 0;
7759 if (attrs1 && !attribute_value_equal (attrs1, attrs2))
7760 return 0;
7761
7762 /* Check for mismatch of non-default calling convention. */
7763 if (TREE_CODE (type1) != FUNCTION_TYPE)
7764 return 1;
7765
7766 /* Check for mismatched call attributes. */
7767 l1 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type1)) != NULL;
7768 l2 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type2)) != NULL;
7769 s1 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type1)) != NULL;
7770 s2 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type2)) != NULL;
7771
7772 /* Only bother to check if an attribute is defined. */
7773 if (l1 | l2 | s1 | s2)
7774 {
7775 /* If one type has an attribute, the other must have the same attribute. */
7776 if ((l1 != l2) || (s1 != s2))
7777 return 0;
7778
7779 /* Disallow mixed attributes. */
7780 if ((l1 & s2) || (l2 & s1))
7781 return 0;
7782 }
7783
7784 /* Check for mismatched ISR attribute. */
7785 l1 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type1)) != NULL;
7786 if (! l1)
7787 l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type1)) != NULL;
7788 l2 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type2)) != NULL;
7789 if (! l2)
7790 l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type2)) != NULL;
7791 if (l1 != l2)
7792 return 0;
7793
7794 l1 = lookup_attribute ("cmse_nonsecure_call",
7795 TYPE_ATTRIBUTES (type1)) != NULL;
7796 l2 = lookup_attribute ("cmse_nonsecure_call",
7797 TYPE_ATTRIBUTES (type2)) != NULL;
7798
7799 if (l1 != l2)
7800 return 0;
7801
7802 return 1;
7803 }
7804
7805 /* Assigns default attributes to newly defined type. This is used to
7806 set short_call/long_call attributes for function types of
7807 functions defined inside corresponding #pragma scopes. */
7808 static void
7809 arm_set_default_type_attributes (tree type)
7810 {
7811 /* Add __attribute__ ((long_call)) to all functions, when
7812 inside #pragma long_calls or __attribute__ ((short_call)),
7813 when inside #pragma no_long_calls. */
7814 if (FUNC_OR_METHOD_TYPE_P (type))
7815 {
7816 tree type_attr_list, attr_name;
7817 type_attr_list = TYPE_ATTRIBUTES (type);
7818
7819 if (arm_pragma_long_calls == LONG)
7820 attr_name = get_identifier ("long_call");
7821 else if (arm_pragma_long_calls == SHORT)
7822 attr_name = get_identifier ("short_call");
7823 else
7824 return;
7825
7826 type_attr_list = tree_cons (attr_name, NULL_TREE, type_attr_list);
7827 TYPE_ATTRIBUTES (type) = type_attr_list;
7828 }
7829 }
7830 \f
7831 /* Return true if DECL is known to be linked into section SECTION. */
7832
7833 static bool
7834 arm_function_in_section_p (tree decl, section *section)
7835 {
7836 /* We can only be certain about the prevailing symbol definition. */
7837 if (!decl_binds_to_current_def_p (decl))
7838 return false;
7839
7840 /* If DECL_SECTION_NAME is set, assume it is trustworthy. */
7841 if (!DECL_SECTION_NAME (decl))
7842 {
7843 /* Make sure that we will not create a unique section for DECL. */
7844 if (flag_function_sections || DECL_COMDAT_GROUP (decl))
7845 return false;
7846 }
7847
7848 return function_section (decl) == section;
7849 }
7850
7851 /* Return nonzero if a 32-bit "long_call" should be generated for
7852 a call from the current function to DECL. We generate a long_call
7853 if the function:
7854
7855 a. has an __attribute__((long call))
7856 or b. is within the scope of a #pragma long_calls
7857 or c. the -mlong-calls command line switch has been specified
7858
7859 However we do not generate a long call if the function:
7860
7861 d. has an __attribute__ ((short_call))
7862 or e. is inside the scope of a #pragma no_long_calls
7863 or f. is defined in the same section as the current function. */
7864
7865 bool
7866 arm_is_long_call_p (tree decl)
7867 {
7868 tree attrs;
7869
7870 if (!decl)
7871 return TARGET_LONG_CALLS;
7872
7873 attrs = TYPE_ATTRIBUTES (TREE_TYPE (decl));
7874 if (lookup_attribute ("short_call", attrs))
7875 return false;
7876
7877 /* For "f", be conservative, and only cater for cases in which the
7878 whole of the current function is placed in the same section. */
7879 if (!flag_reorder_blocks_and_partition
7880 && TREE_CODE (decl) == FUNCTION_DECL
7881 && arm_function_in_section_p (decl, current_function_section ()))
7882 return false;
7883
7884 if (lookup_attribute ("long_call", attrs))
7885 return true;
7886
7887 return TARGET_LONG_CALLS;
7888 }
7889
7890 /* Return nonzero if it is ok to make a tail-call to DECL. */
7891 static bool
7892 arm_function_ok_for_sibcall (tree decl, tree exp)
7893 {
7894 unsigned long func_type;
7895
7896 if (cfun->machine->sibcall_blocked)
7897 return false;
7898
7899 if (TARGET_FDPIC)
7900 {
7901 /* In FDPIC, never tailcall something for which we have no decl:
7902 the target function could be in a different module, requiring
7903 a different FDPIC register value. */
7904 if (decl == NULL)
7905 return false;
7906 }
7907
7908 /* Never tailcall something if we are generating code for Thumb-1. */
7909 if (TARGET_THUMB1)
7910 return false;
7911
7912 /* The PIC register is live on entry to VxWorks PLT entries, so we
7913 must make the call before restoring the PIC register. */
7914 if (TARGET_VXWORKS_RTP && flag_pic && decl && !targetm.binds_local_p (decl))
7915 return false;
7916
7917 /* ??? Cannot tail-call to long calls with APCS frame and VFP, because IP
7918 may be used both as target of the call and base register for restoring
7919 the VFP registers */
7920 if (TARGET_APCS_FRAME && TARGET_ARM
7921 && TARGET_HARD_FLOAT
7922 && decl && arm_is_long_call_p (decl))
7923 return false;
7924
7925 /* If we are interworking and the function is not declared static
7926 then we can't tail-call it unless we know that it exists in this
7927 compilation unit (since it might be a Thumb routine). */
7928 if (TARGET_INTERWORK && decl && TREE_PUBLIC (decl)
7929 && !TREE_ASM_WRITTEN (decl))
7930 return false;
7931
7932 func_type = arm_current_func_type ();
7933 /* Never tailcall from an ISR routine - it needs a special exit sequence. */
7934 if (IS_INTERRUPT (func_type))
7935 return false;
7936
7937 /* ARMv8-M non-secure entry functions need to return with bxns which is only
7938 generated for entry functions themselves. */
7939 if (IS_CMSE_ENTRY (arm_current_func_type ()))
7940 return false;
7941
7942 /* We do not allow ARMv8-M non-secure calls to be turned into sibling calls,
7943 this would complicate matters for later code generation. */
7944 if (TREE_CODE (exp) == CALL_EXPR)
7945 {
7946 tree fntype = TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp)));
7947 if (lookup_attribute ("cmse_nonsecure_call", TYPE_ATTRIBUTES (fntype)))
7948 return false;
7949 }
7950
7951 if (!VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
7952 {
7953 /* Check that the return value locations are the same. For
7954 example that we aren't returning a value from the sibling in
7955 a VFP register but then need to transfer it to a core
7956 register. */
7957 rtx a, b;
7958 tree decl_or_type = decl;
7959
7960 /* If it is an indirect function pointer, get the function type. */
7961 if (!decl)
7962 decl_or_type = TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp)));
7963
7964 a = arm_function_value (TREE_TYPE (exp), decl_or_type, false);
7965 b = arm_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
7966 cfun->decl, false);
7967 if (!rtx_equal_p (a, b))
7968 return false;
7969 }
7970
7971 /* Never tailcall if function may be called with a misaligned SP. */
7972 if (IS_STACKALIGN (func_type))
7973 return false;
7974
7975 /* The AAPCS says that, on bare-metal, calls to unresolved weak
7976 references should become a NOP. Don't convert such calls into
7977 sibling calls. */
7978 if (TARGET_AAPCS_BASED
7979 && arm_abi == ARM_ABI_AAPCS
7980 && decl
7981 && DECL_WEAK (decl))
7982 return false;
7983
7984 /* We cannot do a tailcall for an indirect call by descriptor if all the
7985 argument registers are used because the only register left to load the
7986 address is IP and it will already contain the static chain. */
7987 if (!decl && CALL_EXPR_BY_DESCRIPTOR (exp) && !flag_trampolines)
7988 {
7989 tree fntype = TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp)));
7990 CUMULATIVE_ARGS cum;
7991 cumulative_args_t cum_v;
7992
7993 arm_init_cumulative_args (&cum, fntype, NULL_RTX, NULL_TREE);
7994 cum_v = pack_cumulative_args (&cum);
7995
7996 for (tree t = TYPE_ARG_TYPES (fntype); t; t = TREE_CHAIN (t))
7997 {
7998 tree type = TREE_VALUE (t);
7999 if (!VOID_TYPE_P (type))
8000 {
8001 function_arg_info arg (type, /*named=*/true);
8002 arm_function_arg_advance (cum_v, arg);
8003 }
8004 }
8005
8006 function_arg_info arg (integer_type_node, /*named=*/true);
8007 if (!arm_function_arg (cum_v, arg))
8008 return false;
8009 }
8010
8011 /* Everything else is ok. */
8012 return true;
8013 }
8014
8015 \f
8016 /* Addressing mode support functions. */
8017
8018 /* Return nonzero if X is a legitimate immediate operand when compiling
8019 for PIC. We know that X satisfies CONSTANT_P and flag_pic is true. */
8020 int
8021 legitimate_pic_operand_p (rtx x)
8022 {
8023 if (SYMBOL_REF_P (x)
8024 || (GET_CODE (x) == CONST
8025 && GET_CODE (XEXP (x, 0)) == PLUS
8026 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
8027 return 0;
8028
8029 return 1;
8030 }
8031
8032 /* Record that the current function needs a PIC register. If PIC_REG is null,
8033 a new pseudo is allocated as PIC register, otherwise PIC_REG is used. In
8034 both case cfun->machine->pic_reg is initialized if we have not already done
8035 so. COMPUTE_NOW decide whether and where to set the PIC register. If true,
8036 PIC register is reloaded in the current position of the instruction stream
8037 irregardless of whether it was loaded before. Otherwise, it is only loaded
8038 if not already done so (crtl->uses_pic_offset_table is null). Note that
8039 nonnull PIC_REG is only supported iff COMPUTE_NOW is true and null PIC_REG
8040 is only supported iff COMPUTE_NOW is false. */
8041
8042 static void
8043 require_pic_register (rtx pic_reg, bool compute_now)
8044 {
8045 gcc_assert (compute_now == (pic_reg != NULL_RTX));
8046
8047 /* A lot of the logic here is made obscure by the fact that this
8048 routine gets called as part of the rtx cost estimation process.
8049 We don't want those calls to affect any assumptions about the real
8050 function; and further, we can't call entry_of_function() until we
8051 start the real expansion process. */
8052 if (!crtl->uses_pic_offset_table || compute_now)
8053 {
8054 gcc_assert (can_create_pseudo_p ()
8055 || (pic_reg != NULL_RTX
8056 && REG_P (pic_reg)
8057 && GET_MODE (pic_reg) == Pmode));
8058 if (arm_pic_register != INVALID_REGNUM
8059 && !compute_now
8060 && !(TARGET_THUMB1 && arm_pic_register > LAST_LO_REGNUM))
8061 {
8062 if (!cfun->machine->pic_reg)
8063 cfun->machine->pic_reg = gen_rtx_REG (Pmode, arm_pic_register);
8064
8065 /* Play games to avoid marking the function as needing pic
8066 if we are being called as part of the cost-estimation
8067 process. */
8068 if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
8069 crtl->uses_pic_offset_table = 1;
8070 }
8071 else
8072 {
8073 rtx_insn *seq, *insn;
8074
8075 if (pic_reg == NULL_RTX)
8076 pic_reg = gen_reg_rtx (Pmode);
8077 if (!cfun->machine->pic_reg)
8078 cfun->machine->pic_reg = pic_reg;
8079
8080 /* Play games to avoid marking the function as needing pic
8081 if we are being called as part of the cost-estimation
8082 process. */
8083 if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
8084 {
8085 crtl->uses_pic_offset_table = 1;
8086 start_sequence ();
8087
8088 if (TARGET_THUMB1 && arm_pic_register != INVALID_REGNUM
8089 && arm_pic_register > LAST_LO_REGNUM
8090 && !compute_now)
8091 emit_move_insn (cfun->machine->pic_reg,
8092 gen_rtx_REG (Pmode, arm_pic_register));
8093 else
8094 arm_load_pic_register (0UL, pic_reg);
8095
8096 seq = get_insns ();
8097 end_sequence ();
8098
8099 for (insn = seq; insn; insn = NEXT_INSN (insn))
8100 if (INSN_P (insn))
8101 INSN_LOCATION (insn) = prologue_location;
8102
8103 /* We can be called during expansion of PHI nodes, where
8104 we can't yet emit instructions directly in the final
8105 insn stream. Queue the insns on the entry edge, they will
8106 be committed after everything else is expanded. */
8107 if (currently_expanding_to_rtl)
8108 insert_insn_on_edge (seq,
8109 single_succ_edge
8110 (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
8111 else
8112 emit_insn (seq);
8113 }
8114 }
8115 }
8116 }
8117
8118 /* Generate insns to calculate the address of ORIG in pic mode. */
8119 static rtx_insn *
8120 calculate_pic_address_constant (rtx reg, rtx pic_reg, rtx orig)
8121 {
8122 rtx pat;
8123 rtx mem;
8124
8125 pat = gen_calculate_pic_address (reg, pic_reg, orig);
8126
8127 /* Make the MEM as close to a constant as possible. */
8128 mem = SET_SRC (pat);
8129 gcc_assert (MEM_P (mem) && !MEM_VOLATILE_P (mem));
8130 MEM_READONLY_P (mem) = 1;
8131 MEM_NOTRAP_P (mem) = 1;
8132
8133 return emit_insn (pat);
8134 }
8135
8136 /* Legitimize PIC load to ORIG into REG. If REG is NULL, a new pseudo is
8137 created to hold the result of the load. If not NULL, PIC_REG indicates
8138 which register to use as PIC register, otherwise it is decided by register
8139 allocator. COMPUTE_NOW forces the PIC register to be loaded at the current
8140 location in the instruction stream, irregardless of whether it was loaded
8141 previously. Note that nonnull PIC_REG is only supported iff COMPUTE_NOW is
8142 true and null PIC_REG is only supported iff COMPUTE_NOW is false.
8143
8144 Returns the register REG into which the PIC load is performed. */
8145
8146 rtx
8147 legitimize_pic_address (rtx orig, machine_mode mode, rtx reg, rtx pic_reg,
8148 bool compute_now)
8149 {
8150 gcc_assert (compute_now == (pic_reg != NULL_RTX));
8151
8152 if (SYMBOL_REF_P (orig)
8153 || LABEL_REF_P (orig))
8154 {
8155 if (reg == 0)
8156 {
8157 gcc_assert (can_create_pseudo_p ());
8158 reg = gen_reg_rtx (Pmode);
8159 }
8160
8161 /* VxWorks does not impose a fixed gap between segments; the run-time
8162 gap can be different from the object-file gap. We therefore can't
8163 use GOTOFF unless we are absolutely sure that the symbol is in the
8164 same segment as the GOT. Unfortunately, the flexibility of linker
8165 scripts means that we can't be sure of that in general, so assume
8166 that GOTOFF is never valid on VxWorks. */
8167 /* References to weak symbols cannot be resolved locally: they
8168 may be overridden by a non-weak definition at link time. */
8169 rtx_insn *insn;
8170 if ((LABEL_REF_P (orig)
8171 || (SYMBOL_REF_P (orig)
8172 && SYMBOL_REF_LOCAL_P (orig)
8173 && (SYMBOL_REF_DECL (orig)
8174 ? !DECL_WEAK (SYMBOL_REF_DECL (orig)) : 1)
8175 && (!SYMBOL_REF_FUNCTION_P (orig)
8176 || arm_fdpic_local_funcdesc_p (orig))))
8177 && NEED_GOT_RELOC
8178 && arm_pic_data_is_text_relative)
8179 insn = arm_pic_static_addr (orig, reg);
8180 else
8181 {
8182 /* If this function doesn't have a pic register, create one now. */
8183 require_pic_register (pic_reg, compute_now);
8184
8185 if (pic_reg == NULL_RTX)
8186 pic_reg = cfun->machine->pic_reg;
8187
8188 insn = calculate_pic_address_constant (reg, pic_reg, orig);
8189 }
8190
8191 /* Put a REG_EQUAL note on this insn, so that it can be optimized
8192 by loop. */
8193 set_unique_reg_note (insn, REG_EQUAL, orig);
8194
8195 return reg;
8196 }
8197 else if (GET_CODE (orig) == CONST)
8198 {
8199 rtx base, offset;
8200
8201 if (GET_CODE (XEXP (orig, 0)) == PLUS
8202 && XEXP (XEXP (orig, 0), 0) == cfun->machine->pic_reg)
8203 return orig;
8204
8205 /* Handle the case where we have: const (UNSPEC_TLS). */
8206 if (GET_CODE (XEXP (orig, 0)) == UNSPEC
8207 && XINT (XEXP (orig, 0), 1) == UNSPEC_TLS)
8208 return orig;
8209
8210 /* Handle the case where we have:
8211 const (plus (UNSPEC_TLS) (ADDEND)). The ADDEND must be a
8212 CONST_INT. */
8213 if (GET_CODE (XEXP (orig, 0)) == PLUS
8214 && GET_CODE (XEXP (XEXP (orig, 0), 0)) == UNSPEC
8215 && XINT (XEXP (XEXP (orig, 0), 0), 1) == UNSPEC_TLS)
8216 {
8217 gcc_assert (CONST_INT_P (XEXP (XEXP (orig, 0), 1)));
8218 return orig;
8219 }
8220
8221 if (reg == 0)
8222 {
8223 gcc_assert (can_create_pseudo_p ());
8224 reg = gen_reg_rtx (Pmode);
8225 }
8226
8227 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
8228
8229 base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg,
8230 pic_reg, compute_now);
8231 offset = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode,
8232 base == reg ? 0 : reg, pic_reg,
8233 compute_now);
8234
8235 if (CONST_INT_P (offset))
8236 {
8237 /* The base register doesn't really matter, we only want to
8238 test the index for the appropriate mode. */
8239 if (!arm_legitimate_index_p (mode, offset, SET, 0))
8240 {
8241 gcc_assert (can_create_pseudo_p ());
8242 offset = force_reg (Pmode, offset);
8243 }
8244
8245 if (CONST_INT_P (offset))
8246 return plus_constant (Pmode, base, INTVAL (offset));
8247 }
8248
8249 if (GET_MODE_SIZE (mode) > 4
8250 && (GET_MODE_CLASS (mode) == MODE_INT
8251 || TARGET_SOFT_FLOAT))
8252 {
8253 emit_insn (gen_addsi3 (reg, base, offset));
8254 return reg;
8255 }
8256
8257 return gen_rtx_PLUS (Pmode, base, offset);
8258 }
8259
8260 return orig;
8261 }
8262
8263
8264 /* Generate insns that produce the address of the stack canary */
8265 rtx
8266 arm_stack_protect_tls_canary_mem (bool reload)
8267 {
8268 rtx tp = gen_reg_rtx (SImode);
8269 if (reload)
8270 emit_insn (gen_reload_tp_hard (tp));
8271 else
8272 emit_insn (gen_load_tp_hard (tp));
8273
8274 rtx reg = gen_reg_rtx (SImode);
8275 rtx offset = GEN_INT (arm_stack_protector_guard_offset);
8276 emit_set_insn (reg, gen_rtx_PLUS (SImode, tp, offset));
8277 return gen_rtx_MEM (SImode, reg);
8278 }
8279
8280
8281 /* Whether a register is callee saved or not. This is necessary because high
8282 registers are marked as caller saved when optimizing for size on Thumb-1
8283 targets despite being callee saved in order to avoid using them. */
8284 #define callee_saved_reg_p(reg) \
8285 (!call_used_or_fixed_reg_p (reg) \
8286 || (TARGET_THUMB1 && optimize_size \
8287 && reg >= FIRST_HI_REGNUM && reg <= LAST_HI_REGNUM))
8288
8289 /* Return a mask for the call-clobbered low registers that are unused
8290 at the end of the prologue. */
8291 static unsigned long
8292 thumb1_prologue_unused_call_clobbered_lo_regs (void)
8293 {
8294 unsigned long mask = 0;
8295 bitmap prologue_live_out = df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun));
8296
8297 for (int reg = FIRST_LO_REGNUM; reg <= LAST_LO_REGNUM; reg++)
8298 if (!callee_saved_reg_p (reg) && !REGNO_REG_SET_P (prologue_live_out, reg))
8299 mask |= 1 << (reg - FIRST_LO_REGNUM);
8300 return mask;
8301 }
8302
8303 /* Similarly for the start of the epilogue. */
8304 static unsigned long
8305 thumb1_epilogue_unused_call_clobbered_lo_regs (void)
8306 {
8307 unsigned long mask = 0;
8308 bitmap epilogue_live_in = df_get_live_in (EXIT_BLOCK_PTR_FOR_FN (cfun));
8309
8310 for (int reg = FIRST_LO_REGNUM; reg <= LAST_LO_REGNUM; reg++)
8311 if (!callee_saved_reg_p (reg) && !REGNO_REG_SET_P (epilogue_live_in, reg))
8312 mask |= 1 << (reg - FIRST_LO_REGNUM);
8313 return mask;
8314 }
8315
8316 /* Find a spare register to use during the prolog of a function. */
8317
8318 static int
8319 thumb_find_work_register (unsigned long pushed_regs_mask)
8320 {
8321 int reg;
8322
8323 unsigned long unused_regs
8324 = thumb1_prologue_unused_call_clobbered_lo_regs ();
8325
8326 /* Check the argument registers first as these are call-used. The
8327 register allocation order means that sometimes r3 might be used
8328 but earlier argument registers might not, so check them all. */
8329 for (reg = LAST_LO_REGNUM; reg >= FIRST_LO_REGNUM; reg--)
8330 if (unused_regs & (1 << (reg - FIRST_LO_REGNUM)))
8331 return reg;
8332
8333 /* Otherwise look for a call-saved register that is going to be pushed. */
8334 for (reg = LAST_LO_REGNUM; reg > LAST_ARG_REGNUM; reg --)
8335 if (pushed_regs_mask & (1 << reg))
8336 return reg;
8337
8338 if (TARGET_THUMB2)
8339 {
8340 /* Thumb-2 can use high regs. */
8341 for (reg = FIRST_HI_REGNUM; reg < 15; reg ++)
8342 if (pushed_regs_mask & (1 << reg))
8343 return reg;
8344 }
8345 /* Something went wrong - thumb_compute_save_reg_mask()
8346 should have arranged for a suitable register to be pushed. */
8347 gcc_unreachable ();
8348 }
8349
8350 static GTY(()) int pic_labelno;
8351
8352 /* Generate code to load the PIC register. In thumb mode SCRATCH is a
8353 low register. */
8354
8355 void
8356 arm_load_pic_register (unsigned long saved_regs ATTRIBUTE_UNUSED, rtx pic_reg)
8357 {
8358 rtx l1, labelno, pic_tmp, pic_rtx;
8359
8360 if (crtl->uses_pic_offset_table == 0
8361 || TARGET_SINGLE_PIC_BASE
8362 || TARGET_FDPIC)
8363 return;
8364
8365 gcc_assert (flag_pic);
8366
8367 if (pic_reg == NULL_RTX)
8368 pic_reg = cfun->machine->pic_reg;
8369 if (TARGET_VXWORKS_RTP)
8370 {
8371 pic_rtx = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE);
8372 pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
8373 emit_insn (gen_pic_load_addr_32bit (pic_reg, pic_rtx));
8374
8375 emit_insn (gen_rtx_SET (pic_reg, gen_rtx_MEM (Pmode, pic_reg)));
8376
8377 pic_tmp = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
8378 emit_insn (gen_pic_offset_arm (pic_reg, pic_reg, pic_tmp));
8379 }
8380 else
8381 {
8382 /* We use an UNSPEC rather than a LABEL_REF because this label
8383 never appears in the code stream. */
8384
8385 labelno = GEN_INT (pic_labelno++);
8386 l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
8387 l1 = gen_rtx_CONST (VOIDmode, l1);
8388
8389 /* On the ARM the PC register contains 'dot + 8' at the time of the
8390 addition, on the Thumb it is 'dot + 4'. */
8391 pic_rtx = plus_constant (Pmode, l1, TARGET_ARM ? 8 : 4);
8392 pic_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, pic_rtx),
8393 UNSPEC_GOTSYM_OFF);
8394 pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
8395
8396 if (TARGET_32BIT)
8397 {
8398 emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
8399 }
8400 else /* TARGET_THUMB1 */
8401 {
8402 if (arm_pic_register != INVALID_REGNUM
8403 && REGNO (pic_reg) > LAST_LO_REGNUM)
8404 {
8405 /* We will have pushed the pic register, so we should always be
8406 able to find a work register. */
8407 pic_tmp = gen_rtx_REG (SImode,
8408 thumb_find_work_register (saved_regs));
8409 emit_insn (gen_pic_load_addr_thumb1 (pic_tmp, pic_rtx));
8410 emit_insn (gen_movsi (pic_offset_table_rtx, pic_tmp));
8411 emit_insn (gen_pic_add_dot_plus_four (pic_reg, pic_reg, labelno));
8412 }
8413 else if (arm_pic_register != INVALID_REGNUM
8414 && arm_pic_register > LAST_LO_REGNUM
8415 && REGNO (pic_reg) <= LAST_LO_REGNUM)
8416 {
8417 emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
8418 emit_move_insn (gen_rtx_REG (Pmode, arm_pic_register), pic_reg);
8419 emit_use (gen_rtx_REG (Pmode, arm_pic_register));
8420 }
8421 else
8422 emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
8423 }
8424 }
8425
8426 /* Need to emit this whether or not we obey regdecls,
8427 since setjmp/longjmp can cause life info to screw up. */
8428 emit_use (pic_reg);
8429 }
8430
8431 /* Try to determine whether an object, referenced via ORIG, will be
8432 placed in the text or data segment. This is used in FDPIC mode, to
8433 decide which relocations to use when accessing ORIG. *IS_READONLY
8434 is set to true if ORIG is a read-only location, false otherwise.
8435 Return true if we could determine the location of ORIG, false
8436 otherwise. *IS_READONLY is valid only when we return true. */
8437 static bool
8438 arm_is_segment_info_known (rtx orig, bool *is_readonly)
8439 {
8440 *is_readonly = false;
8441
8442 if (LABEL_REF_P (orig))
8443 {
8444 *is_readonly = true;
8445 return true;
8446 }
8447
8448 if (SYMBOL_REF_P (orig))
8449 {
8450 if (CONSTANT_POOL_ADDRESS_P (orig))
8451 {
8452 *is_readonly = true;
8453 return true;
8454 }
8455 if (SYMBOL_REF_LOCAL_P (orig)
8456 && !SYMBOL_REF_EXTERNAL_P (orig)
8457 && SYMBOL_REF_DECL (orig)
8458 && (!DECL_P (SYMBOL_REF_DECL (orig))
8459 || !DECL_COMMON (SYMBOL_REF_DECL (orig))))
8460 {
8461 tree decl = SYMBOL_REF_DECL (orig);
8462 tree init = VAR_P (decl)
8463 ? DECL_INITIAL (decl) : (TREE_CODE (decl) == CONSTRUCTOR)
8464 ? decl : 0;
8465 int reloc = 0;
8466 bool named_section, readonly;
8467
8468 if (init && init != error_mark_node)
8469 reloc = compute_reloc_for_constant (init);
8470
8471 named_section = VAR_P (decl)
8472 && lookup_attribute ("section", DECL_ATTRIBUTES (decl));
8473 readonly = decl_readonly_section (decl, reloc);
8474
8475 /* We don't know where the link script will put a named
8476 section, so return false in such a case. */
8477 if (named_section)
8478 return false;
8479
8480 *is_readonly = readonly;
8481 return true;
8482 }
8483
8484 /* We don't know. */
8485 return false;
8486 }
8487
8488 gcc_unreachable ();
8489 }
8490
8491 /* Generate code to load the address of a static var when flag_pic is set. */
8492 static rtx_insn *
8493 arm_pic_static_addr (rtx orig, rtx reg)
8494 {
8495 rtx l1, labelno, offset_rtx;
8496 rtx_insn *insn;
8497
8498 gcc_assert (flag_pic);
8499
8500 bool is_readonly = false;
8501 bool info_known = false;
8502
8503 if (TARGET_FDPIC
8504 && SYMBOL_REF_P (orig)
8505 && !SYMBOL_REF_FUNCTION_P (orig))
8506 info_known = arm_is_segment_info_known (orig, &is_readonly);
8507
8508 if (TARGET_FDPIC
8509 && SYMBOL_REF_P (orig)
8510 && !SYMBOL_REF_FUNCTION_P (orig)
8511 && !info_known)
8512 {
8513 /* We don't know where orig is stored, so we have be
8514 pessimistic and use a GOT relocation. */
8515 rtx pic_reg = gen_rtx_REG (Pmode, FDPIC_REGNUM);
8516
8517 insn = calculate_pic_address_constant (reg, pic_reg, orig);
8518 }
8519 else if (TARGET_FDPIC
8520 && SYMBOL_REF_P (orig)
8521 && (SYMBOL_REF_FUNCTION_P (orig)
8522 || !is_readonly))
8523 {
8524 /* We use the GOTOFF relocation. */
8525 rtx pic_reg = gen_rtx_REG (Pmode, FDPIC_REGNUM);
8526
8527 rtx l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, orig), UNSPEC_PIC_SYM);
8528 emit_insn (gen_movsi (reg, l1));
8529 insn = emit_insn (gen_addsi3 (reg, reg, pic_reg));
8530 }
8531 else
8532 {
8533 /* Not FDPIC, not SYMBOL_REF_P or readonly: we can use
8534 PC-relative access. */
8535 /* We use an UNSPEC rather than a LABEL_REF because this label
8536 never appears in the code stream. */
8537 labelno = GEN_INT (pic_labelno++);
8538 l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
8539 l1 = gen_rtx_CONST (VOIDmode, l1);
8540
8541 /* On the ARM the PC register contains 'dot + 8' at the time of the
8542 addition, on the Thumb it is 'dot + 4'. */
8543 offset_rtx = plus_constant (Pmode, l1, TARGET_ARM ? 8 : 4);
8544 offset_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, orig, offset_rtx),
8545 UNSPEC_SYMBOL_OFFSET);
8546 offset_rtx = gen_rtx_CONST (Pmode, offset_rtx);
8547
8548 insn = emit_insn (gen_pic_load_addr_unified (reg, offset_rtx,
8549 labelno));
8550 }
8551
8552 return insn;
8553 }
8554
8555 /* Return nonzero if X is valid as an ARM state addressing register. */
8556 static int
8557 arm_address_register_rtx_p (rtx x, int strict_p)
8558 {
8559 int regno;
8560
8561 if (!REG_P (x))
8562 return 0;
8563
8564 regno = REGNO (x);
8565
8566 if (strict_p)
8567 return ARM_REGNO_OK_FOR_BASE_P (regno);
8568
8569 return (regno <= LAST_ARM_REGNUM
8570 || regno >= FIRST_PSEUDO_REGISTER
8571 || regno == FRAME_POINTER_REGNUM
8572 || regno == ARG_POINTER_REGNUM);
8573 }
8574
8575 /* Return TRUE if this rtx is the difference of a symbol and a label,
8576 and will reduce to a PC-relative relocation in the object file.
8577 Expressions like this can be left alone when generating PIC, rather
8578 than forced through the GOT. */
8579 static int
8580 pcrel_constant_p (rtx x)
8581 {
8582 if (GET_CODE (x) == MINUS)
8583 return symbol_mentioned_p (XEXP (x, 0)) && label_mentioned_p (XEXP (x, 1));
8584
8585 return FALSE;
8586 }
8587
8588 /* Return true if X will surely end up in an index register after next
8589 splitting pass. */
8590 static bool
8591 will_be_in_index_register (const_rtx x)
8592 {
8593 /* arm.md: calculate_pic_address will split this into a register. */
8594 return GET_CODE (x) == UNSPEC && (XINT (x, 1) == UNSPEC_PIC_SYM);
8595 }
8596
8597 /* Return nonzero if X is a valid ARM state address operand. */
8598 int
8599 arm_legitimate_address_outer_p (machine_mode mode, rtx x, RTX_CODE outer,
8600 int strict_p)
8601 {
8602 bool use_ldrd;
8603 enum rtx_code code = GET_CODE (x);
8604
8605 if (arm_address_register_rtx_p (x, strict_p))
8606 return 1;
8607
8608 use_ldrd = (TARGET_LDRD
8609 && (mode == DImode || mode == DFmode));
8610
8611 if (code == POST_INC || code == PRE_DEC
8612 || ((code == PRE_INC || code == POST_DEC)
8613 && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
8614 return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
8615
8616 else if ((code == POST_MODIFY || code == PRE_MODIFY)
8617 && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
8618 && GET_CODE (XEXP (x, 1)) == PLUS
8619 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
8620 {
8621 rtx addend = XEXP (XEXP (x, 1), 1);
8622
8623 /* Don't allow ldrd post increment by register because it's hard
8624 to fixup invalid register choices. */
8625 if (use_ldrd
8626 && GET_CODE (x) == POST_MODIFY
8627 && REG_P (addend))
8628 return 0;
8629
8630 return ((use_ldrd || GET_MODE_SIZE (mode) <= 4)
8631 && arm_legitimate_index_p (mode, addend, outer, strict_p));
8632 }
8633
8634 /* After reload constants split into minipools will have addresses
8635 from a LABEL_REF. */
8636 else if (reload_completed
8637 && (code == LABEL_REF
8638 || (code == CONST
8639 && GET_CODE (XEXP (x, 0)) == PLUS
8640 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
8641 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
8642 return 1;
8643
8644 else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
8645 return 0;
8646
8647 else if (code == PLUS)
8648 {
8649 rtx xop0 = XEXP (x, 0);
8650 rtx xop1 = XEXP (x, 1);
8651
8652 return ((arm_address_register_rtx_p (xop0, strict_p)
8653 && ((CONST_INT_P (xop1)
8654 && arm_legitimate_index_p (mode, xop1, outer, strict_p))
8655 || (!strict_p && will_be_in_index_register (xop1))))
8656 || (arm_address_register_rtx_p (xop1, strict_p)
8657 && arm_legitimate_index_p (mode, xop0, outer, strict_p)));
8658 }
8659
8660 #if 0
8661 /* Reload currently can't handle MINUS, so disable this for now */
8662 else if (GET_CODE (x) == MINUS)
8663 {
8664 rtx xop0 = XEXP (x, 0);
8665 rtx xop1 = XEXP (x, 1);
8666
8667 return (arm_address_register_rtx_p (xop0, strict_p)
8668 && arm_legitimate_index_p (mode, xop1, outer, strict_p));
8669 }
8670 #endif
8671
8672 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
8673 && code == SYMBOL_REF
8674 && CONSTANT_POOL_ADDRESS_P (x)
8675 && ! (flag_pic
8676 && symbol_mentioned_p (get_pool_constant (x))
8677 && ! pcrel_constant_p (get_pool_constant (x))))
8678 return 1;
8679
8680 return 0;
8681 }
8682
8683 /* Return true if we can avoid creating a constant pool entry for x. */
8684 static bool
8685 can_avoid_literal_pool_for_label_p (rtx x)
8686 {
8687 /* Normally we can assign constant values to target registers without
8688 the help of constant pool. But there are cases we have to use constant
8689 pool like:
8690 1) assign a label to register.
8691 2) sign-extend a 8bit value to 32bit and then assign to register.
8692
8693 Constant pool access in format:
8694 (set (reg r0) (mem (symbol_ref (".LC0"))))
8695 will cause the use of literal pool (later in function arm_reorg).
8696 So here we mark such format as an invalid format, then the compiler
8697 will adjust it into:
8698 (set (reg r0) (symbol_ref (".LC0")))
8699 (set (reg r0) (mem (reg r0))).
8700 No extra register is required, and (mem (reg r0)) won't cause the use
8701 of literal pools. */
8702 if (arm_disable_literal_pool && SYMBOL_REF_P (x)
8703 && CONSTANT_POOL_ADDRESS_P (x))
8704 return 1;
8705 return 0;
8706 }
8707
8708
8709 /* Return nonzero if X is a valid Thumb-2 address operand. */
8710 static int
8711 thumb2_legitimate_address_p (machine_mode mode, rtx x, int strict_p)
8712 {
8713 bool use_ldrd;
8714 enum rtx_code code = GET_CODE (x);
8715
8716 /* If we are dealing with a MVE predicate mode, then treat it as a HImode as
8717 can store and load it like any other 16-bit value. */
8718 if (TARGET_HAVE_MVE && VALID_MVE_PRED_MODE (mode))
8719 mode = HImode;
8720
8721 if (TARGET_HAVE_MVE && VALID_MVE_MODE (mode))
8722 return mve_vector_mem_operand (mode, x, strict_p);
8723
8724 if (arm_address_register_rtx_p (x, strict_p))
8725 return 1;
8726
8727 use_ldrd = (TARGET_LDRD
8728 && (mode == DImode || mode == DFmode));
8729
8730 if (code == POST_INC || code == PRE_DEC
8731 || ((code == PRE_INC || code == POST_DEC)
8732 && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
8733 return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
8734
8735 else if ((code == POST_MODIFY || code == PRE_MODIFY)
8736 && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
8737 && GET_CODE (XEXP (x, 1)) == PLUS
8738 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
8739 {
8740 /* Thumb-2 only has autoincrement by constant. */
8741 rtx addend = XEXP (XEXP (x, 1), 1);
8742 HOST_WIDE_INT offset;
8743
8744 if (!CONST_INT_P (addend))
8745 return 0;
8746
8747 offset = INTVAL(addend);
8748 if (GET_MODE_SIZE (mode) <= 4)
8749 return (offset > -256 && offset < 256);
8750
8751 return (use_ldrd && offset > -1024 && offset < 1024
8752 && (offset & 3) == 0);
8753 }
8754
8755 /* After reload constants split into minipools will have addresses
8756 from a LABEL_REF. */
8757 else if (reload_completed
8758 && (code == LABEL_REF
8759 || (code == CONST
8760 && GET_CODE (XEXP (x, 0)) == PLUS
8761 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
8762 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
8763 return 1;
8764
8765 else if (mode == TImode
8766 || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode))
8767 || (TARGET_HAVE_MVE && VALID_MVE_STRUCT_MODE (mode)))
8768 return 0;
8769
8770 else if (code == PLUS)
8771 {
8772 rtx xop0 = XEXP (x, 0);
8773 rtx xop1 = XEXP (x, 1);
8774
8775 return ((arm_address_register_rtx_p (xop0, strict_p)
8776 && (thumb2_legitimate_index_p (mode, xop1, strict_p)
8777 || (!strict_p && will_be_in_index_register (xop1))))
8778 || (arm_address_register_rtx_p (xop1, strict_p)
8779 && thumb2_legitimate_index_p (mode, xop0, strict_p)));
8780 }
8781
8782 else if (can_avoid_literal_pool_for_label_p (x))
8783 return 0;
8784
8785 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
8786 && code == SYMBOL_REF
8787 && CONSTANT_POOL_ADDRESS_P (x)
8788 && ! (flag_pic
8789 && symbol_mentioned_p (get_pool_constant (x))
8790 && ! pcrel_constant_p (get_pool_constant (x))))
8791 return 1;
8792
8793 return 0;
8794 }
8795
8796 /* Return nonzero if INDEX is valid for an address index operand in
8797 ARM state. */
8798 static int
8799 arm_legitimate_index_p (machine_mode mode, rtx index, RTX_CODE outer,
8800 int strict_p)
8801 {
8802 HOST_WIDE_INT range;
8803 enum rtx_code code = GET_CODE (index);
8804
8805 /* Standard coprocessor addressing modes. */
8806 if (TARGET_HARD_FLOAT
8807 && (mode == SFmode || mode == DFmode))
8808 return (code == CONST_INT && INTVAL (index) < 1024
8809 && INTVAL (index) > -1024
8810 && (INTVAL (index) & 3) == 0);
8811
8812 /* For quad modes, we restrict the constant offset to be slightly less
8813 than what the instruction format permits. We do this because for
8814 quad mode moves, we will actually decompose them into two separate
8815 double-mode reads or writes. INDEX must therefore be a valid
8816 (double-mode) offset and so should INDEX+8. */
8817 if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
8818 return (code == CONST_INT
8819 && INTVAL (index) < 1016
8820 && INTVAL (index) > -1024
8821 && (INTVAL (index) & 3) == 0);
8822
8823 /* We have no such constraint on double mode offsets, so we permit the
8824 full range of the instruction format. */
8825 if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
8826 return (code == CONST_INT
8827 && INTVAL (index) < 1024
8828 && INTVAL (index) > -1024
8829 && (INTVAL (index) & 3) == 0);
8830
8831 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
8832 return (code == CONST_INT
8833 && INTVAL (index) < 1024
8834 && INTVAL (index) > -1024
8835 && (INTVAL (index) & 3) == 0);
8836
8837 if (arm_address_register_rtx_p (index, strict_p)
8838 && (GET_MODE_SIZE (mode) <= 4))
8839 return 1;
8840
8841 if (mode == DImode || mode == DFmode)
8842 {
8843 if (code == CONST_INT)
8844 {
8845 HOST_WIDE_INT val = INTVAL (index);
8846
8847 /* Assume we emit ldrd or 2x ldr if !TARGET_LDRD.
8848 If vldr is selected it uses arm_coproc_mem_operand. */
8849 if (TARGET_LDRD)
8850 return val > -256 && val < 256;
8851 else
8852 return val > -4096 && val < 4092;
8853 }
8854
8855 return TARGET_LDRD && arm_address_register_rtx_p (index, strict_p);
8856 }
8857
8858 if (GET_MODE_SIZE (mode) <= 4
8859 && ! (arm_arch4
8860 && (mode == HImode
8861 || mode == HFmode
8862 || (mode == QImode && outer == SIGN_EXTEND))))
8863 {
8864 if (code == MULT)
8865 {
8866 rtx xiop0 = XEXP (index, 0);
8867 rtx xiop1 = XEXP (index, 1);
8868
8869 return ((arm_address_register_rtx_p (xiop0, strict_p)
8870 && power_of_two_operand (xiop1, SImode))
8871 || (arm_address_register_rtx_p (xiop1, strict_p)
8872 && power_of_two_operand (xiop0, SImode)));
8873 }
8874 else if (code == LSHIFTRT || code == ASHIFTRT
8875 || code == ASHIFT || code == ROTATERT)
8876 {
8877 rtx op = XEXP (index, 1);
8878
8879 return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
8880 && CONST_INT_P (op)
8881 && INTVAL (op) > 0
8882 && INTVAL (op) <= 31);
8883 }
8884 }
8885
8886 /* For ARM v4 we may be doing a sign-extend operation during the
8887 load. */
8888 if (arm_arch4)
8889 {
8890 if (mode == HImode
8891 || mode == HFmode
8892 || (outer == SIGN_EXTEND && mode == QImode))
8893 range = 256;
8894 else
8895 range = 4096;
8896 }
8897 else
8898 range = (mode == HImode || mode == HFmode) ? 4095 : 4096;
8899
8900 return (code == CONST_INT
8901 && INTVAL (index) < range
8902 && INTVAL (index) > -range);
8903 }
8904
8905 /* Return true if OP is a valid index scaling factor for Thumb-2 address
8906 index operand. i.e. 1, 2, 4 or 8. */
8907 static bool
8908 thumb2_index_mul_operand (rtx op)
8909 {
8910 HOST_WIDE_INT val;
8911
8912 if (!CONST_INT_P (op))
8913 return false;
8914
8915 val = INTVAL(op);
8916 return (val == 1 || val == 2 || val == 4 || val == 8);
8917 }
8918
8919 /* Return nonzero if INDEX is a valid Thumb-2 address index operand. */
8920 static int
8921 thumb2_legitimate_index_p (machine_mode mode, rtx index, int strict_p)
8922 {
8923 enum rtx_code code = GET_CODE (index);
8924
8925 /* ??? Combine arm and thumb2 coprocessor addressing modes. */
8926 /* Standard coprocessor addressing modes. */
8927 if (TARGET_VFP_BASE
8928 && (mode == SFmode || mode == DFmode))
8929 return (code == CONST_INT && INTVAL (index) < 1024
8930 /* Thumb-2 allows only > -256 index range for it's core register
8931 load/stores. Since we allow SF/DF in core registers, we have
8932 to use the intersection between -256~4096 (core) and -1024~1024
8933 (coprocessor). */
8934 && INTVAL (index) > -256
8935 && (INTVAL (index) & 3) == 0);
8936
8937 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
8938 {
8939 /* For DImode assume values will usually live in core regs
8940 and only allow LDRD addressing modes. */
8941 if (!TARGET_LDRD || mode != DImode)
8942 return (code == CONST_INT
8943 && INTVAL (index) < 1024
8944 && INTVAL (index) > -1024
8945 && (INTVAL (index) & 3) == 0);
8946 }
8947
8948 /* For quad modes, we restrict the constant offset to be slightly less
8949 than what the instruction format permits. We do this because for
8950 quad mode moves, we will actually decompose them into two separate
8951 double-mode reads or writes. INDEX must therefore be a valid
8952 (double-mode) offset and so should INDEX+8. */
8953 if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
8954 return (code == CONST_INT
8955 && INTVAL (index) < 1016
8956 && INTVAL (index) > -1024
8957 && (INTVAL (index) & 3) == 0);
8958
8959 /* We have no such constraint on double mode offsets, so we permit the
8960 full range of the instruction format. */
8961 if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
8962 return (code == CONST_INT
8963 && INTVAL (index) < 1024
8964 && INTVAL (index) > -1024
8965 && (INTVAL (index) & 3) == 0);
8966
8967 if (arm_address_register_rtx_p (index, strict_p)
8968 && (GET_MODE_SIZE (mode) <= 4))
8969 return 1;
8970
8971 if (mode == DImode || mode == DFmode)
8972 {
8973 if (code == CONST_INT)
8974 {
8975 HOST_WIDE_INT val = INTVAL (index);
8976 /* Thumb-2 ldrd only has reg+const addressing modes.
8977 Assume we emit ldrd or 2x ldr if !TARGET_LDRD.
8978 If vldr is selected it uses arm_coproc_mem_operand. */
8979 if (TARGET_LDRD)
8980 return IN_RANGE (val, -1020, 1020) && (val & 3) == 0;
8981 else
8982 return IN_RANGE (val, -255, 4095 - 4);
8983 }
8984 else
8985 return 0;
8986 }
8987
8988 if (code == MULT)
8989 {
8990 rtx xiop0 = XEXP (index, 0);
8991 rtx xiop1 = XEXP (index, 1);
8992
8993 return ((arm_address_register_rtx_p (xiop0, strict_p)
8994 && thumb2_index_mul_operand (xiop1))
8995 || (arm_address_register_rtx_p (xiop1, strict_p)
8996 && thumb2_index_mul_operand (xiop0)));
8997 }
8998 else if (code == ASHIFT)
8999 {
9000 rtx op = XEXP (index, 1);
9001
9002 return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
9003 && CONST_INT_P (op)
9004 && INTVAL (op) > 0
9005 && INTVAL (op) <= 3);
9006 }
9007
9008 return (code == CONST_INT
9009 && INTVAL (index) < 4096
9010 && INTVAL (index) > -256);
9011 }
9012
9013 /* Return nonzero if X is valid as a 16-bit Thumb state base register. */
9014 static int
9015 thumb1_base_register_rtx_p (rtx x, machine_mode mode, int strict_p)
9016 {
9017 int regno;
9018
9019 if (!REG_P (x))
9020 return 0;
9021
9022 regno = REGNO (x);
9023
9024 if (strict_p)
9025 return THUMB1_REGNO_MODE_OK_FOR_BASE_P (regno, mode);
9026
9027 return (regno <= LAST_LO_REGNUM
9028 || regno > LAST_VIRTUAL_REGISTER
9029 || regno == FRAME_POINTER_REGNUM
9030 || (GET_MODE_SIZE (mode) >= 4
9031 && (regno == STACK_POINTER_REGNUM
9032 || regno >= FIRST_PSEUDO_REGISTER
9033 || x == hard_frame_pointer_rtx
9034 || x == arg_pointer_rtx)));
9035 }
9036
9037 /* Return nonzero if x is a legitimate index register. This is the case
9038 for any base register that can access a QImode object. */
9039 inline static int
9040 thumb1_index_register_rtx_p (rtx x, int strict_p)
9041 {
9042 return thumb1_base_register_rtx_p (x, QImode, strict_p);
9043 }
9044
9045 /* Return nonzero if x is a legitimate 16-bit Thumb-state address.
9046
9047 The AP may be eliminated to either the SP or the FP, so we use the
9048 least common denominator, e.g. SImode, and offsets from 0 to 64.
9049
9050 ??? Verify whether the above is the right approach.
9051
9052 ??? Also, the FP may be eliminated to the SP, so perhaps that
9053 needs special handling also.
9054
9055 ??? Look at how the mips16 port solves this problem. It probably uses
9056 better ways to solve some of these problems.
9057
9058 Although it is not incorrect, we don't accept QImode and HImode
9059 addresses based on the frame pointer or arg pointer until the
9060 reload pass starts. This is so that eliminating such addresses
9061 into stack based ones won't produce impossible code. */
9062 int
9063 thumb1_legitimate_address_p (machine_mode mode, rtx x, int strict_p)
9064 {
9065 if (TARGET_HAVE_MOVT && can_avoid_literal_pool_for_label_p (x))
9066 return 0;
9067
9068 /* ??? Not clear if this is right. Experiment. */
9069 if (GET_MODE_SIZE (mode) < 4
9070 && !(reload_in_progress || reload_completed)
9071 && (reg_mentioned_p (frame_pointer_rtx, x)
9072 || reg_mentioned_p (arg_pointer_rtx, x)
9073 || reg_mentioned_p (virtual_incoming_args_rtx, x)
9074 || reg_mentioned_p (virtual_outgoing_args_rtx, x)
9075 || reg_mentioned_p (virtual_stack_dynamic_rtx, x)
9076 || reg_mentioned_p (virtual_stack_vars_rtx, x)))
9077 return 0;
9078
9079 /* Accept any base register. SP only in SImode or larger. */
9080 else if (thumb1_base_register_rtx_p (x, mode, strict_p))
9081 return 1;
9082
9083 /* This is PC relative data before arm_reorg runs. */
9084 else if (GET_MODE_SIZE (mode) >= 4 && CONSTANT_P (x)
9085 && SYMBOL_REF_P (x)
9086 && CONSTANT_POOL_ADDRESS_P (x) && !flag_pic
9087 && !arm_disable_literal_pool)
9088 return 1;
9089
9090 /* This is PC relative data after arm_reorg runs. */
9091 else if ((GET_MODE_SIZE (mode) >= 4 || mode == HFmode)
9092 && reload_completed
9093 && (LABEL_REF_P (x)
9094 || (GET_CODE (x) == CONST
9095 && GET_CODE (XEXP (x, 0)) == PLUS
9096 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
9097 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
9098 return 1;
9099
9100 /* Post-inc indexing only supported for SImode and larger. */
9101 else if (GET_CODE (x) == POST_INC && GET_MODE_SIZE (mode) >= 4
9102 && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p))
9103 return 1;
9104
9105 else if (GET_CODE (x) == PLUS)
9106 {
9107 /* REG+REG address can be any two index registers. */
9108 /* We disallow FRAME+REG addressing since we know that FRAME
9109 will be replaced with STACK, and SP relative addressing only
9110 permits SP+OFFSET. */
9111 if (GET_MODE_SIZE (mode) <= 4
9112 && XEXP (x, 0) != frame_pointer_rtx
9113 && XEXP (x, 1) != frame_pointer_rtx
9114 && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
9115 && (thumb1_index_register_rtx_p (XEXP (x, 1), strict_p)
9116 || (!strict_p && will_be_in_index_register (XEXP (x, 1)))))
9117 return 1;
9118
9119 /* REG+const has 5-7 bit offset for non-SP registers. */
9120 else if ((thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
9121 || XEXP (x, 0) == arg_pointer_rtx)
9122 && CONST_INT_P (XEXP (x, 1))
9123 && thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
9124 return 1;
9125
9126 /* REG+const has 10-bit offset for SP, but only SImode and
9127 larger is supported. */
9128 /* ??? Should probably check for DI/DFmode overflow here
9129 just like GO_IF_LEGITIMATE_OFFSET does. */
9130 else if (REG_P (XEXP (x, 0))
9131 && REGNO (XEXP (x, 0)) == STACK_POINTER_REGNUM
9132 && GET_MODE_SIZE (mode) >= 4
9133 && CONST_INT_P (XEXP (x, 1))
9134 && INTVAL (XEXP (x, 1)) >= 0
9135 && INTVAL (XEXP (x, 1)) + GET_MODE_SIZE (mode) <= 1024
9136 && (INTVAL (XEXP (x, 1)) & 3) == 0)
9137 return 1;
9138
9139 else if (REG_P (XEXP (x, 0))
9140 && (REGNO (XEXP (x, 0)) == FRAME_POINTER_REGNUM
9141 || REGNO (XEXP (x, 0)) == ARG_POINTER_REGNUM
9142 || VIRTUAL_REGISTER_P (XEXP (x, 0)))
9143 && GET_MODE_SIZE (mode) >= 4
9144 && CONST_INT_P (XEXP (x, 1))
9145 && (INTVAL (XEXP (x, 1)) & 3) == 0)
9146 return 1;
9147 }
9148
9149 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
9150 && GET_MODE_SIZE (mode) == 4
9151 && SYMBOL_REF_P (x)
9152 && CONSTANT_POOL_ADDRESS_P (x)
9153 && !arm_disable_literal_pool
9154 && ! (flag_pic
9155 && symbol_mentioned_p (get_pool_constant (x))
9156 && ! pcrel_constant_p (get_pool_constant (x))))
9157 return 1;
9158
9159 return 0;
9160 }
9161
9162 /* Return nonzero if VAL can be used as an offset in a Thumb-state address
9163 instruction of mode MODE. */
9164 int
9165 thumb_legitimate_offset_p (machine_mode mode, HOST_WIDE_INT val)
9166 {
9167 switch (GET_MODE_SIZE (mode))
9168 {
9169 case 1:
9170 return val >= 0 && val < 32;
9171
9172 case 2:
9173 return val >= 0 && val < 64 && (val & 1) == 0;
9174
9175 default:
9176 return (val >= 0
9177 && (val + GET_MODE_SIZE (mode)) <= 128
9178 && (val & 3) == 0);
9179 }
9180 }
9181
9182 bool
9183 arm_legitimate_address_p (machine_mode mode, rtx x, bool strict_p, code_helper)
9184 {
9185 if (TARGET_ARM)
9186 return arm_legitimate_address_outer_p (mode, x, SET, strict_p);
9187 else if (TARGET_THUMB2)
9188 return thumb2_legitimate_address_p (mode, x, strict_p);
9189 else /* if (TARGET_THUMB1) */
9190 return thumb1_legitimate_address_p (mode, x, strict_p);
9191 }
9192
9193 /* Worker function for TARGET_PREFERRED_RELOAD_CLASS.
9194
9195 Given an rtx X being reloaded into a reg required to be
9196 in class CLASS, return the class of reg to actually use.
9197 In general this is just CLASS, but for the Thumb core registers and
9198 immediate constants we prefer a LO_REGS class or a subset. */
9199
9200 static reg_class_t
9201 arm_preferred_reload_class (rtx x ATTRIBUTE_UNUSED, reg_class_t rclass)
9202 {
9203 if (TARGET_32BIT)
9204 return rclass;
9205 else
9206 {
9207 if (rclass == GENERAL_REGS)
9208 return LO_REGS;
9209 else
9210 return rclass;
9211 }
9212 }
9213
9214 /* Build the SYMBOL_REF for __tls_get_addr. */
9215
9216 static GTY(()) rtx tls_get_addr_libfunc;
9217
9218 static rtx
9219 get_tls_get_addr (void)
9220 {
9221 if (!tls_get_addr_libfunc)
9222 tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
9223 return tls_get_addr_libfunc;
9224 }
9225
9226 rtx
9227 arm_load_tp (rtx target)
9228 {
9229 if (!target)
9230 target = gen_reg_rtx (SImode);
9231
9232 if (TARGET_HARD_TP)
9233 {
9234 /* Can return in any reg. */
9235 emit_insn (gen_load_tp_hard (target));
9236 }
9237 else
9238 {
9239 /* Always returned in r0. Immediately copy the result into a pseudo,
9240 otherwise other uses of r0 (e.g. setting up function arguments) may
9241 clobber the value. */
9242
9243 rtx tmp;
9244
9245 if (TARGET_FDPIC)
9246 {
9247 rtx fdpic_reg = gen_rtx_REG (Pmode, FDPIC_REGNUM);
9248 rtx initial_fdpic_reg = get_hard_reg_initial_val (Pmode, FDPIC_REGNUM);
9249
9250 emit_insn (gen_load_tp_soft_fdpic ());
9251
9252 /* Restore r9. */
9253 emit_insn (gen_restore_pic_register_after_call(fdpic_reg, initial_fdpic_reg));
9254 }
9255 else
9256 emit_insn (gen_load_tp_soft ());
9257
9258 tmp = gen_rtx_REG (SImode, R0_REGNUM);
9259 emit_move_insn (target, tmp);
9260 }
9261 return target;
9262 }
9263
9264 static rtx
9265 load_tls_operand (rtx x, rtx reg)
9266 {
9267 rtx tmp;
9268
9269 if (reg == NULL_RTX)
9270 reg = gen_reg_rtx (SImode);
9271
9272 tmp = gen_rtx_CONST (SImode, x);
9273
9274 emit_move_insn (reg, tmp);
9275
9276 return reg;
9277 }
9278
9279 static rtx_insn *
9280 arm_call_tls_get_addr (rtx x, rtx reg, rtx *valuep, int reloc)
9281 {
9282 rtx label, labelno = NULL_RTX, sum;
9283
9284 gcc_assert (reloc != TLS_DESCSEQ);
9285 start_sequence ();
9286
9287 if (TARGET_FDPIC)
9288 {
9289 sum = gen_rtx_UNSPEC (Pmode,
9290 gen_rtvec (2, x, GEN_INT (reloc)),
9291 UNSPEC_TLS);
9292 }
9293 else
9294 {
9295 labelno = GEN_INT (pic_labelno++);
9296 label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
9297 label = gen_rtx_CONST (VOIDmode, label);
9298
9299 sum = gen_rtx_UNSPEC (Pmode,
9300 gen_rtvec (4, x, GEN_INT (reloc), label,
9301 GEN_INT (TARGET_ARM ? 8 : 4)),
9302 UNSPEC_TLS);
9303 }
9304 reg = load_tls_operand (sum, reg);
9305
9306 if (TARGET_FDPIC)
9307 emit_insn (gen_addsi3 (reg, reg, gen_rtx_REG (Pmode, FDPIC_REGNUM)));
9308 else if (TARGET_ARM)
9309 emit_insn (gen_pic_add_dot_plus_eight (reg, reg, labelno));
9310 else
9311 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
9312
9313 *valuep = emit_library_call_value (get_tls_get_addr (), NULL_RTX,
9314 LCT_PURE, /* LCT_CONST? */
9315 Pmode, reg, Pmode);
9316
9317 rtx_insn *insns = get_insns ();
9318 end_sequence ();
9319
9320 return insns;
9321 }
9322
9323 static rtx
9324 arm_tls_descseq_addr (rtx x, rtx reg)
9325 {
9326 rtx labelno = GEN_INT (pic_labelno++);
9327 rtx label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
9328 rtx sum = gen_rtx_UNSPEC (Pmode,
9329 gen_rtvec (4, x, GEN_INT (TLS_DESCSEQ),
9330 gen_rtx_CONST (VOIDmode, label),
9331 GEN_INT (!TARGET_ARM)),
9332 UNSPEC_TLS);
9333 rtx reg0 = load_tls_operand (sum, gen_rtx_REG (SImode, R0_REGNUM));
9334
9335 emit_insn (gen_tlscall (x, labelno));
9336 if (!reg)
9337 reg = gen_reg_rtx (SImode);
9338 else
9339 gcc_assert (REGNO (reg) != R0_REGNUM);
9340
9341 emit_move_insn (reg, reg0);
9342
9343 return reg;
9344 }
9345
9346
9347 rtx
9348 legitimize_tls_address (rtx x, rtx reg)
9349 {
9350 rtx dest, tp, label, labelno, sum, ret, eqv, addend;
9351 rtx_insn *insns;
9352 unsigned int model = SYMBOL_REF_TLS_MODEL (x);
9353
9354 switch (model)
9355 {
9356 case TLS_MODEL_GLOBAL_DYNAMIC:
9357 if (TARGET_GNU2_TLS)
9358 {
9359 gcc_assert (!TARGET_FDPIC);
9360
9361 reg = arm_tls_descseq_addr (x, reg);
9362
9363 tp = arm_load_tp (NULL_RTX);
9364
9365 dest = gen_rtx_PLUS (Pmode, tp, reg);
9366 }
9367 else
9368 {
9369 /* Original scheme */
9370 if (TARGET_FDPIC)
9371 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_GD32_FDPIC);
9372 else
9373 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_GD32);
9374 dest = gen_reg_rtx (Pmode);
9375 emit_libcall_block (insns, dest, ret, x);
9376 }
9377 return dest;
9378
9379 case TLS_MODEL_LOCAL_DYNAMIC:
9380 if (TARGET_GNU2_TLS)
9381 {
9382 gcc_assert (!TARGET_FDPIC);
9383
9384 reg = arm_tls_descseq_addr (x, reg);
9385
9386 tp = arm_load_tp (NULL_RTX);
9387
9388 dest = gen_rtx_PLUS (Pmode, tp, reg);
9389 }
9390 else
9391 {
9392 if (TARGET_FDPIC)
9393 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_LDM32_FDPIC);
9394 else
9395 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_LDM32);
9396
9397 /* Attach a unique REG_EQUIV, to allow the RTL optimizers to
9398 share the LDM result with other LD model accesses. */
9399 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const1_rtx),
9400 UNSPEC_TLS);
9401 dest = gen_reg_rtx (Pmode);
9402 emit_libcall_block (insns, dest, ret, eqv);
9403
9404 /* Load the addend. */
9405 addend = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, x,
9406 GEN_INT (TLS_LDO32)),
9407 UNSPEC_TLS);
9408 addend = force_reg (SImode, gen_rtx_CONST (SImode, addend));
9409 dest = gen_rtx_PLUS (Pmode, dest, addend);
9410 }
9411 return dest;
9412
9413 case TLS_MODEL_INITIAL_EXEC:
9414 if (TARGET_FDPIC)
9415 {
9416 sum = gen_rtx_UNSPEC (Pmode,
9417 gen_rtvec (2, x, GEN_INT (TLS_IE32_FDPIC)),
9418 UNSPEC_TLS);
9419 reg = load_tls_operand (sum, reg);
9420 emit_insn (gen_addsi3 (reg, reg, gen_rtx_REG (Pmode, FDPIC_REGNUM)));
9421 emit_move_insn (reg, gen_rtx_MEM (Pmode, reg));
9422 }
9423 else
9424 {
9425 labelno = GEN_INT (pic_labelno++);
9426 label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
9427 label = gen_rtx_CONST (VOIDmode, label);
9428 sum = gen_rtx_UNSPEC (Pmode,
9429 gen_rtvec (4, x, GEN_INT (TLS_IE32), label,
9430 GEN_INT (TARGET_ARM ? 8 : 4)),
9431 UNSPEC_TLS);
9432 reg = load_tls_operand (sum, reg);
9433
9434 if (TARGET_ARM)
9435 emit_insn (gen_tls_load_dot_plus_eight (reg, reg, labelno));
9436 else if (TARGET_THUMB2)
9437 emit_insn (gen_tls_load_dot_plus_four (reg, NULL, reg, labelno));
9438 else
9439 {
9440 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
9441 emit_move_insn (reg, gen_const_mem (SImode, reg));
9442 }
9443 }
9444
9445 tp = arm_load_tp (NULL_RTX);
9446
9447 return gen_rtx_PLUS (Pmode, tp, reg);
9448
9449 case TLS_MODEL_LOCAL_EXEC:
9450 tp = arm_load_tp (NULL_RTX);
9451
9452 reg = gen_rtx_UNSPEC (Pmode,
9453 gen_rtvec (2, x, GEN_INT (TLS_LE32)),
9454 UNSPEC_TLS);
9455 reg = force_reg (SImode, gen_rtx_CONST (SImode, reg));
9456
9457 return gen_rtx_PLUS (Pmode, tp, reg);
9458
9459 default:
9460 abort ();
9461 }
9462 }
9463
9464 /* Try machine-dependent ways of modifying an illegitimate address
9465 to be legitimate. If we find one, return the new, valid address. */
9466 rtx
9467 arm_legitimize_address (rtx x, rtx orig_x, machine_mode mode)
9468 {
9469 if (arm_tls_referenced_p (x))
9470 {
9471 rtx addend = NULL;
9472
9473 if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS)
9474 {
9475 addend = XEXP (XEXP (x, 0), 1);
9476 x = XEXP (XEXP (x, 0), 0);
9477 }
9478
9479 if (!SYMBOL_REF_P (x))
9480 return x;
9481
9482 gcc_assert (SYMBOL_REF_TLS_MODEL (x) != 0);
9483
9484 x = legitimize_tls_address (x, NULL_RTX);
9485
9486 if (addend)
9487 {
9488 x = gen_rtx_PLUS (SImode, x, addend);
9489 orig_x = x;
9490 }
9491 else
9492 return x;
9493 }
9494
9495 if (TARGET_THUMB1)
9496 return thumb_legitimize_address (x, orig_x, mode);
9497
9498 if (GET_CODE (x) == PLUS)
9499 {
9500 rtx xop0 = XEXP (x, 0);
9501 rtx xop1 = XEXP (x, 1);
9502
9503 if (CONSTANT_P (xop0) && !symbol_mentioned_p (xop0))
9504 xop0 = force_reg (SImode, xop0);
9505
9506 if (CONSTANT_P (xop1) && !CONST_INT_P (xop1)
9507 && !symbol_mentioned_p (xop1))
9508 xop1 = force_reg (SImode, xop1);
9509
9510 if (ARM_BASE_REGISTER_RTX_P (xop0)
9511 && CONST_INT_P (xop1))
9512 {
9513 HOST_WIDE_INT n, low_n;
9514 rtx base_reg, val;
9515 n = INTVAL (xop1);
9516
9517 /* VFP addressing modes actually allow greater offsets, but for
9518 now we just stick with the lowest common denominator. */
9519 if (mode == DImode || mode == DFmode)
9520 {
9521 low_n = n & 0x0f;
9522 n &= ~0x0f;
9523 if (low_n > 4)
9524 {
9525 n += 16;
9526 low_n -= 16;
9527 }
9528 }
9529 else
9530 {
9531 low_n = ((mode) == TImode ? 0
9532 : n >= 0 ? (n & 0xfff) : -((-n) & 0xfff));
9533 n -= low_n;
9534 }
9535
9536 base_reg = gen_reg_rtx (SImode);
9537 val = force_operand (plus_constant (Pmode, xop0, n), NULL_RTX);
9538 emit_move_insn (base_reg, val);
9539 x = plus_constant (Pmode, base_reg, low_n);
9540 }
9541 else if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
9542 x = gen_rtx_PLUS (SImode, xop0, xop1);
9543 }
9544
9545 /* XXX We don't allow MINUS any more -- see comment in
9546 arm_legitimate_address_outer_p (). */
9547 else if (GET_CODE (x) == MINUS)
9548 {
9549 rtx xop0 = XEXP (x, 0);
9550 rtx xop1 = XEXP (x, 1);
9551
9552 if (CONSTANT_P (xop0))
9553 xop0 = force_reg (SImode, xop0);
9554
9555 if (CONSTANT_P (xop1) && ! symbol_mentioned_p (xop1))
9556 xop1 = force_reg (SImode, xop1);
9557
9558 if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
9559 x = gen_rtx_MINUS (SImode, xop0, xop1);
9560 }
9561
9562 /* Make sure to take full advantage of the pre-indexed addressing mode
9563 with absolute addresses which often allows for the base register to
9564 be factorized for multiple adjacent memory references, and it might
9565 even allows for the mini pool to be avoided entirely. */
9566 else if (CONST_INT_P (x) && optimize > 0)
9567 {
9568 unsigned int bits;
9569 HOST_WIDE_INT mask, base, index;
9570 rtx base_reg;
9571
9572 /* LDR and LDRB can use a 12-bit index, ldrsb and the rest can
9573 only use a 8-bit index. So let's use a 12-bit index for
9574 SImode only and hope that arm_gen_constant will enable LDRB
9575 to use more bits. */
9576 bits = (mode == SImode) ? 12 : 8;
9577 mask = (1 << bits) - 1;
9578 base = INTVAL (x) & ~mask;
9579 index = INTVAL (x) & mask;
9580 if (TARGET_ARM && bit_count (base & 0xffffffff) > (32 - bits)/2)
9581 {
9582 /* It'll most probably be more efficient to generate the
9583 base with more bits set and use a negative index instead.
9584 Don't do this for Thumb as negative offsets are much more
9585 limited. */
9586 base |= mask;
9587 index -= mask;
9588 }
9589 base_reg = force_reg (SImode, GEN_INT (base));
9590 x = plus_constant (Pmode, base_reg, index);
9591 }
9592
9593 if (flag_pic)
9594 {
9595 /* We need to find and carefully transform any SYMBOL and LABEL
9596 references; so go back to the original address expression. */
9597 rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX, NULL_RTX,
9598 false /*compute_now*/);
9599
9600 if (new_x != orig_x)
9601 x = new_x;
9602 }
9603
9604 return x;
9605 }
9606
9607
9608 /* Try machine-dependent ways of modifying an illegitimate Thumb address
9609 to be legitimate. If we find one, return the new, valid address. */
9610 rtx
9611 thumb_legitimize_address (rtx x, rtx orig_x, machine_mode mode)
9612 {
9613 if (GET_CODE (x) == PLUS
9614 && CONST_INT_P (XEXP (x, 1))
9615 && (INTVAL (XEXP (x, 1)) >= 32 * GET_MODE_SIZE (mode)
9616 || INTVAL (XEXP (x, 1)) < 0))
9617 {
9618 rtx xop0 = XEXP (x, 0);
9619 rtx xop1 = XEXP (x, 1);
9620 HOST_WIDE_INT offset = INTVAL (xop1);
9621
9622 /* Try and fold the offset into a biasing of the base register and
9623 then offsetting that. Don't do this when optimizing for space
9624 since it can cause too many CSEs. */
9625 if (optimize_size && offset >= 0
9626 && offset < 256 + 31 * GET_MODE_SIZE (mode))
9627 {
9628 HOST_WIDE_INT delta;
9629
9630 if (offset >= 256)
9631 delta = offset - (256 - GET_MODE_SIZE (mode));
9632 else if (offset < 32 * GET_MODE_SIZE (mode) + 8)
9633 delta = 31 * GET_MODE_SIZE (mode);
9634 else
9635 delta = offset & (~31 * GET_MODE_SIZE (mode));
9636
9637 xop0 = force_operand (plus_constant (Pmode, xop0, offset - delta),
9638 NULL_RTX);
9639 x = plus_constant (Pmode, xop0, delta);
9640 }
9641 else if (offset < 0 && offset > -256)
9642 /* Small negative offsets are best done with a subtract before the
9643 dereference, forcing these into a register normally takes two
9644 instructions. */
9645 x = force_operand (x, NULL_RTX);
9646 else
9647 {
9648 /* For the remaining cases, force the constant into a register. */
9649 xop1 = force_reg (SImode, xop1);
9650 x = gen_rtx_PLUS (SImode, xop0, xop1);
9651 }
9652 }
9653 else if (GET_CODE (x) == PLUS
9654 && s_register_operand (XEXP (x, 1), SImode)
9655 && !s_register_operand (XEXP (x, 0), SImode))
9656 {
9657 rtx xop0 = force_operand (XEXP (x, 0), NULL_RTX);
9658
9659 x = gen_rtx_PLUS (SImode, xop0, XEXP (x, 1));
9660 }
9661
9662 if (flag_pic)
9663 {
9664 /* We need to find and carefully transform any SYMBOL and LABEL
9665 references; so go back to the original address expression. */
9666 rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX, NULL_RTX,
9667 false /*compute_now*/);
9668
9669 if (new_x != orig_x)
9670 x = new_x;
9671 }
9672
9673 return x;
9674 }
9675
9676 /* Return TRUE if X contains any TLS symbol references. */
9677
9678 bool
9679 arm_tls_referenced_p (rtx x)
9680 {
9681 if (! TARGET_HAVE_TLS)
9682 return false;
9683
9684 subrtx_iterator::array_type array;
9685 FOR_EACH_SUBRTX (iter, array, x, ALL)
9686 {
9687 const_rtx x = *iter;
9688 if (SYMBOL_REF_P (x) && SYMBOL_REF_TLS_MODEL (x) != 0)
9689 {
9690 /* ARM currently does not provide relocations to encode TLS variables
9691 into AArch32 instructions, only data, so there is no way to
9692 currently implement these if a literal pool is disabled. */
9693 if (arm_disable_literal_pool)
9694 sorry ("accessing thread-local storage is not currently supported "
9695 "with %<-mpure-code%> or %<-mslow-flash-data%>");
9696
9697 return true;
9698 }
9699
9700 /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
9701 TLS offsets, not real symbol references. */
9702 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
9703 iter.skip_subrtxes ();
9704 }
9705 return false;
9706 }
9707
9708 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
9709
9710 On the ARM, allow any integer (invalid ones are removed later by insn
9711 patterns), nice doubles and symbol_refs which refer to the function's
9712 constant pool XXX.
9713
9714 When generating pic allow anything. */
9715
9716 static bool
9717 arm_legitimate_constant_p_1 (machine_mode, rtx x)
9718 {
9719 if (GET_CODE (x) == CONST_VECTOR && !neon_make_constant (x, false))
9720 return false;
9721
9722 return flag_pic || !label_mentioned_p (x);
9723 }
9724
9725 static bool
9726 thumb_legitimate_constant_p (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
9727 {
9728 /* Splitters for TARGET_USE_MOVT call arm_emit_movpair which creates high
9729 RTX. These RTX must therefore be allowed for Thumb-1 so that when run
9730 for ARMv8-M Baseline or later the result is valid. */
9731 if (TARGET_HAVE_MOVT && GET_CODE (x) == HIGH)
9732 x = XEXP (x, 0);
9733
9734 return (CONST_INT_P (x)
9735 || CONST_DOUBLE_P (x)
9736 || CONSTANT_ADDRESS_P (x)
9737 || (TARGET_HAVE_MOVT && SYMBOL_REF_P (x))
9738 /* On Thumb-1 without MOVT/MOVW and literal pool disabled,
9739 we build the symbol address with upper/lower
9740 relocations. */
9741 || (TARGET_THUMB1
9742 && !label_mentioned_p (x)
9743 && arm_valid_symbolic_address_p (x)
9744 && arm_disable_literal_pool)
9745 || flag_pic);
9746 }
9747
9748 static bool
9749 arm_legitimate_constant_p (machine_mode mode, rtx x)
9750 {
9751 return (!arm_cannot_force_const_mem (mode, x)
9752 && (TARGET_32BIT
9753 ? arm_legitimate_constant_p_1 (mode, x)
9754 : thumb_legitimate_constant_p (mode, x)));
9755 }
9756
9757 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
9758
9759 static bool
9760 arm_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
9761 {
9762 rtx base, offset;
9763 split_const (x, &base, &offset);
9764
9765 if (SYMBOL_REF_P (base))
9766 {
9767 /* Function symbols cannot have an offset due to the Thumb bit. */
9768 if ((SYMBOL_REF_FLAGS (base) & SYMBOL_FLAG_FUNCTION)
9769 && INTVAL (offset) != 0)
9770 return true;
9771
9772 if (ARM_OFFSETS_MUST_BE_WITHIN_SECTIONS_P
9773 && !offset_within_block_p (base, INTVAL (offset)))
9774 return true;
9775 }
9776 return arm_tls_referenced_p (x);
9777 }
9778 \f
9779 #define REG_OR_SUBREG_REG(X) \
9780 (REG_P (X) \
9781 || (SUBREG_P (X) && REG_P (SUBREG_REG (X))))
9782
9783 #define REG_OR_SUBREG_RTX(X) \
9784 (REG_P (X) ? (X) : SUBREG_REG (X))
9785
9786 static inline int
9787 thumb1_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
9788 {
9789 machine_mode mode = GET_MODE (x);
9790 int total, words;
9791
9792 switch (code)
9793 {
9794 case ASHIFT:
9795 case ASHIFTRT:
9796 case LSHIFTRT:
9797 case ROTATERT:
9798 return (mode == SImode) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
9799
9800 case PLUS:
9801 case MINUS:
9802 case COMPARE:
9803 case NEG:
9804 case NOT:
9805 return COSTS_N_INSNS (1);
9806
9807 case MULT:
9808 if (arm_arch6m && arm_m_profile_small_mul)
9809 return COSTS_N_INSNS (32);
9810
9811 if (CONST_INT_P (XEXP (x, 1)))
9812 {
9813 int cycles = 0;
9814 unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
9815
9816 while (i)
9817 {
9818 i >>= 2;
9819 cycles++;
9820 }
9821 return COSTS_N_INSNS (2) + cycles;
9822 }
9823 return COSTS_N_INSNS (1) + 16;
9824
9825 case SET:
9826 /* A SET doesn't have a mode, so let's look at the SET_DEST to get
9827 the mode. */
9828 words = ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x))));
9829 return (COSTS_N_INSNS (words)
9830 + 4 * ((MEM_P (SET_SRC (x)))
9831 + MEM_P (SET_DEST (x))));
9832
9833 case CONST_INT:
9834 if (outer == SET)
9835 {
9836 if (UINTVAL (x) < 256
9837 /* 16-bit constant. */
9838 || (TARGET_HAVE_MOVT && !(INTVAL (x) & 0xffff0000)))
9839 return 0;
9840 if (thumb_shiftable_const (INTVAL (x)))
9841 return COSTS_N_INSNS (2);
9842 return arm_disable_literal_pool
9843 ? COSTS_N_INSNS (8)
9844 : COSTS_N_INSNS (3);
9845 }
9846 else if ((outer == PLUS || outer == COMPARE)
9847 && INTVAL (x) < 256 && INTVAL (x) > -256)
9848 return 0;
9849 else if ((outer == IOR || outer == XOR || outer == AND)
9850 && INTVAL (x) < 256 && INTVAL (x) >= -256)
9851 return COSTS_N_INSNS (1);
9852 else if (outer == AND)
9853 {
9854 int i;
9855 /* This duplicates the tests in the andsi3 expander. */
9856 for (i = 9; i <= 31; i++)
9857 if ((HOST_WIDE_INT_1 << i) - 1 == INTVAL (x)
9858 || (HOST_WIDE_INT_1 << i) - 1 == ~INTVAL (x))
9859 return COSTS_N_INSNS (2);
9860 }
9861 else if (outer == ASHIFT || outer == ASHIFTRT
9862 || outer == LSHIFTRT)
9863 return 0;
9864 return COSTS_N_INSNS (2);
9865
9866 case CONST:
9867 case CONST_DOUBLE:
9868 case LABEL_REF:
9869 case SYMBOL_REF:
9870 return COSTS_N_INSNS (3);
9871
9872 case UDIV:
9873 case UMOD:
9874 case DIV:
9875 case MOD:
9876 return 100;
9877
9878 case TRUNCATE:
9879 return 99;
9880
9881 case AND:
9882 case XOR:
9883 case IOR:
9884 /* XXX guess. */
9885 return 8;
9886
9887 case MEM:
9888 /* XXX another guess. */
9889 /* Memory costs quite a lot for the first word, but subsequent words
9890 load at the equivalent of a single insn each. */
9891 return (10 + 4 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
9892 + ((SYMBOL_REF_P (x) && CONSTANT_POOL_ADDRESS_P (x))
9893 ? 4 : 0));
9894
9895 case IF_THEN_ELSE:
9896 /* XXX a guess. */
9897 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
9898 return 14;
9899 return 2;
9900
9901 case SIGN_EXTEND:
9902 case ZERO_EXTEND:
9903 total = mode == DImode ? COSTS_N_INSNS (1) : 0;
9904 total += thumb1_rtx_costs (XEXP (x, 0), GET_CODE (XEXP (x, 0)), code);
9905
9906 if (mode == SImode)
9907 return total;
9908
9909 if (arm_arch6)
9910 return total + COSTS_N_INSNS (1);
9911
9912 /* Assume a two-shift sequence. Increase the cost slightly so
9913 we prefer actual shifts over an extend operation. */
9914 return total + 1 + COSTS_N_INSNS (2);
9915
9916 default:
9917 return 99;
9918 }
9919 }
9920
9921 /* Estimates the size cost of thumb1 instructions.
9922 For now most of the code is copied from thumb1_rtx_costs. We need more
9923 fine grain tuning when we have more related test cases. */
9924 static inline int
9925 thumb1_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
9926 {
9927 machine_mode mode = GET_MODE (x);
9928 int words, cost;
9929
9930 switch (code)
9931 {
9932 case ASHIFT:
9933 case ASHIFTRT:
9934 case LSHIFTRT:
9935 case ROTATERT:
9936 return (mode == SImode) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
9937
9938 case PLUS:
9939 case MINUS:
9940 /* Thumb-1 needs two instructions to fulfill shiftadd/shiftsub0/shiftsub1
9941 defined by RTL expansion, especially for the expansion of
9942 multiplication. */
9943 if ((GET_CODE (XEXP (x, 0)) == MULT
9944 && power_of_two_operand (XEXP (XEXP (x,0),1), SImode))
9945 || (GET_CODE (XEXP (x, 1)) == MULT
9946 && power_of_two_operand (XEXP (XEXP (x, 1), 1), SImode)))
9947 return COSTS_N_INSNS (2);
9948 /* Fall through. */
9949 case COMPARE:
9950 case NEG:
9951 case NOT:
9952 return COSTS_N_INSNS (1);
9953
9954 case MULT:
9955 if (CONST_INT_P (XEXP (x, 1)))
9956 {
9957 /* Thumb1 mul instruction can't operate on const. We must Load it
9958 into a register first. */
9959 int const_size = thumb1_size_rtx_costs (XEXP (x, 1), CONST_INT, SET);
9960 /* For the targets which have a very small and high-latency multiply
9961 unit, we prefer to synthesize the mult with up to 5 instructions,
9962 giving a good balance between size and performance. */
9963 if (arm_arch6m && arm_m_profile_small_mul)
9964 return COSTS_N_INSNS (5);
9965 else
9966 return COSTS_N_INSNS (1) + const_size;
9967 }
9968 return COSTS_N_INSNS (1);
9969
9970 case SET:
9971 /* A SET doesn't have a mode, so let's look at the SET_DEST to get
9972 the mode. */
9973 words = ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x))));
9974 cost = COSTS_N_INSNS (words);
9975 if (satisfies_constraint_J (SET_SRC (x))
9976 || satisfies_constraint_K (SET_SRC (x))
9977 /* Too big an immediate for a 2-byte mov, using MOVT. */
9978 || (CONST_INT_P (SET_SRC (x))
9979 && UINTVAL (SET_SRC (x)) >= 256
9980 && TARGET_HAVE_MOVT
9981 && satisfies_constraint_j (SET_SRC (x)))
9982 /* thumb1_movdi_insn. */
9983 || ((words > 1) && MEM_P (SET_SRC (x))))
9984 cost += COSTS_N_INSNS (1);
9985 return cost;
9986
9987 case CONST_INT:
9988 if (outer == SET)
9989 {
9990 if (UINTVAL (x) < 256)
9991 return COSTS_N_INSNS (1);
9992 /* movw is 4byte long. */
9993 if (TARGET_HAVE_MOVT && !(INTVAL (x) & 0xffff0000))
9994 return COSTS_N_INSNS (2);
9995 /* See split "TARGET_THUMB1 && satisfies_constraint_J". */
9996 if (INTVAL (x) >= -255 && INTVAL (x) <= -1)
9997 return COSTS_N_INSNS (2);
9998 /* See split "TARGET_THUMB1 && satisfies_constraint_K". */
9999 if (thumb_shiftable_const (INTVAL (x)))
10000 return COSTS_N_INSNS (2);
10001 return arm_disable_literal_pool
10002 ? COSTS_N_INSNS (8)
10003 : COSTS_N_INSNS (3);
10004 }
10005 else if ((outer == PLUS || outer == COMPARE)
10006 && INTVAL (x) < 256 && INTVAL (x) > -256)
10007 return 0;
10008 else if ((outer == IOR || outer == XOR || outer == AND)
10009 && INTVAL (x) < 256 && INTVAL (x) >= -256)
10010 return COSTS_N_INSNS (1);
10011 else if (outer == AND)
10012 {
10013 int i;
10014 /* This duplicates the tests in the andsi3 expander. */
10015 for (i = 9; i <= 31; i++)
10016 if ((HOST_WIDE_INT_1 << i) - 1 == INTVAL (x)
10017 || (HOST_WIDE_INT_1 << i) - 1 == ~INTVAL (x))
10018 return COSTS_N_INSNS (2);
10019 }
10020 else if (outer == ASHIFT || outer == ASHIFTRT
10021 || outer == LSHIFTRT)
10022 return 0;
10023 return COSTS_N_INSNS (2);
10024
10025 case CONST:
10026 case CONST_DOUBLE:
10027 case LABEL_REF:
10028 case SYMBOL_REF:
10029 return COSTS_N_INSNS (3);
10030
10031 case UDIV:
10032 case UMOD:
10033 case DIV:
10034 case MOD:
10035 return 100;
10036
10037 case TRUNCATE:
10038 return 99;
10039
10040 case AND:
10041 case XOR:
10042 case IOR:
10043 return COSTS_N_INSNS (1);
10044
10045 case MEM:
10046 return (COSTS_N_INSNS (1)
10047 + COSTS_N_INSNS (1)
10048 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
10049 + ((SYMBOL_REF_P (x) && CONSTANT_POOL_ADDRESS_P (x))
10050 ? COSTS_N_INSNS (1) : 0));
10051
10052 case IF_THEN_ELSE:
10053 /* XXX a guess. */
10054 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
10055 return 14;
10056 return 2;
10057
10058 case ZERO_EXTEND:
10059 /* XXX still guessing. */
10060 switch (GET_MODE (XEXP (x, 0)))
10061 {
10062 case E_QImode:
10063 return (1 + (mode == DImode ? 4 : 0)
10064 + (MEM_P (XEXP (x, 0)) ? 10 : 0));
10065
10066 case E_HImode:
10067 return (4 + (mode == DImode ? 4 : 0)
10068 + (MEM_P (XEXP (x, 0)) ? 10 : 0));
10069
10070 case E_SImode:
10071 return (1 + (MEM_P (XEXP (x, 0)) ? 10 : 0));
10072
10073 default:
10074 return 99;
10075 }
10076
10077 default:
10078 return 99;
10079 }
10080 }
10081
10082 /* Helper function for arm_rtx_costs. If one operand of the OP, a
10083 PLUS, adds the carry flag, then return the other operand. If
10084 neither is a carry, return OP unchanged. */
10085 static rtx
10086 strip_carry_operation (rtx op)
10087 {
10088 gcc_assert (GET_CODE (op) == PLUS);
10089 if (arm_carry_operation (XEXP (op, 0), GET_MODE (op)))
10090 return XEXP (op, 1);
10091 else if (arm_carry_operation (XEXP (op, 1), GET_MODE (op)))
10092 return XEXP (op, 0);
10093 return op;
10094 }
10095
10096 /* Helper function for arm_rtx_costs. If the operand is a valid shift
10097 operand, then return the operand that is being shifted. If the shift
10098 is not by a constant, then set SHIFT_REG to point to the operand.
10099 Return NULL if OP is not a shifter operand. */
10100 static rtx
10101 shifter_op_p (rtx op, rtx *shift_reg)
10102 {
10103 enum rtx_code code = GET_CODE (op);
10104
10105 if (code == MULT && CONST_INT_P (XEXP (op, 1))
10106 && exact_log2 (INTVAL (XEXP (op, 1))) > 0)
10107 return XEXP (op, 0);
10108 else if (code == ROTATE && CONST_INT_P (XEXP (op, 1)))
10109 return XEXP (op, 0);
10110 else if (code == ROTATERT || code == ASHIFT || code == LSHIFTRT
10111 || code == ASHIFTRT)
10112 {
10113 if (!CONST_INT_P (XEXP (op, 1)))
10114 *shift_reg = XEXP (op, 1);
10115 return XEXP (op, 0);
10116 }
10117
10118 return NULL;
10119 }
10120
10121 static bool
10122 arm_unspec_cost (rtx x, enum rtx_code /* outer_code */, bool speed_p, int *cost)
10123 {
10124 const struct cpu_cost_table *extra_cost = current_tune->insn_extra_cost;
10125 rtx_code code = GET_CODE (x);
10126 gcc_assert (code == UNSPEC || code == UNSPEC_VOLATILE);
10127
10128 switch (XINT (x, 1))
10129 {
10130 case UNSPEC_UNALIGNED_LOAD:
10131 /* We can only do unaligned loads into the integer unit, and we can't
10132 use LDM or LDRD. */
10133 *cost = COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x)));
10134 if (speed_p)
10135 *cost += (ARM_NUM_REGS (GET_MODE (x)) * extra_cost->ldst.load
10136 + extra_cost->ldst.load_unaligned);
10137
10138 #ifdef NOT_YET
10139 *cost += arm_address_cost (XEXP (XVECEXP (x, 0, 0), 0), GET_MODE (x),
10140 ADDR_SPACE_GENERIC, speed_p);
10141 #endif
10142 return true;
10143
10144 case UNSPEC_UNALIGNED_STORE:
10145 *cost = COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x)));
10146 if (speed_p)
10147 *cost += (ARM_NUM_REGS (GET_MODE (x)) * extra_cost->ldst.store
10148 + extra_cost->ldst.store_unaligned);
10149
10150 *cost += rtx_cost (XVECEXP (x, 0, 0), VOIDmode, UNSPEC, 0, speed_p);
10151 #ifdef NOT_YET
10152 *cost += arm_address_cost (XEXP (XVECEXP (x, 0, 0), 0), GET_MODE (x),
10153 ADDR_SPACE_GENERIC, speed_p);
10154 #endif
10155 return true;
10156
10157 case UNSPEC_VRINTZ:
10158 case UNSPEC_VRINTP:
10159 case UNSPEC_VRINTM:
10160 case UNSPEC_VRINTR:
10161 case UNSPEC_VRINTX:
10162 case UNSPEC_VRINTA:
10163 if (speed_p)
10164 *cost += extra_cost->fp[GET_MODE (x) == DFmode].roundint;
10165
10166 return true;
10167 default:
10168 *cost = COSTS_N_INSNS (2);
10169 break;
10170 }
10171 return true;
10172 }
10173
10174 /* Cost of a libcall. We assume one insn per argument, an amount for the
10175 call (one insn for -Os) and then one for processing the result. */
10176 #define LIBCALL_COST(N) COSTS_N_INSNS (N + (speed_p ? 18 : 2))
10177
10178 #define HANDLE_NARROW_SHIFT_ARITH(OP, IDX) \
10179 do \
10180 { \
10181 shift_op = shifter_op_p (XEXP (x, IDX), &shift_reg); \
10182 if (shift_op != NULL \
10183 && arm_rtx_shift_left_p (XEXP (x, IDX))) \
10184 { \
10185 if (shift_reg) \
10186 { \
10187 if (speed_p) \
10188 *cost += extra_cost->alu.arith_shift_reg; \
10189 *cost += rtx_cost (shift_reg, GET_MODE (shift_reg), \
10190 ASHIFT, 1, speed_p); \
10191 } \
10192 else if (speed_p) \
10193 *cost += extra_cost->alu.arith_shift; \
10194 \
10195 *cost += (rtx_cost (shift_op, GET_MODE (shift_op), \
10196 ASHIFT, 0, speed_p) \
10197 + rtx_cost (XEXP (x, 1 - IDX), \
10198 GET_MODE (shift_op), \
10199 OP, 1, speed_p)); \
10200 return true; \
10201 } \
10202 } \
10203 while (0)
10204
10205 /* Helper function for arm_rtx_costs_internal. Calculates the cost of a MEM,
10206 considering the costs of the addressing mode and memory access
10207 separately. */
10208 static bool
10209 arm_mem_costs (rtx x, const struct cpu_cost_table *extra_cost,
10210 int *cost, bool speed_p)
10211 {
10212 machine_mode mode = GET_MODE (x);
10213
10214 *cost = COSTS_N_INSNS (1);
10215
10216 if (flag_pic
10217 && GET_CODE (XEXP (x, 0)) == PLUS
10218 && will_be_in_index_register (XEXP (XEXP (x, 0), 1)))
10219 /* This will be split into two instructions. Add the cost of the
10220 additional instruction here. The cost of the memory access is computed
10221 below. See arm.md:calculate_pic_address. */
10222 *cost += COSTS_N_INSNS (1);
10223
10224 /* Calculate cost of the addressing mode. */
10225 if (speed_p)
10226 {
10227 arm_addr_mode_op op_type;
10228 switch (GET_CODE (XEXP (x, 0)))
10229 {
10230 default:
10231 case REG:
10232 op_type = AMO_DEFAULT;
10233 break;
10234 case MINUS:
10235 /* MINUS does not appear in RTL, but the architecture supports it,
10236 so handle this case defensively. */
10237 /* fall through */
10238 case PLUS:
10239 op_type = AMO_NO_WB;
10240 break;
10241 case PRE_INC:
10242 case PRE_DEC:
10243 case POST_INC:
10244 case POST_DEC:
10245 case PRE_MODIFY:
10246 case POST_MODIFY:
10247 op_type = AMO_WB;
10248 break;
10249 }
10250
10251 if (VECTOR_MODE_P (mode))
10252 *cost += current_tune->addr_mode_costs->vector[op_type];
10253 else if (FLOAT_MODE_P (mode))
10254 *cost += current_tune->addr_mode_costs->fp[op_type];
10255 else
10256 *cost += current_tune->addr_mode_costs->integer[op_type];
10257 }
10258
10259 /* Calculate cost of memory access. */
10260 if (speed_p)
10261 {
10262 if (FLOAT_MODE_P (mode))
10263 {
10264 if (GET_MODE_SIZE (mode) == 8)
10265 *cost += extra_cost->ldst.loadd;
10266 else
10267 *cost += extra_cost->ldst.loadf;
10268 }
10269 else if (VECTOR_MODE_P (mode))
10270 *cost += extra_cost->ldst.loadv;
10271 else
10272 {
10273 /* Integer modes */
10274 if (GET_MODE_SIZE (mode) == 8)
10275 *cost += extra_cost->ldst.ldrd;
10276 else
10277 *cost += extra_cost->ldst.load;
10278 }
10279 }
10280
10281 return true;
10282 }
10283
10284 /* Helper for arm_bfi_p. */
10285 static bool
10286 arm_bfi_1_p (rtx op0, rtx op1, rtx *sub0, rtx *sub1)
10287 {
10288 unsigned HOST_WIDE_INT const1;
10289 unsigned HOST_WIDE_INT const2 = 0;
10290
10291 if (!CONST_INT_P (XEXP (op0, 1)))
10292 return false;
10293
10294 const1 = UINTVAL (XEXP (op0, 1));
10295 if (!CONST_INT_P (XEXP (op1, 1))
10296 || ~UINTVAL (XEXP (op1, 1)) != const1)
10297 return false;
10298
10299 if (GET_CODE (XEXP (op0, 0)) == ASHIFT
10300 && CONST_INT_P (XEXP (XEXP (op0, 0), 1)))
10301 {
10302 const2 = UINTVAL (XEXP (XEXP (op0, 0), 1));
10303 *sub0 = XEXP (XEXP (op0, 0), 0);
10304 }
10305 else
10306 *sub0 = XEXP (op0, 0);
10307
10308 if (const2 >= GET_MODE_BITSIZE (GET_MODE (op0)))
10309 return false;
10310
10311 *sub1 = XEXP (op1, 0);
10312 return exact_log2 (const1 + (HOST_WIDE_INT_1U << const2)) >= 0;
10313 }
10314
10315 /* Recognize a BFI idiom. Helper for arm_rtx_costs_internal. The
10316 format looks something like:
10317
10318 (IOR (AND (reg1) (~const1))
10319 (AND (ASHIFT (reg2) (const2))
10320 (const1)))
10321
10322 where const1 is a consecutive sequence of 1-bits with the
10323 least-significant non-zero bit starting at bit position const2. If
10324 const2 is zero, then the shift will not appear at all, due to
10325 canonicalization. The two arms of the IOR expression may be
10326 flipped. */
10327 static bool
10328 arm_bfi_p (rtx x, rtx *sub0, rtx *sub1)
10329 {
10330 if (GET_CODE (x) != IOR)
10331 return false;
10332 if (GET_CODE (XEXP (x, 0)) != AND
10333 || GET_CODE (XEXP (x, 1)) != AND)
10334 return false;
10335 return (arm_bfi_1_p (XEXP (x, 0), XEXP (x, 1), sub0, sub1)
10336 || arm_bfi_1_p (XEXP (x, 1), XEXP (x, 0), sub1, sub0));
10337 }
10338
10339 /* RTX costs. Make an estimate of the cost of executing the operation
10340 X, which is contained within an operation with code OUTER_CODE.
10341 SPEED_P indicates whether the cost desired is the performance cost,
10342 or the size cost. The estimate is stored in COST and the return
10343 value is TRUE if the cost calculation is final, or FALSE if the
10344 caller should recurse through the operands of X to add additional
10345 costs.
10346
10347 We currently make no attempt to model the size savings of Thumb-2
10348 16-bit instructions. At the normal points in compilation where
10349 this code is called we have no measure of whether the condition
10350 flags are live or not, and thus no realistic way to determine what
10351 the size will eventually be. */
10352 static bool
10353 arm_rtx_costs_internal (rtx x, enum rtx_code code, enum rtx_code outer_code,
10354 const struct cpu_cost_table *extra_cost,
10355 int *cost, bool speed_p)
10356 {
10357 machine_mode mode = GET_MODE (x);
10358
10359 *cost = COSTS_N_INSNS (1);
10360
10361 if (TARGET_THUMB1)
10362 {
10363 if (speed_p)
10364 *cost = thumb1_rtx_costs (x, code, outer_code);
10365 else
10366 *cost = thumb1_size_rtx_costs (x, code, outer_code);
10367 return true;
10368 }
10369
10370 switch (code)
10371 {
10372 case SET:
10373 *cost = 0;
10374 /* SET RTXs don't have a mode so we get it from the destination. */
10375 mode = GET_MODE (SET_DEST (x));
10376
10377 if (REG_P (SET_SRC (x))
10378 && REG_P (SET_DEST (x)))
10379 {
10380 /* Assume that most copies can be done with a single insn,
10381 unless we don't have HW FP, in which case everything
10382 larger than word mode will require two insns. */
10383 *cost = COSTS_N_INSNS (((!TARGET_VFP_BASE
10384 && GET_MODE_SIZE (mode) > 4)
10385 || mode == DImode)
10386 ? 2 : 1);
10387 /* Conditional register moves can be encoded
10388 in 16 bits in Thumb mode. */
10389 if (!speed_p && TARGET_THUMB && outer_code == COND_EXEC)
10390 *cost >>= 1;
10391
10392 return true;
10393 }
10394
10395 if (CONST_INT_P (SET_SRC (x)))
10396 {
10397 /* Handle CONST_INT here, since the value doesn't have a mode
10398 and we would otherwise be unable to work out the true cost. */
10399 *cost = rtx_cost (SET_DEST (x), GET_MODE (SET_DEST (x)), SET,
10400 0, speed_p);
10401 outer_code = SET;
10402 /* Slightly lower the cost of setting a core reg to a constant.
10403 This helps break up chains and allows for better scheduling. */
10404 if (REG_P (SET_DEST (x))
10405 && REGNO (SET_DEST (x)) <= LR_REGNUM)
10406 *cost -= 1;
10407 x = SET_SRC (x);
10408 /* Immediate moves with an immediate in the range [0, 255] can be
10409 encoded in 16 bits in Thumb mode. */
10410 if (!speed_p && TARGET_THUMB && GET_MODE (x) == SImode
10411 && INTVAL (x) >= 0 && INTVAL (x) <=255)
10412 *cost >>= 1;
10413 goto const_int_cost;
10414 }
10415
10416 return false;
10417
10418 case MEM:
10419 return arm_mem_costs (x, extra_cost, cost, speed_p);
10420
10421 case PARALLEL:
10422 {
10423 /* Calculations of LDM costs are complex. We assume an initial cost
10424 (ldm_1st) which will load the number of registers mentioned in
10425 ldm_regs_per_insn_1st registers; then each additional
10426 ldm_regs_per_insn_subsequent registers cost one more insn. The
10427 formula for N regs is thus:
10428
10429 ldm_1st + COSTS_N_INSNS ((max (N - ldm_regs_per_insn_1st, 0)
10430 + ldm_regs_per_insn_subsequent - 1)
10431 / ldm_regs_per_insn_subsequent).
10432
10433 Additional costs may also be added for addressing. A similar
10434 formula is used for STM. */
10435
10436 bool is_ldm = load_multiple_operation (x, SImode);
10437 bool is_stm = store_multiple_operation (x, SImode);
10438
10439 if (is_ldm || is_stm)
10440 {
10441 if (speed_p)
10442 {
10443 HOST_WIDE_INT nregs = XVECLEN (x, 0);
10444 HOST_WIDE_INT regs_per_insn_1st = is_ldm
10445 ? extra_cost->ldst.ldm_regs_per_insn_1st
10446 : extra_cost->ldst.stm_regs_per_insn_1st;
10447 HOST_WIDE_INT regs_per_insn_sub = is_ldm
10448 ? extra_cost->ldst.ldm_regs_per_insn_subsequent
10449 : extra_cost->ldst.stm_regs_per_insn_subsequent;
10450
10451 *cost += regs_per_insn_1st
10452 + COSTS_N_INSNS (((MAX (nregs - regs_per_insn_1st, 0))
10453 + regs_per_insn_sub - 1)
10454 / regs_per_insn_sub);
10455 return true;
10456 }
10457
10458 }
10459 return false;
10460 }
10461 case DIV:
10462 case UDIV:
10463 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10464 && (mode == SFmode || !TARGET_VFP_SINGLE))
10465 *cost += COSTS_N_INSNS (speed_p
10466 ? extra_cost->fp[mode != SFmode].div : 0);
10467 else if (mode == SImode && TARGET_IDIV)
10468 *cost += COSTS_N_INSNS (speed_p ? extra_cost->mult[0].idiv : 0);
10469 else
10470 *cost = LIBCALL_COST (2);
10471
10472 /* Make the cost of sdiv more expensive so when both sdiv and udiv are
10473 possible udiv is prefered. */
10474 *cost += (code == DIV ? COSTS_N_INSNS (1) : 0);
10475 return false; /* All arguments must be in registers. */
10476
10477 case MOD:
10478 /* MOD by a power of 2 can be expanded as:
10479 rsbs r1, r0, #0
10480 and r0, r0, #(n - 1)
10481 and r1, r1, #(n - 1)
10482 rsbpl r0, r1, #0. */
10483 if (CONST_INT_P (XEXP (x, 1))
10484 && exact_log2 (INTVAL (XEXP (x, 1))) > 0
10485 && mode == SImode)
10486 {
10487 *cost += COSTS_N_INSNS (3);
10488
10489 if (speed_p)
10490 *cost += 2 * extra_cost->alu.logical
10491 + extra_cost->alu.arith;
10492 return true;
10493 }
10494
10495 /* Fall-through. */
10496 case UMOD:
10497 /* Make the cost of sdiv more expensive so when both sdiv and udiv are
10498 possible udiv is prefered. */
10499 *cost = LIBCALL_COST (2) + (code == MOD ? COSTS_N_INSNS (1) : 0);
10500 return false; /* All arguments must be in registers. */
10501
10502 case ROTATE:
10503 if (mode == SImode && REG_P (XEXP (x, 1)))
10504 {
10505 *cost += (COSTS_N_INSNS (1)
10506 + rtx_cost (XEXP (x, 0), mode, code, 0, speed_p));
10507 if (speed_p)
10508 *cost += extra_cost->alu.shift_reg;
10509 return true;
10510 }
10511 /* Fall through */
10512 case ROTATERT:
10513 case ASHIFT:
10514 case LSHIFTRT:
10515 case ASHIFTRT:
10516 if (mode == DImode && CONST_INT_P (XEXP (x, 1)))
10517 {
10518 *cost += (COSTS_N_INSNS (2)
10519 + rtx_cost (XEXP (x, 0), mode, code, 0, speed_p));
10520 if (speed_p)
10521 *cost += 2 * extra_cost->alu.shift;
10522 /* Slightly disparage left shift by 1 at so we prefer adddi3. */
10523 if (code == ASHIFT && XEXP (x, 1) == CONST1_RTX (SImode))
10524 *cost += 1;
10525 return true;
10526 }
10527 else if (mode == SImode)
10528 {
10529 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10530 /* Slightly disparage register shifts at -Os, but not by much. */
10531 if (!CONST_INT_P (XEXP (x, 1)))
10532 *cost += (speed_p ? extra_cost->alu.shift_reg : 1
10533 + rtx_cost (XEXP (x, 1), mode, code, 1, speed_p));
10534 return true;
10535 }
10536 else if (GET_MODE_CLASS (mode) == MODE_INT
10537 && GET_MODE_SIZE (mode) < 4)
10538 {
10539 if (code == ASHIFT)
10540 {
10541 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10542 /* Slightly disparage register shifts at -Os, but not by
10543 much. */
10544 if (!CONST_INT_P (XEXP (x, 1)))
10545 *cost += (speed_p ? extra_cost->alu.shift_reg : 1
10546 + rtx_cost (XEXP (x, 1), mode, code, 1, speed_p));
10547 }
10548 else if (code == LSHIFTRT || code == ASHIFTRT)
10549 {
10550 if (arm_arch_thumb2 && CONST_INT_P (XEXP (x, 1)))
10551 {
10552 /* Can use SBFX/UBFX. */
10553 if (speed_p)
10554 *cost += extra_cost->alu.bfx;
10555 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10556 }
10557 else
10558 {
10559 *cost += COSTS_N_INSNS (1);
10560 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10561 if (speed_p)
10562 {
10563 if (CONST_INT_P (XEXP (x, 1)))
10564 *cost += 2 * extra_cost->alu.shift;
10565 else
10566 *cost += (extra_cost->alu.shift
10567 + extra_cost->alu.shift_reg);
10568 }
10569 else
10570 /* Slightly disparage register shifts. */
10571 *cost += !CONST_INT_P (XEXP (x, 1));
10572 }
10573 }
10574 else /* Rotates. */
10575 {
10576 *cost = COSTS_N_INSNS (2 + !CONST_INT_P (XEXP (x, 1)));
10577 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10578 if (speed_p)
10579 {
10580 if (CONST_INT_P (XEXP (x, 1)))
10581 *cost += (2 * extra_cost->alu.shift
10582 + extra_cost->alu.log_shift);
10583 else
10584 *cost += (extra_cost->alu.shift
10585 + extra_cost->alu.shift_reg
10586 + extra_cost->alu.log_shift_reg);
10587 }
10588 }
10589 return true;
10590 }
10591
10592 *cost = LIBCALL_COST (2);
10593 return false;
10594
10595 case BSWAP:
10596 if (arm_arch6)
10597 {
10598 if (mode == SImode)
10599 {
10600 if (speed_p)
10601 *cost += extra_cost->alu.rev;
10602
10603 return false;
10604 }
10605 }
10606 else
10607 {
10608 /* No rev instruction available. Look at arm_legacy_rev
10609 and thumb_legacy_rev for the form of RTL used then. */
10610 if (TARGET_THUMB)
10611 {
10612 *cost += COSTS_N_INSNS (9);
10613
10614 if (speed_p)
10615 {
10616 *cost += 6 * extra_cost->alu.shift;
10617 *cost += 3 * extra_cost->alu.logical;
10618 }
10619 }
10620 else
10621 {
10622 *cost += COSTS_N_INSNS (4);
10623
10624 if (speed_p)
10625 {
10626 *cost += 2 * extra_cost->alu.shift;
10627 *cost += extra_cost->alu.arith_shift;
10628 *cost += 2 * extra_cost->alu.logical;
10629 }
10630 }
10631 return true;
10632 }
10633 return false;
10634
10635 case MINUS:
10636 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10637 && (mode == SFmode || !TARGET_VFP_SINGLE))
10638 {
10639 if (GET_CODE (XEXP (x, 0)) == MULT
10640 || GET_CODE (XEXP (x, 1)) == MULT)
10641 {
10642 rtx mul_op0, mul_op1, sub_op;
10643
10644 if (speed_p)
10645 *cost += extra_cost->fp[mode != SFmode].mult_addsub;
10646
10647 if (GET_CODE (XEXP (x, 0)) == MULT)
10648 {
10649 mul_op0 = XEXP (XEXP (x, 0), 0);
10650 mul_op1 = XEXP (XEXP (x, 0), 1);
10651 sub_op = XEXP (x, 1);
10652 }
10653 else
10654 {
10655 mul_op0 = XEXP (XEXP (x, 1), 0);
10656 mul_op1 = XEXP (XEXP (x, 1), 1);
10657 sub_op = XEXP (x, 0);
10658 }
10659
10660 /* The first operand of the multiply may be optionally
10661 negated. */
10662 if (GET_CODE (mul_op0) == NEG)
10663 mul_op0 = XEXP (mul_op0, 0);
10664
10665 *cost += (rtx_cost (mul_op0, mode, code, 0, speed_p)
10666 + rtx_cost (mul_op1, mode, code, 0, speed_p)
10667 + rtx_cost (sub_op, mode, code, 0, speed_p));
10668
10669 return true;
10670 }
10671
10672 if (speed_p)
10673 *cost += extra_cost->fp[mode != SFmode].addsub;
10674 return false;
10675 }
10676
10677 if (mode == SImode)
10678 {
10679 rtx shift_by_reg = NULL;
10680 rtx shift_op;
10681 rtx non_shift_op;
10682 rtx op0 = XEXP (x, 0);
10683 rtx op1 = XEXP (x, 1);
10684
10685 /* Factor out any borrow operation. There's more than one way
10686 of expressing this; try to recognize them all. */
10687 if (GET_CODE (op0) == MINUS)
10688 {
10689 if (arm_borrow_operation (op1, SImode))
10690 {
10691 op1 = XEXP (op0, 1);
10692 op0 = XEXP (op0, 0);
10693 }
10694 else if (arm_borrow_operation (XEXP (op0, 1), SImode))
10695 op0 = XEXP (op0, 0);
10696 }
10697 else if (GET_CODE (op1) == PLUS
10698 && arm_borrow_operation (XEXP (op1, 0), SImode))
10699 op1 = XEXP (op1, 0);
10700 else if (GET_CODE (op0) == NEG
10701 && arm_borrow_operation (op1, SImode))
10702 {
10703 /* Negate with carry-in. For Thumb2 this is done with
10704 SBC R, X, X lsl #1 (ie X - 2X - C) as Thumb lacks the
10705 RSC instruction that exists in Arm mode. */
10706 if (speed_p)
10707 *cost += (TARGET_THUMB2
10708 ? extra_cost->alu.arith_shift
10709 : extra_cost->alu.arith);
10710 *cost += rtx_cost (XEXP (op0, 0), mode, MINUS, 0, speed_p);
10711 return true;
10712 }
10713 /* (Carry_op - reg) can be done as RSC Rd, Rn, #1 on Arm.
10714 Note we do mean ~borrow here. */
10715 else if (TARGET_ARM && arm_carry_operation (op0, SImode))
10716 {
10717 *cost += rtx_cost (op1, mode, code, 1, speed_p);
10718 return true;
10719 }
10720
10721 shift_op = shifter_op_p (op0, &shift_by_reg);
10722 if (shift_op == NULL)
10723 {
10724 shift_op = shifter_op_p (op1, &shift_by_reg);
10725 non_shift_op = op0;
10726 }
10727 else
10728 non_shift_op = op1;
10729
10730 if (shift_op != NULL)
10731 {
10732 if (shift_by_reg != NULL)
10733 {
10734 if (speed_p)
10735 *cost += extra_cost->alu.arith_shift_reg;
10736 *cost += rtx_cost (shift_by_reg, mode, code, 0, speed_p);
10737 }
10738 else if (speed_p)
10739 *cost += extra_cost->alu.arith_shift;
10740
10741 *cost += rtx_cost (shift_op, mode, code, 0, speed_p);
10742 *cost += rtx_cost (non_shift_op, mode, code, 0, speed_p);
10743 return true;
10744 }
10745
10746 if (arm_arch_thumb2
10747 && GET_CODE (XEXP (x, 1)) == MULT)
10748 {
10749 /* MLS. */
10750 if (speed_p)
10751 *cost += extra_cost->mult[0].add;
10752 *cost += rtx_cost (XEXP (x, 0), mode, MINUS, 0, speed_p);
10753 *cost += rtx_cost (XEXP (XEXP (x, 1), 0), mode, MULT, 0, speed_p);
10754 *cost += rtx_cost (XEXP (XEXP (x, 1), 1), mode, MULT, 1, speed_p);
10755 return true;
10756 }
10757
10758 if (CONST_INT_P (op0))
10759 {
10760 int insns = arm_gen_constant (MINUS, SImode, NULL_RTX,
10761 INTVAL (op0), NULL_RTX,
10762 NULL_RTX, 1, 0);
10763 *cost = COSTS_N_INSNS (insns);
10764 if (speed_p)
10765 *cost += insns * extra_cost->alu.arith;
10766 *cost += rtx_cost (XEXP (x, 1), mode, code, 1, speed_p);
10767 return true;
10768 }
10769 else if (speed_p)
10770 *cost += extra_cost->alu.arith;
10771
10772 /* Don't recurse as we don't want to cost any borrow that
10773 we've stripped. */
10774 *cost += rtx_cost (op0, mode, MINUS, 0, speed_p);
10775 *cost += rtx_cost (op1, mode, MINUS, 1, speed_p);
10776 return true;
10777 }
10778
10779 if (GET_MODE_CLASS (mode) == MODE_INT
10780 && GET_MODE_SIZE (mode) < 4)
10781 {
10782 rtx shift_op, shift_reg;
10783 shift_reg = NULL;
10784
10785 /* We check both sides of the MINUS for shifter operands since,
10786 unlike PLUS, it's not commutative. */
10787
10788 HANDLE_NARROW_SHIFT_ARITH (MINUS, 0);
10789 HANDLE_NARROW_SHIFT_ARITH (MINUS, 1);
10790
10791 /* Slightly disparage, as we might need to widen the result. */
10792 *cost += 1;
10793 if (speed_p)
10794 *cost += extra_cost->alu.arith;
10795
10796 if (CONST_INT_P (XEXP (x, 0)))
10797 {
10798 *cost += rtx_cost (XEXP (x, 1), mode, code, 1, speed_p);
10799 return true;
10800 }
10801
10802 return false;
10803 }
10804
10805 if (mode == DImode)
10806 {
10807 *cost += COSTS_N_INSNS (1);
10808
10809 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
10810 {
10811 rtx op1 = XEXP (x, 1);
10812
10813 if (speed_p)
10814 *cost += 2 * extra_cost->alu.arith;
10815
10816 if (GET_CODE (op1) == ZERO_EXTEND)
10817 *cost += rtx_cost (XEXP (op1, 0), VOIDmode, ZERO_EXTEND,
10818 0, speed_p);
10819 else
10820 *cost += rtx_cost (op1, mode, MINUS, 1, speed_p);
10821 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, ZERO_EXTEND,
10822 0, speed_p);
10823 return true;
10824 }
10825 else if (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
10826 {
10827 if (speed_p)
10828 *cost += extra_cost->alu.arith + extra_cost->alu.arith_shift;
10829 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, SIGN_EXTEND,
10830 0, speed_p)
10831 + rtx_cost (XEXP (x, 1), mode, MINUS, 1, speed_p));
10832 return true;
10833 }
10834 else if (GET_CODE (XEXP (x, 1)) == ZERO_EXTEND
10835 || GET_CODE (XEXP (x, 1)) == SIGN_EXTEND)
10836 {
10837 if (speed_p)
10838 *cost += (extra_cost->alu.arith
10839 + (GET_CODE (XEXP (x, 1)) == ZERO_EXTEND
10840 ? extra_cost->alu.arith
10841 : extra_cost->alu.arith_shift));
10842 *cost += (rtx_cost (XEXP (x, 0), mode, MINUS, 0, speed_p)
10843 + rtx_cost (XEXP (XEXP (x, 1), 0), VOIDmode,
10844 GET_CODE (XEXP (x, 1)), 0, speed_p));
10845 return true;
10846 }
10847
10848 if (speed_p)
10849 *cost += 2 * extra_cost->alu.arith;
10850 return false;
10851 }
10852
10853 /* Vector mode? */
10854
10855 *cost = LIBCALL_COST (2);
10856 return false;
10857
10858 case PLUS:
10859 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10860 && (mode == SFmode || !TARGET_VFP_SINGLE))
10861 {
10862 if (GET_CODE (XEXP (x, 0)) == MULT)
10863 {
10864 rtx mul_op0, mul_op1, add_op;
10865
10866 if (speed_p)
10867 *cost += extra_cost->fp[mode != SFmode].mult_addsub;
10868
10869 mul_op0 = XEXP (XEXP (x, 0), 0);
10870 mul_op1 = XEXP (XEXP (x, 0), 1);
10871 add_op = XEXP (x, 1);
10872
10873 *cost += (rtx_cost (mul_op0, mode, code, 0, speed_p)
10874 + rtx_cost (mul_op1, mode, code, 0, speed_p)
10875 + rtx_cost (add_op, mode, code, 0, speed_p));
10876
10877 return true;
10878 }
10879
10880 if (speed_p)
10881 *cost += extra_cost->fp[mode != SFmode].addsub;
10882 return false;
10883 }
10884 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
10885 {
10886 *cost = LIBCALL_COST (2);
10887 return false;
10888 }
10889
10890 /* Narrow modes can be synthesized in SImode, but the range
10891 of useful sub-operations is limited. Check for shift operations
10892 on one of the operands. Only left shifts can be used in the
10893 narrow modes. */
10894 if (GET_MODE_CLASS (mode) == MODE_INT
10895 && GET_MODE_SIZE (mode) < 4)
10896 {
10897 rtx shift_op, shift_reg;
10898 shift_reg = NULL;
10899
10900 HANDLE_NARROW_SHIFT_ARITH (PLUS, 0);
10901
10902 if (CONST_INT_P (XEXP (x, 1)))
10903 {
10904 int insns = arm_gen_constant (PLUS, SImode, NULL_RTX,
10905 INTVAL (XEXP (x, 1)), NULL_RTX,
10906 NULL_RTX, 1, 0);
10907 *cost = COSTS_N_INSNS (insns);
10908 if (speed_p)
10909 *cost += insns * extra_cost->alu.arith;
10910 /* Slightly penalize a narrow operation as the result may
10911 need widening. */
10912 *cost += 1 + rtx_cost (XEXP (x, 0), mode, PLUS, 0, speed_p);
10913 return true;
10914 }
10915
10916 /* Slightly penalize a narrow operation as the result may
10917 need widening. */
10918 *cost += 1;
10919 if (speed_p)
10920 *cost += extra_cost->alu.arith;
10921
10922 return false;
10923 }
10924
10925 if (mode == SImode)
10926 {
10927 rtx shift_op, shift_reg;
10928
10929 if (TARGET_INT_SIMD
10930 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
10931 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
10932 {
10933 /* UXTA[BH] or SXTA[BH]. */
10934 if (speed_p)
10935 *cost += extra_cost->alu.extend_arith;
10936 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, ZERO_EXTEND,
10937 0, speed_p)
10938 + rtx_cost (XEXP (x, 1), mode, PLUS, 0, speed_p));
10939 return true;
10940 }
10941
10942 rtx op0 = XEXP (x, 0);
10943 rtx op1 = XEXP (x, 1);
10944
10945 /* Handle a side effect of adding in the carry to an addition. */
10946 if (GET_CODE (op0) == PLUS
10947 && arm_carry_operation (op1, mode))
10948 {
10949 op1 = XEXP (op0, 1);
10950 op0 = XEXP (op0, 0);
10951 }
10952 else if (GET_CODE (op1) == PLUS
10953 && arm_carry_operation (op0, mode))
10954 {
10955 op0 = XEXP (op1, 0);
10956 op1 = XEXP (op1, 1);
10957 }
10958 else if (GET_CODE (op0) == PLUS)
10959 {
10960 op0 = strip_carry_operation (op0);
10961 if (swap_commutative_operands_p (op0, op1))
10962 std::swap (op0, op1);
10963 }
10964
10965 if (arm_carry_operation (op0, mode))
10966 {
10967 /* Adding the carry to a register is a canonicalization of
10968 adding 0 to the register plus the carry. */
10969 if (speed_p)
10970 *cost += extra_cost->alu.arith;
10971 *cost += rtx_cost (op1, mode, PLUS, 1, speed_p);
10972 return true;
10973 }
10974
10975 shift_reg = NULL;
10976 shift_op = shifter_op_p (op0, &shift_reg);
10977 if (shift_op != NULL)
10978 {
10979 if (shift_reg)
10980 {
10981 if (speed_p)
10982 *cost += extra_cost->alu.arith_shift_reg;
10983 *cost += rtx_cost (shift_reg, mode, ASHIFT, 1, speed_p);
10984 }
10985 else if (speed_p)
10986 *cost += extra_cost->alu.arith_shift;
10987
10988 *cost += (rtx_cost (shift_op, mode, ASHIFT, 0, speed_p)
10989 + rtx_cost (op1, mode, PLUS, 1, speed_p));
10990 return true;
10991 }
10992
10993 if (GET_CODE (op0) == MULT)
10994 {
10995 rtx mul_op = op0;
10996
10997 if (TARGET_DSP_MULTIPLY
10998 && ((GET_CODE (XEXP (mul_op, 0)) == SIGN_EXTEND
10999 && (GET_CODE (XEXP (mul_op, 1)) == SIGN_EXTEND
11000 || (GET_CODE (XEXP (mul_op, 1)) == ASHIFTRT
11001 && CONST_INT_P (XEXP (XEXP (mul_op, 1), 1))
11002 && INTVAL (XEXP (XEXP (mul_op, 1), 1)) == 16)))
11003 || (GET_CODE (XEXP (mul_op, 0)) == ASHIFTRT
11004 && CONST_INT_P (XEXP (XEXP (mul_op, 0), 1))
11005 && INTVAL (XEXP (XEXP (mul_op, 0), 1)) == 16
11006 && (GET_CODE (XEXP (mul_op, 1)) == SIGN_EXTEND
11007 || (GET_CODE (XEXP (mul_op, 1)) == ASHIFTRT
11008 && CONST_INT_P (XEXP (XEXP (mul_op, 1), 1))
11009 && (INTVAL (XEXP (XEXP (mul_op, 1), 1))
11010 == 16))))))
11011 {
11012 /* SMLA[BT][BT]. */
11013 if (speed_p)
11014 *cost += extra_cost->mult[0].extend_add;
11015 *cost += (rtx_cost (XEXP (XEXP (mul_op, 0), 0), mode,
11016 SIGN_EXTEND, 0, speed_p)
11017 + rtx_cost (XEXP (XEXP (mul_op, 1), 0), mode,
11018 SIGN_EXTEND, 0, speed_p)
11019 + rtx_cost (op1, mode, PLUS, 1, speed_p));
11020 return true;
11021 }
11022
11023 if (speed_p)
11024 *cost += extra_cost->mult[0].add;
11025 *cost += (rtx_cost (XEXP (mul_op, 0), mode, MULT, 0, speed_p)
11026 + rtx_cost (XEXP (mul_op, 1), mode, MULT, 1, speed_p)
11027 + rtx_cost (op1, mode, PLUS, 1, speed_p));
11028 return true;
11029 }
11030
11031 if (CONST_INT_P (op1))
11032 {
11033 int insns = arm_gen_constant (PLUS, SImode, NULL_RTX,
11034 INTVAL (op1), NULL_RTX,
11035 NULL_RTX, 1, 0);
11036 *cost = COSTS_N_INSNS (insns);
11037 if (speed_p)
11038 *cost += insns * extra_cost->alu.arith;
11039 *cost += rtx_cost (op0, mode, PLUS, 0, speed_p);
11040 return true;
11041 }
11042
11043 if (speed_p)
11044 *cost += extra_cost->alu.arith;
11045
11046 /* Don't recurse here because we want to test the operands
11047 without any carry operation. */
11048 *cost += rtx_cost (op0, mode, PLUS, 0, speed_p);
11049 *cost += rtx_cost (op1, mode, PLUS, 1, speed_p);
11050 return true;
11051 }
11052
11053 if (mode == DImode)
11054 {
11055 if (GET_CODE (XEXP (x, 0)) == MULT
11056 && ((GET_CODE (XEXP (XEXP (x, 0), 0)) == ZERO_EXTEND
11057 && GET_CODE (XEXP (XEXP (x, 0), 1)) == ZERO_EXTEND)
11058 || (GET_CODE (XEXP (XEXP (x, 0), 0)) == SIGN_EXTEND
11059 && GET_CODE (XEXP (XEXP (x, 0), 1)) == SIGN_EXTEND)))
11060 {
11061 if (speed_p)
11062 *cost += extra_cost->mult[1].extend_add;
11063 *cost += (rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), mode,
11064 ZERO_EXTEND, 0, speed_p)
11065 + rtx_cost (XEXP (XEXP (XEXP (x, 0), 1), 0), mode,
11066 ZERO_EXTEND, 0, speed_p)
11067 + rtx_cost (XEXP (x, 1), mode, PLUS, 1, speed_p));
11068 return true;
11069 }
11070
11071 *cost += COSTS_N_INSNS (1);
11072
11073 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
11074 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
11075 {
11076 if (speed_p)
11077 *cost += (extra_cost->alu.arith
11078 + (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
11079 ? extra_cost->alu.arith
11080 : extra_cost->alu.arith_shift));
11081
11082 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, ZERO_EXTEND,
11083 0, speed_p)
11084 + rtx_cost (XEXP (x, 1), mode, PLUS, 1, speed_p));
11085 return true;
11086 }
11087
11088 if (speed_p)
11089 *cost += 2 * extra_cost->alu.arith;
11090 return false;
11091 }
11092
11093 /* Vector mode? */
11094 *cost = LIBCALL_COST (2);
11095 return false;
11096 case IOR:
11097 {
11098 rtx sub0, sub1;
11099 if (mode == SImode && arm_arch6 && aarch_rev16_p (x))
11100 {
11101 if (speed_p)
11102 *cost += extra_cost->alu.rev;
11103
11104 return true;
11105 }
11106 else if (mode == SImode && arm_arch_thumb2
11107 && arm_bfi_p (x, &sub0, &sub1))
11108 {
11109 *cost += rtx_cost (sub0, mode, ZERO_EXTRACT, 1, speed_p);
11110 *cost += rtx_cost (sub1, mode, ZERO_EXTRACT, 0, speed_p);
11111 if (speed_p)
11112 *cost += extra_cost->alu.bfi;
11113
11114 return true;
11115 }
11116 }
11117
11118 /* Fall through. */
11119 case AND: case XOR:
11120 if (mode == SImode)
11121 {
11122 enum rtx_code subcode = GET_CODE (XEXP (x, 0));
11123 rtx op0 = XEXP (x, 0);
11124 rtx shift_op, shift_reg;
11125
11126 if (subcode == NOT
11127 && (code == AND
11128 || (code == IOR && TARGET_THUMB2)))
11129 op0 = XEXP (op0, 0);
11130
11131 shift_reg = NULL;
11132 shift_op = shifter_op_p (op0, &shift_reg);
11133 if (shift_op != NULL)
11134 {
11135 if (shift_reg)
11136 {
11137 if (speed_p)
11138 *cost += extra_cost->alu.log_shift_reg;
11139 *cost += rtx_cost (shift_reg, mode, ASHIFT, 1, speed_p);
11140 }
11141 else if (speed_p)
11142 *cost += extra_cost->alu.log_shift;
11143
11144 *cost += (rtx_cost (shift_op, mode, ASHIFT, 0, speed_p)
11145 + rtx_cost (XEXP (x, 1), mode, code, 1, speed_p));
11146 return true;
11147 }
11148
11149 if (CONST_INT_P (XEXP (x, 1)))
11150 {
11151 int insns = arm_gen_constant (code, SImode, NULL_RTX,
11152 INTVAL (XEXP (x, 1)), NULL_RTX,
11153 NULL_RTX, 1, 0);
11154
11155 *cost = COSTS_N_INSNS (insns);
11156 if (speed_p)
11157 *cost += insns * extra_cost->alu.logical;
11158 *cost += rtx_cost (op0, mode, code, 0, speed_p);
11159 return true;
11160 }
11161
11162 if (speed_p)
11163 *cost += extra_cost->alu.logical;
11164 *cost += (rtx_cost (op0, mode, code, 0, speed_p)
11165 + rtx_cost (XEXP (x, 1), mode, code, 1, speed_p));
11166 return true;
11167 }
11168
11169 if (mode == DImode)
11170 {
11171 rtx op0 = XEXP (x, 0);
11172 enum rtx_code subcode = GET_CODE (op0);
11173
11174 *cost += COSTS_N_INSNS (1);
11175
11176 if (subcode == NOT
11177 && (code == AND
11178 || (code == IOR && TARGET_THUMB2)))
11179 op0 = XEXP (op0, 0);
11180
11181 if (GET_CODE (op0) == ZERO_EXTEND)
11182 {
11183 if (speed_p)
11184 *cost += 2 * extra_cost->alu.logical;
11185
11186 *cost += (rtx_cost (XEXP (op0, 0), VOIDmode, ZERO_EXTEND,
11187 0, speed_p)
11188 + rtx_cost (XEXP (x, 1), mode, code, 0, speed_p));
11189 return true;
11190 }
11191 else if (GET_CODE (op0) == SIGN_EXTEND)
11192 {
11193 if (speed_p)
11194 *cost += extra_cost->alu.logical + extra_cost->alu.log_shift;
11195
11196 *cost += (rtx_cost (XEXP (op0, 0), VOIDmode, SIGN_EXTEND,
11197 0, speed_p)
11198 + rtx_cost (XEXP (x, 1), mode, code, 0, speed_p));
11199 return true;
11200 }
11201
11202 if (speed_p)
11203 *cost += 2 * extra_cost->alu.logical;
11204
11205 return true;
11206 }
11207 /* Vector mode? */
11208
11209 *cost = LIBCALL_COST (2);
11210 return false;
11211
11212 case MULT:
11213 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
11214 && (mode == SFmode || !TARGET_VFP_SINGLE))
11215 {
11216 rtx op0 = XEXP (x, 0);
11217
11218 if (GET_CODE (op0) == NEG && !flag_rounding_math)
11219 op0 = XEXP (op0, 0);
11220
11221 if (speed_p)
11222 *cost += extra_cost->fp[mode != SFmode].mult;
11223
11224 *cost += (rtx_cost (op0, mode, MULT, 0, speed_p)
11225 + rtx_cost (XEXP (x, 1), mode, MULT, 1, speed_p));
11226 return true;
11227 }
11228 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
11229 {
11230 *cost = LIBCALL_COST (2);
11231 return false;
11232 }
11233
11234 if (mode == SImode)
11235 {
11236 if (TARGET_DSP_MULTIPLY
11237 && ((GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
11238 && (GET_CODE (XEXP (x, 1)) == SIGN_EXTEND
11239 || (GET_CODE (XEXP (x, 1)) == ASHIFTRT
11240 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
11241 && INTVAL (XEXP (XEXP (x, 1), 1)) == 16)))
11242 || (GET_CODE (XEXP (x, 0)) == ASHIFTRT
11243 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
11244 && INTVAL (XEXP (XEXP (x, 0), 1)) == 16
11245 && (GET_CODE (XEXP (x, 1)) == SIGN_EXTEND
11246 || (GET_CODE (XEXP (x, 1)) == ASHIFTRT
11247 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
11248 && (INTVAL (XEXP (XEXP (x, 1), 1))
11249 == 16))))))
11250 {
11251 /* SMUL[TB][TB]. */
11252 if (speed_p)
11253 *cost += extra_cost->mult[0].extend;
11254 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), mode,
11255 SIGN_EXTEND, 0, speed_p);
11256 *cost += rtx_cost (XEXP (XEXP (x, 1), 0), mode,
11257 SIGN_EXTEND, 1, speed_p);
11258 return true;
11259 }
11260 if (speed_p)
11261 *cost += extra_cost->mult[0].simple;
11262 return false;
11263 }
11264
11265 if (mode == DImode)
11266 {
11267 if ((GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
11268 && GET_CODE (XEXP (x, 1)) == ZERO_EXTEND)
11269 || (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
11270 && GET_CODE (XEXP (x, 1)) == SIGN_EXTEND))
11271 {
11272 if (speed_p)
11273 *cost += extra_cost->mult[1].extend;
11274 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode,
11275 ZERO_EXTEND, 0, speed_p)
11276 + rtx_cost (XEXP (XEXP (x, 1), 0), VOIDmode,
11277 ZERO_EXTEND, 0, speed_p));
11278 return true;
11279 }
11280
11281 *cost = LIBCALL_COST (2);
11282 return false;
11283 }
11284
11285 /* Vector mode? */
11286 *cost = LIBCALL_COST (2);
11287 return false;
11288
11289 case NEG:
11290 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
11291 && (mode == SFmode || !TARGET_VFP_SINGLE))
11292 {
11293 if (GET_CODE (XEXP (x, 0)) == MULT)
11294 {
11295 /* VNMUL. */
11296 *cost = rtx_cost (XEXP (x, 0), mode, NEG, 0, speed_p);
11297 return true;
11298 }
11299
11300 if (speed_p)
11301 *cost += extra_cost->fp[mode != SFmode].neg;
11302
11303 return false;
11304 }
11305 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
11306 {
11307 *cost = LIBCALL_COST (1);
11308 return false;
11309 }
11310
11311 if (mode == SImode)
11312 {
11313 if (GET_CODE (XEXP (x, 0)) == ABS)
11314 {
11315 *cost += COSTS_N_INSNS (1);
11316 /* Assume the non-flag-changing variant. */
11317 if (speed_p)
11318 *cost += (extra_cost->alu.log_shift
11319 + extra_cost->alu.arith_shift);
11320 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), mode, ABS, 0, speed_p);
11321 return true;
11322 }
11323
11324 if (GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMPARE
11325 || GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMM_COMPARE)
11326 {
11327 *cost += COSTS_N_INSNS (1);
11328 /* No extra cost for MOV imm and MVN imm. */
11329 /* If the comparison op is using the flags, there's no further
11330 cost, otherwise we need to add the cost of the comparison. */
11331 if (!(REG_P (XEXP (XEXP (x, 0), 0))
11332 && REGNO (XEXP (XEXP (x, 0), 0)) == CC_REGNUM
11333 && XEXP (XEXP (x, 0), 1) == const0_rtx))
11334 {
11335 mode = GET_MODE (XEXP (XEXP (x, 0), 0));
11336 *cost += (COSTS_N_INSNS (1)
11337 + rtx_cost (XEXP (XEXP (x, 0), 0), mode, COMPARE,
11338 0, speed_p)
11339 + rtx_cost (XEXP (XEXP (x, 0), 1), mode, COMPARE,
11340 1, speed_p));
11341 if (speed_p)
11342 *cost += extra_cost->alu.arith;
11343 }
11344 return true;
11345 }
11346
11347 if (speed_p)
11348 *cost += extra_cost->alu.arith;
11349 return false;
11350 }
11351
11352 if (GET_MODE_CLASS (mode) == MODE_INT
11353 && GET_MODE_SIZE (mode) < 4)
11354 {
11355 /* Slightly disparage, as we might need an extend operation. */
11356 *cost += 1;
11357 if (speed_p)
11358 *cost += extra_cost->alu.arith;
11359 return false;
11360 }
11361
11362 if (mode == DImode)
11363 {
11364 *cost += COSTS_N_INSNS (1);
11365 if (speed_p)
11366 *cost += 2 * extra_cost->alu.arith;
11367 return false;
11368 }
11369
11370 /* Vector mode? */
11371 *cost = LIBCALL_COST (1);
11372 return false;
11373
11374 case NOT:
11375 if (mode == SImode)
11376 {
11377 rtx shift_op;
11378 rtx shift_reg = NULL;
11379
11380 shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
11381
11382 if (shift_op)
11383 {
11384 if (shift_reg != NULL)
11385 {
11386 if (speed_p)
11387 *cost += extra_cost->alu.log_shift_reg;
11388 *cost += rtx_cost (shift_reg, mode, ASHIFT, 1, speed_p);
11389 }
11390 else if (speed_p)
11391 *cost += extra_cost->alu.log_shift;
11392 *cost += rtx_cost (shift_op, mode, ASHIFT, 0, speed_p);
11393 return true;
11394 }
11395
11396 if (speed_p)
11397 *cost += extra_cost->alu.logical;
11398 return false;
11399 }
11400 if (mode == DImode)
11401 {
11402 *cost += COSTS_N_INSNS (1);
11403 return false;
11404 }
11405
11406 /* Vector mode? */
11407
11408 *cost += LIBCALL_COST (1);
11409 return false;
11410
11411 case IF_THEN_ELSE:
11412 {
11413 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
11414 {
11415 *cost += COSTS_N_INSNS (3);
11416 return true;
11417 }
11418 int op1cost = rtx_cost (XEXP (x, 1), mode, SET, 1, speed_p);
11419 int op2cost = rtx_cost (XEXP (x, 2), mode, SET, 1, speed_p);
11420
11421 *cost = rtx_cost (XEXP (x, 0), mode, IF_THEN_ELSE, 0, speed_p);
11422 /* Assume that if one arm of the if_then_else is a register,
11423 that it will be tied with the result and eliminate the
11424 conditional insn. */
11425 if (REG_P (XEXP (x, 1)))
11426 *cost += op2cost;
11427 else if (REG_P (XEXP (x, 2)))
11428 *cost += op1cost;
11429 else
11430 {
11431 if (speed_p)
11432 {
11433 if (extra_cost->alu.non_exec_costs_exec)
11434 *cost += op1cost + op2cost + extra_cost->alu.non_exec;
11435 else
11436 *cost += MAX (op1cost, op2cost) + extra_cost->alu.non_exec;
11437 }
11438 else
11439 *cost += op1cost + op2cost;
11440 }
11441 }
11442 return true;
11443
11444 case COMPARE:
11445 if (cc_register (XEXP (x, 0), VOIDmode) && XEXP (x, 1) == const0_rtx)
11446 *cost = 0;
11447 else
11448 {
11449 machine_mode op0mode;
11450 /* We'll mostly assume that the cost of a compare is the cost of the
11451 LHS. However, there are some notable exceptions. */
11452
11453 /* Floating point compares are never done as side-effects. */
11454 op0mode = GET_MODE (XEXP (x, 0));
11455 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (op0mode) == MODE_FLOAT
11456 && (op0mode == SFmode || !TARGET_VFP_SINGLE))
11457 {
11458 if (speed_p)
11459 *cost += extra_cost->fp[op0mode != SFmode].compare;
11460
11461 if (XEXP (x, 1) == CONST0_RTX (op0mode))
11462 {
11463 *cost += rtx_cost (XEXP (x, 0), op0mode, code, 0, speed_p);
11464 return true;
11465 }
11466
11467 return false;
11468 }
11469 else if (GET_MODE_CLASS (op0mode) == MODE_FLOAT)
11470 {
11471 *cost = LIBCALL_COST (2);
11472 return false;
11473 }
11474
11475 /* DImode compares normally take two insns. */
11476 if (op0mode == DImode)
11477 {
11478 *cost += COSTS_N_INSNS (1);
11479 if (speed_p)
11480 *cost += 2 * extra_cost->alu.arith;
11481 return false;
11482 }
11483
11484 if (op0mode == SImode)
11485 {
11486 rtx shift_op;
11487 rtx shift_reg;
11488
11489 if (XEXP (x, 1) == const0_rtx
11490 && !(REG_P (XEXP (x, 0))
11491 || (GET_CODE (XEXP (x, 0)) == SUBREG
11492 && REG_P (SUBREG_REG (XEXP (x, 0))))))
11493 {
11494 *cost = rtx_cost (XEXP (x, 0), op0mode, COMPARE, 0, speed_p);
11495
11496 /* Multiply operations that set the flags are often
11497 significantly more expensive. */
11498 if (speed_p
11499 && GET_CODE (XEXP (x, 0)) == MULT
11500 && !power_of_two_operand (XEXP (XEXP (x, 0), 1), mode))
11501 *cost += extra_cost->mult[0].flag_setting;
11502
11503 if (speed_p
11504 && GET_CODE (XEXP (x, 0)) == PLUS
11505 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
11506 && !power_of_two_operand (XEXP (XEXP (XEXP (x, 0),
11507 0), 1), mode))
11508 *cost += extra_cost->mult[0].flag_setting;
11509 return true;
11510 }
11511
11512 shift_reg = NULL;
11513 shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
11514 if (shift_op != NULL)
11515 {
11516 if (shift_reg != NULL)
11517 {
11518 *cost += rtx_cost (shift_reg, op0mode, ASHIFT,
11519 1, speed_p);
11520 if (speed_p)
11521 *cost += extra_cost->alu.arith_shift_reg;
11522 }
11523 else if (speed_p)
11524 *cost += extra_cost->alu.arith_shift;
11525 *cost += rtx_cost (shift_op, op0mode, ASHIFT, 0, speed_p);
11526 *cost += rtx_cost (XEXP (x, 1), op0mode, COMPARE, 1, speed_p);
11527 return true;
11528 }
11529
11530 if (speed_p)
11531 *cost += extra_cost->alu.arith;
11532 if (CONST_INT_P (XEXP (x, 1))
11533 && const_ok_for_op (INTVAL (XEXP (x, 1)), COMPARE))
11534 {
11535 *cost += rtx_cost (XEXP (x, 0), op0mode, COMPARE, 0, speed_p);
11536 return true;
11537 }
11538 return false;
11539 }
11540
11541 /* Vector mode? */
11542
11543 *cost = LIBCALL_COST (2);
11544 return false;
11545 }
11546 return true;
11547
11548 case EQ:
11549 case GE:
11550 case GT:
11551 case LE:
11552 case LT:
11553 /* Neon has special instructions when comparing with 0 (vceq, vcge, vcgt,
11554 vcle and vclt). */
11555 if (TARGET_NEON
11556 && TARGET_HARD_FLOAT
11557 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
11558 && (XEXP (x, 1) == CONST0_RTX (mode)))
11559 {
11560 *cost = 0;
11561 return true;
11562 }
11563
11564 /* Fall through. */
11565 case NE:
11566 case LTU:
11567 case LEU:
11568 case GEU:
11569 case GTU:
11570 case ORDERED:
11571 case UNORDERED:
11572 case UNEQ:
11573 case UNLE:
11574 case UNLT:
11575 case UNGE:
11576 case UNGT:
11577 case LTGT:
11578 if (outer_code == SET)
11579 {
11580 /* Is it a store-flag operation? */
11581 if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM
11582 && XEXP (x, 1) == const0_rtx)
11583 {
11584 /* Thumb also needs an IT insn. */
11585 *cost += COSTS_N_INSNS (TARGET_THUMB ? 2 : 1);
11586 return true;
11587 }
11588 if (XEXP (x, 1) == const0_rtx)
11589 {
11590 switch (code)
11591 {
11592 case LT:
11593 /* LSR Rd, Rn, #31. */
11594 if (speed_p)
11595 *cost += extra_cost->alu.shift;
11596 break;
11597
11598 case EQ:
11599 /* RSBS T1, Rn, #0
11600 ADC Rd, Rn, T1. */
11601
11602 case NE:
11603 /* SUBS T1, Rn, #1
11604 SBC Rd, Rn, T1. */
11605 *cost += COSTS_N_INSNS (1);
11606 break;
11607
11608 case LE:
11609 /* RSBS T1, Rn, Rn, LSR #31
11610 ADC Rd, Rn, T1. */
11611 *cost += COSTS_N_INSNS (1);
11612 if (speed_p)
11613 *cost += extra_cost->alu.arith_shift;
11614 break;
11615
11616 case GT:
11617 /* RSB Rd, Rn, Rn, ASR #1
11618 LSR Rd, Rd, #31. */
11619 *cost += COSTS_N_INSNS (1);
11620 if (speed_p)
11621 *cost += (extra_cost->alu.arith_shift
11622 + extra_cost->alu.shift);
11623 break;
11624
11625 case GE:
11626 /* ASR Rd, Rn, #31
11627 ADD Rd, Rn, #1. */
11628 *cost += COSTS_N_INSNS (1);
11629 if (speed_p)
11630 *cost += extra_cost->alu.shift;
11631 break;
11632
11633 default:
11634 /* Remaining cases are either meaningless or would take
11635 three insns anyway. */
11636 *cost = COSTS_N_INSNS (3);
11637 break;
11638 }
11639 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
11640 return true;
11641 }
11642 else
11643 {
11644 *cost += COSTS_N_INSNS (TARGET_THUMB ? 3 : 2);
11645 if (CONST_INT_P (XEXP (x, 1))
11646 && const_ok_for_op (INTVAL (XEXP (x, 1)), COMPARE))
11647 {
11648 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
11649 return true;
11650 }
11651
11652 return false;
11653 }
11654 }
11655 /* Not directly inside a set. If it involves the condition code
11656 register it must be the condition for a branch, cond_exec or
11657 I_T_E operation. Since the comparison is performed elsewhere
11658 this is just the control part which has no additional
11659 cost. */
11660 else if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM
11661 && XEXP (x, 1) == const0_rtx)
11662 {
11663 *cost = 0;
11664 return true;
11665 }
11666 return false;
11667
11668 case ABS:
11669 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
11670 && (mode == SFmode || !TARGET_VFP_SINGLE))
11671 {
11672 if (speed_p)
11673 *cost += extra_cost->fp[mode != SFmode].neg;
11674
11675 return false;
11676 }
11677 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
11678 {
11679 *cost = LIBCALL_COST (1);
11680 return false;
11681 }
11682
11683 if (mode == SImode)
11684 {
11685 if (speed_p)
11686 *cost += extra_cost->alu.log_shift + extra_cost->alu.arith_shift;
11687 return false;
11688 }
11689 /* Vector mode? */
11690 *cost = LIBCALL_COST (1);
11691 return false;
11692
11693 case SIGN_EXTEND:
11694 if ((arm_arch4 || GET_MODE (XEXP (x, 0)) == SImode)
11695 && MEM_P (XEXP (x, 0)))
11696 {
11697 if (mode == DImode)
11698 *cost += COSTS_N_INSNS (1);
11699
11700 if (!speed_p)
11701 return true;
11702
11703 if (GET_MODE (XEXP (x, 0)) == SImode)
11704 *cost += extra_cost->ldst.load;
11705 else
11706 *cost += extra_cost->ldst.load_sign_extend;
11707
11708 if (mode == DImode)
11709 *cost += extra_cost->alu.shift;
11710
11711 return true;
11712 }
11713
11714 /* Widening from less than 32-bits requires an extend operation. */
11715 if (GET_MODE (XEXP (x, 0)) != SImode && arm_arch6)
11716 {
11717 /* We have SXTB/SXTH. */
11718 *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
11719 if (speed_p)
11720 *cost += extra_cost->alu.extend;
11721 }
11722 else if (GET_MODE (XEXP (x, 0)) != SImode)
11723 {
11724 /* Needs two shifts. */
11725 *cost += COSTS_N_INSNS (1);
11726 *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
11727 if (speed_p)
11728 *cost += 2 * extra_cost->alu.shift;
11729 }
11730
11731 /* Widening beyond 32-bits requires one more insn. */
11732 if (mode == DImode)
11733 {
11734 *cost += COSTS_N_INSNS (1);
11735 if (speed_p)
11736 *cost += extra_cost->alu.shift;
11737 }
11738
11739 return true;
11740
11741 case ZERO_EXTEND:
11742 if ((arm_arch4
11743 || GET_MODE (XEXP (x, 0)) == SImode
11744 || GET_MODE (XEXP (x, 0)) == QImode)
11745 && MEM_P (XEXP (x, 0)))
11746 {
11747 *cost = rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
11748
11749 if (mode == DImode)
11750 *cost += COSTS_N_INSNS (1); /* No speed penalty. */
11751
11752 return true;
11753 }
11754
11755 /* Widening from less than 32-bits requires an extend operation. */
11756 if (GET_MODE (XEXP (x, 0)) == QImode)
11757 {
11758 /* UXTB can be a shorter instruction in Thumb2, but it might
11759 be slower than the AND Rd, Rn, #255 alternative. When
11760 optimizing for speed it should never be slower to use
11761 AND, and we don't really model 16-bit vs 32-bit insns
11762 here. */
11763 if (speed_p)
11764 *cost += extra_cost->alu.logical;
11765 }
11766 else if (GET_MODE (XEXP (x, 0)) != SImode && arm_arch6)
11767 {
11768 /* We have UXTB/UXTH. */
11769 *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
11770 if (speed_p)
11771 *cost += extra_cost->alu.extend;
11772 }
11773 else if (GET_MODE (XEXP (x, 0)) != SImode)
11774 {
11775 /* Needs two shifts. It's marginally preferable to use
11776 shifts rather than two BIC instructions as the second
11777 shift may merge with a subsequent insn as a shifter
11778 op. */
11779 *cost = COSTS_N_INSNS (2);
11780 *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
11781 if (speed_p)
11782 *cost += 2 * extra_cost->alu.shift;
11783 }
11784
11785 /* Widening beyond 32-bits requires one more insn. */
11786 if (mode == DImode)
11787 {
11788 *cost += COSTS_N_INSNS (1); /* No speed penalty. */
11789 }
11790
11791 return true;
11792
11793 case CONST_INT:
11794 *cost = 0;
11795 /* CONST_INT has no mode, so we cannot tell for sure how many
11796 insns are really going to be needed. The best we can do is
11797 look at the value passed. If it fits in SImode, then assume
11798 that's the mode it will be used for. Otherwise assume it
11799 will be used in DImode. */
11800 if (INTVAL (x) == trunc_int_for_mode (INTVAL (x), SImode))
11801 mode = SImode;
11802 else
11803 mode = DImode;
11804
11805 /* Avoid blowing up in arm_gen_constant (). */
11806 if (!(outer_code == PLUS
11807 || outer_code == AND
11808 || outer_code == IOR
11809 || outer_code == XOR
11810 || outer_code == MINUS))
11811 outer_code = SET;
11812
11813 const_int_cost:
11814 if (mode == SImode)
11815 {
11816 *cost += COSTS_N_INSNS (arm_gen_constant (outer_code, SImode, NULL,
11817 INTVAL (x), NULL, NULL,
11818 0, 0));
11819 /* Extra costs? */
11820 }
11821 else
11822 {
11823 *cost += COSTS_N_INSNS (arm_gen_constant
11824 (outer_code, SImode, NULL,
11825 trunc_int_for_mode (INTVAL (x), SImode),
11826 NULL, NULL, 0, 0)
11827 + arm_gen_constant (outer_code, SImode, NULL,
11828 INTVAL (x) >> 32, NULL,
11829 NULL, 0, 0));
11830 /* Extra costs? */
11831 }
11832
11833 return true;
11834
11835 case CONST:
11836 case LABEL_REF:
11837 case SYMBOL_REF:
11838 if (speed_p)
11839 {
11840 if (arm_arch_thumb2 && !flag_pic)
11841 *cost += COSTS_N_INSNS (1);
11842 else
11843 *cost += extra_cost->ldst.load;
11844 }
11845 else
11846 *cost += COSTS_N_INSNS (1);
11847
11848 if (flag_pic)
11849 {
11850 *cost += COSTS_N_INSNS (1);
11851 if (speed_p)
11852 *cost += extra_cost->alu.arith;
11853 }
11854
11855 return true;
11856
11857 case CONST_FIXED:
11858 *cost = COSTS_N_INSNS (4);
11859 /* Fixme. */
11860 return true;
11861
11862 case CONST_DOUBLE:
11863 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
11864 && (mode == SFmode || !TARGET_VFP_SINGLE))
11865 {
11866 if (vfp3_const_double_rtx (x))
11867 {
11868 if (speed_p)
11869 *cost += extra_cost->fp[mode == DFmode].fpconst;
11870 return true;
11871 }
11872
11873 if (speed_p)
11874 {
11875 if (mode == DFmode)
11876 *cost += extra_cost->ldst.loadd;
11877 else
11878 *cost += extra_cost->ldst.loadf;
11879 }
11880 else
11881 *cost += COSTS_N_INSNS (1 + (mode == DFmode));
11882
11883 return true;
11884 }
11885 *cost = COSTS_N_INSNS (4);
11886 return true;
11887
11888 case CONST_VECTOR:
11889 /* Fixme. */
11890 if (((TARGET_NEON && TARGET_HARD_FLOAT
11891 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode)))
11892 || TARGET_HAVE_MVE)
11893 && simd_immediate_valid_for_move (x, mode, NULL, NULL))
11894 *cost = COSTS_N_INSNS (1);
11895 else
11896 *cost = COSTS_N_INSNS (4);
11897 return true;
11898
11899 case HIGH:
11900 case LO_SUM:
11901 /* When optimizing for size, we prefer constant pool entries to
11902 MOVW/MOVT pairs, so bump the cost of these slightly. */
11903 if (!speed_p)
11904 *cost += 1;
11905 return true;
11906
11907 case CLZ:
11908 if (speed_p)
11909 *cost += extra_cost->alu.clz;
11910 return false;
11911
11912 case SMIN:
11913 if (XEXP (x, 1) == const0_rtx)
11914 {
11915 if (speed_p)
11916 *cost += extra_cost->alu.log_shift;
11917 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
11918 return true;
11919 }
11920 /* Fall through. */
11921 case SMAX:
11922 case UMIN:
11923 case UMAX:
11924 *cost += COSTS_N_INSNS (1);
11925 return false;
11926
11927 case TRUNCATE:
11928 if (GET_CODE (XEXP (x, 0)) == ASHIFTRT
11929 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
11930 && INTVAL (XEXP (XEXP (x, 0), 1)) == 32
11931 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
11932 && ((GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND
11933 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == SIGN_EXTEND)
11934 || (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND
11935 && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1))
11936 == ZERO_EXTEND))))
11937 {
11938 if (speed_p)
11939 *cost += extra_cost->mult[1].extend;
11940 *cost += (rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), VOIDmode,
11941 ZERO_EXTEND, 0, speed_p)
11942 + rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 1), VOIDmode,
11943 ZERO_EXTEND, 0, speed_p));
11944 return true;
11945 }
11946 *cost = LIBCALL_COST (1);
11947 return false;
11948
11949 case UNSPEC_VOLATILE:
11950 case UNSPEC:
11951 return arm_unspec_cost (x, outer_code, speed_p, cost);
11952
11953 case PC:
11954 /* Reading the PC is like reading any other register. Writing it
11955 is more expensive, but we take that into account elsewhere. */
11956 *cost = 0;
11957 return true;
11958
11959 case ZERO_EXTRACT:
11960 /* TODO: Simple zero_extract of bottom bits using AND. */
11961 /* Fall through. */
11962 case SIGN_EXTRACT:
11963 if (arm_arch6
11964 && mode == SImode
11965 && CONST_INT_P (XEXP (x, 1))
11966 && CONST_INT_P (XEXP (x, 2)))
11967 {
11968 if (speed_p)
11969 *cost += extra_cost->alu.bfx;
11970 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
11971 return true;
11972 }
11973 /* Without UBFX/SBFX, need to resort to shift operations. */
11974 *cost += COSTS_N_INSNS (1);
11975 if (speed_p)
11976 *cost += 2 * extra_cost->alu.shift;
11977 *cost += rtx_cost (XEXP (x, 0), mode, ASHIFT, 0, speed_p);
11978 return true;
11979
11980 case FLOAT_EXTEND:
11981 if (TARGET_HARD_FLOAT)
11982 {
11983 if (speed_p)
11984 *cost += extra_cost->fp[mode == DFmode].widen;
11985 if (!TARGET_VFP5
11986 && GET_MODE (XEXP (x, 0)) == HFmode)
11987 {
11988 /* Pre v8, widening HF->DF is a two-step process, first
11989 widening to SFmode. */
11990 *cost += COSTS_N_INSNS (1);
11991 if (speed_p)
11992 *cost += extra_cost->fp[0].widen;
11993 }
11994 *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
11995 return true;
11996 }
11997
11998 *cost = LIBCALL_COST (1);
11999 return false;
12000
12001 case FLOAT_TRUNCATE:
12002 if (TARGET_HARD_FLOAT)
12003 {
12004 if (speed_p)
12005 *cost += extra_cost->fp[mode == DFmode].narrow;
12006 *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
12007 return true;
12008 /* Vector modes? */
12009 }
12010 *cost = LIBCALL_COST (1);
12011 return false;
12012
12013 case FMA:
12014 if (TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_FMA)
12015 {
12016 rtx op0 = XEXP (x, 0);
12017 rtx op1 = XEXP (x, 1);
12018 rtx op2 = XEXP (x, 2);
12019
12020
12021 /* vfms or vfnma. */
12022 if (GET_CODE (op0) == NEG)
12023 op0 = XEXP (op0, 0);
12024
12025 /* vfnms or vfnma. */
12026 if (GET_CODE (op2) == NEG)
12027 op2 = XEXP (op2, 0);
12028
12029 *cost += rtx_cost (op0, mode, FMA, 0, speed_p);
12030 *cost += rtx_cost (op1, mode, FMA, 1, speed_p);
12031 *cost += rtx_cost (op2, mode, FMA, 2, speed_p);
12032
12033 if (speed_p)
12034 *cost += extra_cost->fp[mode ==DFmode].fma;
12035
12036 return true;
12037 }
12038
12039 *cost = LIBCALL_COST (3);
12040 return false;
12041
12042 case FIX:
12043 case UNSIGNED_FIX:
12044 if (TARGET_HARD_FLOAT)
12045 {
12046 /* The *combine_vcvtf2i reduces a vmul+vcvt into
12047 a vcvt fixed-point conversion. */
12048 if (code == FIX && mode == SImode
12049 && GET_CODE (XEXP (x, 0)) == FIX
12050 && GET_MODE (XEXP (x, 0)) == SFmode
12051 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
12052 && vfp3_const_double_for_bits (XEXP (XEXP (XEXP (x, 0), 0), 1))
12053 > 0)
12054 {
12055 if (speed_p)
12056 *cost += extra_cost->fp[0].toint;
12057
12058 *cost += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), mode,
12059 code, 0, speed_p);
12060 return true;
12061 }
12062
12063 if (GET_MODE_CLASS (mode) == MODE_INT)
12064 {
12065 mode = GET_MODE (XEXP (x, 0));
12066 if (speed_p)
12067 *cost += extra_cost->fp[mode == DFmode].toint;
12068 /* Strip of the 'cost' of rounding towards zero. */
12069 if (GET_CODE (XEXP (x, 0)) == FIX)
12070 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), mode, code,
12071 0, speed_p);
12072 else
12073 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
12074 /* ??? Increase the cost to deal with transferring from
12075 FP -> CORE registers? */
12076 return true;
12077 }
12078 else if (GET_MODE_CLASS (mode) == MODE_FLOAT
12079 && TARGET_VFP5)
12080 {
12081 if (speed_p)
12082 *cost += extra_cost->fp[mode == DFmode].roundint;
12083 return false;
12084 }
12085 /* Vector costs? */
12086 }
12087 *cost = LIBCALL_COST (1);
12088 return false;
12089
12090 case FLOAT:
12091 case UNSIGNED_FLOAT:
12092 if (TARGET_HARD_FLOAT)
12093 {
12094 /* ??? Increase the cost to deal with transferring from CORE
12095 -> FP registers? */
12096 if (speed_p)
12097 *cost += extra_cost->fp[mode == DFmode].fromint;
12098 return false;
12099 }
12100 *cost = LIBCALL_COST (1);
12101 return false;
12102
12103 case CALL:
12104 return true;
12105
12106 case ASM_OPERANDS:
12107 {
12108 /* Just a guess. Guess number of instructions in the asm
12109 plus one insn per input. Always a minimum of COSTS_N_INSNS (1)
12110 though (see PR60663). */
12111 int asm_length = MAX (1, asm_str_count (ASM_OPERANDS_TEMPLATE (x)));
12112 int num_operands = ASM_OPERANDS_INPUT_LENGTH (x);
12113
12114 *cost = COSTS_N_INSNS (asm_length + num_operands);
12115 return true;
12116 }
12117 default:
12118 if (mode != VOIDmode)
12119 *cost = COSTS_N_INSNS (ARM_NUM_REGS (mode));
12120 else
12121 *cost = COSTS_N_INSNS (4); /* Who knows? */
12122 return false;
12123 }
12124 }
12125
12126 #undef HANDLE_NARROW_SHIFT_ARITH
12127
12128 /* RTX costs entry point. */
12129
12130 static bool
12131 arm_rtx_costs (rtx x, machine_mode mode ATTRIBUTE_UNUSED, int outer_code,
12132 int opno ATTRIBUTE_UNUSED, int *total, bool speed)
12133 {
12134 bool result;
12135 int code = GET_CODE (x);
12136 gcc_assert (current_tune->insn_extra_cost);
12137
12138 result = arm_rtx_costs_internal (x, (enum rtx_code) code,
12139 (enum rtx_code) outer_code,
12140 current_tune->insn_extra_cost,
12141 total, speed);
12142
12143 if (dump_file && arm_verbose_cost)
12144 {
12145 print_rtl_single (dump_file, x);
12146 fprintf (dump_file, "\n%s cost: %d (%s)\n", speed ? "Hot" : "Cold",
12147 *total, result ? "final" : "partial");
12148 }
12149 return result;
12150 }
12151
12152 static int
12153 arm_insn_cost (rtx_insn *insn, bool speed)
12154 {
12155 int cost;
12156
12157 /* Don't cost a simple reg-reg move at a full insn cost: such moves
12158 will likely disappear during register allocation. */
12159 if (!reload_completed
12160 && GET_CODE (PATTERN (insn)) == SET
12161 && REG_P (SET_DEST (PATTERN (insn)))
12162 && REG_P (SET_SRC (PATTERN (insn))))
12163 return 2;
12164 cost = pattern_cost (PATTERN (insn), speed);
12165 /* If the cost is zero, then it's likely a complex insn. We don't want the
12166 cost of these to be less than something we know about. */
12167 return cost ? cost : COSTS_N_INSNS (2);
12168 }
12169
12170 /* All address computations that can be done are free, but rtx cost returns
12171 the same for practically all of them. So we weight the different types
12172 of address here in the order (most pref first):
12173 PRE/POST_INC/DEC, SHIFT or NON-INT sum, INT sum, REG, MEM or LABEL. */
12174 static inline int
12175 arm_arm_address_cost (rtx x)
12176 {
12177 enum rtx_code c = GET_CODE (x);
12178
12179 if (c == PRE_INC || c == PRE_DEC || c == POST_INC || c == POST_DEC)
12180 return 0;
12181 if (c == MEM || c == LABEL_REF || c == SYMBOL_REF)
12182 return 10;
12183
12184 if (c == PLUS)
12185 {
12186 if (CONST_INT_P (XEXP (x, 1)))
12187 return 2;
12188
12189 if (ARITHMETIC_P (XEXP (x, 0)) || ARITHMETIC_P (XEXP (x, 1)))
12190 return 3;
12191
12192 return 4;
12193 }
12194
12195 return 6;
12196 }
12197
12198 static inline int
12199 arm_thumb_address_cost (rtx x)
12200 {
12201 enum rtx_code c = GET_CODE (x);
12202
12203 if (c == REG)
12204 return 1;
12205 if (c == PLUS
12206 && REG_P (XEXP (x, 0))
12207 && CONST_INT_P (XEXP (x, 1)))
12208 return 1;
12209
12210 return 2;
12211 }
12212
12213 static int
12214 arm_address_cost (rtx x, machine_mode mode ATTRIBUTE_UNUSED,
12215 addr_space_t as ATTRIBUTE_UNUSED, bool speed ATTRIBUTE_UNUSED)
12216 {
12217 return TARGET_32BIT ? arm_arm_address_cost (x) : arm_thumb_address_cost (x);
12218 }
12219
12220 /* Adjust cost hook for XScale. */
12221 static bool
12222 xscale_sched_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep,
12223 int * cost)
12224 {
12225 /* Some true dependencies can have a higher cost depending
12226 on precisely how certain input operands are used. */
12227 if (dep_type == 0
12228 && recog_memoized (insn) >= 0
12229 && recog_memoized (dep) >= 0)
12230 {
12231 int shift_opnum = get_attr_shift (insn);
12232 enum attr_type attr_type = get_attr_type (dep);
12233
12234 /* If nonzero, SHIFT_OPNUM contains the operand number of a shifted
12235 operand for INSN. If we have a shifted input operand and the
12236 instruction we depend on is another ALU instruction, then we may
12237 have to account for an additional stall. */
12238 if (shift_opnum != 0
12239 && (attr_type == TYPE_ALU_SHIFT_IMM_LSL_1TO4
12240 || attr_type == TYPE_ALU_SHIFT_IMM_OTHER
12241 || attr_type == TYPE_ALUS_SHIFT_IMM
12242 || attr_type == TYPE_LOGIC_SHIFT_IMM
12243 || attr_type == TYPE_LOGICS_SHIFT_IMM
12244 || attr_type == TYPE_ALU_SHIFT_REG
12245 || attr_type == TYPE_ALUS_SHIFT_REG
12246 || attr_type == TYPE_LOGIC_SHIFT_REG
12247 || attr_type == TYPE_LOGICS_SHIFT_REG
12248 || attr_type == TYPE_MOV_SHIFT
12249 || attr_type == TYPE_MVN_SHIFT
12250 || attr_type == TYPE_MOV_SHIFT_REG
12251 || attr_type == TYPE_MVN_SHIFT_REG))
12252 {
12253 rtx shifted_operand;
12254 int opno;
12255
12256 /* Get the shifted operand. */
12257 extract_insn (insn);
12258 shifted_operand = recog_data.operand[shift_opnum];
12259
12260 /* Iterate over all the operands in DEP. If we write an operand
12261 that overlaps with SHIFTED_OPERAND, then we have increase the
12262 cost of this dependency. */
12263 extract_insn (dep);
12264 preprocess_constraints (dep);
12265 for (opno = 0; opno < recog_data.n_operands; opno++)
12266 {
12267 /* We can ignore strict inputs. */
12268 if (recog_data.operand_type[opno] == OP_IN)
12269 continue;
12270
12271 if (reg_overlap_mentioned_p (recog_data.operand[opno],
12272 shifted_operand))
12273 {
12274 *cost = 2;
12275 return false;
12276 }
12277 }
12278 }
12279 }
12280 return true;
12281 }
12282
12283 /* Adjust cost hook for Cortex A9. */
12284 static bool
12285 cortex_a9_sched_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep,
12286 int * cost)
12287 {
12288 switch (dep_type)
12289 {
12290 case REG_DEP_ANTI:
12291 *cost = 0;
12292 return false;
12293
12294 case REG_DEP_TRUE:
12295 case REG_DEP_OUTPUT:
12296 if (recog_memoized (insn) >= 0
12297 && recog_memoized (dep) >= 0)
12298 {
12299 if (GET_CODE (PATTERN (insn)) == SET)
12300 {
12301 if (GET_MODE_CLASS
12302 (GET_MODE (SET_DEST (PATTERN (insn)))) == MODE_FLOAT
12303 || GET_MODE_CLASS
12304 (GET_MODE (SET_SRC (PATTERN (insn)))) == MODE_FLOAT)
12305 {
12306 enum attr_type attr_type_insn = get_attr_type (insn);
12307 enum attr_type attr_type_dep = get_attr_type (dep);
12308
12309 /* By default all dependencies of the form
12310 s0 = s0 <op> s1
12311 s0 = s0 <op> s2
12312 have an extra latency of 1 cycle because
12313 of the input and output dependency in this
12314 case. However this gets modeled as an true
12315 dependency and hence all these checks. */
12316 if (REG_P (SET_DEST (PATTERN (insn)))
12317 && reg_set_p (SET_DEST (PATTERN (insn)), dep))
12318 {
12319 /* FMACS is a special case where the dependent
12320 instruction can be issued 3 cycles before
12321 the normal latency in case of an output
12322 dependency. */
12323 if ((attr_type_insn == TYPE_FMACS
12324 || attr_type_insn == TYPE_FMACD)
12325 && (attr_type_dep == TYPE_FMACS
12326 || attr_type_dep == TYPE_FMACD))
12327 {
12328 if (dep_type == REG_DEP_OUTPUT)
12329 *cost = insn_default_latency (dep) - 3;
12330 else
12331 *cost = insn_default_latency (dep);
12332 return false;
12333 }
12334 else
12335 {
12336 if (dep_type == REG_DEP_OUTPUT)
12337 *cost = insn_default_latency (dep) + 1;
12338 else
12339 *cost = insn_default_latency (dep);
12340 }
12341 return false;
12342 }
12343 }
12344 }
12345 }
12346 break;
12347
12348 default:
12349 gcc_unreachable ();
12350 }
12351
12352 return true;
12353 }
12354
12355 /* Adjust cost hook for FA726TE. */
12356 static bool
12357 fa726te_sched_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep,
12358 int * cost)
12359 {
12360 /* For FA726TE, true dependency on CPSR (i.e. set cond followed by predicated)
12361 have penalty of 3. */
12362 if (dep_type == REG_DEP_TRUE
12363 && recog_memoized (insn) >= 0
12364 && recog_memoized (dep) >= 0
12365 && get_attr_conds (dep) == CONDS_SET)
12366 {
12367 /* Use of carry (e.g. 64-bit arithmetic) in ALU: 3-cycle latency. */
12368 if (get_attr_conds (insn) == CONDS_USE
12369 && get_attr_type (insn) != TYPE_BRANCH)
12370 {
12371 *cost = 3;
12372 return false;
12373 }
12374
12375 if (GET_CODE (PATTERN (insn)) == COND_EXEC
12376 || get_attr_conds (insn) == CONDS_USE)
12377 {
12378 *cost = 0;
12379 return false;
12380 }
12381 }
12382
12383 return true;
12384 }
12385
12386 /* Implement TARGET_REGISTER_MOVE_COST.
12387
12388 Moves between VFP_REGS and GENERAL_REGS are a single insn, but
12389 it is typically more expensive than a single memory access. We set
12390 the cost to less than two memory accesses so that floating
12391 point to integer conversion does not go through memory. */
12392
12393 int
12394 arm_register_move_cost (machine_mode mode ATTRIBUTE_UNUSED,
12395 reg_class_t from, reg_class_t to)
12396 {
12397 if (TARGET_32BIT)
12398 {
12399 if ((IS_VFP_CLASS (from) && !IS_VFP_CLASS (to))
12400 || (!IS_VFP_CLASS (from) && IS_VFP_CLASS (to)))
12401 return 15;
12402 else if ((from == IWMMXT_REGS && to != IWMMXT_REGS)
12403 || (from != IWMMXT_REGS && to == IWMMXT_REGS))
12404 return 4;
12405 else if (from == IWMMXT_GR_REGS || to == IWMMXT_GR_REGS)
12406 return 20;
12407 else
12408 return 2;
12409 }
12410 else
12411 {
12412 if (from == HI_REGS || to == HI_REGS)
12413 return 4;
12414 else
12415 return 2;
12416 }
12417 }
12418
12419 /* Implement TARGET_MEMORY_MOVE_COST. */
12420
12421 int
12422 arm_memory_move_cost (machine_mode mode, reg_class_t rclass,
12423 bool in ATTRIBUTE_UNUSED)
12424 {
12425 if (TARGET_32BIT)
12426 return 10;
12427 else
12428 {
12429 if (GET_MODE_SIZE (mode) < 4)
12430 return 8;
12431 else
12432 return ((2 * GET_MODE_SIZE (mode)) * (rclass == LO_REGS ? 1 : 2));
12433 }
12434 }
12435
12436 /* Vectorizer cost model implementation. */
12437
12438 /* Implement targetm.vectorize.builtin_vectorization_cost. */
12439 static int
12440 arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
12441 tree vectype,
12442 int misalign ATTRIBUTE_UNUSED)
12443 {
12444 unsigned elements;
12445
12446 switch (type_of_cost)
12447 {
12448 case scalar_stmt:
12449 return current_tune->vec_costs->scalar_stmt_cost;
12450
12451 case scalar_load:
12452 return current_tune->vec_costs->scalar_load_cost;
12453
12454 case scalar_store:
12455 return current_tune->vec_costs->scalar_store_cost;
12456
12457 case vector_stmt:
12458 return current_tune->vec_costs->vec_stmt_cost;
12459
12460 case vector_load:
12461 return current_tune->vec_costs->vec_align_load_cost;
12462
12463 case vector_store:
12464 return current_tune->vec_costs->vec_store_cost;
12465
12466 case vec_to_scalar:
12467 return current_tune->vec_costs->vec_to_scalar_cost;
12468
12469 case scalar_to_vec:
12470 return current_tune->vec_costs->scalar_to_vec_cost;
12471
12472 case unaligned_load:
12473 case vector_gather_load:
12474 return current_tune->vec_costs->vec_unalign_load_cost;
12475
12476 case unaligned_store:
12477 case vector_scatter_store:
12478 return current_tune->vec_costs->vec_unalign_store_cost;
12479
12480 case cond_branch_taken:
12481 return current_tune->vec_costs->cond_taken_branch_cost;
12482
12483 case cond_branch_not_taken:
12484 return current_tune->vec_costs->cond_not_taken_branch_cost;
12485
12486 case vec_perm:
12487 case vec_promote_demote:
12488 return current_tune->vec_costs->vec_stmt_cost;
12489
12490 case vec_construct:
12491 elements = TYPE_VECTOR_SUBPARTS (vectype);
12492 return elements / 2 + 1;
12493
12494 default:
12495 gcc_unreachable ();
12496 }
12497 }
12498
12499 /* Return true if and only if this insn can dual-issue only as older. */
12500 static bool
12501 cortexa7_older_only (rtx_insn *insn)
12502 {
12503 if (recog_memoized (insn) < 0)
12504 return false;
12505
12506 switch (get_attr_type (insn))
12507 {
12508 case TYPE_ALU_DSP_REG:
12509 case TYPE_ALU_SREG:
12510 case TYPE_ALUS_SREG:
12511 case TYPE_LOGIC_REG:
12512 case TYPE_LOGICS_REG:
12513 case TYPE_ADC_REG:
12514 case TYPE_ADCS_REG:
12515 case TYPE_ADR:
12516 case TYPE_BFM:
12517 case TYPE_REV:
12518 case TYPE_MVN_REG:
12519 case TYPE_SHIFT_IMM:
12520 case TYPE_SHIFT_REG:
12521 case TYPE_LOAD_BYTE:
12522 case TYPE_LOAD_4:
12523 case TYPE_STORE_4:
12524 case TYPE_FFARITHS:
12525 case TYPE_FADDS:
12526 case TYPE_FFARITHD:
12527 case TYPE_FADDD:
12528 case TYPE_FMOV:
12529 case TYPE_F_CVT:
12530 case TYPE_FCMPS:
12531 case TYPE_FCMPD:
12532 case TYPE_FCONSTS:
12533 case TYPE_FCONSTD:
12534 case TYPE_FMULS:
12535 case TYPE_FMACS:
12536 case TYPE_FMULD:
12537 case TYPE_FMACD:
12538 case TYPE_FDIVS:
12539 case TYPE_FDIVD:
12540 case TYPE_F_MRC:
12541 case TYPE_F_MRRC:
12542 case TYPE_F_FLAG:
12543 case TYPE_F_LOADS:
12544 case TYPE_F_STORES:
12545 return true;
12546 default:
12547 return false;
12548 }
12549 }
12550
12551 /* Return true if and only if this insn can dual-issue as younger. */
12552 static bool
12553 cortexa7_younger (FILE *file, int verbose, rtx_insn *insn)
12554 {
12555 if (recog_memoized (insn) < 0)
12556 {
12557 if (verbose > 5)
12558 fprintf (file, ";; not cortexa7_younger %d\n", INSN_UID (insn));
12559 return false;
12560 }
12561
12562 switch (get_attr_type (insn))
12563 {
12564 case TYPE_ALU_IMM:
12565 case TYPE_ALUS_IMM:
12566 case TYPE_LOGIC_IMM:
12567 case TYPE_LOGICS_IMM:
12568 case TYPE_EXTEND:
12569 case TYPE_MVN_IMM:
12570 case TYPE_MOV_IMM:
12571 case TYPE_MOV_REG:
12572 case TYPE_MOV_SHIFT:
12573 case TYPE_MOV_SHIFT_REG:
12574 case TYPE_BRANCH:
12575 case TYPE_CALL:
12576 return true;
12577 default:
12578 return false;
12579 }
12580 }
12581
12582
12583 /* Look for an instruction that can dual issue only as an older
12584 instruction, and move it in front of any instructions that can
12585 dual-issue as younger, while preserving the relative order of all
12586 other instructions in the ready list. This is a hueuristic to help
12587 dual-issue in later cycles, by postponing issue of more flexible
12588 instructions. This heuristic may affect dual issue opportunities
12589 in the current cycle. */
12590 static void
12591 cortexa7_sched_reorder (FILE *file, int verbose, rtx_insn **ready,
12592 int *n_readyp, int clock)
12593 {
12594 int i;
12595 int first_older_only = -1, first_younger = -1;
12596
12597 if (verbose > 5)
12598 fprintf (file,
12599 ";; sched_reorder for cycle %d with %d insns in ready list\n",
12600 clock,
12601 *n_readyp);
12602
12603 /* Traverse the ready list from the head (the instruction to issue
12604 first), and looking for the first instruction that can issue as
12605 younger and the first instruction that can dual-issue only as
12606 older. */
12607 for (i = *n_readyp - 1; i >= 0; i--)
12608 {
12609 rtx_insn *insn = ready[i];
12610 if (cortexa7_older_only (insn))
12611 {
12612 first_older_only = i;
12613 if (verbose > 5)
12614 fprintf (file, ";; reorder older found %d\n", INSN_UID (insn));
12615 break;
12616 }
12617 else if (cortexa7_younger (file, verbose, insn) && first_younger == -1)
12618 first_younger = i;
12619 }
12620
12621 /* Nothing to reorder because either no younger insn found or insn
12622 that can dual-issue only as older appears before any insn that
12623 can dual-issue as younger. */
12624 if (first_younger == -1)
12625 {
12626 if (verbose > 5)
12627 fprintf (file, ";; sched_reorder nothing to reorder as no younger\n");
12628 return;
12629 }
12630
12631 /* Nothing to reorder because no older-only insn in the ready list. */
12632 if (first_older_only == -1)
12633 {
12634 if (verbose > 5)
12635 fprintf (file, ";; sched_reorder nothing to reorder as no older_only\n");
12636 return;
12637 }
12638
12639 /* Move first_older_only insn before first_younger. */
12640 if (verbose > 5)
12641 fprintf (file, ";; cortexa7_sched_reorder insn %d before %d\n",
12642 INSN_UID(ready [first_older_only]),
12643 INSN_UID(ready [first_younger]));
12644 rtx_insn *first_older_only_insn = ready [first_older_only];
12645 for (i = first_older_only; i < first_younger; i++)
12646 {
12647 ready[i] = ready[i+1];
12648 }
12649
12650 ready[i] = first_older_only_insn;
12651 return;
12652 }
12653
12654 /* Implement TARGET_SCHED_REORDER. */
12655 static int
12656 arm_sched_reorder (FILE *file, int verbose, rtx_insn **ready, int *n_readyp,
12657 int clock)
12658 {
12659 switch (arm_tune)
12660 {
12661 case TARGET_CPU_cortexa7:
12662 cortexa7_sched_reorder (file, verbose, ready, n_readyp, clock);
12663 break;
12664 default:
12665 /* Do nothing for other cores. */
12666 break;
12667 }
12668
12669 return arm_issue_rate ();
12670 }
12671
12672 /* This function implements the target macro TARGET_SCHED_ADJUST_COST.
12673 It corrects the value of COST based on the relationship between
12674 INSN and DEP through the dependence LINK. It returns the new
12675 value. There is a per-core adjust_cost hook to adjust scheduler costs
12676 and the per-core hook can choose to completely override the generic
12677 adjust_cost function. Only put bits of code into arm_adjust_cost that
12678 are common across all cores. */
12679 static int
12680 arm_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep, int cost,
12681 unsigned int)
12682 {
12683 rtx i_pat, d_pat;
12684
12685 /* When generating Thumb-1 code, we want to place flag-setting operations
12686 close to a conditional branch which depends on them, so that we can
12687 omit the comparison. */
12688 if (TARGET_THUMB1
12689 && dep_type == 0
12690 && recog_memoized (insn) == CODE_FOR_cbranchsi4_insn
12691 && recog_memoized (dep) >= 0
12692 && get_attr_conds (dep) == CONDS_SET)
12693 return 0;
12694
12695 if (current_tune->sched_adjust_cost != NULL)
12696 {
12697 if (!current_tune->sched_adjust_cost (insn, dep_type, dep, &cost))
12698 return cost;
12699 }
12700
12701 /* XXX Is this strictly true? */
12702 if (dep_type == REG_DEP_ANTI
12703 || dep_type == REG_DEP_OUTPUT)
12704 return 0;
12705
12706 /* Call insns don't incur a stall, even if they follow a load. */
12707 if (dep_type == 0
12708 && CALL_P (insn))
12709 return 1;
12710
12711 if ((i_pat = single_set (insn)) != NULL
12712 && MEM_P (SET_SRC (i_pat))
12713 && (d_pat = single_set (dep)) != NULL
12714 && MEM_P (SET_DEST (d_pat)))
12715 {
12716 rtx src_mem = XEXP (SET_SRC (i_pat), 0);
12717 /* This is a load after a store, there is no conflict if the load reads
12718 from a cached area. Assume that loads from the stack, and from the
12719 constant pool are cached, and that others will miss. This is a
12720 hack. */
12721
12722 if ((SYMBOL_REF_P (src_mem)
12723 && CONSTANT_POOL_ADDRESS_P (src_mem))
12724 || reg_mentioned_p (stack_pointer_rtx, src_mem)
12725 || reg_mentioned_p (frame_pointer_rtx, src_mem)
12726 || reg_mentioned_p (hard_frame_pointer_rtx, src_mem))
12727 return 1;
12728 }
12729
12730 return cost;
12731 }
12732
12733 int
12734 arm_max_conditional_execute (void)
12735 {
12736 return max_insns_skipped;
12737 }
12738
12739 static int
12740 arm_default_branch_cost (bool speed_p, bool predictable_p ATTRIBUTE_UNUSED)
12741 {
12742 if (TARGET_32BIT)
12743 return (TARGET_THUMB2 && !speed_p) ? 1 : 4;
12744 else
12745 return (optimize > 0) ? 2 : 0;
12746 }
12747
12748 static int
12749 arm_cortex_a5_branch_cost (bool speed_p, bool predictable_p)
12750 {
12751 return speed_p ? 0 : arm_default_branch_cost (speed_p, predictable_p);
12752 }
12753
12754 /* Thumb-2 branches are relatively cheap on Cortex-M processors ("1 + P cycles"
12755 on Cortex-M4, where P varies from 1 to 3 according to some criteria), since
12756 sequences of non-executed instructions in IT blocks probably take the same
12757 amount of time as executed instructions (and the IT instruction itself takes
12758 space in icache). This function was experimentally determined to give good
12759 results on a popular embedded benchmark. */
12760
12761 static int
12762 arm_cortex_m_branch_cost (bool speed_p, bool predictable_p)
12763 {
12764 return (TARGET_32BIT && speed_p) ? 1
12765 : arm_default_branch_cost (speed_p, predictable_p);
12766 }
12767
12768 static int
12769 arm_cortex_m7_branch_cost (bool speed_p, bool predictable_p)
12770 {
12771 return speed_p ? 0 : arm_default_branch_cost (speed_p, predictable_p);
12772 }
12773
12774 static bool fp_consts_inited = false;
12775
12776 static REAL_VALUE_TYPE value_fp0;
12777
12778 static void
12779 init_fp_table (void)
12780 {
12781 REAL_VALUE_TYPE r;
12782
12783 r = REAL_VALUE_ATOF ("0", DFmode);
12784 value_fp0 = r;
12785 fp_consts_inited = true;
12786 }
12787
12788 /* Return TRUE if rtx X is a valid immediate FP constant. */
12789 int
12790 arm_const_double_rtx (rtx x)
12791 {
12792 const REAL_VALUE_TYPE *r;
12793
12794 if (!fp_consts_inited)
12795 init_fp_table ();
12796
12797 r = CONST_DOUBLE_REAL_VALUE (x);
12798 if (REAL_VALUE_MINUS_ZERO (*r))
12799 return 0;
12800
12801 if (real_equal (r, &value_fp0))
12802 return 1;
12803
12804 return 0;
12805 }
12806
12807 /* VFPv3 has a fairly wide range of representable immediates, formed from
12808 "quarter-precision" floating-point values. These can be evaluated using this
12809 formula (with ^ for exponentiation):
12810
12811 -1^s * n * 2^-r
12812
12813 Where 's' is a sign bit (0/1), 'n' and 'r' are integers such that
12814 16 <= n <= 31 and 0 <= r <= 7.
12815
12816 These values are mapped onto an 8-bit integer ABCDEFGH s.t.
12817
12818 - A (most-significant) is the sign bit.
12819 - BCD are the exponent (encoded as r XOR 3).
12820 - EFGH are the mantissa (encoded as n - 16).
12821 */
12822
12823 /* Return an integer index for a VFPv3 immediate operand X suitable for the
12824 fconst[sd] instruction, or -1 if X isn't suitable. */
12825 static int
12826 vfp3_const_double_index (rtx x)
12827 {
12828 REAL_VALUE_TYPE r, m;
12829 int sign, exponent;
12830 unsigned HOST_WIDE_INT mantissa, mant_hi;
12831 unsigned HOST_WIDE_INT mask;
12832 int point_pos = 2 * HOST_BITS_PER_WIDE_INT - 1;
12833 bool fail;
12834
12835 if (!TARGET_VFP3 || !CONST_DOUBLE_P (x))
12836 return -1;
12837
12838 r = *CONST_DOUBLE_REAL_VALUE (x);
12839
12840 /* We can't represent these things, so detect them first. */
12841 if (REAL_VALUE_ISINF (r) || REAL_VALUE_ISNAN (r) || REAL_VALUE_MINUS_ZERO (r))
12842 return -1;
12843
12844 /* Extract sign, exponent and mantissa. */
12845 sign = REAL_VALUE_NEGATIVE (r) ? 1 : 0;
12846 r = real_value_abs (&r);
12847 exponent = REAL_EXP (&r);
12848 /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
12849 highest (sign) bit, with a fixed binary point at bit point_pos.
12850 WARNING: If there's ever a VFP version which uses more than 2 * H_W_I - 1
12851 bits for the mantissa, this may fail (low bits would be lost). */
12852 real_ldexp (&m, &r, point_pos - exponent);
12853 wide_int w = real_to_integer (&m, &fail, HOST_BITS_PER_WIDE_INT * 2);
12854 mantissa = w.elt (0);
12855 mant_hi = w.elt (1);
12856
12857 /* If there are bits set in the low part of the mantissa, we can't
12858 represent this value. */
12859 if (mantissa != 0)
12860 return -1;
12861
12862 /* Now make it so that mantissa contains the most-significant bits, and move
12863 the point_pos to indicate that the least-significant bits have been
12864 discarded. */
12865 point_pos -= HOST_BITS_PER_WIDE_INT;
12866 mantissa = mant_hi;
12867
12868 /* We can permit four significant bits of mantissa only, plus a high bit
12869 which is always 1. */
12870 mask = (HOST_WIDE_INT_1U << (point_pos - 5)) - 1;
12871 if ((mantissa & mask) != 0)
12872 return -1;
12873
12874 /* Now we know the mantissa is in range, chop off the unneeded bits. */
12875 mantissa >>= point_pos - 5;
12876
12877 /* The mantissa may be zero. Disallow that case. (It's possible to load the
12878 floating-point immediate zero with Neon using an integer-zero load, but
12879 that case is handled elsewhere.) */
12880 if (mantissa == 0)
12881 return -1;
12882
12883 gcc_assert (mantissa >= 16 && mantissa <= 31);
12884
12885 /* The value of 5 here would be 4 if GCC used IEEE754-like encoding (where
12886 normalized significands are in the range [1, 2). (Our mantissa is shifted
12887 left 4 places at this point relative to normalized IEEE754 values). GCC
12888 internally uses [0.5, 1) (see real.cc), so the exponent returned from
12889 REAL_EXP must be altered. */
12890 exponent = 5 - exponent;
12891
12892 if (exponent < 0 || exponent > 7)
12893 return -1;
12894
12895 /* Sign, mantissa and exponent are now in the correct form to plug into the
12896 formula described in the comment above. */
12897 return (sign << 7) | ((exponent ^ 3) << 4) | (mantissa - 16);
12898 }
12899
12900 /* Return TRUE if rtx X is a valid immediate VFPv3 constant. */
12901 int
12902 vfp3_const_double_rtx (rtx x)
12903 {
12904 if (!TARGET_VFP3)
12905 return 0;
12906
12907 return vfp3_const_double_index (x) != -1;
12908 }
12909
12910 /* Recognize immediates which can be used in various Neon and MVE instructions.
12911 Legal immediates are described by the following table (for VMVN variants, the
12912 bitwise inverse of the constant shown is recognized. In either case, VMOV
12913 is output and the correct instruction to use for a given constant is chosen
12914 by the assembler). The constant shown is replicated across all elements of
12915 the destination vector.
12916
12917 insn elems variant constant (binary)
12918 ---- ----- ------- -----------------
12919 vmov i32 0 00000000 00000000 00000000 abcdefgh
12920 vmov i32 1 00000000 00000000 abcdefgh 00000000
12921 vmov i32 2 00000000 abcdefgh 00000000 00000000
12922 vmov i32 3 abcdefgh 00000000 00000000 00000000
12923 vmov i16 4 00000000 abcdefgh
12924 vmov i16 5 abcdefgh 00000000
12925 vmvn i32 6 00000000 00000000 00000000 abcdefgh
12926 vmvn i32 7 00000000 00000000 abcdefgh 00000000
12927 vmvn i32 8 00000000 abcdefgh 00000000 00000000
12928 vmvn i32 9 abcdefgh 00000000 00000000 00000000
12929 vmvn i16 10 00000000 abcdefgh
12930 vmvn i16 11 abcdefgh 00000000
12931 vmov i32 12 00000000 00000000 abcdefgh 11111111
12932 vmvn i32 13 00000000 00000000 abcdefgh 11111111
12933 vmov i32 14 00000000 abcdefgh 11111111 11111111
12934 vmvn i32 15 00000000 abcdefgh 11111111 11111111
12935 vmov i8 16 abcdefgh
12936 vmov i64 17 aaaaaaaa bbbbbbbb cccccccc dddddddd
12937 eeeeeeee ffffffff gggggggg hhhhhhhh
12938 vmov f32 18 aBbbbbbc defgh000 00000000 00000000
12939 vmov f32 19 00000000 00000000 00000000 00000000
12940
12941 For case 18, B = !b. Representable values are exactly those accepted by
12942 vfp3_const_double_index, but are output as floating-point numbers rather
12943 than indices.
12944
12945 For case 19, we will change it to vmov.i32 when assembling.
12946
12947 Variants 0-5 (inclusive) may also be used as immediates for the second
12948 operand of VORR/VBIC instructions.
12949
12950 The INVERSE argument causes the bitwise inverse of the given operand to be
12951 recognized instead (used for recognizing legal immediates for the VAND/VORN
12952 pseudo-instructions). If INVERSE is true, the value placed in *MODCONST is
12953 *not* inverted (i.e. the pseudo-instruction forms vand/vorn should still be
12954 output, rather than the real insns vbic/vorr).
12955
12956 INVERSE makes no difference to the recognition of float vectors.
12957
12958 The return value is the variant of immediate as shown in the above table, or
12959 -1 if the given value doesn't match any of the listed patterns.
12960 */
12961 static int
12962 simd_valid_immediate (rtx op, machine_mode mode, int inverse,
12963 rtx *modconst, int *elementwidth)
12964 {
12965 #define CHECK(STRIDE, ELSIZE, CLASS, TEST) \
12966 matches = 1; \
12967 for (i = 0; i < idx; i += (STRIDE)) \
12968 if (!(TEST)) \
12969 matches = 0; \
12970 if (matches) \
12971 { \
12972 immtype = (CLASS); \
12973 elsize = (ELSIZE); \
12974 break; \
12975 }
12976
12977 unsigned int i, elsize = 0, idx = 0, n_elts;
12978 unsigned int innersize;
12979 unsigned char bytes[16] = {};
12980 int immtype = -1, matches;
12981 unsigned int invmask = inverse ? 0xff : 0;
12982 bool vector = GET_CODE (op) == CONST_VECTOR;
12983
12984 if (vector)
12985 n_elts = CONST_VECTOR_NUNITS (op);
12986 else
12987 {
12988 n_elts = 1;
12989 gcc_assert (mode != VOIDmode);
12990 }
12991
12992 innersize = GET_MODE_UNIT_SIZE (mode);
12993
12994 /* Only support 128-bit vectors for MVE. */
12995 if (TARGET_HAVE_MVE
12996 && (!vector
12997 || VALID_MVE_PRED_MODE (mode)
12998 || n_elts * innersize != 16))
12999 return -1;
13000
13001 if (!TARGET_HAVE_MVE && GET_MODE_CLASS (mode) == MODE_VECTOR_BOOL)
13002 return -1;
13003
13004 /* Vectors of float constants. */
13005 if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
13006 {
13007 rtx el0 = CONST_VECTOR_ELT (op, 0);
13008
13009 if (!vfp3_const_double_rtx (el0) && el0 != CONST0_RTX (GET_MODE (el0)))
13010 return -1;
13011
13012 /* FP16 vectors cannot be represented. */
13013 if (GET_MODE_INNER (mode) == HFmode)
13014 return -1;
13015
13016 /* All elements in the vector must be the same. Note that 0.0 and -0.0
13017 are distinct in this context. */
13018 if (!const_vec_duplicate_p (op))
13019 return -1;
13020
13021 if (modconst)
13022 *modconst = CONST_VECTOR_ELT (op, 0);
13023
13024 if (elementwidth)
13025 *elementwidth = 0;
13026
13027 if (el0 == CONST0_RTX (GET_MODE (el0)))
13028 return 19;
13029 else
13030 return 18;
13031 }
13032
13033 /* The tricks done in the code below apply for little-endian vector layout.
13034 For big-endian vectors only allow vectors of the form { a, a, a..., a }.
13035 FIXME: Implement logic for big-endian vectors. */
13036 if (BYTES_BIG_ENDIAN && vector && !const_vec_duplicate_p (op))
13037 return -1;
13038
13039 /* Splat vector constant out into a byte vector. */
13040 for (i = 0; i < n_elts; i++)
13041 {
13042 rtx el = vector ? CONST_VECTOR_ELT (op, i) : op;
13043 unsigned HOST_WIDE_INT elpart;
13044
13045 gcc_assert (CONST_INT_P (el));
13046 elpart = INTVAL (el);
13047
13048 for (unsigned int byte = 0; byte < innersize; byte++)
13049 {
13050 bytes[idx++] = (elpart & 0xff) ^ invmask;
13051 elpart >>= BITS_PER_UNIT;
13052 }
13053 }
13054
13055 /* Sanity check. */
13056 gcc_assert (idx == GET_MODE_SIZE (mode));
13057
13058 do
13059 {
13060 CHECK (4, 32, 0, bytes[i] == bytes[0] && bytes[i + 1] == 0
13061 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
13062
13063 CHECK (4, 32, 1, bytes[i] == 0 && bytes[i + 1] == bytes[1]
13064 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
13065
13066 CHECK (4, 32, 2, bytes[i] == 0 && bytes[i + 1] == 0
13067 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
13068
13069 CHECK (4, 32, 3, bytes[i] == 0 && bytes[i + 1] == 0
13070 && bytes[i + 2] == 0 && bytes[i + 3] == bytes[3]);
13071
13072 CHECK (2, 16, 4, bytes[i] == bytes[0] && bytes[i + 1] == 0);
13073
13074 CHECK (2, 16, 5, bytes[i] == 0 && bytes[i + 1] == bytes[1]);
13075
13076 CHECK (4, 32, 6, bytes[i] == bytes[0] && bytes[i + 1] == 0xff
13077 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
13078
13079 CHECK (4, 32, 7, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
13080 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
13081
13082 CHECK (4, 32, 8, bytes[i] == 0xff && bytes[i + 1] == 0xff
13083 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
13084
13085 CHECK (4, 32, 9, bytes[i] == 0xff && bytes[i + 1] == 0xff
13086 && bytes[i + 2] == 0xff && bytes[i + 3] == bytes[3]);
13087
13088 CHECK (2, 16, 10, bytes[i] == bytes[0] && bytes[i + 1] == 0xff);
13089
13090 CHECK (2, 16, 11, bytes[i] == 0xff && bytes[i + 1] == bytes[1]);
13091
13092 CHECK (4, 32, 12, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
13093 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
13094
13095 CHECK (4, 32, 13, bytes[i] == 0 && bytes[i + 1] == bytes[1]
13096 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
13097
13098 CHECK (4, 32, 14, bytes[i] == 0xff && bytes[i + 1] == 0xff
13099 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
13100
13101 CHECK (4, 32, 15, bytes[i] == 0 && bytes[i + 1] == 0
13102 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
13103
13104 CHECK (1, 8, 16, bytes[i] == bytes[0]);
13105
13106 CHECK (1, 64, 17, (bytes[i] == 0 || bytes[i] == 0xff)
13107 && bytes[i] == bytes[(i + 8) % idx]);
13108 }
13109 while (0);
13110
13111 if (immtype == -1)
13112 return -1;
13113
13114 if (elementwidth)
13115 *elementwidth = elsize;
13116
13117 if (modconst)
13118 {
13119 unsigned HOST_WIDE_INT imm = 0;
13120
13121 /* Un-invert bytes of recognized vector, if necessary. */
13122 if (invmask != 0)
13123 for (i = 0; i < idx; i++)
13124 bytes[i] ^= invmask;
13125
13126 if (immtype == 17)
13127 {
13128 /* FIXME: Broken on 32-bit H_W_I hosts. */
13129 gcc_assert (sizeof (HOST_WIDE_INT) == 8);
13130
13131 for (i = 0; i < 8; i++)
13132 imm |= (unsigned HOST_WIDE_INT) (bytes[i] ? 0xff : 0)
13133 << (i * BITS_PER_UNIT);
13134
13135 *modconst = GEN_INT (imm);
13136 }
13137 else
13138 {
13139 unsigned HOST_WIDE_INT imm = 0;
13140
13141 for (i = 0; i < elsize / BITS_PER_UNIT; i++)
13142 imm |= (unsigned HOST_WIDE_INT) bytes[i] << (i * BITS_PER_UNIT);
13143
13144 *modconst = GEN_INT (imm);
13145 }
13146 }
13147
13148 return immtype;
13149 #undef CHECK
13150 }
13151
13152 /* Return TRUE if rtx X is legal for use as either a Neon or MVE VMOV (or,
13153 implicitly, VMVN) immediate. Write back width per element to *ELEMENTWIDTH
13154 (or zero for float elements), and a modified constant (whatever should be
13155 output for a VMOV) in *MODCONST. "neon_immediate_valid_for_move" function is
13156 modified to "simd_immediate_valid_for_move" as this function will be used
13157 both by neon and mve. */
13158 int
13159 simd_immediate_valid_for_move (rtx op, machine_mode mode,
13160 rtx *modconst, int *elementwidth)
13161 {
13162 rtx tmpconst;
13163 int tmpwidth;
13164 int retval = simd_valid_immediate (op, mode, 0, &tmpconst, &tmpwidth);
13165
13166 if (retval == -1)
13167 return 0;
13168
13169 if (modconst)
13170 *modconst = tmpconst;
13171
13172 if (elementwidth)
13173 *elementwidth = tmpwidth;
13174
13175 return 1;
13176 }
13177
13178 /* Return TRUE if rtx X is legal for use in a VORR or VBIC instruction. If
13179 the immediate is valid, write a constant suitable for using as an operand
13180 to VORR/VBIC/VAND/VORN to *MODCONST and the corresponding element width to
13181 *ELEMENTWIDTH. See simd_valid_immediate for description of INVERSE. */
13182
13183 int
13184 neon_immediate_valid_for_logic (rtx op, machine_mode mode, int inverse,
13185 rtx *modconst, int *elementwidth)
13186 {
13187 rtx tmpconst;
13188 int tmpwidth;
13189 int retval = simd_valid_immediate (op, mode, inverse, &tmpconst, &tmpwidth);
13190
13191 if (retval < 0 || retval > 5)
13192 return 0;
13193
13194 if (modconst)
13195 *modconst = tmpconst;
13196
13197 if (elementwidth)
13198 *elementwidth = tmpwidth;
13199
13200 return 1;
13201 }
13202
13203 /* Return TRUE if rtx OP is legal for use in a VSHR or VSHL instruction. If
13204 the immediate is valid, write a constant suitable for using as an operand
13205 to VSHR/VSHL to *MODCONST and the corresponding element width to
13206 *ELEMENTWIDTH. ISLEFTSHIFT is for determine left or right shift,
13207 because they have different limitations. */
13208
13209 int
13210 neon_immediate_valid_for_shift (rtx op, machine_mode mode,
13211 rtx *modconst, int *elementwidth,
13212 bool isleftshift)
13213 {
13214 unsigned int innersize = GET_MODE_UNIT_SIZE (mode);
13215 unsigned int n_elts = CONST_VECTOR_NUNITS (op), i;
13216 unsigned HOST_WIDE_INT last_elt = 0;
13217 unsigned HOST_WIDE_INT maxshift;
13218
13219 /* Split vector constant out into a byte vector. */
13220 for (i = 0; i < n_elts; i++)
13221 {
13222 rtx el = CONST_VECTOR_ELT (op, i);
13223 unsigned HOST_WIDE_INT elpart;
13224
13225 if (CONST_INT_P (el))
13226 elpart = INTVAL (el);
13227 else if (CONST_DOUBLE_P (el))
13228 return 0;
13229 else
13230 gcc_unreachable ();
13231
13232 if (i != 0 && elpart != last_elt)
13233 return 0;
13234
13235 last_elt = elpart;
13236 }
13237
13238 /* Shift less than element size. */
13239 maxshift = innersize * 8;
13240
13241 if (isleftshift)
13242 {
13243 /* Left shift immediate value can be from 0 to <size>-1. */
13244 if (last_elt >= maxshift)
13245 return 0;
13246 }
13247 else
13248 {
13249 /* Right shift immediate value can be from 1 to <size>. */
13250 if (last_elt == 0 || last_elt > maxshift)
13251 return 0;
13252 }
13253
13254 if (elementwidth)
13255 *elementwidth = innersize * 8;
13256
13257 if (modconst)
13258 *modconst = CONST_VECTOR_ELT (op, 0);
13259
13260 return 1;
13261 }
13262
13263 /* Return a string suitable for output of Neon immediate logic operation
13264 MNEM. */
13265
13266 char *
13267 neon_output_logic_immediate (const char *mnem, rtx *op2, machine_mode mode,
13268 int inverse, int quad)
13269 {
13270 int width, is_valid;
13271 static char templ[40];
13272
13273 is_valid = neon_immediate_valid_for_logic (*op2, mode, inverse, op2, &width);
13274
13275 gcc_assert (is_valid != 0);
13276
13277 if (quad)
13278 sprintf (templ, "%s.i%d\t%%q0, %%2", mnem, width);
13279 else
13280 sprintf (templ, "%s.i%d\t%%P0, %%2", mnem, width);
13281
13282 return templ;
13283 }
13284
13285 /* Return a string suitable for output of Neon immediate shift operation
13286 (VSHR or VSHL) MNEM. */
13287
13288 char *
13289 neon_output_shift_immediate (const char *mnem, char sign, rtx *op2,
13290 machine_mode mode, int quad,
13291 bool isleftshift)
13292 {
13293 int width, is_valid;
13294 static char templ[40];
13295
13296 is_valid = neon_immediate_valid_for_shift (*op2, mode, op2, &width, isleftshift);
13297 gcc_assert (is_valid != 0);
13298
13299 if (quad)
13300 sprintf (templ, "%s.%c%d\t%%q0, %%q1, %%2", mnem, sign, width);
13301 else
13302 sprintf (templ, "%s.%c%d\t%%P0, %%P1, %%2", mnem, sign, width);
13303
13304 return templ;
13305 }
13306
13307 /* Output a sequence of pairwise operations to implement a reduction.
13308 NOTE: We do "too much work" here, because pairwise operations work on two
13309 registers-worth of operands in one go. Unfortunately we can't exploit those
13310 extra calculations to do the full operation in fewer steps, I don't think.
13311 Although all vector elements of the result but the first are ignored, we
13312 actually calculate the same result in each of the elements. An alternative
13313 such as initially loading a vector with zero to use as each of the second
13314 operands would use up an additional register and take an extra instruction,
13315 for no particular gain. */
13316
13317 void
13318 neon_pairwise_reduce (rtx op0, rtx op1, machine_mode mode,
13319 rtx (*reduc) (rtx, rtx, rtx))
13320 {
13321 unsigned int i, parts = GET_MODE_SIZE (mode) / GET_MODE_UNIT_SIZE (mode);
13322 rtx tmpsum = op1;
13323
13324 for (i = parts / 2; i >= 1; i /= 2)
13325 {
13326 rtx dest = (i == 1) ? op0 : gen_reg_rtx (mode);
13327 emit_insn (reduc (dest, tmpsum, tmpsum));
13328 tmpsum = dest;
13329 }
13330 }
13331
13332 /* Return a non-NULL RTX iff VALS is a vector constant that can be
13333 loaded into a register using VDUP.
13334
13335 If this is the case, and GENERATE is set, we also generate
13336 instructions to do this and return an RTX to assign to the register. */
13337
13338 static rtx
13339 neon_vdup_constant (rtx vals, bool generate)
13340 {
13341 machine_mode mode = GET_MODE (vals);
13342 machine_mode inner_mode = GET_MODE_INNER (mode);
13343 rtx x;
13344
13345 if (GET_CODE (vals) != CONST_VECTOR || GET_MODE_SIZE (inner_mode) > 4)
13346 return NULL_RTX;
13347
13348 if (!const_vec_duplicate_p (vals, &x))
13349 /* The elements are not all the same. We could handle repeating
13350 patterns of a mode larger than INNER_MODE here (e.g. int8x8_t
13351 {0, C, 0, C, 0, C, 0, C} which can be loaded using
13352 vdup.i16). */
13353 return NULL_RTX;
13354
13355 if (!generate)
13356 return x;
13357
13358 /* We can load this constant by using VDUP and a constant in a
13359 single ARM register. This will be cheaper than a vector
13360 load. */
13361
13362 x = copy_to_mode_reg (inner_mode, x);
13363 return gen_vec_duplicate (mode, x);
13364 }
13365
13366 /* Return a HI representation of CONST_VEC suitable for MVE predicates. */
13367 rtx
13368 mve_bool_vec_to_const (rtx const_vec)
13369 {
13370 machine_mode mode = GET_MODE (const_vec);
13371
13372 if (!VECTOR_MODE_P (mode))
13373 return const_vec;
13374
13375 unsigned n_elts = GET_MODE_NUNITS (mode);
13376 unsigned el_prec = GET_MODE_PRECISION (GET_MODE_INNER (mode));
13377 unsigned shift_c = 16 / n_elts;
13378 unsigned i;
13379 int hi_val = 0;
13380
13381 for (i = 0; i < n_elts; i++)
13382 {
13383 rtx el = CONST_VECTOR_ELT (const_vec, i);
13384 unsigned HOST_WIDE_INT elpart;
13385
13386 gcc_assert (CONST_INT_P (el));
13387 elpart = INTVAL (el) & ((1U << el_prec) - 1);
13388
13389 unsigned index = BYTES_BIG_ENDIAN ? n_elts - i - 1 : i;
13390
13391 hi_val |= elpart << (index * shift_c);
13392 }
13393 /* We are using mov immediate to encode this constant which writes 32-bits
13394 so we need to make sure the top 16-bits are all 0, otherwise we can't
13395 guarantee we can actually write this immediate. */
13396 return gen_int_mode (hi_val, SImode);
13397 }
13398
13399 /* Return a non-NULL RTX iff VALS, which is a PARALLEL containing only
13400 constants (for vec_init) or CONST_VECTOR, can be effeciently loaded
13401 into a register.
13402
13403 If this is the case, and GENERATE is set, we also generate code to do
13404 this and return an RTX to copy into the register. */
13405
13406 rtx
13407 neon_make_constant (rtx vals, bool generate)
13408 {
13409 machine_mode mode = GET_MODE (vals);
13410 rtx target;
13411 rtx const_vec = NULL_RTX;
13412 int n_elts = GET_MODE_NUNITS (mode);
13413 int n_const = 0;
13414 int i;
13415
13416 if (GET_CODE (vals) == CONST_VECTOR)
13417 const_vec = vals;
13418 else if (GET_CODE (vals) == PARALLEL)
13419 {
13420 /* A CONST_VECTOR must contain only CONST_INTs and
13421 CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
13422 Only store valid constants in a CONST_VECTOR. */
13423 for (i = 0; i < n_elts; ++i)
13424 {
13425 rtx x = XVECEXP (vals, 0, i);
13426 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
13427 n_const++;
13428 }
13429 if (n_const == n_elts)
13430 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
13431 }
13432 else
13433 gcc_unreachable ();
13434
13435 if (const_vec != NULL
13436 && simd_immediate_valid_for_move (const_vec, mode, NULL, NULL))
13437 /* Load using VMOV. On Cortex-A8 this takes one cycle. */
13438 return const_vec;
13439 else if (TARGET_HAVE_MVE && VALID_MVE_PRED_MODE(mode))
13440 return mve_bool_vec_to_const (const_vec);
13441 else if ((target = neon_vdup_constant (vals, generate)) != NULL_RTX)
13442 /* Loaded using VDUP. On Cortex-A8 the VDUP takes one NEON
13443 pipeline cycle; creating the constant takes one or two ARM
13444 pipeline cycles. */
13445 return target;
13446 else if (const_vec != NULL_RTX)
13447 /* Load from constant pool. On Cortex-A8 this takes two cycles
13448 (for either double or quad vectors). We cannot take advantage
13449 of single-cycle VLD1 because we need a PC-relative addressing
13450 mode. */
13451 return arm_disable_literal_pool ? NULL_RTX : const_vec;
13452 else
13453 /* A PARALLEL containing something not valid inside CONST_VECTOR.
13454 We cannot construct an initializer. */
13455 return NULL_RTX;
13456 }
13457
13458 /* Initialize vector TARGET to VALS. */
13459
13460 void
13461 neon_expand_vector_init (rtx target, rtx vals)
13462 {
13463 machine_mode mode = GET_MODE (target);
13464 machine_mode inner_mode = GET_MODE_INNER (mode);
13465 int n_elts = GET_MODE_NUNITS (mode);
13466 int n_var = 0, one_var = -1;
13467 bool all_same = true;
13468 rtx x, mem;
13469 int i;
13470
13471 for (i = 0; i < n_elts; ++i)
13472 {
13473 x = XVECEXP (vals, 0, i);
13474 if (!CONSTANT_P (x))
13475 ++n_var, one_var = i;
13476
13477 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
13478 all_same = false;
13479 }
13480
13481 if (n_var == 0)
13482 {
13483 rtx constant = neon_make_constant (vals);
13484 if (constant != NULL_RTX)
13485 {
13486 emit_move_insn (target, constant);
13487 return;
13488 }
13489 }
13490
13491 /* Splat a single non-constant element if we can. */
13492 if (all_same && GET_MODE_SIZE (inner_mode) <= 4)
13493 {
13494 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
13495 emit_insn (gen_rtx_SET (target, gen_vec_duplicate (mode, x)));
13496 return;
13497 }
13498
13499 /* One field is non-constant. Load constant then overwrite varying
13500 field. This is more efficient than using the stack. */
13501 if (n_var == 1)
13502 {
13503 rtx copy = copy_rtx (vals);
13504 rtx merge_mask = GEN_INT (1 << one_var);
13505
13506 /* Load constant part of vector, substitute neighboring value for
13507 varying element. */
13508 XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, (one_var + 1) % n_elts);
13509 neon_expand_vector_init (target, copy);
13510
13511 /* Insert variable. */
13512 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, one_var));
13513 emit_insn (gen_vec_set_internal (mode, target, x, merge_mask, target));
13514 return;
13515 }
13516
13517 /* Construct the vector in memory one field at a time
13518 and load the whole vector. */
13519 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
13520 for (i = 0; i < n_elts; i++)
13521 emit_move_insn (adjust_address_nv (mem, inner_mode,
13522 i * GET_MODE_SIZE (inner_mode)),
13523 XVECEXP (vals, 0, i));
13524 emit_move_insn (target, mem);
13525 }
13526
13527 /* Ensure OPERAND lies between LOW (inclusive) and HIGH (exclusive). Raise
13528 ERR if it doesn't. EXP indicates the source location, which includes the
13529 inlining history for intrinsics. */
13530
13531 static void
13532 bounds_check (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
13533 const_tree exp, const char *desc)
13534 {
13535 HOST_WIDE_INT lane;
13536
13537 gcc_assert (CONST_INT_P (operand));
13538
13539 lane = INTVAL (operand);
13540
13541 if (lane < low || lane >= high)
13542 {
13543 if (exp)
13544 error_at (EXPR_LOCATION (exp),
13545 "%s %wd out of range %wd - %wd", desc, lane, low, high - 1);
13546 else
13547 error ("%s %wd out of range %wd - %wd", desc, lane, low, high - 1);
13548 }
13549 }
13550
13551 /* Bounds-check lanes. */
13552
13553 void
13554 neon_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
13555 const_tree exp)
13556 {
13557 bounds_check (operand, low, high, exp, "lane");
13558 }
13559
13560 /* Bounds-check constants. */
13561
13562 void
13563 arm_const_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
13564 {
13565 bounds_check (operand, low, high, NULL_TREE, "constant");
13566 }
13567
13568 HOST_WIDE_INT
13569 neon_element_bits (machine_mode mode)
13570 {
13571 return GET_MODE_UNIT_BITSIZE (mode);
13572 }
13573
13574 \f
13575 /* Predicates for `match_operand' and `match_operator'. */
13576
13577 /* Return TRUE if OP is a valid coprocessor memory address pattern.
13578 WB level is 2 if full writeback address modes are allowed, 1
13579 if limited writeback address modes (POST_INC and PRE_DEC) are
13580 allowed and 0 if no writeback at all is supported. */
13581
13582 int
13583 arm_coproc_mem_operand_wb (rtx op, int wb_level)
13584 {
13585 gcc_assert (wb_level == 0 || wb_level == 1 || wb_level == 2);
13586 rtx ind;
13587
13588 /* Reject eliminable registers. */
13589 if (! (reload_in_progress || reload_completed || lra_in_progress)
13590 && ( reg_mentioned_p (frame_pointer_rtx, op)
13591 || reg_mentioned_p (arg_pointer_rtx, op)
13592 || reg_mentioned_p (virtual_incoming_args_rtx, op)
13593 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
13594 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
13595 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
13596 return FALSE;
13597
13598 /* Constants are converted into offsets from labels. */
13599 if (!MEM_P (op))
13600 return FALSE;
13601
13602 ind = XEXP (op, 0);
13603
13604 if (reload_completed
13605 && (LABEL_REF_P (ind)
13606 || (GET_CODE (ind) == CONST
13607 && GET_CODE (XEXP (ind, 0)) == PLUS
13608 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
13609 && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
13610 return TRUE;
13611
13612 /* Match: (mem (reg)). */
13613 if (REG_P (ind))
13614 return arm_address_register_rtx_p (ind, 0);
13615
13616 /* Autoincremment addressing modes. POST_INC and PRE_DEC are
13617 acceptable in any case (subject to verification by
13618 arm_address_register_rtx_p). We need full writeback to accept
13619 PRE_INC and POST_DEC, and at least restricted writeback for
13620 PRE_INC and POST_DEC. */
13621 if (wb_level > 0
13622 && (GET_CODE (ind) == POST_INC
13623 || GET_CODE (ind) == PRE_DEC
13624 || (wb_level > 1
13625 && (GET_CODE (ind) == PRE_INC
13626 || GET_CODE (ind) == POST_DEC))))
13627 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
13628
13629 if (wb_level > 1
13630 && (GET_CODE (ind) == POST_MODIFY || GET_CODE (ind) == PRE_MODIFY)
13631 && arm_address_register_rtx_p (XEXP (ind, 0), 0)
13632 && GET_CODE (XEXP (ind, 1)) == PLUS
13633 && rtx_equal_p (XEXP (XEXP (ind, 1), 0), XEXP (ind, 0)))
13634 ind = XEXP (ind, 1);
13635
13636 /* Match:
13637 (plus (reg)
13638 (const))
13639
13640 The encoded immediate for 16-bit modes is multiplied by 2,
13641 while the encoded immediate for 32-bit and 64-bit modes is
13642 multiplied by 4. */
13643 int factor = MIN (GET_MODE_SIZE (GET_MODE (op)), 4);
13644 if (GET_CODE (ind) == PLUS
13645 && REG_P (XEXP (ind, 0))
13646 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
13647 && CONST_INT_P (XEXP (ind, 1))
13648 && IN_RANGE (INTVAL (XEXP (ind, 1)), -255 * factor, 255 * factor)
13649 && (INTVAL (XEXP (ind, 1)) & (factor - 1)) == 0)
13650 return TRUE;
13651
13652 return FALSE;
13653 }
13654
13655 /* Return TRUE if OP is a valid coprocessor memory address pattern.
13656 WB is true if full writeback address modes are allowed and is false
13657 if limited writeback address modes (POST_INC and PRE_DEC) are
13658 allowed. */
13659
13660 int arm_coproc_mem_operand (rtx op, bool wb)
13661 {
13662 return arm_coproc_mem_operand_wb (op, wb ? 2 : 1);
13663 }
13664
13665 /* Return TRUE if OP is a valid coprocessor memory address pattern in a
13666 context in which no writeback address modes are allowed. */
13667
13668 int
13669 arm_coproc_mem_operand_no_writeback (rtx op)
13670 {
13671 return arm_coproc_mem_operand_wb (op, 0);
13672 }
13673
13674 /* In non-STRICT mode, return the register number; in STRICT mode return
13675 the hard regno or the replacement if it won't be a mem. Otherwise, return
13676 the original pseudo number. */
13677 static int
13678 arm_effective_regno (rtx op, bool strict)
13679 {
13680 gcc_assert (REG_P (op));
13681 if (!strict || REGNO (op) < FIRST_PSEUDO_REGISTER
13682 || !reg_renumber || reg_renumber[REGNO (op)] < 0)
13683 return REGNO (op);
13684 return reg_renumber[REGNO (op)];
13685 }
13686
13687 /* This function returns TRUE on matching mode and op.
13688 1. For given modes, check for [Rn], return TRUE for Rn <= LO_REGS.
13689 2. For other modes, check for [Rn], return TRUE for Rn < R15 (expect R13). */
13690 int
13691 mve_vector_mem_operand (machine_mode mode, rtx op, bool strict)
13692 {
13693 enum rtx_code code;
13694 int val, reg_no;
13695
13696 /* Match: (mem (reg)). */
13697 if (REG_P (op))
13698 {
13699 reg_no = arm_effective_regno (op, strict);
13700 return (((mode == E_V8QImode || mode == E_V4QImode || mode == E_V4HImode)
13701 ? reg_no <= LAST_LO_REGNUM
13702 : reg_no < LAST_ARM_REGNUM)
13703 || (!strict && reg_no >= FIRST_PSEUDO_REGISTER));
13704 }
13705 code = GET_CODE (op);
13706
13707 if ((code == POST_INC
13708 || code == PRE_DEC
13709 || code == PRE_INC
13710 || code == POST_DEC)
13711 && REG_P (XEXP (op, 0)))
13712 {
13713 reg_no = arm_effective_regno (XEXP (op, 0), strict);
13714 return (((mode == E_V8QImode || mode == E_V4QImode || mode == E_V4HImode)
13715 ? reg_no <= LAST_LO_REGNUM
13716 :(reg_no < LAST_ARM_REGNUM && reg_no != SP_REGNUM))
13717 || (!strict && reg_no >= FIRST_PSEUDO_REGISTER));
13718 }
13719 else if (((code == POST_MODIFY || code == PRE_MODIFY)
13720 && GET_CODE (XEXP (op, 1)) == PLUS
13721 && XEXP (op, 0) == XEXP (XEXP (op, 1), 0)
13722 && REG_P (XEXP (op, 0))
13723 && GET_CODE (XEXP (XEXP (op, 1), 1)) == CONST_INT)
13724 /* Make sure to only accept PLUS after reload_completed, otherwise
13725 this will interfere with auto_inc's pattern detection. */
13726 || (reload_completed && code == PLUS && REG_P (XEXP (op, 0))
13727 && GET_CODE (XEXP (op, 1)) == CONST_INT))
13728 {
13729 reg_no = arm_effective_regno (XEXP (op, 0), strict);
13730 if (code == PLUS)
13731 val = INTVAL (XEXP (op, 1));
13732 else
13733 val = INTVAL (XEXP(XEXP (op, 1), 1));
13734
13735 switch (mode)
13736 {
13737 case E_V16QImode:
13738 case E_V8QImode:
13739 case E_V4QImode:
13740 if (abs (val) > 127)
13741 return FALSE;
13742 break;
13743 case E_V8HImode:
13744 case E_V8HFmode:
13745 case E_V4HImode:
13746 case E_V4HFmode:
13747 if (val % 2 != 0 || abs (val) > 254)
13748 return FALSE;
13749 break;
13750 case E_V4SImode:
13751 case E_V4SFmode:
13752 if (val % 4 != 0 || abs (val) > 508)
13753 return FALSE;
13754 break;
13755 default:
13756 return FALSE;
13757 }
13758 return ((!strict && reg_no >= FIRST_PSEUDO_REGISTER)
13759 || (MVE_STN_LDW_MODE (mode)
13760 ? reg_no <= LAST_LO_REGNUM
13761 : (reg_no < LAST_ARM_REGNUM
13762 && (code == PLUS || reg_no != SP_REGNUM))));
13763 }
13764 return FALSE;
13765 }
13766
13767 /* Return TRUE if OP is a memory operand which we can load or store a vector
13768 to/from. TYPE is one of the following values:
13769 0 - Vector load/stor (vldr)
13770 1 - Core registers (ldm)
13771 2 - Element/structure loads (vld1)
13772 */
13773 int
13774 neon_vector_mem_operand (rtx op, int type, bool strict)
13775 {
13776 rtx ind;
13777
13778 /* Reject eliminable registers. */
13779 if (strict && ! (reload_in_progress || reload_completed)
13780 && (reg_mentioned_p (frame_pointer_rtx, op)
13781 || reg_mentioned_p (arg_pointer_rtx, op)
13782 || reg_mentioned_p (virtual_incoming_args_rtx, op)
13783 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
13784 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
13785 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
13786 return FALSE;
13787
13788 /* Constants are converted into offsets from labels. */
13789 if (!MEM_P (op))
13790 return FALSE;
13791
13792 ind = XEXP (op, 0);
13793
13794 if (reload_completed
13795 && (LABEL_REF_P (ind)
13796 || (GET_CODE (ind) == CONST
13797 && GET_CODE (XEXP (ind, 0)) == PLUS
13798 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
13799 && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
13800 return TRUE;
13801
13802 /* Match: (mem (reg)). */
13803 if (REG_P (ind))
13804 return arm_address_register_rtx_p (ind, 0);
13805
13806 /* Allow post-increment with Neon registers. */
13807 if ((type != 1 && GET_CODE (ind) == POST_INC)
13808 || (type == 0 && GET_CODE (ind) == PRE_DEC))
13809 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
13810
13811 /* Allow post-increment by register for VLDn */
13812 if (type == 2 && GET_CODE (ind) == POST_MODIFY
13813 && GET_CODE (XEXP (ind, 1)) == PLUS
13814 && REG_P (XEXP (XEXP (ind, 1), 1))
13815 && REG_P (XEXP (ind, 0))
13816 && rtx_equal_p (XEXP (ind, 0), XEXP (XEXP (ind, 1), 0)))
13817 return true;
13818
13819 /* Match:
13820 (plus (reg)
13821 (const)). */
13822 if (type == 0
13823 && GET_CODE (ind) == PLUS
13824 && REG_P (XEXP (ind, 0))
13825 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
13826 && CONST_INT_P (XEXP (ind, 1))
13827 && INTVAL (XEXP (ind, 1)) > -1024
13828 /* For quad modes, we restrict the constant offset to be slightly less
13829 than what the instruction format permits. We have no such constraint
13830 on double mode offsets. (This must match arm_legitimate_index_p.) */
13831 && (INTVAL (XEXP (ind, 1))
13832 < (VALID_NEON_QREG_MODE (GET_MODE (op))? 1016 : 1024))
13833 && (INTVAL (XEXP (ind, 1)) & 3) == 0)
13834 return TRUE;
13835
13836 return FALSE;
13837 }
13838
13839 /* Return TRUE if OP is a mem suitable for loading/storing an MVE struct
13840 type. */
13841 int
13842 mve_struct_mem_operand (rtx op)
13843 {
13844 rtx ind = XEXP (op, 0);
13845
13846 /* Match: (mem (reg)). */
13847 if (REG_P (ind))
13848 return arm_address_register_rtx_p (ind, 0);
13849
13850 /* Allow only post-increment by the mode size. */
13851 if (GET_CODE (ind) == POST_INC)
13852 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
13853
13854 return FALSE;
13855 }
13856
13857 /* Return TRUE if OP is a mem suitable for loading/storing a Neon struct
13858 type. */
13859 int
13860 neon_struct_mem_operand (rtx op)
13861 {
13862 rtx ind;
13863
13864 /* Reject eliminable registers. */
13865 if (! (reload_in_progress || reload_completed)
13866 && ( reg_mentioned_p (frame_pointer_rtx, op)
13867 || reg_mentioned_p (arg_pointer_rtx, op)
13868 || reg_mentioned_p (virtual_incoming_args_rtx, op)
13869 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
13870 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
13871 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
13872 return FALSE;
13873
13874 /* Constants are converted into offsets from labels. */
13875 if (!MEM_P (op))
13876 return FALSE;
13877
13878 ind = XEXP (op, 0);
13879
13880 if (reload_completed
13881 && (LABEL_REF_P (ind)
13882 || (GET_CODE (ind) == CONST
13883 && GET_CODE (XEXP (ind, 0)) == PLUS
13884 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
13885 && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
13886 return TRUE;
13887
13888 /* Match: (mem (reg)). */
13889 if (REG_P (ind))
13890 return arm_address_register_rtx_p (ind, 0);
13891
13892 /* vldm/vstm allows POST_INC (ia) and PRE_DEC (db). */
13893 if (GET_CODE (ind) == POST_INC
13894 || GET_CODE (ind) == PRE_DEC)
13895 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
13896
13897 return FALSE;
13898 }
13899
13900 /* Prepares the operands for the VCMLA by lane instruction such that the right
13901 register number is selected. This instruction is special in that it always
13902 requires a D register, however there is a choice to be made between Dn[0],
13903 Dn[1], D(n+1)[0], and D(n+1)[1] depending on the mode of the registers.
13904
13905 The VCMLA by lane function always selects two values. For instance given D0
13906 and a V2SF, the only valid index is 0 as the values in S0 and S1 will be
13907 used by the instruction. However given V4SF then index 0 and 1 are valid as
13908 D0[0] or D1[0] are both valid.
13909
13910 This function centralizes that information based on OPERANDS, OPERANDS[3]
13911 will be changed from a REG into a CONST_INT RTX and OPERANDS[4] will be
13912 updated to contain the right index. */
13913
13914 rtx *
13915 neon_vcmla_lane_prepare_operands (rtx *operands)
13916 {
13917 int lane = INTVAL (operands[4]);
13918 machine_mode constmode = SImode;
13919 machine_mode mode = GET_MODE (operands[3]);
13920 int regno = REGNO (operands[3]);
13921 regno = ((regno - FIRST_VFP_REGNUM) >> 1);
13922 if (lane > 0 && lane >= GET_MODE_NUNITS (mode) / 4)
13923 {
13924 operands[3] = gen_int_mode (regno + 1, constmode);
13925 operands[4]
13926 = gen_int_mode (lane - GET_MODE_NUNITS (mode) / 4, constmode);
13927 }
13928 else
13929 {
13930 operands[3] = gen_int_mode (regno, constmode);
13931 operands[4] = gen_int_mode (lane, constmode);
13932 }
13933 return operands;
13934 }
13935
13936
13937 /* Return true if X is a register that will be eliminated later on. */
13938 int
13939 arm_eliminable_register (rtx x)
13940 {
13941 return REG_P (x) && (REGNO (x) == FRAME_POINTER_REGNUM
13942 || REGNO (x) == ARG_POINTER_REGNUM
13943 || VIRTUAL_REGISTER_P (x));
13944 }
13945
13946 /* Return GENERAL_REGS if a scratch register required to reload x to/from
13947 coprocessor registers. Otherwise return NO_REGS. */
13948
13949 enum reg_class
13950 coproc_secondary_reload_class (machine_mode mode, rtx x, bool wb)
13951 {
13952 if (mode == HFmode)
13953 {
13954 if (!TARGET_NEON_FP16 && !TARGET_VFP_FP16INST)
13955 return GENERAL_REGS;
13956 if (s_register_operand (x, mode) || neon_vector_mem_operand (x, 2, true))
13957 return NO_REGS;
13958 return GENERAL_REGS;
13959 }
13960
13961 /* The neon move patterns handle all legitimate vector and struct
13962 addresses. */
13963 if (TARGET_NEON
13964 && (MEM_P (x) || GET_CODE (x) == CONST_VECTOR)
13965 && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
13966 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT
13967 || VALID_NEON_STRUCT_MODE (mode)))
13968 return NO_REGS;
13969
13970 if (arm_coproc_mem_operand (x, wb) || s_register_operand (x, mode))
13971 return NO_REGS;
13972
13973 return GENERAL_REGS;
13974 }
13975
13976 /* Values which must be returned in the most-significant end of the return
13977 register. */
13978
13979 static bool
13980 arm_return_in_msb (const_tree valtype)
13981 {
13982 return (TARGET_AAPCS_BASED
13983 && BYTES_BIG_ENDIAN
13984 && (AGGREGATE_TYPE_P (valtype)
13985 || TREE_CODE (valtype) == COMPLEX_TYPE
13986 || FIXED_POINT_TYPE_P (valtype)));
13987 }
13988
13989 /* Return TRUE if X references a SYMBOL_REF. */
13990 int
13991 symbol_mentioned_p (rtx x)
13992 {
13993 const char * fmt;
13994 int i;
13995
13996 if (SYMBOL_REF_P (x))
13997 return 1;
13998
13999 /* UNSPEC_TLS entries for a symbol include the SYMBOL_REF, but they
14000 are constant offsets, not symbols. */
14001 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
14002 return 0;
14003
14004 fmt = GET_RTX_FORMAT (GET_CODE (x));
14005
14006 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
14007 {
14008 if (fmt[i] == 'E')
14009 {
14010 int j;
14011
14012 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
14013 if (symbol_mentioned_p (XVECEXP (x, i, j)))
14014 return 1;
14015 }
14016 else if (fmt[i] == 'e' && symbol_mentioned_p (XEXP (x, i)))
14017 return 1;
14018 }
14019
14020 return 0;
14021 }
14022
14023 /* Return TRUE if X references a LABEL_REF. */
14024 int
14025 label_mentioned_p (rtx x)
14026 {
14027 const char * fmt;
14028 int i;
14029
14030 if (LABEL_REF_P (x))
14031 return 1;
14032
14033 /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the referencing
14034 instruction, but they are constant offsets, not symbols. */
14035 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
14036 return 0;
14037
14038 fmt = GET_RTX_FORMAT (GET_CODE (x));
14039 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
14040 {
14041 if (fmt[i] == 'E')
14042 {
14043 int j;
14044
14045 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
14046 if (label_mentioned_p (XVECEXP (x, i, j)))
14047 return 1;
14048 }
14049 else if (fmt[i] == 'e' && label_mentioned_p (XEXP (x, i)))
14050 return 1;
14051 }
14052
14053 return 0;
14054 }
14055
14056 int
14057 tls_mentioned_p (rtx x)
14058 {
14059 switch (GET_CODE (x))
14060 {
14061 case CONST:
14062 return tls_mentioned_p (XEXP (x, 0));
14063
14064 case UNSPEC:
14065 if (XINT (x, 1) == UNSPEC_TLS)
14066 return 1;
14067
14068 /* Fall through. */
14069 default:
14070 return 0;
14071 }
14072 }
14073
14074 /* Must not copy any rtx that uses a pc-relative address.
14075 Also, disallow copying of load-exclusive instructions that
14076 may appear after splitting of compare-and-swap-style operations
14077 so as to prevent those loops from being transformed away from their
14078 canonical forms (see PR 69904). */
14079
14080 static bool
14081 arm_cannot_copy_insn_p (rtx_insn *insn)
14082 {
14083 /* The tls call insn cannot be copied, as it is paired with a data
14084 word. */
14085 if (recog_memoized (insn) == CODE_FOR_tlscall)
14086 return true;
14087
14088 subrtx_iterator::array_type array;
14089 FOR_EACH_SUBRTX (iter, array, PATTERN (insn), ALL)
14090 {
14091 const_rtx x = *iter;
14092 if (GET_CODE (x) == UNSPEC
14093 && (XINT (x, 1) == UNSPEC_PIC_BASE
14094 || XINT (x, 1) == UNSPEC_PIC_UNIFIED))
14095 return true;
14096 }
14097
14098 rtx set = single_set (insn);
14099 if (set)
14100 {
14101 rtx src = SET_SRC (set);
14102 if (GET_CODE (src) == ZERO_EXTEND)
14103 src = XEXP (src, 0);
14104
14105 /* Catch the load-exclusive and load-acquire operations. */
14106 if (GET_CODE (src) == UNSPEC_VOLATILE
14107 && (XINT (src, 1) == VUNSPEC_LL
14108 || XINT (src, 1) == VUNSPEC_LAX))
14109 return true;
14110 }
14111 return false;
14112 }
14113
14114 enum rtx_code
14115 minmax_code (rtx x)
14116 {
14117 enum rtx_code code = GET_CODE (x);
14118
14119 switch (code)
14120 {
14121 case SMAX:
14122 return GE;
14123 case SMIN:
14124 return LE;
14125 case UMIN:
14126 return LEU;
14127 case UMAX:
14128 return GEU;
14129 default:
14130 gcc_unreachable ();
14131 }
14132 }
14133
14134 /* Match pair of min/max operators that can be implemented via usat/ssat. */
14135
14136 bool
14137 arm_sat_operator_match (rtx lo_bound, rtx hi_bound,
14138 int *mask, bool *signed_sat)
14139 {
14140 /* The high bound must be a power of two minus one. */
14141 int log = exact_log2 (INTVAL (hi_bound) + 1);
14142 if (log == -1)
14143 return false;
14144
14145 /* The low bound is either zero (for usat) or one less than the
14146 negation of the high bound (for ssat). */
14147 if (INTVAL (lo_bound) == 0)
14148 {
14149 if (mask)
14150 *mask = log;
14151 if (signed_sat)
14152 *signed_sat = false;
14153
14154 return true;
14155 }
14156
14157 if (INTVAL (lo_bound) == -INTVAL (hi_bound) - 1)
14158 {
14159 if (mask)
14160 *mask = log + 1;
14161 if (signed_sat)
14162 *signed_sat = true;
14163
14164 return true;
14165 }
14166
14167 return false;
14168 }
14169
14170 /* Return 1 if memory locations are adjacent. */
14171 int
14172 adjacent_mem_locations (rtx a, rtx b)
14173 {
14174 /* We don't guarantee to preserve the order of these memory refs. */
14175 if (volatile_refs_p (a) || volatile_refs_p (b))
14176 return 0;
14177
14178 if ((REG_P (XEXP (a, 0))
14179 || (GET_CODE (XEXP (a, 0)) == PLUS
14180 && CONST_INT_P (XEXP (XEXP (a, 0), 1))))
14181 && (REG_P (XEXP (b, 0))
14182 || (GET_CODE (XEXP (b, 0)) == PLUS
14183 && CONST_INT_P (XEXP (XEXP (b, 0), 1)))))
14184 {
14185 HOST_WIDE_INT val0 = 0, val1 = 0;
14186 rtx reg0, reg1;
14187 int val_diff;
14188
14189 if (GET_CODE (XEXP (a, 0)) == PLUS)
14190 {
14191 reg0 = XEXP (XEXP (a, 0), 0);
14192 val0 = INTVAL (XEXP (XEXP (a, 0), 1));
14193 }
14194 else
14195 reg0 = XEXP (a, 0);
14196
14197 if (GET_CODE (XEXP (b, 0)) == PLUS)
14198 {
14199 reg1 = XEXP (XEXP (b, 0), 0);
14200 val1 = INTVAL (XEXP (XEXP (b, 0), 1));
14201 }
14202 else
14203 reg1 = XEXP (b, 0);
14204
14205 /* Don't accept any offset that will require multiple
14206 instructions to handle, since this would cause the
14207 arith_adjacentmem pattern to output an overlong sequence. */
14208 if (!const_ok_for_op (val0, PLUS) || !const_ok_for_op (val1, PLUS))
14209 return 0;
14210
14211 /* Don't allow an eliminable register: register elimination can make
14212 the offset too large. */
14213 if (arm_eliminable_register (reg0))
14214 return 0;
14215
14216 val_diff = val1 - val0;
14217
14218 if (arm_ld_sched)
14219 {
14220 /* If the target has load delay slots, then there's no benefit
14221 to using an ldm instruction unless the offset is zero and
14222 we are optimizing for size. */
14223 return (optimize_size && (REGNO (reg0) == REGNO (reg1))
14224 && (val0 == 0 || val1 == 0 || val0 == 4 || val1 == 4)
14225 && (val_diff == 4 || val_diff == -4));
14226 }
14227
14228 return ((REGNO (reg0) == REGNO (reg1))
14229 && (val_diff == 4 || val_diff == -4));
14230 }
14231
14232 return 0;
14233 }
14234
14235 /* Return true if OP is a valid load or store multiple operation. LOAD is true
14236 for load operations, false for store operations. CONSECUTIVE is true
14237 if the register numbers in the operation must be consecutive in the register
14238 bank. RETURN_PC is true if value is to be loaded in PC.
14239 The pattern we are trying to match for load is:
14240 [(SET (R_d0) (MEM (PLUS (addr) (offset))))
14241 (SET (R_d1) (MEM (PLUS (addr) (offset + <reg_increment>))))
14242 :
14243 :
14244 (SET (R_dn) (MEM (PLUS (addr) (offset + n * <reg_increment>))))
14245 ]
14246 where
14247 1. If offset is 0, first insn should be (SET (R_d0) (MEM (src_addr))).
14248 2. REGNO (R_d0) < REGNO (R_d1) < ... < REGNO (R_dn).
14249 3. If consecutive is TRUE, then for kth register being loaded,
14250 REGNO (R_dk) = REGNO (R_d0) + k.
14251 The pattern for store is similar. */
14252 bool
14253 ldm_stm_operation_p (rtx op, bool load, machine_mode mode,
14254 bool consecutive, bool return_pc)
14255 {
14256 HOST_WIDE_INT count = XVECLEN (op, 0);
14257 rtx reg, mem, addr;
14258 unsigned regno;
14259 unsigned first_regno;
14260 HOST_WIDE_INT i = 1, base = 0, offset = 0;
14261 rtx elt;
14262 bool addr_reg_in_reglist = false;
14263 bool update = false;
14264 int reg_increment;
14265 int offset_adj;
14266 int regs_per_val;
14267
14268 /* If not in SImode, then registers must be consecutive
14269 (e.g., VLDM instructions for DFmode). */
14270 gcc_assert ((mode == SImode) || consecutive);
14271 /* Setting return_pc for stores is illegal. */
14272 gcc_assert (!return_pc || load);
14273
14274 /* Set up the increments and the regs per val based on the mode. */
14275 reg_increment = GET_MODE_SIZE (mode);
14276 regs_per_val = reg_increment / 4;
14277 offset_adj = return_pc ? 1 : 0;
14278
14279 if (count <= 1
14280 || GET_CODE (XVECEXP (op, 0, offset_adj)) != SET
14281 || (load && !REG_P (SET_DEST (XVECEXP (op, 0, offset_adj)))))
14282 return false;
14283
14284 /* Check if this is a write-back. */
14285 elt = XVECEXP (op, 0, offset_adj);
14286 if (GET_CODE (SET_SRC (elt)) == PLUS)
14287 {
14288 i++;
14289 base = 1;
14290 update = true;
14291
14292 /* The offset adjustment must be the number of registers being
14293 popped times the size of a single register. */
14294 if (!REG_P (SET_DEST (elt))
14295 || !REG_P (XEXP (SET_SRC (elt), 0))
14296 || (REGNO (SET_DEST (elt)) != REGNO (XEXP (SET_SRC (elt), 0)))
14297 || !CONST_INT_P (XEXP (SET_SRC (elt), 1))
14298 || INTVAL (XEXP (SET_SRC (elt), 1)) !=
14299 ((count - 1 - offset_adj) * reg_increment))
14300 return false;
14301 }
14302
14303 i = i + offset_adj;
14304 base = base + offset_adj;
14305 /* Perform a quick check so we don't blow up below. If only one reg is loaded,
14306 success depends on the type: VLDM can do just one reg,
14307 LDM must do at least two. */
14308 if ((count <= i) && (mode == SImode))
14309 return false;
14310
14311 elt = XVECEXP (op, 0, i - 1);
14312 if (GET_CODE (elt) != SET)
14313 return false;
14314
14315 if (load)
14316 {
14317 reg = SET_DEST (elt);
14318 mem = SET_SRC (elt);
14319 }
14320 else
14321 {
14322 reg = SET_SRC (elt);
14323 mem = SET_DEST (elt);
14324 }
14325
14326 if (!REG_P (reg) || !MEM_P (mem))
14327 return false;
14328
14329 regno = REGNO (reg);
14330 first_regno = regno;
14331 addr = XEXP (mem, 0);
14332 if (GET_CODE (addr) == PLUS)
14333 {
14334 if (!CONST_INT_P (XEXP (addr, 1)))
14335 return false;
14336
14337 offset = INTVAL (XEXP (addr, 1));
14338 addr = XEXP (addr, 0);
14339 }
14340
14341 if (!REG_P (addr))
14342 return false;
14343
14344 /* Don't allow SP to be loaded unless it is also the base register. It
14345 guarantees that SP is reset correctly when an LDM instruction
14346 is interrupted. Otherwise, we might end up with a corrupt stack. */
14347 if (load && (REGNO (reg) == SP_REGNUM) && (REGNO (addr) != SP_REGNUM))
14348 return false;
14349
14350 if (regno == REGNO (addr))
14351 addr_reg_in_reglist = true;
14352
14353 for (; i < count; i++)
14354 {
14355 elt = XVECEXP (op, 0, i);
14356 if (GET_CODE (elt) != SET)
14357 return false;
14358
14359 if (load)
14360 {
14361 reg = SET_DEST (elt);
14362 mem = SET_SRC (elt);
14363 }
14364 else
14365 {
14366 reg = SET_SRC (elt);
14367 mem = SET_DEST (elt);
14368 }
14369
14370 if (!REG_P (reg)
14371 || GET_MODE (reg) != mode
14372 || REGNO (reg) <= regno
14373 || (consecutive
14374 && (REGNO (reg) !=
14375 (unsigned int) (first_regno + regs_per_val * (i - base))))
14376 /* Don't allow SP to be loaded unless it is also the base register. It
14377 guarantees that SP is reset correctly when an LDM instruction
14378 is interrupted. Otherwise, we might end up with a corrupt stack. */
14379 || (load && (REGNO (reg) == SP_REGNUM) && (REGNO (addr) != SP_REGNUM))
14380 || !MEM_P (mem)
14381 || GET_MODE (mem) != mode
14382 || ((GET_CODE (XEXP (mem, 0)) != PLUS
14383 || !rtx_equal_p (XEXP (XEXP (mem, 0), 0), addr)
14384 || !CONST_INT_P (XEXP (XEXP (mem, 0), 1))
14385 || (INTVAL (XEXP (XEXP (mem, 0), 1)) !=
14386 offset + (i - base) * reg_increment))
14387 && (!REG_P (XEXP (mem, 0))
14388 || offset + (i - base) * reg_increment != 0)))
14389 return false;
14390
14391 regno = REGNO (reg);
14392 if (regno == REGNO (addr))
14393 addr_reg_in_reglist = true;
14394 }
14395
14396 if (load)
14397 {
14398 if (update && addr_reg_in_reglist)
14399 return false;
14400
14401 /* For Thumb-1, address register is always modified - either by write-back
14402 or by explicit load. If the pattern does not describe an update,
14403 then the address register must be in the list of loaded registers. */
14404 if (TARGET_THUMB1)
14405 return update || addr_reg_in_reglist;
14406 }
14407
14408 return true;
14409 }
14410
14411 /* Checks whether OP is a valid parallel pattern for a CLRM (if VFP is false)
14412 or VSCCLRM (otherwise) insn. To be a valid CLRM pattern, OP must have the
14413 following form:
14414
14415 [(set (reg:SI <N>) (const_int 0))
14416 (set (reg:SI <M>) (const_int 0))
14417 ...
14418 (unspec_volatile [(const_int 0)]
14419 VUNSPEC_CLRM_APSR)
14420 (clobber (reg:CC CC_REGNUM))
14421 ]
14422
14423 Any number (including 0) of set expressions is valid, the volatile unspec is
14424 optional. All registers but SP and PC are allowed and registers must be in
14425 strict increasing order.
14426
14427 To be a valid VSCCLRM pattern, OP must have the following form:
14428
14429 [(unspec_volatile [(const_int 0)]
14430 VUNSPEC_VSCCLRM_VPR)
14431 (set (reg:SF <N>) (const_int 0))
14432 (set (reg:SF <M>) (const_int 0))
14433 ...
14434 ]
14435
14436 As with CLRM, any number (including 0) of set expressions is valid, however
14437 the volatile unspec is mandatory here. Any VFP single-precision register is
14438 accepted but all registers must be consecutive and in increasing order. */
14439
14440 bool
14441 clear_operation_p (rtx op, bool vfp)
14442 {
14443 unsigned regno;
14444 unsigned last_regno = INVALID_REGNUM;
14445 rtx elt, reg, zero;
14446 int count = XVECLEN (op, 0);
14447 int first_set = vfp ? 1 : 0;
14448 machine_mode expected_mode = vfp ? E_SFmode : E_SImode;
14449
14450 for (int i = first_set; i < count; i++)
14451 {
14452 elt = XVECEXP (op, 0, i);
14453
14454 if (!vfp && GET_CODE (elt) == UNSPEC_VOLATILE)
14455 {
14456 if (XINT (elt, 1) != VUNSPEC_CLRM_APSR
14457 || XVECLEN (elt, 0) != 1
14458 || XVECEXP (elt, 0, 0) != CONST0_RTX (SImode)
14459 || i != count - 2)
14460 return false;
14461
14462 continue;
14463 }
14464
14465 if (GET_CODE (elt) == CLOBBER)
14466 continue;
14467
14468 if (GET_CODE (elt) != SET)
14469 return false;
14470
14471 reg = SET_DEST (elt);
14472 zero = SET_SRC (elt);
14473
14474 if (!REG_P (reg)
14475 || GET_MODE (reg) != expected_mode
14476 || zero != CONST0_RTX (SImode))
14477 return false;
14478
14479 regno = REGNO (reg);
14480
14481 if (vfp)
14482 {
14483 if (i != first_set && regno != last_regno + 1)
14484 return false;
14485 }
14486 else
14487 {
14488 if (regno == SP_REGNUM || regno == PC_REGNUM)
14489 return false;
14490 if (i != first_set && regno <= last_regno)
14491 return false;
14492 }
14493
14494 last_regno = regno;
14495 }
14496
14497 return true;
14498 }
14499
14500 /* Return true iff it would be profitable to turn a sequence of NOPS loads
14501 or stores (depending on IS_STORE) into a load-multiple or store-multiple
14502 instruction. ADD_OFFSET is nonzero if the base address register needs
14503 to be modified with an add instruction before we can use it. */
14504
14505 static bool
14506 multiple_operation_profitable_p (bool is_store ATTRIBUTE_UNUSED,
14507 int nops, HOST_WIDE_INT add_offset)
14508 {
14509 /* For ARM8,9 & StrongARM, 2 ldr instructions are faster than an ldm
14510 if the offset isn't small enough. The reason 2 ldrs are faster
14511 is because these ARMs are able to do more than one cache access
14512 in a single cycle. The ARM9 and StrongARM have Harvard caches,
14513 whilst the ARM8 has a double bandwidth cache. This means that
14514 these cores can do both an instruction fetch and a data fetch in
14515 a single cycle, so the trick of calculating the address into a
14516 scratch register (one of the result regs) and then doing a load
14517 multiple actually becomes slower (and no smaller in code size).
14518 That is the transformation
14519
14520 ldr rd1, [rbase + offset]
14521 ldr rd2, [rbase + offset + 4]
14522
14523 to
14524
14525 add rd1, rbase, offset
14526 ldmia rd1, {rd1, rd2}
14527
14528 produces worse code -- '3 cycles + any stalls on rd2' instead of
14529 '2 cycles + any stalls on rd2'. On ARMs with only one cache
14530 access per cycle, the first sequence could never complete in less
14531 than 6 cycles, whereas the ldm sequence would only take 5 and
14532 would make better use of sequential accesses if not hitting the
14533 cache.
14534
14535 We cheat here and test 'arm_ld_sched' which we currently know to
14536 only be true for the ARM8, ARM9 and StrongARM. If this ever
14537 changes, then the test below needs to be reworked. */
14538 if (nops == 2 && arm_ld_sched && add_offset != 0)
14539 return false;
14540
14541 /* XScale has load-store double instructions, but they have stricter
14542 alignment requirements than load-store multiple, so we cannot
14543 use them.
14544
14545 For XScale ldm requires 2 + NREGS cycles to complete and blocks
14546 the pipeline until completion.
14547
14548 NREGS CYCLES
14549 1 3
14550 2 4
14551 3 5
14552 4 6
14553
14554 An ldr instruction takes 1-3 cycles, but does not block the
14555 pipeline.
14556
14557 NREGS CYCLES
14558 1 1-3
14559 2 2-6
14560 3 3-9
14561 4 4-12
14562
14563 Best case ldr will always win. However, the more ldr instructions
14564 we issue, the less likely we are to be able to schedule them well.
14565 Using ldr instructions also increases code size.
14566
14567 As a compromise, we use ldr for counts of 1 or 2 regs, and ldm
14568 for counts of 3 or 4 regs. */
14569 if (nops <= 2 && arm_tune_xscale && !optimize_size)
14570 return false;
14571 return true;
14572 }
14573
14574 /* Subroutine of load_multiple_sequence and store_multiple_sequence.
14575 Given an array of UNSORTED_OFFSETS, of which there are NOPS, compute
14576 an array ORDER which describes the sequence to use when accessing the
14577 offsets that produces an ascending order. In this sequence, each
14578 offset must be larger by exactly 4 than the previous one. ORDER[0]
14579 must have been filled in with the lowest offset by the caller.
14580 If UNSORTED_REGS is nonnull, it is an array of register numbers that
14581 we use to verify that ORDER produces an ascending order of registers.
14582 Return true if it was possible to construct such an order, false if
14583 not. */
14584
14585 static bool
14586 compute_offset_order (int nops, HOST_WIDE_INT *unsorted_offsets, int *order,
14587 int *unsorted_regs)
14588 {
14589 int i;
14590 for (i = 1; i < nops; i++)
14591 {
14592 int j;
14593
14594 order[i] = order[i - 1];
14595 for (j = 0; j < nops; j++)
14596 if (unsorted_offsets[j] == unsorted_offsets[order[i - 1]] + 4)
14597 {
14598 /* We must find exactly one offset that is higher than the
14599 previous one by 4. */
14600 if (order[i] != order[i - 1])
14601 return false;
14602 order[i] = j;
14603 }
14604 if (order[i] == order[i - 1])
14605 return false;
14606 /* The register numbers must be ascending. */
14607 if (unsorted_regs != NULL
14608 && unsorted_regs[order[i]] <= unsorted_regs[order[i - 1]])
14609 return false;
14610 }
14611 return true;
14612 }
14613
14614 /* Used to determine in a peephole whether a sequence of load
14615 instructions can be changed into a load-multiple instruction.
14616 NOPS is the number of separate load instructions we are examining. The
14617 first NOPS entries in OPERANDS are the destination registers, the
14618 next NOPS entries are memory operands. If this function is
14619 successful, *BASE is set to the common base register of the memory
14620 accesses; *LOAD_OFFSET is set to the first memory location's offset
14621 from that base register.
14622 REGS is an array filled in with the destination register numbers.
14623 SAVED_ORDER (if nonnull), is an array filled in with an order that maps
14624 insn numbers to an ascending order of stores. If CHECK_REGS is true,
14625 the sequence of registers in REGS matches the loads from ascending memory
14626 locations, and the function verifies that the register numbers are
14627 themselves ascending. If CHECK_REGS is false, the register numbers
14628 are stored in the order they are found in the operands. */
14629 static int
14630 load_multiple_sequence (rtx *operands, int nops, int *regs, int *saved_order,
14631 int *base, HOST_WIDE_INT *load_offset, bool check_regs)
14632 {
14633 int unsorted_regs[MAX_LDM_STM_OPS];
14634 HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
14635 int order[MAX_LDM_STM_OPS];
14636 int base_reg = -1;
14637 int i, ldm_case;
14638
14639 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
14640 easily extended if required. */
14641 gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
14642
14643 memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
14644
14645 /* Loop over the operands and check that the memory references are
14646 suitable (i.e. immediate offsets from the same base register). At
14647 the same time, extract the target register, and the memory
14648 offsets. */
14649 for (i = 0; i < nops; i++)
14650 {
14651 rtx reg;
14652 rtx offset;
14653
14654 /* Convert a subreg of a mem into the mem itself. */
14655 if (GET_CODE (operands[nops + i]) == SUBREG)
14656 operands[nops + i] = alter_subreg (operands + (nops + i), true);
14657
14658 gcc_assert (MEM_P (operands[nops + i]));
14659
14660 /* Don't reorder volatile memory references; it doesn't seem worth
14661 looking for the case where the order is ok anyway. */
14662 if (MEM_VOLATILE_P (operands[nops + i]))
14663 return 0;
14664
14665 offset = const0_rtx;
14666
14667 if ((REG_P (reg = XEXP (operands[nops + i], 0))
14668 || (SUBREG_P (reg)
14669 && REG_P (reg = SUBREG_REG (reg))))
14670 || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
14671 && ((REG_P (reg = XEXP (XEXP (operands[nops + i], 0), 0)))
14672 || (SUBREG_P (reg)
14673 && REG_P (reg = SUBREG_REG (reg))))
14674 && (CONST_INT_P (offset
14675 = XEXP (XEXP (operands[nops + i], 0), 1)))))
14676 {
14677 if (i == 0)
14678 {
14679 base_reg = REGNO (reg);
14680 if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
14681 return 0;
14682 }
14683 else if (base_reg != (int) REGNO (reg))
14684 /* Not addressed from the same base register. */
14685 return 0;
14686
14687 unsorted_regs[i] = (REG_P (operands[i])
14688 ? REGNO (operands[i])
14689 : REGNO (SUBREG_REG (operands[i])));
14690
14691 /* If it isn't an integer register, or if it overwrites the
14692 base register but isn't the last insn in the list, then
14693 we can't do this. */
14694 if (unsorted_regs[i] < 0
14695 || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
14696 || unsorted_regs[i] > 14
14697 || (i != nops - 1 && unsorted_regs[i] == base_reg))
14698 return 0;
14699
14700 /* Don't allow SP to be loaded unless it is also the base
14701 register. It guarantees that SP is reset correctly when
14702 an LDM instruction is interrupted. Otherwise, we might
14703 end up with a corrupt stack. */
14704 if (unsorted_regs[i] == SP_REGNUM && base_reg != SP_REGNUM)
14705 return 0;
14706
14707 unsorted_offsets[i] = INTVAL (offset);
14708 if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
14709 order[0] = i;
14710 }
14711 else
14712 /* Not a suitable memory address. */
14713 return 0;
14714 }
14715
14716 /* All the useful information has now been extracted from the
14717 operands into unsorted_regs and unsorted_offsets; additionally,
14718 order[0] has been set to the lowest offset in the list. Sort
14719 the offsets into order, verifying that they are adjacent, and
14720 check that the register numbers are ascending. */
14721 if (!compute_offset_order (nops, unsorted_offsets, order,
14722 check_regs ? unsorted_regs : NULL))
14723 return 0;
14724
14725 if (saved_order)
14726 memcpy (saved_order, order, sizeof order);
14727
14728 if (base)
14729 {
14730 *base = base_reg;
14731
14732 for (i = 0; i < nops; i++)
14733 regs[i] = unsorted_regs[check_regs ? order[i] : i];
14734
14735 *load_offset = unsorted_offsets[order[0]];
14736 }
14737
14738 if (unsorted_offsets[order[0]] == 0)
14739 ldm_case = 1; /* ldmia */
14740 else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
14741 ldm_case = 2; /* ldmib */
14742 else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
14743 ldm_case = 3; /* ldmda */
14744 else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
14745 ldm_case = 4; /* ldmdb */
14746 else if (const_ok_for_arm (unsorted_offsets[order[0]])
14747 || const_ok_for_arm (-unsorted_offsets[order[0]]))
14748 ldm_case = 5;
14749 else
14750 return 0;
14751
14752 if (!multiple_operation_profitable_p (false, nops,
14753 ldm_case == 5
14754 ? unsorted_offsets[order[0]] : 0))
14755 return 0;
14756
14757 return ldm_case;
14758 }
14759
14760 /* Used to determine in a peephole whether a sequence of store instructions can
14761 be changed into a store-multiple instruction.
14762 NOPS is the number of separate store instructions we are examining.
14763 NOPS_TOTAL is the total number of instructions recognized by the peephole
14764 pattern.
14765 The first NOPS entries in OPERANDS are the source registers, the next
14766 NOPS entries are memory operands. If this function is successful, *BASE is
14767 set to the common base register of the memory accesses; *LOAD_OFFSET is set
14768 to the first memory location's offset from that base register. REGS is an
14769 array filled in with the source register numbers, REG_RTXS (if nonnull) is
14770 likewise filled with the corresponding rtx's.
14771 SAVED_ORDER (if nonnull), is an array filled in with an order that maps insn
14772 numbers to an ascending order of stores.
14773 If CHECK_REGS is true, the sequence of registers in *REGS matches the stores
14774 from ascending memory locations, and the function verifies that the register
14775 numbers are themselves ascending. If CHECK_REGS is false, the register
14776 numbers are stored in the order they are found in the operands. */
14777 static int
14778 store_multiple_sequence (rtx *operands, int nops, int nops_total,
14779 int *regs, rtx *reg_rtxs, int *saved_order, int *base,
14780 HOST_WIDE_INT *load_offset, bool check_regs)
14781 {
14782 int unsorted_regs[MAX_LDM_STM_OPS];
14783 rtx unsorted_reg_rtxs[MAX_LDM_STM_OPS];
14784 HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
14785 int order[MAX_LDM_STM_OPS];
14786 int base_reg = -1;
14787 rtx base_reg_rtx = NULL;
14788 int i, stm_case;
14789
14790 /* Write back of base register is currently only supported for Thumb 1. */
14791 int base_writeback = TARGET_THUMB1;
14792
14793 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
14794 easily extended if required. */
14795 gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
14796
14797 memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
14798
14799 /* Loop over the operands and check that the memory references are
14800 suitable (i.e. immediate offsets from the same base register). At
14801 the same time, extract the target register, and the memory
14802 offsets. */
14803 for (i = 0; i < nops; i++)
14804 {
14805 rtx reg;
14806 rtx offset;
14807
14808 /* Convert a subreg of a mem into the mem itself. */
14809 if (GET_CODE (operands[nops + i]) == SUBREG)
14810 operands[nops + i] = alter_subreg (operands + (nops + i), true);
14811
14812 gcc_assert (MEM_P (operands[nops + i]));
14813
14814 /* Don't reorder volatile memory references; it doesn't seem worth
14815 looking for the case where the order is ok anyway. */
14816 if (MEM_VOLATILE_P (operands[nops + i]))
14817 return 0;
14818
14819 offset = const0_rtx;
14820
14821 if ((REG_P (reg = XEXP (operands[nops + i], 0))
14822 || (SUBREG_P (reg)
14823 && REG_P (reg = SUBREG_REG (reg))))
14824 || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
14825 && ((REG_P (reg = XEXP (XEXP (operands[nops + i], 0), 0)))
14826 || (SUBREG_P (reg)
14827 && REG_P (reg = SUBREG_REG (reg))))
14828 && (CONST_INT_P (offset
14829 = XEXP (XEXP (operands[nops + i], 0), 1)))))
14830 {
14831 unsorted_reg_rtxs[i] = (REG_P (operands[i])
14832 ? operands[i] : SUBREG_REG (operands[i]));
14833 unsorted_regs[i] = REGNO (unsorted_reg_rtxs[i]);
14834
14835 if (i == 0)
14836 {
14837 base_reg = REGNO (reg);
14838 base_reg_rtx = reg;
14839 if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
14840 return 0;
14841 }
14842 else if (base_reg != (int) REGNO (reg))
14843 /* Not addressed from the same base register. */
14844 return 0;
14845
14846 /* If it isn't an integer register, then we can't do this. */
14847 if (unsorted_regs[i] < 0
14848 || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
14849 /* The effects are unpredictable if the base register is
14850 both updated and stored. */
14851 || (base_writeback && unsorted_regs[i] == base_reg)
14852 || (TARGET_THUMB2 && unsorted_regs[i] == SP_REGNUM)
14853 || unsorted_regs[i] > 14)
14854 return 0;
14855
14856 unsorted_offsets[i] = INTVAL (offset);
14857 if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
14858 order[0] = i;
14859 }
14860 else
14861 /* Not a suitable memory address. */
14862 return 0;
14863 }
14864
14865 /* All the useful information has now been extracted from the
14866 operands into unsorted_regs and unsorted_offsets; additionally,
14867 order[0] has been set to the lowest offset in the list. Sort
14868 the offsets into order, verifying that they are adjacent, and
14869 check that the register numbers are ascending. */
14870 if (!compute_offset_order (nops, unsorted_offsets, order,
14871 check_regs ? unsorted_regs : NULL))
14872 return 0;
14873
14874 if (saved_order)
14875 memcpy (saved_order, order, sizeof order);
14876
14877 if (base)
14878 {
14879 *base = base_reg;
14880
14881 for (i = 0; i < nops; i++)
14882 {
14883 regs[i] = unsorted_regs[check_regs ? order[i] : i];
14884 if (reg_rtxs)
14885 reg_rtxs[i] = unsorted_reg_rtxs[check_regs ? order[i] : i];
14886 }
14887
14888 *load_offset = unsorted_offsets[order[0]];
14889 }
14890
14891 if (TARGET_THUMB1
14892 && !peep2_reg_dead_p (nops_total, base_reg_rtx))
14893 return 0;
14894
14895 if (unsorted_offsets[order[0]] == 0)
14896 stm_case = 1; /* stmia */
14897 else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
14898 stm_case = 2; /* stmib */
14899 else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
14900 stm_case = 3; /* stmda */
14901 else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
14902 stm_case = 4; /* stmdb */
14903 else
14904 return 0;
14905
14906 if (!multiple_operation_profitable_p (false, nops, 0))
14907 return 0;
14908
14909 return stm_case;
14910 }
14911 \f
14912 /* Routines for use in generating RTL. */
14913
14914 /* Generate a load-multiple instruction. COUNT is the number of loads in
14915 the instruction; REGS and MEMS are arrays containing the operands.
14916 BASEREG is the base register to be used in addressing the memory operands.
14917 WBACK_OFFSET is nonzero if the instruction should update the base
14918 register. */
14919
14920 static rtx
14921 arm_gen_load_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
14922 HOST_WIDE_INT wback_offset)
14923 {
14924 int i = 0, j;
14925 rtx result;
14926
14927 if (!multiple_operation_profitable_p (false, count, 0))
14928 {
14929 rtx seq;
14930
14931 start_sequence ();
14932
14933 for (i = 0; i < count; i++)
14934 emit_move_insn (gen_rtx_REG (SImode, regs[i]), mems[i]);
14935
14936 if (wback_offset != 0)
14937 emit_move_insn (basereg, plus_constant (Pmode, basereg, wback_offset));
14938
14939 seq = get_insns ();
14940 end_sequence ();
14941
14942 return seq;
14943 }
14944
14945 result = gen_rtx_PARALLEL (VOIDmode,
14946 rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
14947 if (wback_offset != 0)
14948 {
14949 XVECEXP (result, 0, 0)
14950 = gen_rtx_SET (basereg, plus_constant (Pmode, basereg, wback_offset));
14951 i = 1;
14952 count++;
14953 }
14954
14955 for (j = 0; i < count; i++, j++)
14956 XVECEXP (result, 0, i)
14957 = gen_rtx_SET (gen_rtx_REG (SImode, regs[j]), mems[j]);
14958
14959 return result;
14960 }
14961
14962 /* Generate a store-multiple instruction. COUNT is the number of stores in
14963 the instruction; REGS and MEMS are arrays containing the operands.
14964 BASEREG is the base register to be used in addressing the memory operands.
14965 WBACK_OFFSET is nonzero if the instruction should update the base
14966 register. */
14967
14968 static rtx
14969 arm_gen_store_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
14970 HOST_WIDE_INT wback_offset)
14971 {
14972 int i = 0, j;
14973 rtx result;
14974
14975 if (GET_CODE (basereg) == PLUS)
14976 basereg = XEXP (basereg, 0);
14977
14978 if (!multiple_operation_profitable_p (false, count, 0))
14979 {
14980 rtx seq;
14981
14982 start_sequence ();
14983
14984 for (i = 0; i < count; i++)
14985 emit_move_insn (mems[i], gen_rtx_REG (SImode, regs[i]));
14986
14987 if (wback_offset != 0)
14988 emit_move_insn (basereg, plus_constant (Pmode, basereg, wback_offset));
14989
14990 seq = get_insns ();
14991 end_sequence ();
14992
14993 return seq;
14994 }
14995
14996 result = gen_rtx_PARALLEL (VOIDmode,
14997 rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
14998 if (wback_offset != 0)
14999 {
15000 XVECEXP (result, 0, 0)
15001 = gen_rtx_SET (basereg, plus_constant (Pmode, basereg, wback_offset));
15002 i = 1;
15003 count++;
15004 }
15005
15006 for (j = 0; i < count; i++, j++)
15007 XVECEXP (result, 0, i)
15008 = gen_rtx_SET (mems[j], gen_rtx_REG (SImode, regs[j]));
15009
15010 return result;
15011 }
15012
15013 /* Generate either a load-multiple or a store-multiple instruction. This
15014 function can be used in situations where we can start with a single MEM
15015 rtx and adjust its address upwards.
15016 COUNT is the number of operations in the instruction, not counting a
15017 possible update of the base register. REGS is an array containing the
15018 register operands.
15019 BASEREG is the base register to be used in addressing the memory operands,
15020 which are constructed from BASEMEM.
15021 WRITE_BACK specifies whether the generated instruction should include an
15022 update of the base register.
15023 OFFSETP is used to pass an offset to and from this function; this offset
15024 is not used when constructing the address (instead BASEMEM should have an
15025 appropriate offset in its address), it is used only for setting
15026 MEM_OFFSET. It is updated only if WRITE_BACK is true.*/
15027
15028 static rtx
15029 arm_gen_multiple_op (bool is_load, int *regs, int count, rtx basereg,
15030 bool write_back, rtx basemem, HOST_WIDE_INT *offsetp)
15031 {
15032 rtx mems[MAX_LDM_STM_OPS];
15033 HOST_WIDE_INT offset = *offsetp;
15034 int i;
15035
15036 gcc_assert (count <= MAX_LDM_STM_OPS);
15037
15038 if (GET_CODE (basereg) == PLUS)
15039 basereg = XEXP (basereg, 0);
15040
15041 for (i = 0; i < count; i++)
15042 {
15043 rtx addr = plus_constant (Pmode, basereg, i * 4);
15044 mems[i] = adjust_automodify_address_nv (basemem, SImode, addr, offset);
15045 offset += 4;
15046 }
15047
15048 if (write_back)
15049 *offsetp = offset;
15050
15051 if (is_load)
15052 return arm_gen_load_multiple_1 (count, regs, mems, basereg,
15053 write_back ? 4 * count : 0);
15054 else
15055 return arm_gen_store_multiple_1 (count, regs, mems, basereg,
15056 write_back ? 4 * count : 0);
15057 }
15058
15059 rtx
15060 arm_gen_load_multiple (int *regs, int count, rtx basereg, int write_back,
15061 rtx basemem, HOST_WIDE_INT *offsetp)
15062 {
15063 return arm_gen_multiple_op (TRUE, regs, count, basereg, write_back, basemem,
15064 offsetp);
15065 }
15066
15067 rtx
15068 arm_gen_store_multiple (int *regs, int count, rtx basereg, int write_back,
15069 rtx basemem, HOST_WIDE_INT *offsetp)
15070 {
15071 return arm_gen_multiple_op (FALSE, regs, count, basereg, write_back, basemem,
15072 offsetp);
15073 }
15074
15075 /* Called from a peephole2 expander to turn a sequence of loads into an
15076 LDM instruction. OPERANDS are the operands found by the peephole matcher;
15077 NOPS indicates how many separate loads we are trying to combine. SORT_REGS
15078 is true if we can reorder the registers because they are used commutatively
15079 subsequently.
15080 Returns true iff we could generate a new instruction. */
15081
15082 bool
15083 gen_ldm_seq (rtx *operands, int nops, bool sort_regs)
15084 {
15085 int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
15086 rtx mems[MAX_LDM_STM_OPS];
15087 int i, j, base_reg;
15088 rtx base_reg_rtx;
15089 HOST_WIDE_INT offset;
15090 int write_back = FALSE;
15091 int ldm_case;
15092 rtx addr;
15093
15094 ldm_case = load_multiple_sequence (operands, nops, regs, mem_order,
15095 &base_reg, &offset, !sort_regs);
15096
15097 if (ldm_case == 0)
15098 return false;
15099
15100 if (sort_regs)
15101 for (i = 0; i < nops - 1; i++)
15102 for (j = i + 1; j < nops; j++)
15103 if (regs[i] > regs[j])
15104 {
15105 int t = regs[i];
15106 regs[i] = regs[j];
15107 regs[j] = t;
15108 }
15109 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
15110
15111 if (TARGET_THUMB1)
15112 {
15113 gcc_assert (ldm_case == 1 || ldm_case == 5);
15114
15115 /* Thumb-1 ldm uses writeback except if the base is loaded. */
15116 write_back = true;
15117 for (i = 0; i < nops; i++)
15118 if (base_reg == regs[i])
15119 write_back = false;
15120
15121 /* Ensure the base is dead if it is updated. */
15122 if (write_back && !peep2_reg_dead_p (nops, base_reg_rtx))
15123 return false;
15124 }
15125
15126 if (ldm_case == 5)
15127 {
15128 rtx newbase = TARGET_THUMB1 ? base_reg_rtx : gen_rtx_REG (SImode, regs[0]);
15129 emit_insn (gen_addsi3 (newbase, base_reg_rtx, GEN_INT (offset)));
15130 offset = 0;
15131 base_reg_rtx = newbase;
15132 }
15133
15134 for (i = 0; i < nops; i++)
15135 {
15136 addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
15137 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
15138 SImode, addr, 0);
15139 }
15140 emit_insn (arm_gen_load_multiple_1 (nops, regs, mems, base_reg_rtx,
15141 write_back ? offset + i * 4 : 0));
15142 return true;
15143 }
15144
15145 /* Called from a peephole2 expander to turn a sequence of stores into an
15146 STM instruction. OPERANDS are the operands found by the peephole matcher;
15147 NOPS indicates how many separate stores we are trying to combine.
15148 Returns true iff we could generate a new instruction. */
15149
15150 bool
15151 gen_stm_seq (rtx *operands, int nops)
15152 {
15153 int i;
15154 int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
15155 rtx mems[MAX_LDM_STM_OPS];
15156 int base_reg;
15157 rtx base_reg_rtx;
15158 HOST_WIDE_INT offset;
15159 int write_back = FALSE;
15160 int stm_case;
15161 rtx addr;
15162 bool base_reg_dies;
15163
15164 stm_case = store_multiple_sequence (operands, nops, nops, regs, NULL,
15165 mem_order, &base_reg, &offset, true);
15166
15167 if (stm_case == 0)
15168 return false;
15169
15170 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
15171
15172 base_reg_dies = peep2_reg_dead_p (nops, base_reg_rtx);
15173 if (TARGET_THUMB1)
15174 {
15175 gcc_assert (base_reg_dies);
15176 write_back = TRUE;
15177 }
15178
15179 if (stm_case == 5)
15180 {
15181 gcc_assert (base_reg_dies);
15182 emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
15183 offset = 0;
15184 }
15185
15186 addr = plus_constant (Pmode, base_reg_rtx, offset);
15187
15188 for (i = 0; i < nops; i++)
15189 {
15190 addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
15191 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
15192 SImode, addr, 0);
15193 }
15194 emit_insn (arm_gen_store_multiple_1 (nops, regs, mems, base_reg_rtx,
15195 write_back ? offset + i * 4 : 0));
15196 return true;
15197 }
15198
15199 /* Called from a peephole2 expander to turn a sequence of stores that are
15200 preceded by constant loads into an STM instruction. OPERANDS are the
15201 operands found by the peephole matcher; NOPS indicates how many
15202 separate stores we are trying to combine; there are 2 * NOPS
15203 instructions in the peephole.
15204 Returns true iff we could generate a new instruction. */
15205
15206 bool
15207 gen_const_stm_seq (rtx *operands, int nops)
15208 {
15209 int regs[MAX_LDM_STM_OPS], sorted_regs[MAX_LDM_STM_OPS];
15210 int reg_order[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
15211 rtx reg_rtxs[MAX_LDM_STM_OPS], orig_reg_rtxs[MAX_LDM_STM_OPS];
15212 rtx mems[MAX_LDM_STM_OPS];
15213 int base_reg;
15214 rtx base_reg_rtx;
15215 HOST_WIDE_INT offset;
15216 int write_back = FALSE;
15217 int stm_case;
15218 rtx addr;
15219 bool base_reg_dies;
15220 int i, j;
15221 HARD_REG_SET allocated;
15222
15223 stm_case = store_multiple_sequence (operands, nops, 2 * nops, regs, reg_rtxs,
15224 mem_order, &base_reg, &offset, false);
15225
15226 if (stm_case == 0)
15227 return false;
15228
15229 memcpy (orig_reg_rtxs, reg_rtxs, sizeof orig_reg_rtxs);
15230
15231 /* If the same register is used more than once, try to find a free
15232 register. */
15233 CLEAR_HARD_REG_SET (allocated);
15234 for (i = 0; i < nops; i++)
15235 {
15236 for (j = i + 1; j < nops; j++)
15237 if (regs[i] == regs[j])
15238 {
15239 rtx t = peep2_find_free_register (0, nops * 2,
15240 TARGET_THUMB1 ? "l" : "r",
15241 SImode, &allocated);
15242 if (t == NULL_RTX)
15243 return false;
15244 reg_rtxs[i] = t;
15245 regs[i] = REGNO (t);
15246 }
15247 }
15248
15249 /* Compute an ordering that maps the register numbers to an ascending
15250 sequence. */
15251 reg_order[0] = 0;
15252 for (i = 0; i < nops; i++)
15253 if (regs[i] < regs[reg_order[0]])
15254 reg_order[0] = i;
15255
15256 for (i = 1; i < nops; i++)
15257 {
15258 int this_order = reg_order[i - 1];
15259 for (j = 0; j < nops; j++)
15260 if (regs[j] > regs[reg_order[i - 1]]
15261 && (this_order == reg_order[i - 1]
15262 || regs[j] < regs[this_order]))
15263 this_order = j;
15264 reg_order[i] = this_order;
15265 }
15266
15267 /* Ensure that registers that must be live after the instruction end
15268 up with the correct value. */
15269 for (i = 0; i < nops; i++)
15270 {
15271 int this_order = reg_order[i];
15272 if ((this_order != mem_order[i]
15273 || orig_reg_rtxs[this_order] != reg_rtxs[this_order])
15274 && !peep2_reg_dead_p (nops * 2, orig_reg_rtxs[this_order]))
15275 return false;
15276 }
15277
15278 /* Load the constants. */
15279 for (i = 0; i < nops; i++)
15280 {
15281 rtx op = operands[2 * nops + mem_order[i]];
15282 sorted_regs[i] = regs[reg_order[i]];
15283 emit_move_insn (reg_rtxs[reg_order[i]], op);
15284 }
15285
15286 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
15287
15288 base_reg_dies = peep2_reg_dead_p (nops * 2, base_reg_rtx);
15289 if (TARGET_THUMB1)
15290 {
15291 gcc_assert (base_reg_dies);
15292 write_back = TRUE;
15293 }
15294
15295 if (stm_case == 5)
15296 {
15297 gcc_assert (base_reg_dies);
15298 emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
15299 offset = 0;
15300 }
15301
15302 addr = plus_constant (Pmode, base_reg_rtx, offset);
15303
15304 for (i = 0; i < nops; i++)
15305 {
15306 addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
15307 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
15308 SImode, addr, 0);
15309 }
15310 emit_insn (arm_gen_store_multiple_1 (nops, sorted_regs, mems, base_reg_rtx,
15311 write_back ? offset + i * 4 : 0));
15312 return true;
15313 }
15314
15315 /* Copy a block of memory using plain ldr/str/ldrh/strh instructions, to permit
15316 unaligned copies on processors which support unaligned semantics for those
15317 instructions. INTERLEAVE_FACTOR can be used to attempt to hide load latency
15318 (using more registers) by doing e.g. load/load/store/store for a factor of 2.
15319 An interleave factor of 1 (the minimum) will perform no interleaving.
15320 Load/store multiple are used for aligned addresses where possible. */
15321
15322 static void
15323 arm_block_move_unaligned_straight (rtx dstbase, rtx srcbase,
15324 HOST_WIDE_INT length,
15325 unsigned int interleave_factor)
15326 {
15327 rtx *regs = XALLOCAVEC (rtx, interleave_factor);
15328 int *regnos = XALLOCAVEC (int, interleave_factor);
15329 HOST_WIDE_INT block_size_bytes = interleave_factor * UNITS_PER_WORD;
15330 HOST_WIDE_INT i, j;
15331 HOST_WIDE_INT remaining = length, words;
15332 rtx halfword_tmp = NULL, byte_tmp = NULL;
15333 rtx dst, src;
15334 bool src_aligned = MEM_ALIGN (srcbase) >= BITS_PER_WORD;
15335 bool dst_aligned = MEM_ALIGN (dstbase) >= BITS_PER_WORD;
15336 HOST_WIDE_INT srcoffset, dstoffset;
15337 HOST_WIDE_INT src_autoinc, dst_autoinc;
15338 rtx mem, addr;
15339
15340 gcc_assert (interleave_factor >= 1 && interleave_factor <= 4);
15341
15342 /* Use hard registers if we have aligned source or destination so we can use
15343 load/store multiple with contiguous registers. */
15344 if (dst_aligned || src_aligned)
15345 for (i = 0; i < interleave_factor; i++)
15346 regs[i] = gen_rtx_REG (SImode, i);
15347 else
15348 for (i = 0; i < interleave_factor; i++)
15349 regs[i] = gen_reg_rtx (SImode);
15350
15351 dst = copy_addr_to_reg (XEXP (dstbase, 0));
15352 src = copy_addr_to_reg (XEXP (srcbase, 0));
15353
15354 srcoffset = dstoffset = 0;
15355
15356 /* Calls to arm_gen_load_multiple and arm_gen_store_multiple update SRC/DST.
15357 For copying the last bytes we want to subtract this offset again. */
15358 src_autoinc = dst_autoinc = 0;
15359
15360 for (i = 0; i < interleave_factor; i++)
15361 regnos[i] = i;
15362
15363 /* Copy BLOCK_SIZE_BYTES chunks. */
15364
15365 for (i = 0; i + block_size_bytes <= length; i += block_size_bytes)
15366 {
15367 /* Load words. */
15368 if (src_aligned && interleave_factor > 1)
15369 {
15370 emit_insn (arm_gen_load_multiple (regnos, interleave_factor, src,
15371 TRUE, srcbase, &srcoffset));
15372 src_autoinc += UNITS_PER_WORD * interleave_factor;
15373 }
15374 else
15375 {
15376 for (j = 0; j < interleave_factor; j++)
15377 {
15378 addr = plus_constant (Pmode, src, (srcoffset + j * UNITS_PER_WORD
15379 - src_autoinc));
15380 mem = adjust_automodify_address (srcbase, SImode, addr,
15381 srcoffset + j * UNITS_PER_WORD);
15382 emit_insn (gen_unaligned_loadsi (regs[j], mem));
15383 }
15384 srcoffset += block_size_bytes;
15385 }
15386
15387 /* Store words. */
15388 if (dst_aligned && interleave_factor > 1)
15389 {
15390 emit_insn (arm_gen_store_multiple (regnos, interleave_factor, dst,
15391 TRUE, dstbase, &dstoffset));
15392 dst_autoinc += UNITS_PER_WORD * interleave_factor;
15393 }
15394 else
15395 {
15396 for (j = 0; j < interleave_factor; j++)
15397 {
15398 addr = plus_constant (Pmode, dst, (dstoffset + j * UNITS_PER_WORD
15399 - dst_autoinc));
15400 mem = adjust_automodify_address (dstbase, SImode, addr,
15401 dstoffset + j * UNITS_PER_WORD);
15402 emit_insn (gen_unaligned_storesi (mem, regs[j]));
15403 }
15404 dstoffset += block_size_bytes;
15405 }
15406
15407 remaining -= block_size_bytes;
15408 }
15409
15410 /* Copy any whole words left (note these aren't interleaved with any
15411 subsequent halfword/byte load/stores in the interests of simplicity). */
15412
15413 words = remaining / UNITS_PER_WORD;
15414
15415 gcc_assert (words < interleave_factor);
15416
15417 if (src_aligned && words > 1)
15418 {
15419 emit_insn (arm_gen_load_multiple (regnos, words, src, TRUE, srcbase,
15420 &srcoffset));
15421 src_autoinc += UNITS_PER_WORD * words;
15422 }
15423 else
15424 {
15425 for (j = 0; j < words; j++)
15426 {
15427 addr = plus_constant (Pmode, src,
15428 srcoffset + j * UNITS_PER_WORD - src_autoinc);
15429 mem = adjust_automodify_address (srcbase, SImode, addr,
15430 srcoffset + j * UNITS_PER_WORD);
15431 if (src_aligned)
15432 emit_move_insn (regs[j], mem);
15433 else
15434 emit_insn (gen_unaligned_loadsi (regs[j], mem));
15435 }
15436 srcoffset += words * UNITS_PER_WORD;
15437 }
15438
15439 if (dst_aligned && words > 1)
15440 {
15441 emit_insn (arm_gen_store_multiple (regnos, words, dst, TRUE, dstbase,
15442 &dstoffset));
15443 dst_autoinc += words * UNITS_PER_WORD;
15444 }
15445 else
15446 {
15447 for (j = 0; j < words; j++)
15448 {
15449 addr = plus_constant (Pmode, dst,
15450 dstoffset + j * UNITS_PER_WORD - dst_autoinc);
15451 mem = adjust_automodify_address (dstbase, SImode, addr,
15452 dstoffset + j * UNITS_PER_WORD);
15453 if (dst_aligned)
15454 emit_move_insn (mem, regs[j]);
15455 else
15456 emit_insn (gen_unaligned_storesi (mem, regs[j]));
15457 }
15458 dstoffset += words * UNITS_PER_WORD;
15459 }
15460
15461 remaining -= words * UNITS_PER_WORD;
15462
15463 gcc_assert (remaining < 4);
15464
15465 /* Copy a halfword if necessary. */
15466
15467 if (remaining >= 2)
15468 {
15469 halfword_tmp = gen_reg_rtx (SImode);
15470
15471 addr = plus_constant (Pmode, src, srcoffset - src_autoinc);
15472 mem = adjust_automodify_address (srcbase, HImode, addr, srcoffset);
15473 emit_insn (gen_unaligned_loadhiu (halfword_tmp, mem));
15474
15475 /* Either write out immediately, or delay until we've loaded the last
15476 byte, depending on interleave factor. */
15477 if (interleave_factor == 1)
15478 {
15479 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
15480 mem = adjust_automodify_address (dstbase, HImode, addr, dstoffset);
15481 emit_insn (gen_unaligned_storehi (mem,
15482 gen_lowpart (HImode, halfword_tmp)));
15483 halfword_tmp = NULL;
15484 dstoffset += 2;
15485 }
15486
15487 remaining -= 2;
15488 srcoffset += 2;
15489 }
15490
15491 gcc_assert (remaining < 2);
15492
15493 /* Copy last byte. */
15494
15495 if ((remaining & 1) != 0)
15496 {
15497 byte_tmp = gen_reg_rtx (SImode);
15498
15499 addr = plus_constant (Pmode, src, srcoffset - src_autoinc);
15500 mem = adjust_automodify_address (srcbase, QImode, addr, srcoffset);
15501 emit_move_insn (gen_lowpart (QImode, byte_tmp), mem);
15502
15503 if (interleave_factor == 1)
15504 {
15505 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
15506 mem = adjust_automodify_address (dstbase, QImode, addr, dstoffset);
15507 emit_move_insn (mem, gen_lowpart (QImode, byte_tmp));
15508 byte_tmp = NULL;
15509 dstoffset++;
15510 }
15511
15512 remaining--;
15513 srcoffset++;
15514 }
15515
15516 /* Store last halfword if we haven't done so already. */
15517
15518 if (halfword_tmp)
15519 {
15520 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
15521 mem = adjust_automodify_address (dstbase, HImode, addr, dstoffset);
15522 emit_insn (gen_unaligned_storehi (mem,
15523 gen_lowpart (HImode, halfword_tmp)));
15524 dstoffset += 2;
15525 }
15526
15527 /* Likewise for last byte. */
15528
15529 if (byte_tmp)
15530 {
15531 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
15532 mem = adjust_automodify_address (dstbase, QImode, addr, dstoffset);
15533 emit_move_insn (mem, gen_lowpart (QImode, byte_tmp));
15534 dstoffset++;
15535 }
15536
15537 gcc_assert (remaining == 0 && srcoffset == dstoffset);
15538 }
15539
15540 /* From mips_adjust_block_mem:
15541
15542 Helper function for doing a loop-based block operation on memory
15543 reference MEM. Each iteration of the loop will operate on LENGTH
15544 bytes of MEM.
15545
15546 Create a new base register for use within the loop and point it to
15547 the start of MEM. Create a new memory reference that uses this
15548 register. Store them in *LOOP_REG and *LOOP_MEM respectively. */
15549
15550 static void
15551 arm_adjust_block_mem (rtx mem, HOST_WIDE_INT length, rtx *loop_reg,
15552 rtx *loop_mem)
15553 {
15554 *loop_reg = copy_addr_to_reg (XEXP (mem, 0));
15555
15556 /* Although the new mem does not refer to a known location,
15557 it does keep up to LENGTH bytes of alignment. */
15558 *loop_mem = change_address (mem, BLKmode, *loop_reg);
15559 set_mem_align (*loop_mem, MIN (MEM_ALIGN (mem), length * BITS_PER_UNIT));
15560 }
15561
15562 /* From mips_block_move_loop:
15563
15564 Move LENGTH bytes from SRC to DEST using a loop that moves BYTES_PER_ITER
15565 bytes at a time. LENGTH must be at least BYTES_PER_ITER. Assume that
15566 the memory regions do not overlap. */
15567
15568 static void
15569 arm_block_move_unaligned_loop (rtx dest, rtx src, HOST_WIDE_INT length,
15570 unsigned int interleave_factor,
15571 HOST_WIDE_INT bytes_per_iter)
15572 {
15573 rtx src_reg, dest_reg, final_src, test;
15574 HOST_WIDE_INT leftover;
15575
15576 leftover = length % bytes_per_iter;
15577 length -= leftover;
15578
15579 /* Create registers and memory references for use within the loop. */
15580 arm_adjust_block_mem (src, bytes_per_iter, &src_reg, &src);
15581 arm_adjust_block_mem (dest, bytes_per_iter, &dest_reg, &dest);
15582
15583 /* Calculate the value that SRC_REG should have after the last iteration of
15584 the loop. */
15585 final_src = expand_simple_binop (Pmode, PLUS, src_reg, GEN_INT (length),
15586 0, 0, OPTAB_WIDEN);
15587
15588 /* Emit the start of the loop. */
15589 rtx_code_label *label = gen_label_rtx ();
15590 emit_label (label);
15591
15592 /* Emit the loop body. */
15593 arm_block_move_unaligned_straight (dest, src, bytes_per_iter,
15594 interleave_factor);
15595
15596 /* Move on to the next block. */
15597 emit_move_insn (src_reg, plus_constant (Pmode, src_reg, bytes_per_iter));
15598 emit_move_insn (dest_reg, plus_constant (Pmode, dest_reg, bytes_per_iter));
15599
15600 /* Emit the loop condition. */
15601 test = gen_rtx_NE (VOIDmode, src_reg, final_src);
15602 emit_jump_insn (gen_cbranchsi4 (test, src_reg, final_src, label));
15603
15604 /* Mop up any left-over bytes. */
15605 if (leftover)
15606 arm_block_move_unaligned_straight (dest, src, leftover, interleave_factor);
15607 }
15608
15609 /* Emit a block move when either the source or destination is unaligned (not
15610 aligned to a four-byte boundary). This may need further tuning depending on
15611 core type, optimize_size setting, etc. */
15612
15613 static int
15614 arm_cpymemqi_unaligned (rtx *operands)
15615 {
15616 HOST_WIDE_INT length = INTVAL (operands[2]);
15617
15618 if (optimize_size)
15619 {
15620 bool src_aligned = MEM_ALIGN (operands[1]) >= BITS_PER_WORD;
15621 bool dst_aligned = MEM_ALIGN (operands[0]) >= BITS_PER_WORD;
15622 /* Inlined memcpy using ldr/str/ldrh/strh can be quite big: try to limit
15623 size of code if optimizing for size. We'll use ldm/stm if src_aligned
15624 or dst_aligned though: allow more interleaving in those cases since the
15625 resulting code can be smaller. */
15626 unsigned int interleave_factor = (src_aligned || dst_aligned) ? 2 : 1;
15627 HOST_WIDE_INT bytes_per_iter = (src_aligned || dst_aligned) ? 8 : 4;
15628
15629 if (length > 12)
15630 arm_block_move_unaligned_loop (operands[0], operands[1], length,
15631 interleave_factor, bytes_per_iter);
15632 else
15633 arm_block_move_unaligned_straight (operands[0], operands[1], length,
15634 interleave_factor);
15635 }
15636 else
15637 {
15638 /* Note that the loop created by arm_block_move_unaligned_loop may be
15639 subject to loop unrolling, which makes tuning this condition a little
15640 redundant. */
15641 if (length > 32)
15642 arm_block_move_unaligned_loop (operands[0], operands[1], length, 4, 16);
15643 else
15644 arm_block_move_unaligned_straight (operands[0], operands[1], length, 4);
15645 }
15646
15647 return 1;
15648 }
15649
15650 int
15651 arm_gen_cpymemqi (rtx *operands)
15652 {
15653 HOST_WIDE_INT in_words_to_go, out_words_to_go, last_bytes;
15654 HOST_WIDE_INT srcoffset, dstoffset;
15655 rtx src, dst, srcbase, dstbase;
15656 rtx part_bytes_reg = NULL;
15657 rtx mem;
15658
15659 if (!CONST_INT_P (operands[2])
15660 || !CONST_INT_P (operands[3])
15661 || INTVAL (operands[2]) > 64)
15662 return 0;
15663
15664 if (unaligned_access && (INTVAL (operands[3]) & 3) != 0)
15665 return arm_cpymemqi_unaligned (operands);
15666
15667 if (INTVAL (operands[3]) & 3)
15668 return 0;
15669
15670 dstbase = operands[0];
15671 srcbase = operands[1];
15672
15673 dst = copy_to_mode_reg (SImode, XEXP (dstbase, 0));
15674 src = copy_to_mode_reg (SImode, XEXP (srcbase, 0));
15675
15676 in_words_to_go = ARM_NUM_INTS (INTVAL (operands[2]));
15677 out_words_to_go = INTVAL (operands[2]) / 4;
15678 last_bytes = INTVAL (operands[2]) & 3;
15679 dstoffset = srcoffset = 0;
15680
15681 if (out_words_to_go != in_words_to_go && ((in_words_to_go - 1) & 3) != 0)
15682 part_bytes_reg = gen_rtx_REG (SImode, (in_words_to_go - 1) & 3);
15683
15684 while (in_words_to_go >= 2)
15685 {
15686 if (in_words_to_go > 4)
15687 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, 4, src,
15688 TRUE, srcbase, &srcoffset));
15689 else
15690 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, in_words_to_go,
15691 src, FALSE, srcbase,
15692 &srcoffset));
15693
15694 if (out_words_to_go)
15695 {
15696 if (out_words_to_go > 4)
15697 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence, 4, dst,
15698 TRUE, dstbase, &dstoffset));
15699 else if (out_words_to_go != 1)
15700 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence,
15701 out_words_to_go, dst,
15702 (last_bytes == 0
15703 ? FALSE : TRUE),
15704 dstbase, &dstoffset));
15705 else
15706 {
15707 mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
15708 emit_move_insn (mem, gen_rtx_REG (SImode, R0_REGNUM));
15709 if (last_bytes != 0)
15710 {
15711 emit_insn (gen_addsi3 (dst, dst, GEN_INT (4)));
15712 dstoffset += 4;
15713 }
15714 }
15715 }
15716
15717 in_words_to_go -= in_words_to_go < 4 ? in_words_to_go : 4;
15718 out_words_to_go -= out_words_to_go < 4 ? out_words_to_go : 4;
15719 }
15720
15721 /* OUT_WORDS_TO_GO will be zero here if there are byte stores to do. */
15722 if (out_words_to_go)
15723 {
15724 rtx sreg;
15725
15726 mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
15727 sreg = copy_to_reg (mem);
15728
15729 mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
15730 emit_move_insn (mem, sreg);
15731 in_words_to_go--;
15732
15733 gcc_assert (!in_words_to_go); /* Sanity check */
15734 }
15735
15736 if (in_words_to_go)
15737 {
15738 gcc_assert (in_words_to_go > 0);
15739
15740 mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
15741 part_bytes_reg = copy_to_mode_reg (SImode, mem);
15742 }
15743
15744 gcc_assert (!last_bytes || part_bytes_reg);
15745
15746 if (BYTES_BIG_ENDIAN && last_bytes)
15747 {
15748 rtx tmp = gen_reg_rtx (SImode);
15749
15750 /* The bytes we want are in the top end of the word. */
15751 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg,
15752 GEN_INT (8 * (4 - last_bytes))));
15753 part_bytes_reg = tmp;
15754
15755 while (last_bytes)
15756 {
15757 mem = adjust_automodify_address (dstbase, QImode,
15758 plus_constant (Pmode, dst,
15759 last_bytes - 1),
15760 dstoffset + last_bytes - 1);
15761 emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
15762
15763 if (--last_bytes)
15764 {
15765 tmp = gen_reg_rtx (SImode);
15766 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (8)));
15767 part_bytes_reg = tmp;
15768 }
15769 }
15770
15771 }
15772 else
15773 {
15774 if (last_bytes > 1)
15775 {
15776 mem = adjust_automodify_address (dstbase, HImode, dst, dstoffset);
15777 emit_move_insn (mem, gen_lowpart (HImode, part_bytes_reg));
15778 last_bytes -= 2;
15779 if (last_bytes)
15780 {
15781 rtx tmp = gen_reg_rtx (SImode);
15782 emit_insn (gen_addsi3 (dst, dst, const2_rtx));
15783 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (16)));
15784 part_bytes_reg = tmp;
15785 dstoffset += 2;
15786 }
15787 }
15788
15789 if (last_bytes)
15790 {
15791 mem = adjust_automodify_address (dstbase, QImode, dst, dstoffset);
15792 emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
15793 }
15794 }
15795
15796 return 1;
15797 }
15798
15799 /* Helper for gen_cpymem_ldrd_strd. Increase the address of memory rtx
15800 by mode size. */
15801 inline static rtx
15802 next_consecutive_mem (rtx mem)
15803 {
15804 machine_mode mode = GET_MODE (mem);
15805 HOST_WIDE_INT offset = GET_MODE_SIZE (mode);
15806 rtx addr = plus_constant (Pmode, XEXP (mem, 0), offset);
15807
15808 return adjust_automodify_address (mem, mode, addr, offset);
15809 }
15810
15811 /* Copy using LDRD/STRD instructions whenever possible.
15812 Returns true upon success. */
15813 bool
15814 gen_cpymem_ldrd_strd (rtx *operands)
15815 {
15816 unsigned HOST_WIDE_INT len;
15817 HOST_WIDE_INT align;
15818 rtx src, dst, base;
15819 rtx reg0;
15820 bool src_aligned, dst_aligned;
15821 bool src_volatile, dst_volatile;
15822
15823 gcc_assert (CONST_INT_P (operands[2]));
15824 gcc_assert (CONST_INT_P (operands[3]));
15825
15826 len = UINTVAL (operands[2]);
15827 if (len > 64)
15828 return false;
15829
15830 /* Maximum alignment we can assume for both src and dst buffers. */
15831 align = INTVAL (operands[3]);
15832
15833 if ((!unaligned_access) && (len >= 4) && ((align & 3) != 0))
15834 return false;
15835
15836 /* Place src and dst addresses in registers
15837 and update the corresponding mem rtx. */
15838 dst = operands[0];
15839 dst_volatile = MEM_VOLATILE_P (dst);
15840 dst_aligned = MEM_ALIGN (dst) >= BITS_PER_WORD;
15841 base = copy_to_mode_reg (SImode, XEXP (dst, 0));
15842 dst = adjust_automodify_address (dst, VOIDmode, base, 0);
15843
15844 src = operands[1];
15845 src_volatile = MEM_VOLATILE_P (src);
15846 src_aligned = MEM_ALIGN (src) >= BITS_PER_WORD;
15847 base = copy_to_mode_reg (SImode, XEXP (src, 0));
15848 src = adjust_automodify_address (src, VOIDmode, base, 0);
15849
15850 if (!unaligned_access && !(src_aligned && dst_aligned))
15851 return false;
15852
15853 if (src_volatile || dst_volatile)
15854 return false;
15855
15856 /* If we cannot generate any LDRD/STRD, try to generate LDM/STM. */
15857 if (!(dst_aligned || src_aligned))
15858 return arm_gen_cpymemqi (operands);
15859
15860 /* If the either src or dst is unaligned we'll be accessing it as pairs
15861 of unaligned SImode accesses. Otherwise we can generate DImode
15862 ldrd/strd instructions. */
15863 src = adjust_address (src, src_aligned ? DImode : SImode, 0);
15864 dst = adjust_address (dst, dst_aligned ? DImode : SImode, 0);
15865
15866 while (len >= 8)
15867 {
15868 len -= 8;
15869 reg0 = gen_reg_rtx (DImode);
15870 rtx first_reg = NULL_RTX;
15871 rtx second_reg = NULL_RTX;
15872
15873 if (!src_aligned || !dst_aligned)
15874 {
15875 if (BYTES_BIG_ENDIAN)
15876 {
15877 second_reg = gen_lowpart (SImode, reg0);
15878 first_reg = gen_highpart_mode (SImode, DImode, reg0);
15879 }
15880 else
15881 {
15882 first_reg = gen_lowpart (SImode, reg0);
15883 second_reg = gen_highpart_mode (SImode, DImode, reg0);
15884 }
15885 }
15886 if (MEM_ALIGN (src) >= 2 * BITS_PER_WORD)
15887 emit_move_insn (reg0, src);
15888 else if (src_aligned)
15889 emit_insn (gen_unaligned_loaddi (reg0, src));
15890 else
15891 {
15892 emit_insn (gen_unaligned_loadsi (first_reg, src));
15893 src = next_consecutive_mem (src);
15894 emit_insn (gen_unaligned_loadsi (second_reg, src));
15895 }
15896
15897 if (MEM_ALIGN (dst) >= 2 * BITS_PER_WORD)
15898 emit_move_insn (dst, reg0);
15899 else if (dst_aligned)
15900 emit_insn (gen_unaligned_storedi (dst, reg0));
15901 else
15902 {
15903 emit_insn (gen_unaligned_storesi (dst, first_reg));
15904 dst = next_consecutive_mem (dst);
15905 emit_insn (gen_unaligned_storesi (dst, second_reg));
15906 }
15907
15908 src = next_consecutive_mem (src);
15909 dst = next_consecutive_mem (dst);
15910 }
15911
15912 gcc_assert (len < 8);
15913 if (len >= 4)
15914 {
15915 /* More than a word but less than a double-word to copy. Copy a word. */
15916 reg0 = gen_reg_rtx (SImode);
15917 src = adjust_address (src, SImode, 0);
15918 dst = adjust_address (dst, SImode, 0);
15919 if (src_aligned)
15920 emit_move_insn (reg0, src);
15921 else
15922 emit_insn (gen_unaligned_loadsi (reg0, src));
15923
15924 if (dst_aligned)
15925 emit_move_insn (dst, reg0);
15926 else
15927 emit_insn (gen_unaligned_storesi (dst, reg0));
15928
15929 src = next_consecutive_mem (src);
15930 dst = next_consecutive_mem (dst);
15931 len -= 4;
15932 }
15933
15934 if (len == 0)
15935 return true;
15936
15937 /* Copy the remaining bytes. */
15938 if (len >= 2)
15939 {
15940 dst = adjust_address (dst, HImode, 0);
15941 src = adjust_address (src, HImode, 0);
15942 reg0 = gen_reg_rtx (SImode);
15943 if (src_aligned)
15944 emit_insn (gen_zero_extendhisi2 (reg0, src));
15945 else
15946 emit_insn (gen_unaligned_loadhiu (reg0, src));
15947
15948 if (dst_aligned)
15949 emit_insn (gen_movhi (dst, gen_lowpart(HImode, reg0)));
15950 else
15951 emit_insn (gen_unaligned_storehi (dst, gen_lowpart (HImode, reg0)));
15952
15953 src = next_consecutive_mem (src);
15954 dst = next_consecutive_mem (dst);
15955 if (len == 2)
15956 return true;
15957 }
15958
15959 dst = adjust_address (dst, QImode, 0);
15960 src = adjust_address (src, QImode, 0);
15961 reg0 = gen_reg_rtx (QImode);
15962 emit_move_insn (reg0, src);
15963 emit_move_insn (dst, reg0);
15964 return true;
15965 }
15966
15967 /* Decompose operands for a 64-bit binary operation in OP1 and OP2
15968 into its component 32-bit subregs. OP2 may be an immediate
15969 constant and we want to simplify it in that case. */
15970 void
15971 arm_decompose_di_binop (rtx op1, rtx op2, rtx *lo_op1, rtx *hi_op1,
15972 rtx *lo_op2, rtx *hi_op2)
15973 {
15974 *lo_op1 = gen_lowpart (SImode, op1);
15975 *hi_op1 = gen_highpart (SImode, op1);
15976 *lo_op2 = simplify_gen_subreg (SImode, op2, DImode,
15977 subreg_lowpart_offset (SImode, DImode));
15978 *hi_op2 = simplify_gen_subreg (SImode, op2, DImode,
15979 subreg_highpart_offset (SImode, DImode));
15980 }
15981
15982 /* Select a dominance comparison mode if possible for a test of the general
15983 form (OP (COND_OR (X) (Y)) (const_int 0)). We support three forms.
15984 COND_OR == DOM_CC_X_AND_Y => (X && Y)
15985 COND_OR == DOM_CC_NX_OR_Y => ((! X) || Y)
15986 COND_OR == DOM_CC_X_OR_Y => (X || Y)
15987 In all cases OP will be either EQ or NE, but we don't need to know which
15988 here. If we are unable to support a dominance comparison we return
15989 CC mode. This will then fail to match for the RTL expressions that
15990 generate this call. */
15991 machine_mode
15992 arm_select_dominance_cc_mode (rtx x, rtx y, HOST_WIDE_INT cond_or)
15993 {
15994 enum rtx_code cond1, cond2;
15995 int swapped = 0;
15996
15997 /* Currently we will probably get the wrong result if the individual
15998 comparisons are not simple. This also ensures that it is safe to
15999 reverse a comparison if necessary. */
16000 if ((arm_select_cc_mode (cond1 = GET_CODE (x), XEXP (x, 0), XEXP (x, 1))
16001 != CCmode)
16002 || (arm_select_cc_mode (cond2 = GET_CODE (y), XEXP (y, 0), XEXP (y, 1))
16003 != CCmode))
16004 return CCmode;
16005
16006 /* The if_then_else variant of this tests the second condition if the
16007 first passes, but is true if the first fails. Reverse the first
16008 condition to get a true "inclusive-or" expression. */
16009 if (cond_or == DOM_CC_NX_OR_Y)
16010 cond1 = reverse_condition (cond1);
16011
16012 /* If the comparisons are not equal, and one doesn't dominate the other,
16013 then we can't do this. */
16014 if (cond1 != cond2
16015 && !comparison_dominates_p (cond1, cond2)
16016 && (swapped = 1, !comparison_dominates_p (cond2, cond1)))
16017 return CCmode;
16018
16019 if (swapped)
16020 std::swap (cond1, cond2);
16021
16022 switch (cond1)
16023 {
16024 case EQ:
16025 if (cond_or == DOM_CC_X_AND_Y)
16026 return CC_DEQmode;
16027
16028 switch (cond2)
16029 {
16030 case EQ: return CC_DEQmode;
16031 case LE: return CC_DLEmode;
16032 case LEU: return CC_DLEUmode;
16033 case GE: return CC_DGEmode;
16034 case GEU: return CC_DGEUmode;
16035 default: gcc_unreachable ();
16036 }
16037
16038 case LT:
16039 if (cond_or == DOM_CC_X_AND_Y)
16040 return CC_DLTmode;
16041
16042 switch (cond2)
16043 {
16044 case LT:
16045 return CC_DLTmode;
16046 case LE:
16047 return CC_DLEmode;
16048 case NE:
16049 return CC_DNEmode;
16050 default:
16051 gcc_unreachable ();
16052 }
16053
16054 case GT:
16055 if (cond_or == DOM_CC_X_AND_Y)
16056 return CC_DGTmode;
16057
16058 switch (cond2)
16059 {
16060 case GT:
16061 return CC_DGTmode;
16062 case GE:
16063 return CC_DGEmode;
16064 case NE:
16065 return CC_DNEmode;
16066 default:
16067 gcc_unreachable ();
16068 }
16069
16070 case LTU:
16071 if (cond_or == DOM_CC_X_AND_Y)
16072 return CC_DLTUmode;
16073
16074 switch (cond2)
16075 {
16076 case LTU:
16077 return CC_DLTUmode;
16078 case LEU:
16079 return CC_DLEUmode;
16080 case NE:
16081 return CC_DNEmode;
16082 default:
16083 gcc_unreachable ();
16084 }
16085
16086 case GTU:
16087 if (cond_or == DOM_CC_X_AND_Y)
16088 return CC_DGTUmode;
16089
16090 switch (cond2)
16091 {
16092 case GTU:
16093 return CC_DGTUmode;
16094 case GEU:
16095 return CC_DGEUmode;
16096 case NE:
16097 return CC_DNEmode;
16098 default:
16099 gcc_unreachable ();
16100 }
16101
16102 /* The remaining cases only occur when both comparisons are the
16103 same. */
16104 case NE:
16105 gcc_assert (cond1 == cond2);
16106 return CC_DNEmode;
16107
16108 case LE:
16109 gcc_assert (cond1 == cond2);
16110 return CC_DLEmode;
16111
16112 case GE:
16113 gcc_assert (cond1 == cond2);
16114 return CC_DGEmode;
16115
16116 case LEU:
16117 gcc_assert (cond1 == cond2);
16118 return CC_DLEUmode;
16119
16120 case GEU:
16121 gcc_assert (cond1 == cond2);
16122 return CC_DGEUmode;
16123
16124 default:
16125 gcc_unreachable ();
16126 }
16127 }
16128
16129 machine_mode
16130 arm_select_cc_mode (enum rtx_code op, rtx x, rtx y)
16131 {
16132 /* All floating point compares return CCFP if it is an equality
16133 comparison, and CCFPE otherwise. */
16134 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
16135 {
16136 switch (op)
16137 {
16138 case EQ:
16139 case NE:
16140 case UNORDERED:
16141 case ORDERED:
16142 case UNLT:
16143 case UNLE:
16144 case UNGT:
16145 case UNGE:
16146 case UNEQ:
16147 case LTGT:
16148 return CCFPmode;
16149
16150 case LT:
16151 case LE:
16152 case GT:
16153 case GE:
16154 return CCFPEmode;
16155
16156 default:
16157 gcc_unreachable ();
16158 }
16159 }
16160
16161 /* A compare with a shifted operand. Because of canonicalization, the
16162 comparison will have to be swapped when we emit the assembler. */
16163 if (GET_MODE (y) == SImode
16164 && (REG_P (y) || (SUBREG_P (y)))
16165 && (GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
16166 || GET_CODE (x) == LSHIFTRT || GET_CODE (x) == ROTATE
16167 || GET_CODE (x) == ROTATERT))
16168 return CC_SWPmode;
16169
16170 /* A widened compare of the sum of a value plus a carry against a
16171 constant. This is a representation of RSC. We want to swap the
16172 result of the comparison at output. Not valid if the Z bit is
16173 needed. */
16174 if (GET_MODE (x) == DImode
16175 && GET_CODE (x) == PLUS
16176 && arm_borrow_operation (XEXP (x, 1), DImode)
16177 && CONST_INT_P (y)
16178 && ((GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
16179 && (op == LE || op == GT))
16180 || (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
16181 && (op == LEU || op == GTU))))
16182 return CC_SWPmode;
16183
16184 /* If X is a constant we want to use CC_RSBmode. This is
16185 non-canonical, but arm_gen_compare_reg uses this to generate the
16186 correct canonical form. */
16187 if (GET_MODE (y) == SImode
16188 && (REG_P (y) || SUBREG_P (y))
16189 && CONST_INT_P (x))
16190 return CC_RSBmode;
16191
16192 /* This operation is performed swapped, but since we only rely on the Z
16193 flag we don't need an additional mode. */
16194 if (GET_MODE (y) == SImode
16195 && (REG_P (y) || (SUBREG_P (y)))
16196 && GET_CODE (x) == NEG
16197 && (op == EQ || op == NE))
16198 return CC_Zmode;
16199
16200 /* This is a special case that is used by combine to allow a
16201 comparison of a shifted byte load to be split into a zero-extend
16202 followed by a comparison of the shifted integer (only valid for
16203 equalities and unsigned inequalities). */
16204 if (GET_MODE (x) == SImode
16205 && GET_CODE (x) == ASHIFT
16206 && CONST_INT_P (XEXP (x, 1)) && INTVAL (XEXP (x, 1)) == 24
16207 && GET_CODE (XEXP (x, 0)) == SUBREG
16208 && MEM_P (SUBREG_REG (XEXP (x, 0)))
16209 && GET_MODE (SUBREG_REG (XEXP (x, 0))) == QImode
16210 && (op == EQ || op == NE
16211 || op == GEU || op == GTU || op == LTU || op == LEU)
16212 && CONST_INT_P (y))
16213 return CC_Zmode;
16214
16215 /* A construct for a conditional compare, if the false arm contains
16216 0, then both conditions must be true, otherwise either condition
16217 must be true. Not all conditions are possible, so CCmode is
16218 returned if it can't be done. */
16219 if (GET_CODE (x) == IF_THEN_ELSE
16220 && (XEXP (x, 2) == const0_rtx
16221 || XEXP (x, 2) == const1_rtx)
16222 && COMPARISON_P (XEXP (x, 0))
16223 && COMPARISON_P (XEXP (x, 1)))
16224 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
16225 INTVAL (XEXP (x, 2)));
16226
16227 /* Alternate canonicalizations of the above. These are somewhat cleaner. */
16228 if (GET_CODE (x) == AND
16229 && (op == EQ || op == NE)
16230 && COMPARISON_P (XEXP (x, 0))
16231 && COMPARISON_P (XEXP (x, 1)))
16232 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
16233 DOM_CC_X_AND_Y);
16234
16235 if (GET_CODE (x) == IOR
16236 && (op == EQ || op == NE)
16237 && COMPARISON_P (XEXP (x, 0))
16238 && COMPARISON_P (XEXP (x, 1)))
16239 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
16240 DOM_CC_X_OR_Y);
16241
16242 /* An operation (on Thumb) where we want to test for a single bit.
16243 This is done by shifting that bit up into the top bit of a
16244 scratch register; we can then branch on the sign bit. */
16245 if (TARGET_THUMB1
16246 && GET_MODE (x) == SImode
16247 && (op == EQ || op == NE)
16248 && GET_CODE (x) == ZERO_EXTRACT
16249 && XEXP (x, 1) == const1_rtx)
16250 return CC_Nmode;
16251
16252 /* An operation that sets the condition codes as a side-effect, the
16253 V flag is not set correctly, so we can only use comparisons where
16254 this doesn't matter. (For LT and GE we can use "mi" and "pl"
16255 instead.) */
16256 /* ??? Does the ZERO_EXTRACT case really apply to thumb2? */
16257 if (GET_MODE (x) == SImode
16258 && y == const0_rtx
16259 && (op == EQ || op == NE || op == LT || op == GE)
16260 && (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS
16261 || GET_CODE (x) == AND || GET_CODE (x) == IOR
16262 || GET_CODE (x) == XOR || GET_CODE (x) == MULT
16263 || GET_CODE (x) == NOT || GET_CODE (x) == NEG
16264 || GET_CODE (x) == LSHIFTRT
16265 || GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
16266 || GET_CODE (x) == ROTATERT
16267 || (TARGET_32BIT && GET_CODE (x) == ZERO_EXTRACT)))
16268 return CC_NZmode;
16269
16270 /* A comparison of ~reg with a const is really a special
16271 canoncialization of compare (~const, reg), which is a reverse
16272 subtract operation. We may not get here if CONST is 0, but that
16273 doesn't matter because ~0 isn't a valid immediate for RSB. */
16274 if (GET_MODE (x) == SImode
16275 && GET_CODE (x) == NOT
16276 && CONST_INT_P (y))
16277 return CC_RSBmode;
16278
16279 if (GET_MODE (x) == QImode && (op == EQ || op == NE))
16280 return CC_Zmode;
16281
16282 if (GET_MODE (x) == SImode && (op == LTU || op == GEU)
16283 && GET_CODE (x) == PLUS
16284 && (rtx_equal_p (XEXP (x, 0), y) || rtx_equal_p (XEXP (x, 1), y)))
16285 return CC_Cmode;
16286
16287 if (GET_MODE (x) == DImode
16288 && GET_CODE (x) == PLUS
16289 && GET_CODE (XEXP (x, 1)) == ZERO_EXTEND
16290 && CONST_INT_P (y)
16291 && UINTVAL (y) == 0x800000000
16292 && (op == GEU || op == LTU))
16293 return CC_ADCmode;
16294
16295 if (GET_MODE (x) == DImode
16296 && (op == GE || op == LT)
16297 && GET_CODE (x) == SIGN_EXTEND
16298 && ((GET_CODE (y) == PLUS
16299 && arm_borrow_operation (XEXP (y, 0), DImode))
16300 || arm_borrow_operation (y, DImode)))
16301 return CC_NVmode;
16302
16303 if (GET_MODE (x) == DImode
16304 && (op == GEU || op == LTU)
16305 && GET_CODE (x) == ZERO_EXTEND
16306 && ((GET_CODE (y) == PLUS
16307 && arm_borrow_operation (XEXP (y, 0), DImode))
16308 || arm_borrow_operation (y, DImode)))
16309 return CC_Bmode;
16310
16311 if (GET_MODE (x) == DImode
16312 && (op == EQ || op == NE)
16313 && (GET_CODE (x) == PLUS
16314 || GET_CODE (x) == MINUS)
16315 && (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
16316 || GET_CODE (XEXP (x, 1)) == SIGN_EXTEND)
16317 && GET_CODE (y) == SIGN_EXTEND
16318 && GET_CODE (XEXP (y, 0)) == GET_CODE (x))
16319 return CC_Vmode;
16320
16321 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_CC)
16322 return GET_MODE (x);
16323
16324 return CCmode;
16325 }
16326
16327 /* X and Y are two (DImode) things to compare for the condition CODE. Emit
16328 the sequence of instructions needed to generate a suitable condition
16329 code register. Return the CC register result. */
16330 static rtx
16331 arm_gen_dicompare_reg (rtx_code code, rtx x, rtx y, rtx scratch)
16332 {
16333 machine_mode mode;
16334 rtx cc_reg;
16335
16336 /* We don't currently handle DImode in thumb1, but rely on libgcc. */
16337 gcc_assert (TARGET_32BIT);
16338 gcc_assert (!CONST_INT_P (x));
16339
16340 rtx x_lo = simplify_gen_subreg (SImode, x, DImode,
16341 subreg_lowpart_offset (SImode, DImode));
16342 rtx x_hi = simplify_gen_subreg (SImode, x, DImode,
16343 subreg_highpart_offset (SImode, DImode));
16344 rtx y_lo = simplify_gen_subreg (SImode, y, DImode,
16345 subreg_lowpart_offset (SImode, DImode));
16346 rtx y_hi = simplify_gen_subreg (SImode, y, DImode,
16347 subreg_highpart_offset (SImode, DImode));
16348 switch (code)
16349 {
16350 case EQ:
16351 case NE:
16352 {
16353 if (y_lo == const0_rtx || y_hi == const0_rtx)
16354 {
16355 if (y_lo != const0_rtx)
16356 {
16357 rtx scratch2 = scratch ? scratch : gen_reg_rtx (SImode);
16358
16359 gcc_assert (y_hi == const0_rtx);
16360 y_lo = gen_int_mode (-INTVAL (y_lo), SImode);
16361 if (!arm_add_operand (y_lo, SImode))
16362 y_lo = force_reg (SImode, y_lo);
16363 emit_insn (gen_addsi3 (scratch2, x_lo, y_lo));
16364 x_lo = scratch2;
16365 }
16366 else if (y_hi != const0_rtx)
16367 {
16368 rtx scratch2 = scratch ? scratch : gen_reg_rtx (SImode);
16369
16370 y_hi = gen_int_mode (-INTVAL (y_hi), SImode);
16371 if (!arm_add_operand (y_hi, SImode))
16372 y_hi = force_reg (SImode, y_hi);
16373 emit_insn (gen_addsi3 (scratch2, x_hi, y_hi));
16374 x_hi = scratch2;
16375 }
16376
16377 if (!scratch)
16378 {
16379 gcc_assert (!reload_completed);
16380 scratch = gen_rtx_SCRATCH (SImode);
16381 }
16382
16383 rtx clobber = gen_rtx_CLOBBER (VOIDmode, scratch);
16384 cc_reg = gen_rtx_REG (CC_NZmode, CC_REGNUM);
16385
16386 rtx set
16387 = gen_rtx_SET (cc_reg,
16388 gen_rtx_COMPARE (CC_NZmode,
16389 gen_rtx_IOR (SImode, x_lo, x_hi),
16390 const0_rtx));
16391 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set,
16392 clobber)));
16393 return cc_reg;
16394 }
16395
16396 if (!arm_add_operand (y_lo, SImode))
16397 y_lo = force_reg (SImode, y_lo);
16398
16399 if (!arm_add_operand (y_hi, SImode))
16400 y_hi = force_reg (SImode, y_hi);
16401
16402 rtx cmp1 = gen_rtx_NE (SImode, x_lo, y_lo);
16403 rtx cmp2 = gen_rtx_NE (SImode, x_hi, y_hi);
16404 rtx conjunction = gen_rtx_IOR (SImode, cmp1, cmp2);
16405 mode = SELECT_CC_MODE (code, conjunction, const0_rtx);
16406 cc_reg = gen_rtx_REG (mode, CC_REGNUM);
16407
16408 emit_insn (gen_rtx_SET (cc_reg,
16409 gen_rtx_COMPARE (mode, conjunction,
16410 const0_rtx)));
16411 return cc_reg;
16412 }
16413
16414 case LT:
16415 case GE:
16416 {
16417 if (y_lo == const0_rtx)
16418 {
16419 /* If the low word of y is 0, then this is simply a normal
16420 compare of the upper words. */
16421 if (!arm_add_operand (y_hi, SImode))
16422 y_hi = force_reg (SImode, y_hi);
16423
16424 return arm_gen_compare_reg (code, x_hi, y_hi, NULL_RTX);
16425 }
16426
16427 if (!arm_add_operand (y_lo, SImode))
16428 y_lo = force_reg (SImode, y_lo);
16429
16430 rtx cmp1
16431 = gen_rtx_LTU (DImode,
16432 arm_gen_compare_reg (LTU, x_lo, y_lo, NULL_RTX),
16433 const0_rtx);
16434
16435 if (!scratch)
16436 scratch = gen_rtx_SCRATCH (SImode);
16437
16438 if (!arm_not_operand (y_hi, SImode))
16439 y_hi = force_reg (SImode, y_hi);
16440
16441 rtx_insn *insn;
16442 if (y_hi == const0_rtx)
16443 insn = emit_insn (gen_cmpsi3_0_carryin_CC_NVout (scratch, x_hi,
16444 cmp1));
16445 else if (CONST_INT_P (y_hi))
16446 insn = emit_insn (gen_cmpsi3_imm_carryin_CC_NVout (scratch, x_hi,
16447 y_hi, cmp1));
16448 else
16449 insn = emit_insn (gen_cmpsi3_carryin_CC_NVout (scratch, x_hi, y_hi,
16450 cmp1));
16451 return SET_DEST (single_set (insn));
16452 }
16453
16454 case LE:
16455 case GT:
16456 {
16457 /* During expansion, we only expect to get here if y is a
16458 constant that we want to handle, otherwise we should have
16459 swapped the operands already. */
16460 gcc_assert (arm_const_double_prefer_rsbs_rsc (y));
16461
16462 if (!const_ok_for_arm (INTVAL (y_lo)))
16463 y_lo = force_reg (SImode, y_lo);
16464
16465 /* Perform a reverse subtract and compare. */
16466 rtx cmp1
16467 = gen_rtx_LTU (DImode,
16468 arm_gen_compare_reg (LTU, y_lo, x_lo, scratch),
16469 const0_rtx);
16470 rtx_insn *insn = emit_insn (gen_rscsi3_CC_NVout_scratch (scratch, y_hi,
16471 x_hi, cmp1));
16472 return SET_DEST (single_set (insn));
16473 }
16474
16475 case LTU:
16476 case GEU:
16477 {
16478 if (y_lo == const0_rtx)
16479 {
16480 /* If the low word of y is 0, then this is simply a normal
16481 compare of the upper words. */
16482 if (!arm_add_operand (y_hi, SImode))
16483 y_hi = force_reg (SImode, y_hi);
16484
16485 return arm_gen_compare_reg (code, x_hi, y_hi, NULL_RTX);
16486 }
16487
16488 if (!arm_add_operand (y_lo, SImode))
16489 y_lo = force_reg (SImode, y_lo);
16490
16491 rtx cmp1
16492 = gen_rtx_LTU (DImode,
16493 arm_gen_compare_reg (LTU, x_lo, y_lo, NULL_RTX),
16494 const0_rtx);
16495
16496 if (!scratch)
16497 scratch = gen_rtx_SCRATCH (SImode);
16498 if (!arm_not_operand (y_hi, SImode))
16499 y_hi = force_reg (SImode, y_hi);
16500
16501 rtx_insn *insn;
16502 if (y_hi == const0_rtx)
16503 insn = emit_insn (gen_cmpsi3_0_carryin_CC_Bout (scratch, x_hi,
16504 cmp1));
16505 else if (CONST_INT_P (y_hi))
16506 {
16507 /* Constant is viewed as unsigned when zero-extended. */
16508 y_hi = GEN_INT (UINTVAL (y_hi) & 0xffffffffULL);
16509 insn = emit_insn (gen_cmpsi3_imm_carryin_CC_Bout (scratch, x_hi,
16510 y_hi, cmp1));
16511 }
16512 else
16513 insn = emit_insn (gen_cmpsi3_carryin_CC_Bout (scratch, x_hi, y_hi,
16514 cmp1));
16515 return SET_DEST (single_set (insn));
16516 }
16517
16518 case LEU:
16519 case GTU:
16520 {
16521 /* During expansion, we only expect to get here if y is a
16522 constant that we want to handle, otherwise we should have
16523 swapped the operands already. */
16524 gcc_assert (arm_const_double_prefer_rsbs_rsc (y));
16525
16526 if (!const_ok_for_arm (INTVAL (y_lo)))
16527 y_lo = force_reg (SImode, y_lo);
16528
16529 /* Perform a reverse subtract and compare. */
16530 rtx cmp1
16531 = gen_rtx_LTU (DImode,
16532 arm_gen_compare_reg (LTU, y_lo, x_lo, scratch),
16533 const0_rtx);
16534 y_hi = GEN_INT (0xffffffff & UINTVAL (y_hi));
16535 rtx_insn *insn = emit_insn (gen_rscsi3_CC_Bout_scratch (scratch, y_hi,
16536 x_hi, cmp1));
16537 return SET_DEST (single_set (insn));
16538 }
16539
16540 default:
16541 gcc_unreachable ();
16542 }
16543 }
16544
16545 /* X and Y are two things to compare using CODE. Emit the compare insn and
16546 return the rtx for register 0 in the proper mode. */
16547 rtx
16548 arm_gen_compare_reg (rtx_code code, rtx x, rtx y, rtx scratch)
16549 {
16550 if (GET_MODE (x) == DImode || GET_MODE (y) == DImode)
16551 return arm_gen_dicompare_reg (code, x, y, scratch);
16552
16553 machine_mode mode = SELECT_CC_MODE (code, x, y);
16554 rtx cc_reg = gen_rtx_REG (mode, CC_REGNUM);
16555 if (mode == CC_RSBmode)
16556 {
16557 if (!scratch)
16558 scratch = gen_rtx_SCRATCH (SImode);
16559 emit_insn (gen_rsb_imm_compare_scratch (scratch,
16560 GEN_INT (~UINTVAL (x)), y));
16561 }
16562 else
16563 emit_set_insn (cc_reg, gen_rtx_COMPARE (mode, x, y));
16564
16565 return cc_reg;
16566 }
16567
16568 /* Generate a sequence of insns that will generate the correct return
16569 address mask depending on the physical architecture that the program
16570 is running on. */
16571 rtx
16572 arm_gen_return_addr_mask (void)
16573 {
16574 rtx reg = gen_reg_rtx (Pmode);
16575
16576 emit_insn (gen_return_addr_mask (reg));
16577 return reg;
16578 }
16579
16580 void
16581 arm_reload_in_hi (rtx *operands)
16582 {
16583 rtx ref = operands[1];
16584 rtx base, scratch;
16585 HOST_WIDE_INT offset = 0;
16586
16587 if (SUBREG_P (ref))
16588 {
16589 offset = SUBREG_BYTE (ref);
16590 ref = SUBREG_REG (ref);
16591 }
16592
16593 if (REG_P (ref))
16594 {
16595 /* We have a pseudo which has been spilt onto the stack; there
16596 are two cases here: the first where there is a simple
16597 stack-slot replacement and a second where the stack-slot is
16598 out of range, or is used as a subreg. */
16599 if (reg_equiv_mem (REGNO (ref)))
16600 {
16601 ref = reg_equiv_mem (REGNO (ref));
16602 base = find_replacement (&XEXP (ref, 0));
16603 }
16604 else
16605 /* The slot is out of range, or was dressed up in a SUBREG. */
16606 base = reg_equiv_address (REGNO (ref));
16607
16608 /* PR 62554: If there is no equivalent memory location then just move
16609 the value as an SImode register move. This happens when the target
16610 architecture variant does not have an HImode register move. */
16611 if (base == NULL)
16612 {
16613 gcc_assert (REG_P (operands[0]));
16614 emit_insn (gen_movsi (gen_rtx_SUBREG (SImode, operands[0], 0),
16615 gen_rtx_SUBREG (SImode, ref, 0)));
16616 return;
16617 }
16618 }
16619 else
16620 base = find_replacement (&XEXP (ref, 0));
16621
16622 /* Handle the case where the address is too complex to be offset by 1. */
16623 if (GET_CODE (base) == MINUS
16624 || (GET_CODE (base) == PLUS && !CONST_INT_P (XEXP (base, 1))))
16625 {
16626 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
16627
16628 emit_set_insn (base_plus, base);
16629 base = base_plus;
16630 }
16631 else if (GET_CODE (base) == PLUS)
16632 {
16633 /* The addend must be CONST_INT, or we would have dealt with it above. */
16634 HOST_WIDE_INT hi, lo;
16635
16636 offset += INTVAL (XEXP (base, 1));
16637 base = XEXP (base, 0);
16638
16639 /* Rework the address into a legal sequence of insns. */
16640 /* Valid range for lo is -4095 -> 4095 */
16641 lo = (offset >= 0
16642 ? (offset & 0xfff)
16643 : -((-offset) & 0xfff));
16644
16645 /* Corner case, if lo is the max offset then we would be out of range
16646 once we have added the additional 1 below, so bump the msb into the
16647 pre-loading insn(s). */
16648 if (lo == 4095)
16649 lo &= 0x7ff;
16650
16651 hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
16652 ^ (HOST_WIDE_INT) 0x80000000)
16653 - (HOST_WIDE_INT) 0x80000000);
16654
16655 gcc_assert (hi + lo == offset);
16656
16657 if (hi != 0)
16658 {
16659 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
16660
16661 /* Get the base address; addsi3 knows how to handle constants
16662 that require more than one insn. */
16663 emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
16664 base = base_plus;
16665 offset = lo;
16666 }
16667 }
16668
16669 /* Operands[2] may overlap operands[0] (though it won't overlap
16670 operands[1]), that's why we asked for a DImode reg -- so we can
16671 use the bit that does not overlap. */
16672 if (REGNO (operands[2]) == REGNO (operands[0]))
16673 scratch = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
16674 else
16675 scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
16676
16677 emit_insn (gen_zero_extendqisi2 (scratch,
16678 gen_rtx_MEM (QImode,
16679 plus_constant (Pmode, base,
16680 offset))));
16681 emit_insn (gen_zero_extendqisi2 (gen_rtx_SUBREG (SImode, operands[0], 0),
16682 gen_rtx_MEM (QImode,
16683 plus_constant (Pmode, base,
16684 offset + 1))));
16685 if (!BYTES_BIG_ENDIAN)
16686 emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
16687 gen_rtx_IOR (SImode,
16688 gen_rtx_ASHIFT
16689 (SImode,
16690 gen_rtx_SUBREG (SImode, operands[0], 0),
16691 GEN_INT (8)),
16692 scratch));
16693 else
16694 emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
16695 gen_rtx_IOR (SImode,
16696 gen_rtx_ASHIFT (SImode, scratch,
16697 GEN_INT (8)),
16698 gen_rtx_SUBREG (SImode, operands[0], 0)));
16699 }
16700
16701 /* Handle storing a half-word to memory during reload by synthesizing as two
16702 byte stores. Take care not to clobber the input values until after we
16703 have moved them somewhere safe. This code assumes that if the DImode
16704 scratch in operands[2] overlaps either the input value or output address
16705 in some way, then that value must die in this insn (we absolutely need
16706 two scratch registers for some corner cases). */
16707 void
16708 arm_reload_out_hi (rtx *operands)
16709 {
16710 rtx ref = operands[0];
16711 rtx outval = operands[1];
16712 rtx base, scratch;
16713 HOST_WIDE_INT offset = 0;
16714
16715 if (SUBREG_P (ref))
16716 {
16717 offset = SUBREG_BYTE (ref);
16718 ref = SUBREG_REG (ref);
16719 }
16720
16721 if (REG_P (ref))
16722 {
16723 /* We have a pseudo which has been spilt onto the stack; there
16724 are two cases here: the first where there is a simple
16725 stack-slot replacement and a second where the stack-slot is
16726 out of range, or is used as a subreg. */
16727 if (reg_equiv_mem (REGNO (ref)))
16728 {
16729 ref = reg_equiv_mem (REGNO (ref));
16730 base = find_replacement (&XEXP (ref, 0));
16731 }
16732 else
16733 /* The slot is out of range, or was dressed up in a SUBREG. */
16734 base = reg_equiv_address (REGNO (ref));
16735
16736 /* PR 62254: If there is no equivalent memory location then just move
16737 the value as an SImode register move. This happens when the target
16738 architecture variant does not have an HImode register move. */
16739 if (base == NULL)
16740 {
16741 gcc_assert (REG_P (outval) || SUBREG_P (outval));
16742
16743 if (REG_P (outval))
16744 {
16745 emit_insn (gen_movsi (gen_rtx_SUBREG (SImode, ref, 0),
16746 gen_rtx_SUBREG (SImode, outval, 0)));
16747 }
16748 else /* SUBREG_P (outval) */
16749 {
16750 if (GET_MODE (SUBREG_REG (outval)) == SImode)
16751 emit_insn (gen_movsi (gen_rtx_SUBREG (SImode, ref, 0),
16752 SUBREG_REG (outval)));
16753 else
16754 /* FIXME: Handle other cases ? */
16755 gcc_unreachable ();
16756 }
16757 return;
16758 }
16759 }
16760 else
16761 base = find_replacement (&XEXP (ref, 0));
16762
16763 scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
16764
16765 /* Handle the case where the address is too complex to be offset by 1. */
16766 if (GET_CODE (base) == MINUS
16767 || (GET_CODE (base) == PLUS && !CONST_INT_P (XEXP (base, 1))))
16768 {
16769 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
16770
16771 /* Be careful not to destroy OUTVAL. */
16772 if (reg_overlap_mentioned_p (base_plus, outval))
16773 {
16774 /* Updating base_plus might destroy outval, see if we can
16775 swap the scratch and base_plus. */
16776 if (!reg_overlap_mentioned_p (scratch, outval))
16777 std::swap (scratch, base_plus);
16778 else
16779 {
16780 rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
16781
16782 /* Be conservative and copy OUTVAL into the scratch now,
16783 this should only be necessary if outval is a subreg
16784 of something larger than a word. */
16785 /* XXX Might this clobber base? I can't see how it can,
16786 since scratch is known to overlap with OUTVAL, and
16787 must be wider than a word. */
16788 emit_insn (gen_movhi (scratch_hi, outval));
16789 outval = scratch_hi;
16790 }
16791 }
16792
16793 emit_set_insn (base_plus, base);
16794 base = base_plus;
16795 }
16796 else if (GET_CODE (base) == PLUS)
16797 {
16798 /* The addend must be CONST_INT, or we would have dealt with it above. */
16799 HOST_WIDE_INT hi, lo;
16800
16801 offset += INTVAL (XEXP (base, 1));
16802 base = XEXP (base, 0);
16803
16804 /* Rework the address into a legal sequence of insns. */
16805 /* Valid range for lo is -4095 -> 4095 */
16806 lo = (offset >= 0
16807 ? (offset & 0xfff)
16808 : -((-offset) & 0xfff));
16809
16810 /* Corner case, if lo is the max offset then we would be out of range
16811 once we have added the additional 1 below, so bump the msb into the
16812 pre-loading insn(s). */
16813 if (lo == 4095)
16814 lo &= 0x7ff;
16815
16816 hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
16817 ^ (HOST_WIDE_INT) 0x80000000)
16818 - (HOST_WIDE_INT) 0x80000000);
16819
16820 gcc_assert (hi + lo == offset);
16821
16822 if (hi != 0)
16823 {
16824 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
16825
16826 /* Be careful not to destroy OUTVAL. */
16827 if (reg_overlap_mentioned_p (base_plus, outval))
16828 {
16829 /* Updating base_plus might destroy outval, see if we
16830 can swap the scratch and base_plus. */
16831 if (!reg_overlap_mentioned_p (scratch, outval))
16832 std::swap (scratch, base_plus);
16833 else
16834 {
16835 rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
16836
16837 /* Be conservative and copy outval into scratch now,
16838 this should only be necessary if outval is a
16839 subreg of something larger than a word. */
16840 /* XXX Might this clobber base? I can't see how it
16841 can, since scratch is known to overlap with
16842 outval. */
16843 emit_insn (gen_movhi (scratch_hi, outval));
16844 outval = scratch_hi;
16845 }
16846 }
16847
16848 /* Get the base address; addsi3 knows how to handle constants
16849 that require more than one insn. */
16850 emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
16851 base = base_plus;
16852 offset = lo;
16853 }
16854 }
16855
16856 if (BYTES_BIG_ENDIAN)
16857 {
16858 emit_insn (gen_movqi (gen_rtx_MEM (QImode,
16859 plus_constant (Pmode, base,
16860 offset + 1)),
16861 gen_lowpart (QImode, outval)));
16862 emit_insn (gen_lshrsi3 (scratch,
16863 gen_rtx_SUBREG (SImode, outval, 0),
16864 GEN_INT (8)));
16865 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, base,
16866 offset)),
16867 gen_lowpart (QImode, scratch)));
16868 }
16869 else
16870 {
16871 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, base,
16872 offset)),
16873 gen_lowpart (QImode, outval)));
16874 emit_insn (gen_lshrsi3 (scratch,
16875 gen_rtx_SUBREG (SImode, outval, 0),
16876 GEN_INT (8)));
16877 emit_insn (gen_movqi (gen_rtx_MEM (QImode,
16878 plus_constant (Pmode, base,
16879 offset + 1)),
16880 gen_lowpart (QImode, scratch)));
16881 }
16882 }
16883
16884 /* Return true if a type must be passed in memory. For AAPCS, small aggregates
16885 (padded to the size of a word) should be passed in a register. */
16886
16887 static bool
16888 arm_must_pass_in_stack (const function_arg_info &arg)
16889 {
16890 if (TARGET_AAPCS_BASED)
16891 return must_pass_in_stack_var_size (arg);
16892 else
16893 return must_pass_in_stack_var_size_or_pad (arg);
16894 }
16895
16896
16897 /* Implement TARGET_FUNCTION_ARG_PADDING; return PAD_UPWARD if the lowest
16898 byte of a stack argument has useful data. For legacy APCS ABIs we use
16899 the default. For AAPCS based ABIs small aggregate types are placed
16900 in the lowest memory address. */
16901
16902 static pad_direction
16903 arm_function_arg_padding (machine_mode mode, const_tree type)
16904 {
16905 if (!TARGET_AAPCS_BASED)
16906 return default_function_arg_padding (mode, type);
16907
16908 if (type && BYTES_BIG_ENDIAN && INTEGRAL_TYPE_P (type))
16909 return PAD_DOWNWARD;
16910
16911 return PAD_UPWARD;
16912 }
16913
16914
16915 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
16916 Return !BYTES_BIG_ENDIAN if the least significant byte of the
16917 register has useful data, and return the opposite if the most
16918 significant byte does. */
16919
16920 bool
16921 arm_pad_reg_upward (machine_mode mode,
16922 tree type, int first ATTRIBUTE_UNUSED)
16923 {
16924 if (TARGET_AAPCS_BASED && BYTES_BIG_ENDIAN)
16925 {
16926 /* For AAPCS, small aggregates, small fixed-point types,
16927 and small complex types are always padded upwards. */
16928 if (type)
16929 {
16930 if ((AGGREGATE_TYPE_P (type)
16931 || TREE_CODE (type) == COMPLEX_TYPE
16932 || FIXED_POINT_TYPE_P (type))
16933 && int_size_in_bytes (type) <= 4)
16934 return true;
16935 }
16936 else
16937 {
16938 if ((COMPLEX_MODE_P (mode) || ALL_FIXED_POINT_MODE_P (mode))
16939 && GET_MODE_SIZE (mode) <= 4)
16940 return true;
16941 }
16942 }
16943
16944 /* Otherwise, use default padding. */
16945 return !BYTES_BIG_ENDIAN;
16946 }
16947
16948 /* Returns true iff OFFSET is valid for use in an LDRD/STRD instruction,
16949 assuming that the address in the base register is word aligned. */
16950 bool
16951 offset_ok_for_ldrd_strd (HOST_WIDE_INT offset)
16952 {
16953 HOST_WIDE_INT max_offset;
16954
16955 /* Offset must be a multiple of 4 in Thumb mode. */
16956 if (TARGET_THUMB2 && ((offset & 3) != 0))
16957 return false;
16958
16959 if (TARGET_THUMB2)
16960 max_offset = 1020;
16961 else if (TARGET_ARM)
16962 max_offset = 255;
16963 else
16964 return false;
16965
16966 return ((offset <= max_offset) && (offset >= -max_offset));
16967 }
16968
16969 /* Checks whether the operands are valid for use in an LDRD/STRD instruction.
16970 Assumes that RT, RT2, and RN are REG. This is guaranteed by the patterns.
16971 Assumes that the address in the base register RN is word aligned. Pattern
16972 guarantees that both memory accesses use the same base register,
16973 the offsets are constants within the range, and the gap between the offsets is 4.
16974 If preload complete then check that registers are legal. WBACK indicates whether
16975 address is updated. LOAD indicates whether memory access is load or store. */
16976 bool
16977 operands_ok_ldrd_strd (rtx rt, rtx rt2, rtx rn, HOST_WIDE_INT offset,
16978 bool wback, bool load)
16979 {
16980 unsigned int t, t2, n;
16981
16982 if (!reload_completed)
16983 return true;
16984
16985 if (!offset_ok_for_ldrd_strd (offset))
16986 return false;
16987
16988 t = REGNO (rt);
16989 t2 = REGNO (rt2);
16990 n = REGNO (rn);
16991
16992 if ((TARGET_THUMB2)
16993 && ((wback && (n == t || n == t2))
16994 || (t == SP_REGNUM)
16995 || (t == PC_REGNUM)
16996 || (t2 == SP_REGNUM)
16997 || (t2 == PC_REGNUM)
16998 || (!load && (n == PC_REGNUM))
16999 || (load && (t == t2))
17000 /* Triggers Cortex-M3 LDRD errata. */
17001 || (!wback && load && fix_cm3_ldrd && (n == t))))
17002 return false;
17003
17004 if ((TARGET_ARM)
17005 && ((wback && (n == t || n == t2))
17006 || (t2 == PC_REGNUM)
17007 || (t % 2 != 0) /* First destination register is not even. */
17008 || (t2 != t + 1)
17009 /* PC can be used as base register (for offset addressing only),
17010 but it is depricated. */
17011 || (n == PC_REGNUM)))
17012 return false;
17013
17014 return true;
17015 }
17016
17017 /* Return true if a 64-bit access with alignment ALIGN and with a
17018 constant offset OFFSET from the base pointer is permitted on this
17019 architecture. */
17020 static bool
17021 align_ok_ldrd_strd (HOST_WIDE_INT align, HOST_WIDE_INT offset)
17022 {
17023 return (unaligned_access
17024 ? (align >= BITS_PER_WORD && (offset & 3) == 0)
17025 : (align >= 2 * BITS_PER_WORD && (offset & 7) == 0));
17026 }
17027
17028 /* Helper for gen_operands_ldrd_strd. Returns true iff the memory
17029 operand MEM's address contains an immediate offset from the base
17030 register and has no side effects, in which case it sets BASE,
17031 OFFSET and ALIGN accordingly. */
17032 static bool
17033 mem_ok_for_ldrd_strd (rtx mem, rtx *base, rtx *offset, HOST_WIDE_INT *align)
17034 {
17035 rtx addr;
17036
17037 gcc_assert (base != NULL && offset != NULL);
17038
17039 /* TODO: Handle more general memory operand patterns, such as
17040 PRE_DEC and PRE_INC. */
17041
17042 if (side_effects_p (mem))
17043 return false;
17044
17045 /* Can't deal with subregs. */
17046 if (SUBREG_P (mem))
17047 return false;
17048
17049 gcc_assert (MEM_P (mem));
17050
17051 *offset = const0_rtx;
17052 *align = MEM_ALIGN (mem);
17053
17054 addr = XEXP (mem, 0);
17055
17056 /* If addr isn't valid for DImode, then we can't handle it. */
17057 if (!arm_legitimate_address_p (DImode, addr,
17058 reload_in_progress || reload_completed))
17059 return false;
17060
17061 if (REG_P (addr))
17062 {
17063 *base = addr;
17064 return true;
17065 }
17066 else if (GET_CODE (addr) == PLUS)
17067 {
17068 *base = XEXP (addr, 0);
17069 *offset = XEXP (addr, 1);
17070 return (REG_P (*base) && CONST_INT_P (*offset));
17071 }
17072
17073 return false;
17074 }
17075
17076 /* Called from a peephole2 to replace two word-size accesses with a
17077 single LDRD/STRD instruction. Returns true iff we can generate a
17078 new instruction sequence. That is, both accesses use the same base
17079 register and the gap between constant offsets is 4. This function
17080 may reorder its operands to match ldrd/strd RTL templates.
17081 OPERANDS are the operands found by the peephole matcher;
17082 OPERANDS[0,1] are register operands, and OPERANDS[2,3] are the
17083 corresponding memory operands. LOAD indicaates whether the access
17084 is load or store. CONST_STORE indicates a store of constant
17085 integer values held in OPERANDS[4,5] and assumes that the pattern
17086 is of length 4 insn, for the purpose of checking dead registers.
17087 COMMUTE indicates that register operands may be reordered. */
17088 bool
17089 gen_operands_ldrd_strd (rtx *operands, bool load,
17090 bool const_store, bool commute)
17091 {
17092 int nops = 2;
17093 HOST_WIDE_INT offsets[2], offset, align[2];
17094 rtx base = NULL_RTX;
17095 rtx cur_base, cur_offset, tmp;
17096 int i, gap;
17097 HARD_REG_SET regset;
17098
17099 gcc_assert (!const_store || !load);
17100 /* Check that the memory references are immediate offsets from the
17101 same base register. Extract the base register, the destination
17102 registers, and the corresponding memory offsets. */
17103 for (i = 0; i < nops; i++)
17104 {
17105 if (!mem_ok_for_ldrd_strd (operands[nops+i], &cur_base, &cur_offset,
17106 &align[i]))
17107 return false;
17108
17109 if (i == 0)
17110 base = cur_base;
17111 else if (REGNO (base) != REGNO (cur_base))
17112 return false;
17113
17114 offsets[i] = INTVAL (cur_offset);
17115 if (GET_CODE (operands[i]) == SUBREG)
17116 {
17117 tmp = SUBREG_REG (operands[i]);
17118 gcc_assert (GET_MODE (operands[i]) == GET_MODE (tmp));
17119 operands[i] = tmp;
17120 }
17121 }
17122
17123 /* Make sure there is no dependency between the individual loads. */
17124 if (load && REGNO (operands[0]) == REGNO (base))
17125 return false; /* RAW */
17126
17127 if (load && REGNO (operands[0]) == REGNO (operands[1]))
17128 return false; /* WAW */
17129
17130 /* If the same input register is used in both stores
17131 when storing different constants, try to find a free register.
17132 For example, the code
17133 mov r0, 0
17134 str r0, [r2]
17135 mov r0, 1
17136 str r0, [r2, #4]
17137 can be transformed into
17138 mov r1, 0
17139 mov r0, 1
17140 strd r1, r0, [r2]
17141 in Thumb mode assuming that r1 is free.
17142 For ARM mode do the same but only if the starting register
17143 can be made to be even. */
17144 if (const_store
17145 && REGNO (operands[0]) == REGNO (operands[1])
17146 && INTVAL (operands[4]) != INTVAL (operands[5]))
17147 {
17148 if (TARGET_THUMB2)
17149 {
17150 CLEAR_HARD_REG_SET (regset);
17151 tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
17152 if (tmp == NULL_RTX)
17153 return false;
17154
17155 /* Use the new register in the first load to ensure that
17156 if the original input register is not dead after peephole,
17157 then it will have the correct constant value. */
17158 operands[0] = tmp;
17159 }
17160 else if (TARGET_ARM)
17161 {
17162 int regno = REGNO (operands[0]);
17163 if (!peep2_reg_dead_p (4, operands[0]))
17164 {
17165 /* When the input register is even and is not dead after the
17166 pattern, it has to hold the second constant but we cannot
17167 form a legal STRD in ARM mode with this register as the second
17168 register. */
17169 if (regno % 2 == 0)
17170 return false;
17171
17172 /* Is regno-1 free? */
17173 SET_HARD_REG_SET (regset);
17174 CLEAR_HARD_REG_BIT(regset, regno - 1);
17175 tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
17176 if (tmp == NULL_RTX)
17177 return false;
17178
17179 operands[0] = tmp;
17180 }
17181 else
17182 {
17183 /* Find a DImode register. */
17184 CLEAR_HARD_REG_SET (regset);
17185 tmp = peep2_find_free_register (0, 4, "r", DImode, &regset);
17186 if (tmp != NULL_RTX)
17187 {
17188 operands[0] = simplify_gen_subreg (SImode, tmp, DImode, 0);
17189 operands[1] = simplify_gen_subreg (SImode, tmp, DImode, 4);
17190 }
17191 else
17192 {
17193 /* Can we use the input register to form a DI register? */
17194 SET_HARD_REG_SET (regset);
17195 CLEAR_HARD_REG_BIT(regset,
17196 regno % 2 == 0 ? regno + 1 : regno - 1);
17197 tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
17198 if (tmp == NULL_RTX)
17199 return false;
17200 operands[regno % 2 == 1 ? 0 : 1] = tmp;
17201 }
17202 }
17203
17204 gcc_assert (operands[0] != NULL_RTX);
17205 gcc_assert (operands[1] != NULL_RTX);
17206 gcc_assert (REGNO (operands[0]) % 2 == 0);
17207 gcc_assert (REGNO (operands[1]) == REGNO (operands[0]) + 1);
17208 }
17209 }
17210
17211 /* Make sure the instructions are ordered with lower memory access first. */
17212 if (offsets[0] > offsets[1])
17213 {
17214 gap = offsets[0] - offsets[1];
17215 offset = offsets[1];
17216
17217 /* Swap the instructions such that lower memory is accessed first. */
17218 std::swap (operands[0], operands[1]);
17219 std::swap (operands[2], operands[3]);
17220 std::swap (align[0], align[1]);
17221 if (const_store)
17222 std::swap (operands[4], operands[5]);
17223 }
17224 else
17225 {
17226 gap = offsets[1] - offsets[0];
17227 offset = offsets[0];
17228 }
17229
17230 /* Make sure accesses are to consecutive memory locations. */
17231 if (gap != GET_MODE_SIZE (SImode))
17232 return false;
17233
17234 if (!align_ok_ldrd_strd (align[0], offset))
17235 return false;
17236
17237 /* Make sure we generate legal instructions. */
17238 if (operands_ok_ldrd_strd (operands[0], operands[1], base, offset,
17239 false, load))
17240 return true;
17241
17242 /* In Thumb state, where registers are almost unconstrained, there
17243 is little hope to fix it. */
17244 if (TARGET_THUMB2)
17245 return false;
17246
17247 if (load && commute)
17248 {
17249 /* Try reordering registers. */
17250 std::swap (operands[0], operands[1]);
17251 if (operands_ok_ldrd_strd (operands[0], operands[1], base, offset,
17252 false, load))
17253 return true;
17254 }
17255
17256 if (const_store)
17257 {
17258 /* If input registers are dead after this pattern, they can be
17259 reordered or replaced by other registers that are free in the
17260 current pattern. */
17261 if (!peep2_reg_dead_p (4, operands[0])
17262 || !peep2_reg_dead_p (4, operands[1]))
17263 return false;
17264
17265 /* Try to reorder the input registers. */
17266 /* For example, the code
17267 mov r0, 0
17268 mov r1, 1
17269 str r1, [r2]
17270 str r0, [r2, #4]
17271 can be transformed into
17272 mov r1, 0
17273 mov r0, 1
17274 strd r0, [r2]
17275 */
17276 if (operands_ok_ldrd_strd (operands[1], operands[0], base, offset,
17277 false, false))
17278 {
17279 std::swap (operands[0], operands[1]);
17280 return true;
17281 }
17282
17283 /* Try to find a free DI register. */
17284 CLEAR_HARD_REG_SET (regset);
17285 add_to_hard_reg_set (&regset, SImode, REGNO (operands[0]));
17286 add_to_hard_reg_set (&regset, SImode, REGNO (operands[1]));
17287 while (true)
17288 {
17289 tmp = peep2_find_free_register (0, 4, "r", DImode, &regset);
17290 if (tmp == NULL_RTX)
17291 return false;
17292
17293 /* DREG must be an even-numbered register in DImode.
17294 Split it into SI registers. */
17295 operands[0] = simplify_gen_subreg (SImode, tmp, DImode, 0);
17296 operands[1] = simplify_gen_subreg (SImode, tmp, DImode, 4);
17297 gcc_assert (operands[0] != NULL_RTX);
17298 gcc_assert (operands[1] != NULL_RTX);
17299 gcc_assert (REGNO (operands[0]) % 2 == 0);
17300 gcc_assert (REGNO (operands[0]) + 1 == REGNO (operands[1]));
17301
17302 return (operands_ok_ldrd_strd (operands[0], operands[1],
17303 base, offset,
17304 false, load));
17305 }
17306 }
17307
17308 return false;
17309 }
17310
17311
17312 /* Return true if parallel execution of the two word-size accesses provided
17313 could be satisfied with a single LDRD/STRD instruction. Two word-size
17314 accesses are represented by the OPERANDS array, where OPERANDS[0,1] are
17315 register operands and OPERANDS[2,3] are the corresponding memory operands.
17316 */
17317 bool
17318 valid_operands_ldrd_strd (rtx *operands, bool load)
17319 {
17320 int nops = 2;
17321 HOST_WIDE_INT offsets[2], offset, align[2];
17322 rtx base = NULL_RTX;
17323 rtx cur_base, cur_offset;
17324 int i, gap;
17325
17326 /* Check that the memory references are immediate offsets from the
17327 same base register. Extract the base register, the destination
17328 registers, and the corresponding memory offsets. */
17329 for (i = 0; i < nops; i++)
17330 {
17331 if (!mem_ok_for_ldrd_strd (operands[nops+i], &cur_base, &cur_offset,
17332 &align[i]))
17333 return false;
17334
17335 if (i == 0)
17336 base = cur_base;
17337 else if (REGNO (base) != REGNO (cur_base))
17338 return false;
17339
17340 offsets[i] = INTVAL (cur_offset);
17341 if (GET_CODE (operands[i]) == SUBREG)
17342 return false;
17343 }
17344
17345 if (offsets[0] > offsets[1])
17346 return false;
17347
17348 gap = offsets[1] - offsets[0];
17349 offset = offsets[0];
17350
17351 /* Make sure accesses are to consecutive memory locations. */
17352 if (gap != GET_MODE_SIZE (SImode))
17353 return false;
17354
17355 if (!align_ok_ldrd_strd (align[0], offset))
17356 return false;
17357
17358 return operands_ok_ldrd_strd (operands[0], operands[1], base, offset,
17359 false, load);
17360 }
17361
17362 \f
17363 /* Print a symbolic form of X to the debug file, F. */
17364 static void
17365 arm_print_value (FILE *f, rtx x)
17366 {
17367 switch (GET_CODE (x))
17368 {
17369 case CONST_INT:
17370 fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
17371 return;
17372
17373 case CONST_DOUBLE:
17374 {
17375 char fpstr[20];
17376 real_to_decimal (fpstr, CONST_DOUBLE_REAL_VALUE (x),
17377 sizeof (fpstr), 0, 1);
17378 fputs (fpstr, f);
17379 }
17380 return;
17381
17382 case CONST_VECTOR:
17383 {
17384 int i;
17385
17386 fprintf (f, "<");
17387 for (i = 0; i < CONST_VECTOR_NUNITS (x); i++)
17388 {
17389 fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (CONST_VECTOR_ELT (x, i)));
17390 if (i < (CONST_VECTOR_NUNITS (x) - 1))
17391 fputc (',', f);
17392 }
17393 fprintf (f, ">");
17394 }
17395 return;
17396
17397 case CONST_STRING:
17398 fprintf (f, "\"%s\"", XSTR (x, 0));
17399 return;
17400
17401 case SYMBOL_REF:
17402 fprintf (f, "`%s'", XSTR (x, 0));
17403 return;
17404
17405 case LABEL_REF:
17406 fprintf (f, "L%d", INSN_UID (XEXP (x, 0)));
17407 return;
17408
17409 case CONST:
17410 arm_print_value (f, XEXP (x, 0));
17411 return;
17412
17413 case PLUS:
17414 arm_print_value (f, XEXP (x, 0));
17415 fprintf (f, "+");
17416 arm_print_value (f, XEXP (x, 1));
17417 return;
17418
17419 case PC:
17420 fprintf (f, "pc");
17421 return;
17422
17423 default:
17424 fprintf (f, "????");
17425 return;
17426 }
17427 }
17428 \f
17429 /* Routines for manipulation of the constant pool. */
17430
17431 /* Arm instructions cannot load a large constant directly into a
17432 register; they have to come from a pc relative load. The constant
17433 must therefore be placed in the addressable range of the pc
17434 relative load. Depending on the precise pc relative load
17435 instruction the range is somewhere between 256 bytes and 4k. This
17436 means that we often have to dump a constant inside a function, and
17437 generate code to branch around it.
17438
17439 It is important to minimize this, since the branches will slow
17440 things down and make the code larger.
17441
17442 Normally we can hide the table after an existing unconditional
17443 branch so that there is no interruption of the flow, but in the
17444 worst case the code looks like this:
17445
17446 ldr rn, L1
17447 ...
17448 b L2
17449 align
17450 L1: .long value
17451 L2:
17452 ...
17453
17454 ldr rn, L3
17455 ...
17456 b L4
17457 align
17458 L3: .long value
17459 L4:
17460 ...
17461
17462 We fix this by performing a scan after scheduling, which notices
17463 which instructions need to have their operands fetched from the
17464 constant table and builds the table.
17465
17466 The algorithm starts by building a table of all the constants that
17467 need fixing up and all the natural barriers in the function (places
17468 where a constant table can be dropped without breaking the flow).
17469 For each fixup we note how far the pc-relative replacement will be
17470 able to reach and the offset of the instruction into the function.
17471
17472 Having built the table we then group the fixes together to form
17473 tables that are as large as possible (subject to addressing
17474 constraints) and emit each table of constants after the last
17475 barrier that is within range of all the instructions in the group.
17476 If a group does not contain a barrier, then we forcibly create one
17477 by inserting a jump instruction into the flow. Once the table has
17478 been inserted, the insns are then modified to reference the
17479 relevant entry in the pool.
17480
17481 Possible enhancements to the algorithm (not implemented) are:
17482
17483 1) For some processors and object formats, there may be benefit in
17484 aligning the pools to the start of cache lines; this alignment
17485 would need to be taken into account when calculating addressability
17486 of a pool. */
17487
17488 /* These typedefs are located at the start of this file, so that
17489 they can be used in the prototypes there. This comment is to
17490 remind readers of that fact so that the following structures
17491 can be understood more easily.
17492
17493 typedef struct minipool_node Mnode;
17494 typedef struct minipool_fixup Mfix; */
17495
17496 struct minipool_node
17497 {
17498 /* Doubly linked chain of entries. */
17499 Mnode * next;
17500 Mnode * prev;
17501 /* The maximum offset into the code that this entry can be placed. While
17502 pushing fixes for forward references, all entries are sorted in order
17503 of increasing max_address. */
17504 HOST_WIDE_INT max_address;
17505 /* Similarly for an entry inserted for a backwards ref. */
17506 HOST_WIDE_INT min_address;
17507 /* The number of fixes referencing this entry. This can become zero
17508 if we "unpush" an entry. In this case we ignore the entry when we
17509 come to emit the code. */
17510 int refcount;
17511 /* The offset from the start of the minipool. */
17512 HOST_WIDE_INT offset;
17513 /* The value in table. */
17514 rtx value;
17515 /* The mode of value. */
17516 machine_mode mode;
17517 /* The size of the value. With iWMMXt enabled
17518 sizes > 4 also imply an alignment of 8-bytes. */
17519 int fix_size;
17520 };
17521
17522 struct minipool_fixup
17523 {
17524 Mfix * next;
17525 rtx_insn * insn;
17526 HOST_WIDE_INT address;
17527 rtx * loc;
17528 machine_mode mode;
17529 int fix_size;
17530 rtx value;
17531 Mnode * minipool;
17532 HOST_WIDE_INT forwards;
17533 HOST_WIDE_INT backwards;
17534 };
17535
17536 /* Fixes less than a word need padding out to a word boundary. */
17537 #define MINIPOOL_FIX_SIZE(mode) \
17538 (GET_MODE_SIZE ((mode)) >= 4 ? GET_MODE_SIZE ((mode)) : 4)
17539
17540 static Mnode * minipool_vector_head;
17541 static Mnode * minipool_vector_tail;
17542 static rtx_code_label *minipool_vector_label;
17543 static int minipool_pad;
17544
17545 /* The linked list of all minipool fixes required for this function. */
17546 Mfix * minipool_fix_head;
17547 Mfix * minipool_fix_tail;
17548 /* The fix entry for the current minipool, once it has been placed. */
17549 Mfix * minipool_barrier;
17550
17551 #ifndef JUMP_TABLES_IN_TEXT_SECTION
17552 #define JUMP_TABLES_IN_TEXT_SECTION 0
17553 #endif
17554
17555 static HOST_WIDE_INT
17556 get_jump_table_size (rtx_jump_table_data *insn)
17557 {
17558 /* ADDR_VECs only take room if read-only data does into the text
17559 section. */
17560 if (JUMP_TABLES_IN_TEXT_SECTION || readonly_data_section == text_section)
17561 {
17562 rtx body = PATTERN (insn);
17563 int elt = GET_CODE (body) == ADDR_DIFF_VEC ? 1 : 0;
17564 HOST_WIDE_INT size;
17565 HOST_WIDE_INT modesize;
17566
17567 modesize = GET_MODE_SIZE (GET_MODE (body));
17568 size = modesize * XVECLEN (body, elt);
17569 switch (modesize)
17570 {
17571 case 1:
17572 /* Round up size of TBB table to a halfword boundary. */
17573 size = (size + 1) & ~HOST_WIDE_INT_1;
17574 break;
17575 case 2:
17576 /* No padding necessary for TBH. */
17577 break;
17578 case 4:
17579 /* Add two bytes for alignment on Thumb. */
17580 if (TARGET_THUMB)
17581 size += 2;
17582 break;
17583 default:
17584 gcc_unreachable ();
17585 }
17586 return size;
17587 }
17588
17589 return 0;
17590 }
17591
17592 /* Emit insns to load the function address from FUNCDESC (an FDPIC
17593 function descriptor) into a register and the GOT address into the
17594 FDPIC register, returning an rtx for the register holding the
17595 function address. */
17596
17597 rtx
17598 arm_load_function_descriptor (rtx funcdesc)
17599 {
17600 rtx fnaddr_reg = gen_reg_rtx (Pmode);
17601 rtx pic_reg = gen_rtx_REG (Pmode, FDPIC_REGNUM);
17602 rtx fnaddr = gen_rtx_MEM (Pmode, funcdesc);
17603 rtx gotaddr = gen_rtx_MEM (Pmode, plus_constant (Pmode, funcdesc, 4));
17604
17605 emit_move_insn (fnaddr_reg, fnaddr);
17606
17607 /* The ABI requires the entry point address to be loaded first, but
17608 since we cannot support lazy binding for lack of atomic load of
17609 two 32-bits values, we do not need to bother to prevent the
17610 previous load from being moved after that of the GOT address. */
17611 emit_insn (gen_restore_pic_register_after_call (pic_reg, gotaddr));
17612
17613 return fnaddr_reg;
17614 }
17615
17616 /* Return the maximum amount of padding that will be inserted before
17617 label LABEL. */
17618 static HOST_WIDE_INT
17619 get_label_padding (rtx label)
17620 {
17621 HOST_WIDE_INT align, min_insn_size;
17622
17623 align = 1 << label_to_alignment (label).levels[0].log;
17624 min_insn_size = TARGET_THUMB ? 2 : 4;
17625 return align > min_insn_size ? align - min_insn_size : 0;
17626 }
17627
17628 /* Move a minipool fix MP from its current location to before MAX_MP.
17629 If MAX_MP is NULL, then MP doesn't need moving, but the addressing
17630 constraints may need updating. */
17631 static Mnode *
17632 move_minipool_fix_forward_ref (Mnode *mp, Mnode *max_mp,
17633 HOST_WIDE_INT max_address)
17634 {
17635 /* The code below assumes these are different. */
17636 gcc_assert (mp != max_mp);
17637
17638 if (max_mp == NULL)
17639 {
17640 if (max_address < mp->max_address)
17641 mp->max_address = max_address;
17642 }
17643 else
17644 {
17645 if (max_address > max_mp->max_address - mp->fix_size)
17646 mp->max_address = max_mp->max_address - mp->fix_size;
17647 else
17648 mp->max_address = max_address;
17649
17650 /* Unlink MP from its current position. Since max_mp is non-null,
17651 mp->prev must be non-null. */
17652 mp->prev->next = mp->next;
17653 if (mp->next != NULL)
17654 mp->next->prev = mp->prev;
17655 else
17656 minipool_vector_tail = mp->prev;
17657
17658 /* Re-insert it before MAX_MP. */
17659 mp->next = max_mp;
17660 mp->prev = max_mp->prev;
17661 max_mp->prev = mp;
17662
17663 if (mp->prev != NULL)
17664 mp->prev->next = mp;
17665 else
17666 minipool_vector_head = mp;
17667 }
17668
17669 /* Save the new entry. */
17670 max_mp = mp;
17671
17672 /* Scan over the preceding entries and adjust their addresses as
17673 required. */
17674 while (mp->prev != NULL
17675 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
17676 {
17677 mp->prev->max_address = mp->max_address - mp->prev->fix_size;
17678 mp = mp->prev;
17679 }
17680
17681 return max_mp;
17682 }
17683
17684 /* Add a constant to the minipool for a forward reference. Returns the
17685 node added or NULL if the constant will not fit in this pool. */
17686 static Mnode *
17687 add_minipool_forward_ref (Mfix *fix)
17688 {
17689 /* If set, max_mp is the first pool_entry that has a lower
17690 constraint than the one we are trying to add. */
17691 Mnode * max_mp = NULL;
17692 HOST_WIDE_INT max_address = fix->address + fix->forwards - minipool_pad;
17693 Mnode * mp;
17694
17695 /* If the minipool starts before the end of FIX->INSN then this FIX
17696 cannot be placed into the current pool. Furthermore, adding the
17697 new constant pool entry may cause the pool to start FIX_SIZE bytes
17698 earlier. */
17699 if (minipool_vector_head &&
17700 (fix->address + get_attr_length (fix->insn)
17701 >= minipool_vector_head->max_address - fix->fix_size))
17702 return NULL;
17703
17704 /* Scan the pool to see if a constant with the same value has
17705 already been added. While we are doing this, also note the
17706 location where we must insert the constant if it doesn't already
17707 exist. */
17708 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
17709 {
17710 if (GET_CODE (fix->value) == GET_CODE (mp->value)
17711 && fix->mode == mp->mode
17712 && (!LABEL_P (fix->value)
17713 || (CODE_LABEL_NUMBER (fix->value)
17714 == CODE_LABEL_NUMBER (mp->value)))
17715 && rtx_equal_p (fix->value, mp->value))
17716 {
17717 /* More than one fix references this entry. */
17718 mp->refcount++;
17719 return move_minipool_fix_forward_ref (mp, max_mp, max_address);
17720 }
17721
17722 /* Note the insertion point if necessary. */
17723 if (max_mp == NULL
17724 && mp->max_address > max_address)
17725 max_mp = mp;
17726
17727 /* If we are inserting an 8-bytes aligned quantity and
17728 we have not already found an insertion point, then
17729 make sure that all such 8-byte aligned quantities are
17730 placed at the start of the pool. */
17731 if (ARM_DOUBLEWORD_ALIGN
17732 && max_mp == NULL
17733 && fix->fix_size >= 8
17734 && mp->fix_size < 8)
17735 {
17736 max_mp = mp;
17737 max_address = mp->max_address;
17738 }
17739 }
17740
17741 /* The value is not currently in the minipool, so we need to create
17742 a new entry for it. If MAX_MP is NULL, the entry will be put on
17743 the end of the list since the placement is less constrained than
17744 any existing entry. Otherwise, we insert the new fix before
17745 MAX_MP and, if necessary, adjust the constraints on the other
17746 entries. */
17747 mp = XNEW (Mnode);
17748 mp->fix_size = fix->fix_size;
17749 mp->mode = fix->mode;
17750 mp->value = fix->value;
17751 mp->refcount = 1;
17752 /* Not yet required for a backwards ref. */
17753 mp->min_address = -65536;
17754
17755 if (max_mp == NULL)
17756 {
17757 mp->max_address = max_address;
17758 mp->next = NULL;
17759 mp->prev = minipool_vector_tail;
17760
17761 if (mp->prev == NULL)
17762 {
17763 minipool_vector_head = mp;
17764 minipool_vector_label = gen_label_rtx ();
17765 }
17766 else
17767 mp->prev->next = mp;
17768
17769 minipool_vector_tail = mp;
17770 }
17771 else
17772 {
17773 if (max_address > max_mp->max_address - mp->fix_size)
17774 mp->max_address = max_mp->max_address - mp->fix_size;
17775 else
17776 mp->max_address = max_address;
17777
17778 mp->next = max_mp;
17779 mp->prev = max_mp->prev;
17780 max_mp->prev = mp;
17781 if (mp->prev != NULL)
17782 mp->prev->next = mp;
17783 else
17784 minipool_vector_head = mp;
17785 }
17786
17787 /* Save the new entry. */
17788 max_mp = mp;
17789
17790 /* Scan over the preceding entries and adjust their addresses as
17791 required. */
17792 while (mp->prev != NULL
17793 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
17794 {
17795 mp->prev->max_address = mp->max_address - mp->prev->fix_size;
17796 mp = mp->prev;
17797 }
17798
17799 return max_mp;
17800 }
17801
17802 static Mnode *
17803 move_minipool_fix_backward_ref (Mnode *mp, Mnode *min_mp,
17804 HOST_WIDE_INT min_address)
17805 {
17806 HOST_WIDE_INT offset;
17807
17808 /* The code below assumes these are different. */
17809 gcc_assert (mp != min_mp);
17810
17811 if (min_mp == NULL)
17812 {
17813 if (min_address > mp->min_address)
17814 mp->min_address = min_address;
17815 }
17816 else
17817 {
17818 /* We will adjust this below if it is too loose. */
17819 mp->min_address = min_address;
17820
17821 /* Unlink MP from its current position. Since min_mp is non-null,
17822 mp->next must be non-null. */
17823 mp->next->prev = mp->prev;
17824 if (mp->prev != NULL)
17825 mp->prev->next = mp->next;
17826 else
17827 minipool_vector_head = mp->next;
17828
17829 /* Reinsert it after MIN_MP. */
17830 mp->prev = min_mp;
17831 mp->next = min_mp->next;
17832 min_mp->next = mp;
17833 if (mp->next != NULL)
17834 mp->next->prev = mp;
17835 else
17836 minipool_vector_tail = mp;
17837 }
17838
17839 min_mp = mp;
17840
17841 offset = 0;
17842 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
17843 {
17844 mp->offset = offset;
17845 if (mp->refcount > 0)
17846 offset += mp->fix_size;
17847
17848 if (mp->next && mp->next->min_address < mp->min_address + mp->fix_size)
17849 mp->next->min_address = mp->min_address + mp->fix_size;
17850 }
17851
17852 return min_mp;
17853 }
17854
17855 /* Add a constant to the minipool for a backward reference. Returns the
17856 node added or NULL if the constant will not fit in this pool.
17857
17858 Note that the code for insertion for a backwards reference can be
17859 somewhat confusing because the calculated offsets for each fix do
17860 not take into account the size of the pool (which is still under
17861 construction. */
17862 static Mnode *
17863 add_minipool_backward_ref (Mfix *fix)
17864 {
17865 /* If set, min_mp is the last pool_entry that has a lower constraint
17866 than the one we are trying to add. */
17867 Mnode *min_mp = NULL;
17868 /* This can be negative, since it is only a constraint. */
17869 HOST_WIDE_INT min_address = fix->address - fix->backwards;
17870 Mnode *mp;
17871
17872 /* If we can't reach the current pool from this insn, or if we can't
17873 insert this entry at the end of the pool without pushing other
17874 fixes out of range, then we don't try. This ensures that we
17875 can't fail later on. */
17876 if (min_address >= minipool_barrier->address
17877 || (minipool_vector_tail->min_address + fix->fix_size
17878 >= minipool_barrier->address))
17879 return NULL;
17880
17881 /* Scan the pool to see if a constant with the same value has
17882 already been added. While we are doing this, also note the
17883 location where we must insert the constant if it doesn't already
17884 exist. */
17885 for (mp = minipool_vector_tail; mp != NULL; mp = mp->prev)
17886 {
17887 if (GET_CODE (fix->value) == GET_CODE (mp->value)
17888 && fix->mode == mp->mode
17889 && (!LABEL_P (fix->value)
17890 || (CODE_LABEL_NUMBER (fix->value)
17891 == CODE_LABEL_NUMBER (mp->value)))
17892 && rtx_equal_p (fix->value, mp->value)
17893 /* Check that there is enough slack to move this entry to the
17894 end of the table (this is conservative). */
17895 && (mp->max_address
17896 > (minipool_barrier->address
17897 + minipool_vector_tail->offset
17898 + minipool_vector_tail->fix_size)))
17899 {
17900 mp->refcount++;
17901 return move_minipool_fix_backward_ref (mp, min_mp, min_address);
17902 }
17903
17904 if (min_mp != NULL)
17905 mp->min_address += fix->fix_size;
17906 else
17907 {
17908 /* Note the insertion point if necessary. */
17909 if (mp->min_address < min_address)
17910 {
17911 /* For now, we do not allow the insertion of 8-byte alignment
17912 requiring nodes anywhere but at the start of the pool. */
17913 if (ARM_DOUBLEWORD_ALIGN
17914 && fix->fix_size >= 8 && mp->fix_size < 8)
17915 return NULL;
17916 else
17917 min_mp = mp;
17918 }
17919 else if (mp->max_address
17920 < minipool_barrier->address + mp->offset + fix->fix_size)
17921 {
17922 /* Inserting before this entry would push the fix beyond
17923 its maximum address (which can happen if we have
17924 re-located a forwards fix); force the new fix to come
17925 after it. */
17926 if (ARM_DOUBLEWORD_ALIGN
17927 && fix->fix_size >= 8 && mp->fix_size < 8)
17928 return NULL;
17929 else
17930 {
17931 min_mp = mp;
17932 min_address = mp->min_address + fix->fix_size;
17933 }
17934 }
17935 /* Do not insert a non-8-byte aligned quantity before 8-byte
17936 aligned quantities. */
17937 else if (ARM_DOUBLEWORD_ALIGN
17938 && fix->fix_size < 8
17939 && mp->fix_size >= 8)
17940 {
17941 min_mp = mp;
17942 min_address = mp->min_address + fix->fix_size;
17943 }
17944 }
17945 }
17946
17947 /* We need to create a new entry. */
17948 mp = XNEW (Mnode);
17949 mp->fix_size = fix->fix_size;
17950 mp->mode = fix->mode;
17951 mp->value = fix->value;
17952 mp->refcount = 1;
17953 mp->max_address = minipool_barrier->address + 65536;
17954
17955 mp->min_address = min_address;
17956
17957 if (min_mp == NULL)
17958 {
17959 mp->prev = NULL;
17960 mp->next = minipool_vector_head;
17961
17962 if (mp->next == NULL)
17963 {
17964 minipool_vector_tail = mp;
17965 minipool_vector_label = gen_label_rtx ();
17966 }
17967 else
17968 mp->next->prev = mp;
17969
17970 minipool_vector_head = mp;
17971 }
17972 else
17973 {
17974 mp->next = min_mp->next;
17975 mp->prev = min_mp;
17976 min_mp->next = mp;
17977
17978 if (mp->next != NULL)
17979 mp->next->prev = mp;
17980 else
17981 minipool_vector_tail = mp;
17982 }
17983
17984 /* Save the new entry. */
17985 min_mp = mp;
17986
17987 if (mp->prev)
17988 mp = mp->prev;
17989 else
17990 mp->offset = 0;
17991
17992 /* Scan over the following entries and adjust their offsets. */
17993 while (mp->next != NULL)
17994 {
17995 if (mp->next->min_address < mp->min_address + mp->fix_size)
17996 mp->next->min_address = mp->min_address + mp->fix_size;
17997
17998 if (mp->refcount)
17999 mp->next->offset = mp->offset + mp->fix_size;
18000 else
18001 mp->next->offset = mp->offset;
18002
18003 mp = mp->next;
18004 }
18005
18006 return min_mp;
18007 }
18008
18009 static void
18010 assign_minipool_offsets (Mfix *barrier)
18011 {
18012 HOST_WIDE_INT offset = 0;
18013 Mnode *mp;
18014
18015 minipool_barrier = barrier;
18016
18017 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
18018 {
18019 mp->offset = offset;
18020
18021 if (mp->refcount > 0)
18022 offset += mp->fix_size;
18023 }
18024 }
18025
18026 /* Output the literal table */
18027 static void
18028 dump_minipool (rtx_insn *scan)
18029 {
18030 Mnode * mp;
18031 Mnode * nmp;
18032 int align64 = 0;
18033
18034 if (ARM_DOUBLEWORD_ALIGN)
18035 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
18036 if (mp->refcount > 0 && mp->fix_size >= 8)
18037 {
18038 align64 = 1;
18039 break;
18040 }
18041
18042 if (dump_file)
18043 fprintf (dump_file,
18044 ";; Emitting minipool after insn %u; address %ld; align %d (bytes)\n",
18045 INSN_UID (scan), (unsigned long) minipool_barrier->address, align64 ? 8 : 4);
18046
18047 scan = emit_label_after (gen_label_rtx (), scan);
18048 scan = emit_insn_after (align64 ? gen_align_8 () : gen_align_4 (), scan);
18049 scan = emit_label_after (minipool_vector_label, scan);
18050
18051 for (mp = minipool_vector_head; mp != NULL; mp = nmp)
18052 {
18053 if (mp->refcount > 0)
18054 {
18055 if (dump_file)
18056 {
18057 fprintf (dump_file,
18058 ";; Offset %u, min %ld, max %ld ",
18059 (unsigned) mp->offset, (unsigned long) mp->min_address,
18060 (unsigned long) mp->max_address);
18061 arm_print_value (dump_file, mp->value);
18062 fputc ('\n', dump_file);
18063 }
18064
18065 rtx val = copy_rtx (mp->value);
18066
18067 switch (GET_MODE_SIZE (mp->mode))
18068 {
18069 #ifdef HAVE_consttable_1
18070 case 1:
18071 scan = emit_insn_after (gen_consttable_1 (val), scan);
18072 break;
18073
18074 #endif
18075 #ifdef HAVE_consttable_2
18076 case 2:
18077 scan = emit_insn_after (gen_consttable_2 (val), scan);
18078 break;
18079
18080 #endif
18081 #ifdef HAVE_consttable_4
18082 case 4:
18083 scan = emit_insn_after (gen_consttable_4 (val), scan);
18084 break;
18085
18086 #endif
18087 #ifdef HAVE_consttable_8
18088 case 8:
18089 scan = emit_insn_after (gen_consttable_8 (val), scan);
18090 break;
18091
18092 #endif
18093 #ifdef HAVE_consttable_16
18094 case 16:
18095 scan = emit_insn_after (gen_consttable_16 (val), scan);
18096 break;
18097
18098 #endif
18099 default:
18100 gcc_unreachable ();
18101 }
18102 }
18103
18104 nmp = mp->next;
18105 free (mp);
18106 }
18107
18108 minipool_vector_head = minipool_vector_tail = NULL;
18109 scan = emit_insn_after (gen_consttable_end (), scan);
18110 scan = emit_barrier_after (scan);
18111 }
18112
18113 /* Return the cost of forcibly inserting a barrier after INSN. */
18114 static int
18115 arm_barrier_cost (rtx_insn *insn)
18116 {
18117 /* Basing the location of the pool on the loop depth is preferable,
18118 but at the moment, the basic block information seems to be
18119 corrupt by this stage of the compilation. */
18120 int base_cost = 50;
18121 rtx_insn *next = next_nonnote_insn (insn);
18122
18123 if (next != NULL && LABEL_P (next))
18124 base_cost -= 20;
18125
18126 switch (GET_CODE (insn))
18127 {
18128 case CODE_LABEL:
18129 /* It will always be better to place the table before the label, rather
18130 than after it. */
18131 return 50;
18132
18133 case INSN:
18134 case CALL_INSN:
18135 return base_cost;
18136
18137 case JUMP_INSN:
18138 return base_cost - 10;
18139
18140 default:
18141 return base_cost + 10;
18142 }
18143 }
18144
18145 /* Find the best place in the insn stream in the range
18146 (FIX->address,MAX_ADDRESS) to forcibly insert a minipool barrier.
18147 Create the barrier by inserting a jump and add a new fix entry for
18148 it. */
18149 static Mfix *
18150 create_fix_barrier (Mfix *fix, HOST_WIDE_INT max_address)
18151 {
18152 HOST_WIDE_INT count = 0;
18153 rtx_barrier *barrier;
18154 rtx_insn *from = fix->insn;
18155 /* The instruction after which we will insert the jump. */
18156 rtx_insn *selected = NULL;
18157 int selected_cost;
18158 /* The address at which the jump instruction will be placed. */
18159 HOST_WIDE_INT selected_address;
18160 Mfix * new_fix;
18161 HOST_WIDE_INT max_count = max_address - fix->address;
18162 rtx_code_label *label = gen_label_rtx ();
18163
18164 selected_cost = arm_barrier_cost (from);
18165 selected_address = fix->address;
18166
18167 while (from && count < max_count)
18168 {
18169 rtx_jump_table_data *tmp;
18170 int new_cost;
18171
18172 /* This code shouldn't have been called if there was a natural barrier
18173 within range. */
18174 gcc_assert (!BARRIER_P (from));
18175
18176 /* Count the length of this insn. This must stay in sync with the
18177 code that pushes minipool fixes. */
18178 if (LABEL_P (from))
18179 count += get_label_padding (from);
18180 else
18181 count += get_attr_length (from);
18182
18183 /* If there is a jump table, add its length. */
18184 if (tablejump_p (from, NULL, &tmp))
18185 {
18186 count += get_jump_table_size (tmp);
18187
18188 /* Jump tables aren't in a basic block, so base the cost on
18189 the dispatch insn. If we select this location, we will
18190 still put the pool after the table. */
18191 new_cost = arm_barrier_cost (from);
18192
18193 if (count < max_count
18194 && (!selected || new_cost <= selected_cost))
18195 {
18196 selected = tmp;
18197 selected_cost = new_cost;
18198 selected_address = fix->address + count;
18199 }
18200
18201 /* Continue after the dispatch table. */
18202 from = NEXT_INSN (tmp);
18203 continue;
18204 }
18205
18206 new_cost = arm_barrier_cost (from);
18207
18208 if (count < max_count
18209 && (!selected || new_cost <= selected_cost))
18210 {
18211 selected = from;
18212 selected_cost = new_cost;
18213 selected_address = fix->address + count;
18214 }
18215
18216 from = NEXT_INSN (from);
18217 }
18218
18219 /* Make sure that we found a place to insert the jump. */
18220 gcc_assert (selected);
18221
18222 /* Create a new JUMP_INSN that branches around a barrier. */
18223 from = emit_jump_insn_after (gen_jump (label), selected);
18224 JUMP_LABEL (from) = label;
18225 barrier = emit_barrier_after (from);
18226 emit_label_after (label, barrier);
18227
18228 /* Create a minipool barrier entry for the new barrier. */
18229 new_fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* new_fix));
18230 new_fix->insn = barrier;
18231 new_fix->address = selected_address;
18232 new_fix->next = fix->next;
18233 fix->next = new_fix;
18234
18235 return new_fix;
18236 }
18237
18238 /* Record that there is a natural barrier in the insn stream at
18239 ADDRESS. */
18240 static void
18241 push_minipool_barrier (rtx_insn *insn, HOST_WIDE_INT address)
18242 {
18243 Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
18244
18245 fix->insn = insn;
18246 fix->address = address;
18247
18248 fix->next = NULL;
18249 if (minipool_fix_head != NULL)
18250 minipool_fix_tail->next = fix;
18251 else
18252 minipool_fix_head = fix;
18253
18254 minipool_fix_tail = fix;
18255 }
18256
18257 /* Record INSN, which will need fixing up to load a value from the
18258 minipool. ADDRESS is the offset of the insn since the start of the
18259 function; LOC is a pointer to the part of the insn which requires
18260 fixing; VALUE is the constant that must be loaded, which is of type
18261 MODE. */
18262 static void
18263 push_minipool_fix (rtx_insn *insn, HOST_WIDE_INT address, rtx *loc,
18264 machine_mode mode, rtx value)
18265 {
18266 gcc_assert (!arm_disable_literal_pool);
18267 Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
18268
18269 fix->insn = insn;
18270 fix->address = address;
18271 fix->loc = loc;
18272 fix->mode = mode;
18273 fix->fix_size = MINIPOOL_FIX_SIZE (mode);
18274 fix->value = value;
18275 fix->forwards = get_attr_pool_range (insn);
18276 fix->backwards = get_attr_neg_pool_range (insn);
18277 fix->minipool = NULL;
18278
18279 /* If an insn doesn't have a range defined for it, then it isn't
18280 expecting to be reworked by this code. Better to stop now than
18281 to generate duff assembly code. */
18282 gcc_assert (fix->forwards || fix->backwards);
18283
18284 /* If an entry requires 8-byte alignment then assume all constant pools
18285 require 4 bytes of padding. Trying to do this later on a per-pool
18286 basis is awkward because existing pool entries have to be modified. */
18287 if (ARM_DOUBLEWORD_ALIGN && fix->fix_size >= 8)
18288 minipool_pad = 4;
18289
18290 if (dump_file)
18291 {
18292 fprintf (dump_file,
18293 ";; %smode fixup for i%d; addr %lu, range (%ld,%ld): ",
18294 GET_MODE_NAME (mode),
18295 INSN_UID (insn), (unsigned long) address,
18296 -1 * (long)fix->backwards, (long)fix->forwards);
18297 arm_print_value (dump_file, fix->value);
18298 fprintf (dump_file, "\n");
18299 }
18300
18301 /* Add it to the chain of fixes. */
18302 fix->next = NULL;
18303
18304 if (minipool_fix_head != NULL)
18305 minipool_fix_tail->next = fix;
18306 else
18307 minipool_fix_head = fix;
18308
18309 minipool_fix_tail = fix;
18310 }
18311
18312 /* Return maximum allowed cost of synthesizing a 64-bit constant VAL inline.
18313 Returns the number of insns needed, or 99 if we always want to synthesize
18314 the value. */
18315 int
18316 arm_max_const_double_inline_cost ()
18317 {
18318 return ((optimize_size || arm_ld_sched) ? 3 : 4);
18319 }
18320
18321 /* Return the cost of synthesizing a 64-bit constant VAL inline.
18322 Returns the number of insns needed, or 99 if we don't know how to
18323 do it. */
18324 int
18325 arm_const_double_inline_cost (rtx val)
18326 {
18327 rtx lowpart, highpart;
18328 machine_mode mode;
18329
18330 mode = GET_MODE (val);
18331
18332 if (mode == VOIDmode)
18333 mode = DImode;
18334
18335 gcc_assert (GET_MODE_SIZE (mode) == 8);
18336
18337 lowpart = gen_lowpart (SImode, val);
18338 highpart = gen_highpart_mode (SImode, mode, val);
18339
18340 gcc_assert (CONST_INT_P (lowpart));
18341 gcc_assert (CONST_INT_P (highpart));
18342
18343 return (arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (lowpart),
18344 NULL_RTX, NULL_RTX, 0, 0)
18345 + arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (highpart),
18346 NULL_RTX, NULL_RTX, 0, 0));
18347 }
18348
18349 /* Cost of loading a SImode constant. */
18350 static inline int
18351 arm_const_inline_cost (enum rtx_code code, rtx val)
18352 {
18353 return arm_gen_constant (code, SImode, NULL_RTX, INTVAL (val),
18354 NULL_RTX, NULL_RTX, 1, 0);
18355 }
18356
18357 /* Return true if it is worthwhile to split a 64-bit constant into two
18358 32-bit operations. This is the case if optimizing for size, or
18359 if we have load delay slots, or if one 32-bit part can be done with
18360 a single data operation. */
18361 bool
18362 arm_const_double_by_parts (rtx val)
18363 {
18364 machine_mode mode = GET_MODE (val);
18365 rtx part;
18366
18367 if (optimize_size || arm_ld_sched)
18368 return true;
18369
18370 if (mode == VOIDmode)
18371 mode = DImode;
18372
18373 part = gen_highpart_mode (SImode, mode, val);
18374
18375 gcc_assert (CONST_INT_P (part));
18376
18377 if (const_ok_for_arm (INTVAL (part))
18378 || const_ok_for_arm (~INTVAL (part)))
18379 return true;
18380
18381 part = gen_lowpart (SImode, val);
18382
18383 gcc_assert (CONST_INT_P (part));
18384
18385 if (const_ok_for_arm (INTVAL (part))
18386 || const_ok_for_arm (~INTVAL (part)))
18387 return true;
18388
18389 return false;
18390 }
18391
18392 /* Return true if it is possible to inline both the high and low parts
18393 of a 64-bit constant into 32-bit data processing instructions. */
18394 bool
18395 arm_const_double_by_immediates (rtx val)
18396 {
18397 machine_mode mode = GET_MODE (val);
18398 rtx part;
18399
18400 if (mode == VOIDmode)
18401 mode = DImode;
18402
18403 part = gen_highpart_mode (SImode, mode, val);
18404
18405 gcc_assert (CONST_INT_P (part));
18406
18407 if (!const_ok_for_arm (INTVAL (part)))
18408 return false;
18409
18410 part = gen_lowpart (SImode, val);
18411
18412 gcc_assert (CONST_INT_P (part));
18413
18414 if (!const_ok_for_arm (INTVAL (part)))
18415 return false;
18416
18417 return true;
18418 }
18419
18420 /* Scan INSN and note any of its operands that need fixing.
18421 If DO_PUSHES is false we do not actually push any of the fixups
18422 needed. */
18423 static void
18424 note_invalid_constants (rtx_insn *insn, HOST_WIDE_INT address, int do_pushes)
18425 {
18426 int opno;
18427
18428 extract_constrain_insn (insn);
18429
18430 if (recog_data.n_alternatives == 0)
18431 return;
18432
18433 /* Fill in recog_op_alt with information about the constraints of
18434 this insn. */
18435 preprocess_constraints (insn);
18436
18437 const operand_alternative *op_alt = which_op_alt ();
18438 for (opno = 0; opno < recog_data.n_operands; opno++)
18439 {
18440 /* Things we need to fix can only occur in inputs. */
18441 if (recog_data.operand_type[opno] != OP_IN)
18442 continue;
18443
18444 /* If this alternative is a memory reference, then any mention
18445 of constants in this alternative is really to fool reload
18446 into allowing us to accept one there. We need to fix them up
18447 now so that we output the right code. */
18448 if (op_alt[opno].memory_ok)
18449 {
18450 rtx op = recog_data.operand[opno];
18451
18452 if (CONSTANT_P (op))
18453 {
18454 if (do_pushes)
18455 push_minipool_fix (insn, address, recog_data.operand_loc[opno],
18456 recog_data.operand_mode[opno], op);
18457 }
18458 else if (MEM_P (op)
18459 && GET_CODE (XEXP (op, 0)) == SYMBOL_REF
18460 && CONSTANT_POOL_ADDRESS_P (XEXP (op, 0)))
18461 {
18462 if (do_pushes)
18463 {
18464 rtx cop = avoid_constant_pool_reference (op);
18465
18466 /* Casting the address of something to a mode narrower
18467 than a word can cause avoid_constant_pool_reference()
18468 to return the pool reference itself. That's no good to
18469 us here. Lets just hope that we can use the
18470 constant pool value directly. */
18471 if (op == cop)
18472 cop = get_pool_constant (XEXP (op, 0));
18473
18474 push_minipool_fix (insn, address,
18475 recog_data.operand_loc[opno],
18476 recog_data.operand_mode[opno], cop);
18477 }
18478
18479 }
18480 }
18481 }
18482
18483 return;
18484 }
18485
18486 /* This function computes the clear mask and PADDING_BITS_TO_CLEAR for structs
18487 and unions in the context of ARMv8-M Security Extensions. It is used as a
18488 helper function for both 'cmse_nonsecure_call' and 'cmse_nonsecure_entry'
18489 functions. The PADDING_BITS_TO_CLEAR pointer can be the base to either one
18490 or four masks, depending on whether it is being computed for a
18491 'cmse_nonsecure_entry' return value or a 'cmse_nonsecure_call' argument
18492 respectively. The tree for the type of the argument or a field within an
18493 argument is passed in ARG_TYPE, the current register this argument or field
18494 starts in is kept in the pointer REGNO and updated accordingly, the bit this
18495 argument or field starts at is passed in STARTING_BIT and the last used bit
18496 is kept in LAST_USED_BIT which is also updated accordingly. */
18497
18498 static unsigned HOST_WIDE_INT
18499 comp_not_to_clear_mask_str_un (tree arg_type, int * regno,
18500 uint32_t * padding_bits_to_clear,
18501 unsigned starting_bit, int * last_used_bit)
18502
18503 {
18504 unsigned HOST_WIDE_INT not_to_clear_reg_mask = 0;
18505
18506 if (TREE_CODE (arg_type) == RECORD_TYPE)
18507 {
18508 unsigned current_bit = starting_bit;
18509 tree field;
18510 long int offset, size;
18511
18512
18513 field = TYPE_FIELDS (arg_type);
18514 while (field)
18515 {
18516 /* The offset within a structure is always an offset from
18517 the start of that structure. Make sure we take that into the
18518 calculation of the register based offset that we use here. */
18519 offset = starting_bit;
18520 offset += TREE_INT_CST_ELT (DECL_FIELD_BIT_OFFSET (field), 0);
18521 offset %= 32;
18522
18523 /* This is the actual size of the field, for bitfields this is the
18524 bitfield width and not the container size. */
18525 size = TREE_INT_CST_ELT (DECL_SIZE (field), 0);
18526
18527 if (*last_used_bit != offset)
18528 {
18529 if (offset < *last_used_bit)
18530 {
18531 /* This field's offset is before the 'last_used_bit', that
18532 means this field goes on the next register. So we need to
18533 pad the rest of the current register and increase the
18534 register number. */
18535 uint32_t mask;
18536 mask = ((uint32_t)-1) - ((uint32_t) 1 << *last_used_bit);
18537 mask++;
18538
18539 padding_bits_to_clear[*regno] |= mask;
18540 not_to_clear_reg_mask |= HOST_WIDE_INT_1U << *regno;
18541 (*regno)++;
18542 }
18543 else
18544 {
18545 /* Otherwise we pad the bits between the last field's end and
18546 the start of the new field. */
18547 uint32_t mask;
18548
18549 mask = ((uint32_t)-1) >> (32 - offset);
18550 mask -= ((uint32_t) 1 << *last_used_bit) - 1;
18551 padding_bits_to_clear[*regno] |= mask;
18552 }
18553 current_bit = offset;
18554 }
18555
18556 /* Calculate further padding bits for inner structs/unions too. */
18557 if (RECORD_OR_UNION_TYPE_P (TREE_TYPE (field)))
18558 {
18559 *last_used_bit = current_bit;
18560 not_to_clear_reg_mask
18561 |= comp_not_to_clear_mask_str_un (TREE_TYPE (field), regno,
18562 padding_bits_to_clear, offset,
18563 last_used_bit);
18564 }
18565 else
18566 {
18567 /* Update 'current_bit' with this field's size. If the
18568 'current_bit' lies in a subsequent register, update 'regno' and
18569 reset 'current_bit' to point to the current bit in that new
18570 register. */
18571 current_bit += size;
18572 while (current_bit >= 32)
18573 {
18574 current_bit-=32;
18575 not_to_clear_reg_mask |= HOST_WIDE_INT_1U << *regno;
18576 (*regno)++;
18577 }
18578 *last_used_bit = current_bit;
18579 }
18580
18581 field = TREE_CHAIN (field);
18582 }
18583 not_to_clear_reg_mask |= HOST_WIDE_INT_1U << *regno;
18584 }
18585 else if (TREE_CODE (arg_type) == UNION_TYPE)
18586 {
18587 tree field, field_t;
18588 int i, regno_t, field_size;
18589 int max_reg = -1;
18590 int max_bit = -1;
18591 uint32_t mask;
18592 uint32_t padding_bits_to_clear_res[NUM_ARG_REGS]
18593 = {-1, -1, -1, -1};
18594
18595 /* To compute the padding bits in a union we only consider bits as
18596 padding bits if they are always either a padding bit or fall outside a
18597 fields size for all fields in the union. */
18598 field = TYPE_FIELDS (arg_type);
18599 while (field)
18600 {
18601 uint32_t padding_bits_to_clear_t[NUM_ARG_REGS]
18602 = {0U, 0U, 0U, 0U};
18603 int last_used_bit_t = *last_used_bit;
18604 regno_t = *regno;
18605 field_t = TREE_TYPE (field);
18606
18607 /* If the field's type is either a record or a union make sure to
18608 compute their padding bits too. */
18609 if (RECORD_OR_UNION_TYPE_P (field_t))
18610 not_to_clear_reg_mask
18611 |= comp_not_to_clear_mask_str_un (field_t, &regno_t,
18612 &padding_bits_to_clear_t[0],
18613 starting_bit, &last_used_bit_t);
18614 else
18615 {
18616 field_size = TREE_INT_CST_ELT (DECL_SIZE (field), 0);
18617 regno_t = (field_size / 32) + *regno;
18618 last_used_bit_t = (starting_bit + field_size) % 32;
18619 }
18620
18621 for (i = *regno; i < regno_t; i++)
18622 {
18623 /* For all but the last register used by this field only keep the
18624 padding bits that were padding bits in this field. */
18625 padding_bits_to_clear_res[i] &= padding_bits_to_clear_t[i];
18626 }
18627
18628 /* For the last register, keep all padding bits that were padding
18629 bits in this field and any padding bits that are still valid
18630 as padding bits but fall outside of this field's size. */
18631 mask = (((uint32_t) -1) - ((uint32_t) 1 << last_used_bit_t)) + 1;
18632 padding_bits_to_clear_res[regno_t]
18633 &= padding_bits_to_clear_t[regno_t] | mask;
18634
18635 /* Update the maximum size of the fields in terms of registers used
18636 ('max_reg') and the 'last_used_bit' in said register. */
18637 if (max_reg < regno_t)
18638 {
18639 max_reg = regno_t;
18640 max_bit = last_used_bit_t;
18641 }
18642 else if (max_reg == regno_t && max_bit < last_used_bit_t)
18643 max_bit = last_used_bit_t;
18644
18645 field = TREE_CHAIN (field);
18646 }
18647
18648 /* Update the current padding_bits_to_clear using the intersection of the
18649 padding bits of all the fields. */
18650 for (i=*regno; i < max_reg; i++)
18651 padding_bits_to_clear[i] |= padding_bits_to_clear_res[i];
18652
18653 /* Do not keep trailing padding bits, we do not know yet whether this
18654 is the end of the argument. */
18655 mask = ((uint32_t) 1 << max_bit) - 1;
18656 padding_bits_to_clear[max_reg]
18657 |= padding_bits_to_clear_res[max_reg] & mask;
18658
18659 *regno = max_reg;
18660 *last_used_bit = max_bit;
18661 }
18662 else
18663 /* This function should only be used for structs and unions. */
18664 gcc_unreachable ();
18665
18666 return not_to_clear_reg_mask;
18667 }
18668
18669 /* In the context of ARMv8-M Security Extensions, this function is used for both
18670 'cmse_nonsecure_call' and 'cmse_nonsecure_entry' functions to compute what
18671 registers are used when returning or passing arguments, which is then
18672 returned as a mask. It will also compute a mask to indicate padding/unused
18673 bits for each of these registers, and passes this through the
18674 PADDING_BITS_TO_CLEAR pointer. The tree of the argument type is passed in
18675 ARG_TYPE, the rtl representation of the argument is passed in ARG_RTX and
18676 the starting register used to pass this argument or return value is passed
18677 in REGNO. It makes use of 'comp_not_to_clear_mask_str_un' to compute these
18678 for struct and union types. */
18679
18680 static unsigned HOST_WIDE_INT
18681 compute_not_to_clear_mask (tree arg_type, rtx arg_rtx, int regno,
18682 uint32_t * padding_bits_to_clear)
18683
18684 {
18685 int last_used_bit = 0;
18686 unsigned HOST_WIDE_INT not_to_clear_mask;
18687
18688 if (RECORD_OR_UNION_TYPE_P (arg_type))
18689 {
18690 not_to_clear_mask
18691 = comp_not_to_clear_mask_str_un (arg_type, &regno,
18692 padding_bits_to_clear, 0,
18693 &last_used_bit);
18694
18695
18696 /* If the 'last_used_bit' is not zero, that means we are still using a
18697 part of the last 'regno'. In such cases we must clear the trailing
18698 bits. Otherwise we are not using regno and we should mark it as to
18699 clear. */
18700 if (last_used_bit != 0)
18701 padding_bits_to_clear[regno]
18702 |= ((uint32_t)-1) - ((uint32_t) 1 << last_used_bit) + 1;
18703 else
18704 not_to_clear_mask &= ~(HOST_WIDE_INT_1U << regno);
18705 }
18706 else
18707 {
18708 not_to_clear_mask = 0;
18709 /* We are not dealing with structs nor unions. So these arguments may be
18710 passed in floating point registers too. In some cases a BLKmode is
18711 used when returning or passing arguments in multiple VFP registers. */
18712 if (GET_MODE (arg_rtx) == BLKmode)
18713 {
18714 int i, arg_regs;
18715 rtx reg;
18716
18717 /* This should really only occur when dealing with the hard-float
18718 ABI. */
18719 gcc_assert (TARGET_HARD_FLOAT_ABI);
18720
18721 for (i = 0; i < XVECLEN (arg_rtx, 0); i++)
18722 {
18723 reg = XEXP (XVECEXP (arg_rtx, 0, i), 0);
18724 gcc_assert (REG_P (reg));
18725
18726 not_to_clear_mask |= HOST_WIDE_INT_1U << REGNO (reg);
18727
18728 /* If we are dealing with DF mode, make sure we don't
18729 clear either of the registers it addresses. */
18730 arg_regs = ARM_NUM_REGS (GET_MODE (reg));
18731 if (arg_regs > 1)
18732 {
18733 unsigned HOST_WIDE_INT mask;
18734 mask = HOST_WIDE_INT_1U << (REGNO (reg) + arg_regs);
18735 mask -= HOST_WIDE_INT_1U << REGNO (reg);
18736 not_to_clear_mask |= mask;
18737 }
18738 }
18739 }
18740 else
18741 {
18742 /* Otherwise we can rely on the MODE to determine how many registers
18743 are being used by this argument. */
18744 int arg_regs = ARM_NUM_REGS (GET_MODE (arg_rtx));
18745 not_to_clear_mask |= HOST_WIDE_INT_1U << REGNO (arg_rtx);
18746 if (arg_regs > 1)
18747 {
18748 unsigned HOST_WIDE_INT
18749 mask = HOST_WIDE_INT_1U << (REGNO (arg_rtx) + arg_regs);
18750 mask -= HOST_WIDE_INT_1U << REGNO (arg_rtx);
18751 not_to_clear_mask |= mask;
18752 }
18753 }
18754 }
18755
18756 return not_to_clear_mask;
18757 }
18758
18759 /* Clear registers secret before doing a cmse_nonsecure_call or returning from
18760 a cmse_nonsecure_entry function. TO_CLEAR_BITMAP indicates which registers
18761 are to be fully cleared, using the value in register CLEARING_REG if more
18762 efficient. The PADDING_BITS_LEN entries array PADDING_BITS_TO_CLEAR gives
18763 the bits that needs to be cleared in caller-saved core registers, with
18764 SCRATCH_REG used as a scratch register for that clearing.
18765
18766 NOTE: one of three following assertions must hold:
18767 - SCRATCH_REG is a low register
18768 - CLEARING_REG is in the set of registers fully cleared (ie. its bit is set
18769 in TO_CLEAR_BITMAP)
18770 - CLEARING_REG is a low register. */
18771
18772 static void
18773 cmse_clear_registers (sbitmap to_clear_bitmap, uint32_t *padding_bits_to_clear,
18774 int padding_bits_len, rtx scratch_reg, rtx clearing_reg)
18775 {
18776 bool saved_clearing = false;
18777 rtx saved_clearing_reg = NULL_RTX;
18778 int i, regno, clearing_regno, minregno = R0_REGNUM, maxregno = minregno - 1;
18779
18780 gcc_assert (arm_arch_cmse);
18781
18782 if (!bitmap_empty_p (to_clear_bitmap))
18783 {
18784 minregno = bitmap_first_set_bit (to_clear_bitmap);
18785 maxregno = bitmap_last_set_bit (to_clear_bitmap);
18786 }
18787 clearing_regno = REGNO (clearing_reg);
18788
18789 /* Clear padding bits. */
18790 gcc_assert (padding_bits_len <= NUM_ARG_REGS);
18791 for (i = 0, regno = R0_REGNUM; i < padding_bits_len; i++, regno++)
18792 {
18793 uint64_t mask;
18794 rtx rtx16, dest, cleared_reg = gen_rtx_REG (SImode, regno);
18795
18796 if (padding_bits_to_clear[i] == 0)
18797 continue;
18798
18799 /* If this is a Thumb-1 target and SCRATCH_REG is not a low register, use
18800 CLEARING_REG as scratch. */
18801 if (TARGET_THUMB1
18802 && REGNO (scratch_reg) > LAST_LO_REGNUM)
18803 {
18804 /* clearing_reg is not to be cleared, copy its value into scratch_reg
18805 such that we can use clearing_reg to clear the unused bits in the
18806 arguments. */
18807 if ((clearing_regno > maxregno
18808 || !bitmap_bit_p (to_clear_bitmap, clearing_regno))
18809 && !saved_clearing)
18810 {
18811 gcc_assert (clearing_regno <= LAST_LO_REGNUM);
18812 emit_move_insn (scratch_reg, clearing_reg);
18813 saved_clearing = true;
18814 saved_clearing_reg = scratch_reg;
18815 }
18816 scratch_reg = clearing_reg;
18817 }
18818
18819 /* Fill the lower half of the negated padding_bits_to_clear[i]. */
18820 mask = (~padding_bits_to_clear[i]) & 0xFFFF;
18821 emit_move_insn (scratch_reg, gen_int_mode (mask, SImode));
18822
18823 /* Fill the top half of the negated padding_bits_to_clear[i]. */
18824 mask = (~padding_bits_to_clear[i]) >> 16;
18825 rtx16 = gen_int_mode (16, SImode);
18826 dest = gen_rtx_ZERO_EXTRACT (SImode, scratch_reg, rtx16, rtx16);
18827 if (mask)
18828 emit_insn (gen_rtx_SET (dest, gen_int_mode (mask, SImode)));
18829
18830 emit_insn (gen_andsi3 (cleared_reg, cleared_reg, scratch_reg));
18831 }
18832 if (saved_clearing)
18833 emit_move_insn (clearing_reg, saved_clearing_reg);
18834
18835
18836 /* Clear full registers. */
18837
18838 if (TARGET_HAVE_FPCXT_CMSE)
18839 {
18840 rtvec vunspec_vec;
18841 int i, j, k, nb_regs;
18842 rtx use_seq, par, reg, set, vunspec;
18843 int to_clear_bitmap_size = SBITMAP_SIZE (to_clear_bitmap);
18844 auto_sbitmap core_regs_bitmap (to_clear_bitmap_size);
18845 auto_sbitmap to_clear_core_bitmap (to_clear_bitmap_size);
18846
18847 for (i = FIRST_VFP_REGNUM; i <= maxregno; i += nb_regs)
18848 {
18849 /* Find next register to clear and exit if none. */
18850 for (; i <= maxregno && !bitmap_bit_p (to_clear_bitmap, i); i++);
18851 if (i > maxregno)
18852 break;
18853
18854 /* Compute number of consecutive registers to clear. */
18855 for (j = i; j <= maxregno && bitmap_bit_p (to_clear_bitmap, j);
18856 j++);
18857 nb_regs = j - i;
18858
18859 /* Create VSCCLRM RTX pattern. */
18860 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (nb_regs + 1));
18861 vunspec_vec = gen_rtvec (1, gen_int_mode (0, SImode));
18862 vunspec = gen_rtx_UNSPEC_VOLATILE (SImode, vunspec_vec,
18863 VUNSPEC_VSCCLRM_VPR);
18864 XVECEXP (par, 0, 0) = vunspec;
18865
18866 /* Insert VFP register clearing RTX in the pattern. */
18867 start_sequence ();
18868 for (k = 1, j = i; j <= maxregno && k < nb_regs + 1; j++)
18869 {
18870 if (!bitmap_bit_p (to_clear_bitmap, j))
18871 continue;
18872
18873 reg = gen_rtx_REG (SFmode, j);
18874 set = gen_rtx_SET (reg, const0_rtx);
18875 XVECEXP (par, 0, k++) = set;
18876 emit_use (reg);
18877 }
18878 use_seq = get_insns ();
18879 end_sequence ();
18880
18881 emit_insn_after (use_seq, emit_insn (par));
18882 }
18883
18884 /* Get set of core registers to clear. */
18885 bitmap_clear (core_regs_bitmap);
18886 bitmap_set_range (core_regs_bitmap, R0_REGNUM,
18887 IP_REGNUM - R0_REGNUM + 1);
18888 bitmap_and (to_clear_core_bitmap, to_clear_bitmap,
18889 core_regs_bitmap);
18890 gcc_assert (!bitmap_empty_p (to_clear_core_bitmap));
18891
18892 if (bitmap_empty_p (to_clear_core_bitmap))
18893 return;
18894
18895 /* Create clrm RTX pattern. */
18896 nb_regs = bitmap_count_bits (to_clear_core_bitmap);
18897 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (nb_regs + 2));
18898
18899 /* Insert core register clearing RTX in the pattern. */
18900 start_sequence ();
18901 for (j = 0, i = minregno; j < nb_regs; i++)
18902 {
18903 if (!bitmap_bit_p (to_clear_core_bitmap, i))
18904 continue;
18905
18906 reg = gen_rtx_REG (SImode, i);
18907 set = gen_rtx_SET (reg, const0_rtx);
18908 XVECEXP (par, 0, j++) = set;
18909 emit_use (reg);
18910 }
18911
18912 /* Insert APSR register clearing RTX in the pattern
18913 * along with clobbering CC. */
18914 vunspec_vec = gen_rtvec (1, gen_int_mode (0, SImode));
18915 vunspec = gen_rtx_UNSPEC_VOLATILE (SImode, vunspec_vec,
18916 VUNSPEC_CLRM_APSR);
18917
18918 XVECEXP (par, 0, j++) = vunspec;
18919
18920 rtx ccreg = gen_rtx_REG (CCmode, CC_REGNUM);
18921 rtx clobber = gen_rtx_CLOBBER (VOIDmode, ccreg);
18922 XVECEXP (par, 0, j) = clobber;
18923
18924 use_seq = get_insns ();
18925 end_sequence ();
18926
18927 emit_insn_after (use_seq, emit_insn (par));
18928 }
18929 else
18930 {
18931 /* If not marked for clearing, clearing_reg already does not contain
18932 any secret. */
18933 if (clearing_regno <= maxregno
18934 && bitmap_bit_p (to_clear_bitmap, clearing_regno))
18935 {
18936 emit_move_insn (clearing_reg, const0_rtx);
18937 emit_use (clearing_reg);
18938 bitmap_clear_bit (to_clear_bitmap, clearing_regno);
18939 }
18940
18941 for (regno = minregno; regno <= maxregno; regno++)
18942 {
18943 if (!bitmap_bit_p (to_clear_bitmap, regno))
18944 continue;
18945
18946 if (IS_VFP_REGNUM (regno))
18947 {
18948 /* If regno is an even vfp register and its successor is also to
18949 be cleared, use vmov. */
18950 if (TARGET_VFP_DOUBLE
18951 && VFP_REGNO_OK_FOR_DOUBLE (regno)
18952 && bitmap_bit_p (to_clear_bitmap, regno + 1))
18953 {
18954 emit_move_insn (gen_rtx_REG (DFmode, regno),
18955 CONST1_RTX (DFmode));
18956 emit_use (gen_rtx_REG (DFmode, regno));
18957 regno++;
18958 }
18959 else
18960 {
18961 emit_move_insn (gen_rtx_REG (SFmode, regno),
18962 CONST1_RTX (SFmode));
18963 emit_use (gen_rtx_REG (SFmode, regno));
18964 }
18965 }
18966 else
18967 {
18968 emit_move_insn (gen_rtx_REG (SImode, regno), clearing_reg);
18969 emit_use (gen_rtx_REG (SImode, regno));
18970 }
18971 }
18972 }
18973 }
18974
18975 /* Clear core and caller-saved VFP registers not used to pass arguments before
18976 a cmse_nonsecure_call. Saving, clearing and restoring of VFP callee-saved
18977 registers is done in the __gnu_cmse_nonsecure_call libcall. See
18978 libgcc/config/arm/cmse_nonsecure_call.S. */
18979
18980 static void
18981 cmse_nonsecure_call_inline_register_clear (void)
18982 {
18983 basic_block bb;
18984
18985 FOR_EACH_BB_FN (bb, cfun)
18986 {
18987 rtx_insn *insn;
18988
18989 FOR_BB_INSNS (bb, insn)
18990 {
18991 bool clear_callee_saved = TARGET_HAVE_FPCXT_CMSE;
18992 /* frame = VFP regs + FPSCR + VPR. */
18993 unsigned lazy_store_stack_frame_size
18994 = (LAST_VFP_REGNUM - FIRST_VFP_REGNUM + 1 + 2) * UNITS_PER_WORD;
18995 unsigned long callee_saved_mask
18996 = ((1 << (LAST_HI_REGNUM + 1)) - 1)
18997 & ~((1 << (LAST_ARG_REGNUM + 1)) - 1);
18998 unsigned address_regnum, regno;
18999 unsigned max_int_regno
19000 = clear_callee_saved ? IP_REGNUM : LAST_ARG_REGNUM;
19001 unsigned max_fp_regno
19002 = TARGET_HAVE_FPCXT_CMSE ? LAST_VFP_REGNUM : D7_VFP_REGNUM;
19003 unsigned maxregno
19004 = TARGET_HARD_FLOAT_ABI ? max_fp_regno : max_int_regno;
19005 auto_sbitmap to_clear_bitmap (maxregno + 1);
19006 rtx_insn *seq;
19007 rtx pat, call, unspec, clearing_reg, ip_reg, shift;
19008 rtx address;
19009 CUMULATIVE_ARGS args_so_far_v;
19010 cumulative_args_t args_so_far;
19011 tree arg_type, fntype;
19012 bool first_param = true, lazy_fpclear = !TARGET_HARD_FLOAT_ABI;
19013 function_args_iterator args_iter;
19014 uint32_t padding_bits_to_clear[4] = {0U, 0U, 0U, 0U};
19015
19016 if (!NONDEBUG_INSN_P (insn))
19017 continue;
19018
19019 if (!CALL_P (insn))
19020 continue;
19021
19022 pat = PATTERN (insn);
19023 gcc_assert (GET_CODE (pat) == PARALLEL && XVECLEN (pat, 0) > 0);
19024 call = XVECEXP (pat, 0, 0);
19025
19026 /* Get the real call RTX if the insn sets a value, ie. returns. */
19027 if (GET_CODE (call) == SET)
19028 call = SET_SRC (call);
19029
19030 /* Check if it is a cmse_nonsecure_call. */
19031 unspec = XEXP (call, 0);
19032 if (GET_CODE (unspec) != UNSPEC
19033 || XINT (unspec, 1) != UNSPEC_NONSECURE_MEM)
19034 continue;
19035
19036 /* Mark registers that needs to be cleared. Those that holds a
19037 parameter are removed from the set further below. */
19038 bitmap_clear (to_clear_bitmap);
19039 bitmap_set_range (to_clear_bitmap, R0_REGNUM,
19040 max_int_regno - R0_REGNUM + 1);
19041
19042 /* Only look at the caller-saved floating point registers in case of
19043 -mfloat-abi=hard. For -mfloat-abi=softfp we will be using the
19044 lazy store and loads which clear both caller- and callee-saved
19045 registers. */
19046 if (!lazy_fpclear)
19047 {
19048 auto_sbitmap float_bitmap (maxregno + 1);
19049
19050 bitmap_clear (float_bitmap);
19051 bitmap_set_range (float_bitmap, FIRST_VFP_REGNUM,
19052 max_fp_regno - FIRST_VFP_REGNUM + 1);
19053 bitmap_ior (to_clear_bitmap, to_clear_bitmap, float_bitmap);
19054 }
19055
19056 /* Make sure the register used to hold the function address is not
19057 cleared. */
19058 address = RTVEC_ELT (XVEC (unspec, 0), 0);
19059 gcc_assert (MEM_P (address));
19060 gcc_assert (REG_P (XEXP (address, 0)));
19061 address_regnum = REGNO (XEXP (address, 0));
19062 if (address_regnum <= max_int_regno)
19063 bitmap_clear_bit (to_clear_bitmap, address_regnum);
19064
19065 /* Set basic block of call insn so that df rescan is performed on
19066 insns inserted here. */
19067 set_block_for_insn (insn, bb);
19068 df_set_flags (DF_DEFER_INSN_RESCAN);
19069 start_sequence ();
19070
19071 /* Make sure the scheduler doesn't schedule other insns beyond
19072 here. */
19073 emit_insn (gen_blockage ());
19074
19075 /* Walk through all arguments and clear registers appropriately.
19076 */
19077 fntype = TREE_TYPE (MEM_EXPR (address));
19078 arm_init_cumulative_args (&args_so_far_v, fntype, NULL_RTX,
19079 NULL_TREE);
19080 args_so_far = pack_cumulative_args (&args_so_far_v);
19081 FOREACH_FUNCTION_ARGS (fntype, arg_type, args_iter)
19082 {
19083 rtx arg_rtx;
19084 uint64_t to_clear_args_mask;
19085
19086 if (VOID_TYPE_P (arg_type))
19087 continue;
19088
19089 function_arg_info arg (arg_type, /*named=*/true);
19090 if (!first_param)
19091 /* ??? We should advance after processing the argument and pass
19092 the argument we're advancing past. */
19093 arm_function_arg_advance (args_so_far, arg);
19094
19095 arg_rtx = arm_function_arg (args_so_far, arg);
19096 gcc_assert (REG_P (arg_rtx));
19097 to_clear_args_mask
19098 = compute_not_to_clear_mask (arg_type, arg_rtx,
19099 REGNO (arg_rtx),
19100 &padding_bits_to_clear[0]);
19101 if (to_clear_args_mask)
19102 {
19103 for (regno = R0_REGNUM; regno <= maxregno; regno++)
19104 {
19105 if (to_clear_args_mask & (1ULL << regno))
19106 bitmap_clear_bit (to_clear_bitmap, regno);
19107 }
19108 }
19109
19110 first_param = false;
19111 }
19112
19113 /* We use right shift and left shift to clear the LSB of the address
19114 we jump to instead of using bic, to avoid having to use an extra
19115 register on Thumb-1. */
19116 clearing_reg = XEXP (address, 0);
19117 shift = gen_rtx_LSHIFTRT (SImode, clearing_reg, const1_rtx);
19118 emit_insn (gen_rtx_SET (clearing_reg, shift));
19119 shift = gen_rtx_ASHIFT (SImode, clearing_reg, const1_rtx);
19120 emit_insn (gen_rtx_SET (clearing_reg, shift));
19121
19122 if (clear_callee_saved)
19123 {
19124 rtx push_insn =
19125 emit_multi_reg_push (callee_saved_mask, callee_saved_mask);
19126 /* Disable frame debug info in push because it needs to be
19127 disabled for pop (see below). */
19128 RTX_FRAME_RELATED_P (push_insn) = 0;
19129
19130 /* Lazy store multiple. */
19131 if (lazy_fpclear)
19132 {
19133 rtx imm;
19134 rtx_insn *add_insn;
19135
19136 imm = gen_int_mode (- lazy_store_stack_frame_size, SImode);
19137 add_insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
19138 stack_pointer_rtx, imm));
19139 /* If we have the frame pointer, then it will be the
19140 CFA reg. Otherwise, the stack pointer is the CFA
19141 reg, so we need to emit a CFA adjust. */
19142 if (!frame_pointer_needed)
19143 arm_add_cfa_adjust_cfa_note (add_insn,
19144 - lazy_store_stack_frame_size,
19145 stack_pointer_rtx,
19146 stack_pointer_rtx);
19147 emit_insn (gen_lazy_store_multiple_insn (stack_pointer_rtx));
19148 }
19149 /* Save VFP callee-saved registers. */
19150 else
19151 {
19152 vfp_emit_fstmd (D7_VFP_REGNUM + 1,
19153 (max_fp_regno - D7_VFP_REGNUM) / 2);
19154 /* Disable frame debug info in push because it needs to be
19155 disabled for vpop (see below). */
19156 RTX_FRAME_RELATED_P (get_last_insn ()) = 0;
19157 }
19158 }
19159
19160 /* Clear caller-saved registers that leak before doing a non-secure
19161 call. */
19162 ip_reg = gen_rtx_REG (SImode, IP_REGNUM);
19163 cmse_clear_registers (to_clear_bitmap, padding_bits_to_clear,
19164 NUM_ARG_REGS, ip_reg, clearing_reg);
19165
19166 seq = get_insns ();
19167 end_sequence ();
19168 emit_insn_before (seq, insn);
19169
19170 if (TARGET_HAVE_FPCXT_CMSE)
19171 {
19172 rtx_insn *last, *pop_insn, *after = insn;
19173
19174 start_sequence ();
19175
19176 /* Lazy load multiple done as part of libcall in Armv8-M. */
19177 if (lazy_fpclear)
19178 {
19179 rtx imm = gen_int_mode (lazy_store_stack_frame_size, SImode);
19180 emit_insn (gen_lazy_load_multiple_insn (stack_pointer_rtx));
19181 rtx_insn *add_insn =
19182 emit_insn (gen_addsi3 (stack_pointer_rtx,
19183 stack_pointer_rtx, imm));
19184 if (!frame_pointer_needed)
19185 arm_add_cfa_adjust_cfa_note (add_insn,
19186 lazy_store_stack_frame_size,
19187 stack_pointer_rtx,
19188 stack_pointer_rtx);
19189 }
19190 /* Restore VFP callee-saved registers. */
19191 else
19192 {
19193 int nb_callee_saved_vfp_regs =
19194 (max_fp_regno - D7_VFP_REGNUM) / 2;
19195 arm_emit_vfp_multi_reg_pop (D7_VFP_REGNUM + 1,
19196 nb_callee_saved_vfp_regs,
19197 stack_pointer_rtx);
19198 /* Disable frame debug info in vpop because the SP adjustment
19199 is made using a CFA adjustment note while CFA used is
19200 sometimes R7. This then causes an assert failure in the
19201 CFI note creation code. */
19202 RTX_FRAME_RELATED_P (get_last_insn ()) = 0;
19203 }
19204
19205 arm_emit_multi_reg_pop (callee_saved_mask);
19206 pop_insn = get_last_insn ();
19207
19208 /* Disable frame debug info in pop because they reset the state
19209 of popped registers to what it was at the beginning of the
19210 function, before the prologue. This leads to incorrect state
19211 when doing the pop after the nonsecure call for registers that
19212 are pushed both in prologue and before the nonsecure call.
19213
19214 It also occasionally triggers an assert failure in CFI note
19215 creation code when there are two codepaths to the epilogue,
19216 one of which does not go through the nonsecure call.
19217 Obviously this mean that debugging between the push and pop is
19218 not reliable. */
19219 RTX_FRAME_RELATED_P (pop_insn) = 0;
19220
19221 seq = get_insns ();
19222 last = get_last_insn ();
19223 end_sequence ();
19224
19225 emit_insn_after (seq, after);
19226
19227 /* Skip pop we have just inserted after nonsecure call, we know
19228 it does not contain a nonsecure call. */
19229 insn = last;
19230 }
19231 }
19232 }
19233 }
19234
19235 /* Rewrite move insn into subtract of 0 if the condition codes will
19236 be useful in next conditional jump insn. */
19237
19238 static void
19239 thumb1_reorg (void)
19240 {
19241 basic_block bb;
19242
19243 FOR_EACH_BB_FN (bb, cfun)
19244 {
19245 rtx dest, src;
19246 rtx cmp, op0, op1, set = NULL;
19247 rtx_insn *prev, *insn = BB_END (bb);
19248 bool insn_clobbered = false;
19249
19250 while (insn != BB_HEAD (bb) && !NONDEBUG_INSN_P (insn))
19251 insn = PREV_INSN (insn);
19252
19253 /* Find the last cbranchsi4_insn in basic block BB. */
19254 if (insn == BB_HEAD (bb)
19255 || INSN_CODE (insn) != CODE_FOR_cbranchsi4_insn)
19256 continue;
19257
19258 /* Get the register with which we are comparing. */
19259 cmp = XEXP (SET_SRC (PATTERN (insn)), 0);
19260 op0 = XEXP (cmp, 0);
19261 op1 = XEXP (cmp, 1);
19262
19263 /* Check that comparison is against ZERO. */
19264 if (!CONST_INT_P (op1) || INTVAL (op1) != 0)
19265 continue;
19266
19267 /* Find the first flag setting insn before INSN in basic block BB. */
19268 gcc_assert (insn != BB_HEAD (bb));
19269 for (prev = PREV_INSN (insn);
19270 (!insn_clobbered
19271 && prev != BB_HEAD (bb)
19272 && (NOTE_P (prev)
19273 || DEBUG_INSN_P (prev)
19274 || ((set = single_set (prev)) != NULL
19275 && get_attr_conds (prev) == CONDS_NOCOND)));
19276 prev = PREV_INSN (prev))
19277 {
19278 if (reg_set_p (op0, prev))
19279 insn_clobbered = true;
19280 }
19281
19282 /* Skip if op0 is clobbered by insn other than prev. */
19283 if (insn_clobbered)
19284 continue;
19285
19286 if (!set)
19287 continue;
19288
19289 dest = SET_DEST (set);
19290 src = SET_SRC (set);
19291 if (!low_register_operand (dest, SImode)
19292 || !low_register_operand (src, SImode))
19293 continue;
19294
19295 /* Rewrite move into subtract of 0 if its operand is compared with ZERO
19296 in INSN. Both src and dest of the move insn are checked. */
19297 if (REGNO (op0) == REGNO (src) || REGNO (op0) == REGNO (dest))
19298 {
19299 dest = copy_rtx (dest);
19300 src = copy_rtx (src);
19301 src = gen_rtx_MINUS (SImode, src, const0_rtx);
19302 PATTERN (prev) = gen_rtx_SET (dest, src);
19303 INSN_CODE (prev) = -1;
19304 /* Set test register in INSN to dest. */
19305 XEXP (cmp, 0) = copy_rtx (dest);
19306 INSN_CODE (insn) = -1;
19307 }
19308 }
19309 }
19310
19311 /* Convert instructions to their cc-clobbering variant if possible, since
19312 that allows us to use smaller encodings. */
19313
19314 static void
19315 thumb2_reorg (void)
19316 {
19317 basic_block bb;
19318 regset_head live;
19319
19320 INIT_REG_SET (&live);
19321
19322 /* We are freeing block_for_insn in the toplev to keep compatibility
19323 with old MDEP_REORGS that are not CFG based. Recompute it now. */
19324 compute_bb_for_insn ();
19325 df_analyze ();
19326
19327 enum Convert_Action {SKIP, CONV, SWAP_CONV};
19328
19329 FOR_EACH_BB_FN (bb, cfun)
19330 {
19331 if ((current_tune->disparage_flag_setting_t16_encodings
19332 == tune_params::DISPARAGE_FLAGS_ALL)
19333 && optimize_bb_for_speed_p (bb))
19334 continue;
19335
19336 rtx_insn *insn;
19337 Convert_Action action = SKIP;
19338 Convert_Action action_for_partial_flag_setting
19339 = ((current_tune->disparage_flag_setting_t16_encodings
19340 != tune_params::DISPARAGE_FLAGS_NEITHER)
19341 && optimize_bb_for_speed_p (bb))
19342 ? SKIP : CONV;
19343
19344 COPY_REG_SET (&live, DF_LR_OUT (bb));
19345 df_simulate_initialize_backwards (bb, &live);
19346 FOR_BB_INSNS_REVERSE (bb, insn)
19347 {
19348 if (NONJUMP_INSN_P (insn)
19349 && !REGNO_REG_SET_P (&live, CC_REGNUM)
19350 && GET_CODE (PATTERN (insn)) == SET)
19351 {
19352 action = SKIP;
19353 rtx pat = PATTERN (insn);
19354 rtx dst = XEXP (pat, 0);
19355 rtx src = XEXP (pat, 1);
19356 rtx op0 = NULL_RTX, op1 = NULL_RTX;
19357
19358 if (UNARY_P (src) || BINARY_P (src))
19359 op0 = XEXP (src, 0);
19360
19361 if (BINARY_P (src))
19362 op1 = XEXP (src, 1);
19363
19364 if (low_register_operand (dst, SImode))
19365 {
19366 switch (GET_CODE (src))
19367 {
19368 case PLUS:
19369 /* Adding two registers and storing the result
19370 in the first source is already a 16-bit
19371 operation. */
19372 if (rtx_equal_p (dst, op0)
19373 && register_operand (op1, SImode))
19374 break;
19375
19376 if (low_register_operand (op0, SImode))
19377 {
19378 /* ADDS <Rd>,<Rn>,<Rm> */
19379 if (low_register_operand (op1, SImode))
19380 action = CONV;
19381 /* ADDS <Rdn>,#<imm8> */
19382 /* SUBS <Rdn>,#<imm8> */
19383 else if (rtx_equal_p (dst, op0)
19384 && CONST_INT_P (op1)
19385 && IN_RANGE (INTVAL (op1), -255, 255))
19386 action = CONV;
19387 /* ADDS <Rd>,<Rn>,#<imm3> */
19388 /* SUBS <Rd>,<Rn>,#<imm3> */
19389 else if (CONST_INT_P (op1)
19390 && IN_RANGE (INTVAL (op1), -7, 7))
19391 action = CONV;
19392 }
19393 /* ADCS <Rd>, <Rn> */
19394 else if (GET_CODE (XEXP (src, 0)) == PLUS
19395 && rtx_equal_p (XEXP (XEXP (src, 0), 0), dst)
19396 && low_register_operand (XEXP (XEXP (src, 0), 1),
19397 SImode)
19398 && COMPARISON_P (op1)
19399 && cc_register (XEXP (op1, 0), VOIDmode)
19400 && maybe_get_arm_condition_code (op1) == ARM_CS
19401 && XEXP (op1, 1) == const0_rtx)
19402 action = CONV;
19403 break;
19404
19405 case MINUS:
19406 /* RSBS <Rd>,<Rn>,#0
19407 Not handled here: see NEG below. */
19408 /* SUBS <Rd>,<Rn>,#<imm3>
19409 SUBS <Rdn>,#<imm8>
19410 Not handled here: see PLUS above. */
19411 /* SUBS <Rd>,<Rn>,<Rm> */
19412 if (low_register_operand (op0, SImode)
19413 && low_register_operand (op1, SImode))
19414 action = CONV;
19415 break;
19416
19417 case MULT:
19418 /* MULS <Rdm>,<Rn>,<Rdm>
19419 As an exception to the rule, this is only used
19420 when optimizing for size since MULS is slow on all
19421 known implementations. We do not even want to use
19422 MULS in cold code, if optimizing for speed, so we
19423 test the global flag here. */
19424 if (!optimize_size)
19425 break;
19426 /* Fall through. */
19427 case AND:
19428 case IOR:
19429 case XOR:
19430 /* ANDS <Rdn>,<Rm> */
19431 if (rtx_equal_p (dst, op0)
19432 && low_register_operand (op1, SImode))
19433 action = action_for_partial_flag_setting;
19434 else if (rtx_equal_p (dst, op1)
19435 && low_register_operand (op0, SImode))
19436 action = action_for_partial_flag_setting == SKIP
19437 ? SKIP : SWAP_CONV;
19438 break;
19439
19440 case ASHIFTRT:
19441 case ASHIFT:
19442 case LSHIFTRT:
19443 /* ASRS <Rdn>,<Rm> */
19444 /* LSRS <Rdn>,<Rm> */
19445 /* LSLS <Rdn>,<Rm> */
19446 if (rtx_equal_p (dst, op0)
19447 && low_register_operand (op1, SImode))
19448 action = action_for_partial_flag_setting;
19449 /* ASRS <Rd>,<Rm>,#<imm5> */
19450 /* LSRS <Rd>,<Rm>,#<imm5> */
19451 /* LSLS <Rd>,<Rm>,#<imm5> */
19452 else if (low_register_operand (op0, SImode)
19453 && CONST_INT_P (op1)
19454 && IN_RANGE (INTVAL (op1), 0, 31))
19455 action = action_for_partial_flag_setting;
19456 break;
19457
19458 case ROTATERT:
19459 /* RORS <Rdn>,<Rm> */
19460 if (rtx_equal_p (dst, op0)
19461 && low_register_operand (op1, SImode))
19462 action = action_for_partial_flag_setting;
19463 break;
19464
19465 case NOT:
19466 /* MVNS <Rd>,<Rm> */
19467 if (low_register_operand (op0, SImode))
19468 action = action_for_partial_flag_setting;
19469 break;
19470
19471 case NEG:
19472 /* NEGS <Rd>,<Rm> (a.k.a RSBS) */
19473 if (low_register_operand (op0, SImode))
19474 action = CONV;
19475 break;
19476
19477 case CONST_INT:
19478 /* MOVS <Rd>,#<imm8> */
19479 if (CONST_INT_P (src)
19480 && IN_RANGE (INTVAL (src), 0, 255))
19481 action = action_for_partial_flag_setting;
19482 break;
19483
19484 case REG:
19485 /* MOVS and MOV<c> with registers have different
19486 encodings, so are not relevant here. */
19487 break;
19488
19489 default:
19490 break;
19491 }
19492 }
19493
19494 if (action != SKIP)
19495 {
19496 rtx ccreg = gen_rtx_REG (CCmode, CC_REGNUM);
19497 rtx clobber = gen_rtx_CLOBBER (VOIDmode, ccreg);
19498 rtvec vec;
19499
19500 if (action == SWAP_CONV)
19501 {
19502 src = copy_rtx (src);
19503 XEXP (src, 0) = op1;
19504 XEXP (src, 1) = op0;
19505 pat = gen_rtx_SET (dst, src);
19506 vec = gen_rtvec (2, pat, clobber);
19507 }
19508 else /* action == CONV */
19509 vec = gen_rtvec (2, pat, clobber);
19510
19511 PATTERN (insn) = gen_rtx_PARALLEL (VOIDmode, vec);
19512 INSN_CODE (insn) = -1;
19513 }
19514 }
19515
19516 if (NONDEBUG_INSN_P (insn))
19517 df_simulate_one_insn_backwards (bb, insn, &live);
19518 }
19519 }
19520
19521 CLEAR_REG_SET (&live);
19522 }
19523
19524 /* Gcc puts the pool in the wrong place for ARM, since we can only
19525 load addresses a limited distance around the pc. We do some
19526 special munging to move the constant pool values to the correct
19527 point in the code. */
19528 static void
19529 arm_reorg (void)
19530 {
19531 rtx_insn *insn;
19532 HOST_WIDE_INT address = 0;
19533 Mfix * fix;
19534
19535 if (use_cmse)
19536 cmse_nonsecure_call_inline_register_clear ();
19537
19538 /* We cannot run the Thumb passes for thunks because there is no CFG. */
19539 if (cfun->is_thunk)
19540 ;
19541 else if (TARGET_THUMB1)
19542 thumb1_reorg ();
19543 else if (TARGET_THUMB2)
19544 thumb2_reorg ();
19545
19546 /* Ensure all insns that must be split have been split at this point.
19547 Otherwise, the pool placement code below may compute incorrect
19548 insn lengths. Note that when optimizing, all insns have already
19549 been split at this point. */
19550 if (!optimize)
19551 split_all_insns_noflow ();
19552
19553 /* Make sure we do not attempt to create a literal pool even though it should
19554 no longer be necessary to create any. */
19555 if (arm_disable_literal_pool)
19556 return ;
19557
19558 minipool_fix_head = minipool_fix_tail = NULL;
19559
19560 /* The first insn must always be a note, or the code below won't
19561 scan it properly. */
19562 insn = get_insns ();
19563 gcc_assert (NOTE_P (insn));
19564 minipool_pad = 0;
19565
19566 /* Scan all the insns and record the operands that will need fixing. */
19567 for (insn = next_nonnote_insn (insn); insn; insn = next_nonnote_insn (insn))
19568 {
19569 if (BARRIER_P (insn))
19570 push_minipool_barrier (insn, address);
19571 else if (INSN_P (insn))
19572 {
19573 rtx_jump_table_data *table;
19574
19575 note_invalid_constants (insn, address, true);
19576 address += get_attr_length (insn);
19577
19578 /* If the insn is a vector jump, add the size of the table
19579 and skip the table. */
19580 if (tablejump_p (insn, NULL, &table))
19581 {
19582 address += get_jump_table_size (table);
19583 insn = table;
19584 }
19585 }
19586 else if (LABEL_P (insn))
19587 /* Add the worst-case padding due to alignment. We don't add
19588 the _current_ padding because the minipool insertions
19589 themselves might change it. */
19590 address += get_label_padding (insn);
19591 }
19592
19593 fix = minipool_fix_head;
19594
19595 /* Now scan the fixups and perform the required changes. */
19596 while (fix)
19597 {
19598 Mfix * ftmp;
19599 Mfix * fdel;
19600 Mfix * last_added_fix;
19601 Mfix * last_barrier = NULL;
19602 Mfix * this_fix;
19603
19604 /* Skip any further barriers before the next fix. */
19605 while (fix && BARRIER_P (fix->insn))
19606 fix = fix->next;
19607
19608 /* No more fixes. */
19609 if (fix == NULL)
19610 break;
19611
19612 last_added_fix = NULL;
19613
19614 for (ftmp = fix; ftmp; ftmp = ftmp->next)
19615 {
19616 if (BARRIER_P (ftmp->insn))
19617 {
19618 if (ftmp->address >= minipool_vector_head->max_address)
19619 break;
19620
19621 last_barrier = ftmp;
19622 }
19623 else if ((ftmp->minipool = add_minipool_forward_ref (ftmp)) == NULL)
19624 break;
19625
19626 last_added_fix = ftmp; /* Keep track of the last fix added. */
19627 }
19628
19629 /* If we found a barrier, drop back to that; any fixes that we
19630 could have reached but come after the barrier will now go in
19631 the next mini-pool. */
19632 if (last_barrier != NULL)
19633 {
19634 /* Reduce the refcount for those fixes that won't go into this
19635 pool after all. */
19636 for (fdel = last_barrier->next;
19637 fdel && fdel != ftmp;
19638 fdel = fdel->next)
19639 {
19640 fdel->minipool->refcount--;
19641 fdel->minipool = NULL;
19642 }
19643
19644 ftmp = last_barrier;
19645 }
19646 else
19647 {
19648 /* ftmp is first fix that we can't fit into this pool and
19649 there no natural barriers that we could use. Insert a
19650 new barrier in the code somewhere between the previous
19651 fix and this one, and arrange to jump around it. */
19652 HOST_WIDE_INT max_address;
19653
19654 /* The last item on the list of fixes must be a barrier, so
19655 we can never run off the end of the list of fixes without
19656 last_barrier being set. */
19657 gcc_assert (ftmp);
19658
19659 max_address = minipool_vector_head->max_address;
19660 /* Check that there isn't another fix that is in range that
19661 we couldn't fit into this pool because the pool was
19662 already too large: we need to put the pool before such an
19663 instruction. The pool itself may come just after the
19664 fix because create_fix_barrier also allows space for a
19665 jump instruction. */
19666 if (ftmp->address < max_address)
19667 max_address = ftmp->address + 1;
19668
19669 last_barrier = create_fix_barrier (last_added_fix, max_address);
19670 }
19671
19672 assign_minipool_offsets (last_barrier);
19673
19674 while (ftmp)
19675 {
19676 if (!BARRIER_P (ftmp->insn)
19677 && ((ftmp->minipool = add_minipool_backward_ref (ftmp))
19678 == NULL))
19679 break;
19680
19681 ftmp = ftmp->next;
19682 }
19683
19684 /* Scan over the fixes we have identified for this pool, fixing them
19685 up and adding the constants to the pool itself. */
19686 for (this_fix = fix; this_fix && ftmp != this_fix;
19687 this_fix = this_fix->next)
19688 if (!BARRIER_P (this_fix->insn))
19689 {
19690 rtx addr
19691 = plus_constant (Pmode,
19692 gen_rtx_LABEL_REF (VOIDmode,
19693 minipool_vector_label),
19694 this_fix->minipool->offset);
19695 *this_fix->loc = gen_rtx_MEM (this_fix->mode, addr);
19696 }
19697
19698 dump_minipool (last_barrier->insn);
19699 fix = ftmp;
19700 }
19701
19702 /* From now on we must synthesize any constants that we can't handle
19703 directly. This can happen if the RTL gets split during final
19704 instruction generation. */
19705 cfun->machine->after_arm_reorg = 1;
19706
19707 /* Free the minipool memory. */
19708 obstack_free (&minipool_obstack, minipool_startobj);
19709 }
19710 \f
19711 /* Routines to output assembly language. */
19712
19713 /* Return string representation of passed in real value. */
19714 static const char *
19715 fp_const_from_val (REAL_VALUE_TYPE *r)
19716 {
19717 if (!fp_consts_inited)
19718 init_fp_table ();
19719
19720 gcc_assert (real_equal (r, &value_fp0));
19721 return "0";
19722 }
19723
19724 /* OPERANDS[0] is the entire list of insns that constitute pop,
19725 OPERANDS[1] is the base register, RETURN_PC is true iff return insn
19726 is in the list, UPDATE is true iff the list contains explicit
19727 update of base register. */
19728 void
19729 arm_output_multireg_pop (rtx *operands, bool return_pc, rtx cond, bool reverse,
19730 bool update)
19731 {
19732 int i;
19733 char pattern[100];
19734 int offset;
19735 const char *conditional;
19736 int num_saves = XVECLEN (operands[0], 0);
19737 unsigned int regno;
19738 unsigned int regno_base = REGNO (operands[1]);
19739 bool interrupt_p = IS_INTERRUPT (arm_current_func_type ());
19740
19741 offset = 0;
19742 offset += update ? 1 : 0;
19743 offset += return_pc ? 1 : 0;
19744
19745 /* Is the base register in the list? */
19746 for (i = offset; i < num_saves; i++)
19747 {
19748 regno = REGNO (XEXP (XVECEXP (operands[0], 0, i), 0));
19749 /* If SP is in the list, then the base register must be SP. */
19750 gcc_assert ((regno != SP_REGNUM) || (regno_base == SP_REGNUM));
19751 /* If base register is in the list, there must be no explicit update. */
19752 if (regno == regno_base)
19753 gcc_assert (!update);
19754 }
19755
19756 conditional = reverse ? "%?%D0" : "%?%d0";
19757 /* Can't use POP if returning from an interrupt. */
19758 if ((regno_base == SP_REGNUM) && update && !(interrupt_p && return_pc))
19759 sprintf (pattern, "pop%s\t{", conditional);
19760 else
19761 {
19762 /* Output ldmfd when the base register is SP, otherwise output ldmia.
19763 It's just a convention, their semantics are identical. */
19764 if (regno_base == SP_REGNUM)
19765 sprintf (pattern, "ldmfd%s\t", conditional);
19766 else if (update)
19767 sprintf (pattern, "ldmia%s\t", conditional);
19768 else
19769 sprintf (pattern, "ldm%s\t", conditional);
19770
19771 strcat (pattern, reg_names[regno_base]);
19772 if (update)
19773 strcat (pattern, "!, {");
19774 else
19775 strcat (pattern, ", {");
19776 }
19777
19778 /* Output the first destination register. */
19779 strcat (pattern,
19780 reg_names[REGNO (XEXP (XVECEXP (operands[0], 0, offset), 0))]);
19781
19782 /* Output the rest of the destination registers. */
19783 for (i = offset + 1; i < num_saves; i++)
19784 {
19785 strcat (pattern, ", ");
19786 strcat (pattern,
19787 reg_names[REGNO (XEXP (XVECEXP (operands[0], 0, i), 0))]);
19788 }
19789
19790 strcat (pattern, "}");
19791
19792 if (interrupt_p && return_pc)
19793 strcat (pattern, "^");
19794
19795 output_asm_insn (pattern, &cond);
19796 }
19797
19798
19799 /* Output the assembly for a store multiple. */
19800
19801 const char *
19802 vfp_output_vstmd (rtx * operands)
19803 {
19804 char pattern[100];
19805 int p;
19806 int base;
19807 int i;
19808 rtx addr_reg = REG_P (XEXP (operands[0], 0))
19809 ? XEXP (operands[0], 0)
19810 : XEXP (XEXP (operands[0], 0), 0);
19811 bool push_p = REGNO (addr_reg) == SP_REGNUM;
19812
19813 if (push_p)
19814 strcpy (pattern, "vpush%?.64\t{%P1");
19815 else
19816 strcpy (pattern, "vstmdb%?.64\t%m0!, {%P1");
19817
19818 p = strlen (pattern);
19819
19820 gcc_assert (REG_P (operands[1]));
19821
19822 base = (REGNO (operands[1]) - FIRST_VFP_REGNUM) / 2;
19823 for (i = 1; i < XVECLEN (operands[2], 0); i++)
19824 {
19825 p += sprintf (&pattern[p], ", d%d", base + i);
19826 }
19827 strcpy (&pattern[p], "}");
19828
19829 output_asm_insn (pattern, operands);
19830 return "";
19831 }
19832
19833
19834 /* Emit RTL to save block of VFP register pairs to the stack. Returns the
19835 number of bytes pushed. */
19836
19837 static int
19838 vfp_emit_fstmd (int base_reg, int count)
19839 {
19840 rtx par;
19841 rtx dwarf;
19842 rtx tmp, reg;
19843 int i;
19844
19845 /* Workaround ARM10 VFPr1 bug. Data corruption can occur when exactly two
19846 register pairs are stored by a store multiple insn. We avoid this
19847 by pushing an extra pair. */
19848 if (count == 2 && !arm_arch6)
19849 {
19850 if (base_reg == LAST_VFP_REGNUM - 3)
19851 base_reg -= 2;
19852 count++;
19853 }
19854
19855 /* FSTMD may not store more than 16 doubleword registers at once. Split
19856 larger stores into multiple parts (up to a maximum of two, in
19857 practice). */
19858 if (count > 16)
19859 {
19860 int saved;
19861 /* NOTE: base_reg is an internal register number, so each D register
19862 counts as 2. */
19863 saved = vfp_emit_fstmd (base_reg + 32, count - 16);
19864 saved += vfp_emit_fstmd (base_reg, 16);
19865 return saved;
19866 }
19867
19868 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count));
19869 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (count + 1));
19870
19871 reg = gen_rtx_REG (DFmode, base_reg);
19872 base_reg += 2;
19873
19874 XVECEXP (par, 0, 0)
19875 = gen_rtx_SET (gen_frame_mem
19876 (BLKmode,
19877 gen_rtx_PRE_MODIFY (Pmode,
19878 stack_pointer_rtx,
19879 plus_constant
19880 (Pmode, stack_pointer_rtx,
19881 - (count * 8)))
19882 ),
19883 gen_rtx_UNSPEC (BLKmode,
19884 gen_rtvec (1, reg),
19885 UNSPEC_PUSH_MULT));
19886
19887 tmp = gen_rtx_SET (stack_pointer_rtx,
19888 plus_constant (Pmode, stack_pointer_rtx, -(count * 8)));
19889 RTX_FRAME_RELATED_P (tmp) = 1;
19890 XVECEXP (dwarf, 0, 0) = tmp;
19891
19892 tmp = gen_rtx_SET (gen_frame_mem (DFmode, stack_pointer_rtx), reg);
19893 RTX_FRAME_RELATED_P (tmp) = 1;
19894 XVECEXP (dwarf, 0, 1) = tmp;
19895
19896 for (i = 1; i < count; i++)
19897 {
19898 reg = gen_rtx_REG (DFmode, base_reg);
19899 base_reg += 2;
19900 XVECEXP (par, 0, i) = gen_rtx_USE (VOIDmode, reg);
19901
19902 tmp = gen_rtx_SET (gen_frame_mem (DFmode,
19903 plus_constant (Pmode,
19904 stack_pointer_rtx,
19905 i * 8)),
19906 reg);
19907 RTX_FRAME_RELATED_P (tmp) = 1;
19908 XVECEXP (dwarf, 0, i + 1) = tmp;
19909 }
19910
19911 par = emit_insn (par);
19912 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
19913 RTX_FRAME_RELATED_P (par) = 1;
19914
19915 return count * 8;
19916 }
19917
19918 /* Returns true if -mcmse has been passed and the function pointed to by 'addr'
19919 has the cmse_nonsecure_call attribute and returns false otherwise. */
19920
19921 bool
19922 detect_cmse_nonsecure_call (tree addr)
19923 {
19924 if (!addr)
19925 return FALSE;
19926
19927 tree fntype = TREE_TYPE (addr);
19928 if (use_cmse && lookup_attribute ("cmse_nonsecure_call",
19929 TYPE_ATTRIBUTES (fntype)))
19930 return TRUE;
19931 return FALSE;
19932 }
19933
19934
19935 /* Emit a call instruction with pattern PAT. ADDR is the address of
19936 the call target. */
19937
19938 void
19939 arm_emit_call_insn (rtx pat, rtx addr, bool sibcall)
19940 {
19941 rtx insn;
19942
19943 insn = emit_call_insn (pat);
19944
19945 /* The PIC register is live on entry to VxWorks PIC PLT entries.
19946 If the call might use such an entry, add a use of the PIC register
19947 to the instruction's CALL_INSN_FUNCTION_USAGE. */
19948 if (TARGET_VXWORKS_RTP
19949 && flag_pic
19950 && !sibcall
19951 && SYMBOL_REF_P (addr)
19952 && (SYMBOL_REF_DECL (addr)
19953 ? !targetm.binds_local_p (SYMBOL_REF_DECL (addr))
19954 : !SYMBOL_REF_LOCAL_P (addr)))
19955 {
19956 require_pic_register (NULL_RTX, false /*compute_now*/);
19957 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), cfun->machine->pic_reg);
19958 }
19959
19960 if (TARGET_FDPIC)
19961 {
19962 rtx fdpic_reg = gen_rtx_REG (Pmode, FDPIC_REGNUM);
19963 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), fdpic_reg);
19964 }
19965
19966 if (TARGET_AAPCS_BASED)
19967 {
19968 /* For AAPCS, IP and CC can be clobbered by veneers inserted by the
19969 linker. We need to add an IP clobber to allow setting
19970 TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS to true. A CC clobber
19971 is not needed since it's a fixed register. */
19972 rtx *fusage = &CALL_INSN_FUNCTION_USAGE (insn);
19973 clobber_reg (fusage, gen_rtx_REG (word_mode, IP_REGNUM));
19974 }
19975 }
19976
19977 /* Output a 'call' insn. */
19978 const char *
19979 output_call (rtx *operands)
19980 {
19981 gcc_assert (!arm_arch5t); /* Patterns should call blx <reg> directly. */
19982
19983 /* Handle calls to lr using ip (which may be clobbered in subr anyway). */
19984 if (REGNO (operands[0]) == LR_REGNUM)
19985 {
19986 operands[0] = gen_rtx_REG (SImode, IP_REGNUM);
19987 output_asm_insn ("mov%?\t%0, %|lr", operands);
19988 }
19989
19990 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
19991
19992 if (TARGET_INTERWORK || arm_arch4t)
19993 output_asm_insn ("bx%?\t%0", operands);
19994 else
19995 output_asm_insn ("mov%?\t%|pc, %0", operands);
19996
19997 return "";
19998 }
19999
20000 /* Output a move from arm registers to arm registers of a long double
20001 OPERANDS[0] is the destination.
20002 OPERANDS[1] is the source. */
20003 const char *
20004 output_mov_long_double_arm_from_arm (rtx *operands)
20005 {
20006 /* We have to be careful here because the two might overlap. */
20007 int dest_start = REGNO (operands[0]);
20008 int src_start = REGNO (operands[1]);
20009 rtx ops[2];
20010 int i;
20011
20012 if (dest_start < src_start)
20013 {
20014 for (i = 0; i < 3; i++)
20015 {
20016 ops[0] = gen_rtx_REG (SImode, dest_start + i);
20017 ops[1] = gen_rtx_REG (SImode, src_start + i);
20018 output_asm_insn ("mov%?\t%0, %1", ops);
20019 }
20020 }
20021 else
20022 {
20023 for (i = 2; i >= 0; i--)
20024 {
20025 ops[0] = gen_rtx_REG (SImode, dest_start + i);
20026 ops[1] = gen_rtx_REG (SImode, src_start + i);
20027 output_asm_insn ("mov%?\t%0, %1", ops);
20028 }
20029 }
20030
20031 return "";
20032 }
20033
20034 void
20035 arm_emit_movpair (rtx dest, rtx src)
20036 {
20037 /* If the src is an immediate, simplify it. */
20038 if (CONST_INT_P (src))
20039 {
20040 HOST_WIDE_INT val = INTVAL (src);
20041 emit_set_insn (dest, GEN_INT (val & 0x0000ffff));
20042 if ((val >> 16) & 0x0000ffff)
20043 {
20044 emit_set_insn (gen_rtx_ZERO_EXTRACT (SImode, dest, GEN_INT (16),
20045 GEN_INT (16)),
20046 GEN_INT ((val >> 16) & 0x0000ffff));
20047 rtx_insn *insn = get_last_insn ();
20048 set_unique_reg_note (insn, REG_EQUAL, copy_rtx (src));
20049 }
20050 return;
20051 }
20052 emit_set_insn (dest, gen_rtx_HIGH (SImode, src));
20053 emit_set_insn (dest, gen_rtx_LO_SUM (SImode, dest, src));
20054 rtx_insn *insn = get_last_insn ();
20055 set_unique_reg_note (insn, REG_EQUAL, copy_rtx (src));
20056 }
20057
20058 /* Output a move between double words. It must be REG<-MEM
20059 or MEM<-REG. */
20060 const char *
20061 output_move_double (rtx *operands, bool emit, int *count)
20062 {
20063 enum rtx_code code0 = GET_CODE (operands[0]);
20064 enum rtx_code code1 = GET_CODE (operands[1]);
20065 rtx otherops[3];
20066 if (count)
20067 *count = 1;
20068
20069 /* The only case when this might happen is when
20070 you are looking at the length of a DImode instruction
20071 that has an invalid constant in it. */
20072 if (code0 == REG && code1 != MEM)
20073 {
20074 gcc_assert (!emit);
20075 *count = 2;
20076 return "";
20077 }
20078
20079 if (code0 == REG)
20080 {
20081 unsigned int reg0 = REGNO (operands[0]);
20082 const bool can_ldrd = TARGET_LDRD && (TARGET_THUMB2 || (reg0 % 2 == 0));
20083
20084 otherops[0] = gen_rtx_REG (SImode, 1 + reg0);
20085
20086 gcc_assert (code1 == MEM); /* Constraints should ensure this. */
20087
20088 switch (GET_CODE (XEXP (operands[1], 0)))
20089 {
20090 case REG:
20091
20092 if (emit)
20093 {
20094 if (can_ldrd
20095 && !(fix_cm3_ldrd && reg0 == REGNO(XEXP (operands[1], 0))))
20096 output_asm_insn ("ldrd%?\t%0, [%m1]", operands);
20097 else
20098 output_asm_insn ("ldmia%?\t%m1, %M0", operands);
20099 }
20100 break;
20101
20102 case PRE_INC:
20103 gcc_assert (can_ldrd);
20104 if (emit)
20105 output_asm_insn ("ldrd%?\t%0, [%m1, #8]!", operands);
20106 break;
20107
20108 case PRE_DEC:
20109 if (emit)
20110 {
20111 if (can_ldrd)
20112 output_asm_insn ("ldrd%?\t%0, [%m1, #-8]!", operands);
20113 else
20114 output_asm_insn ("ldmdb%?\t%m1!, %M0", operands);
20115 }
20116 break;
20117
20118 case POST_INC:
20119 if (emit)
20120 {
20121 if (can_ldrd)
20122 output_asm_insn ("ldrd%?\t%0, [%m1], #8", operands);
20123 else
20124 output_asm_insn ("ldmia%?\t%m1!, %M0", operands);
20125 }
20126 break;
20127
20128 case POST_DEC:
20129 gcc_assert (can_ldrd);
20130 if (emit)
20131 output_asm_insn ("ldrd%?\t%0, [%m1], #-8", operands);
20132 break;
20133
20134 case PRE_MODIFY:
20135 case POST_MODIFY:
20136 /* Autoicrement addressing modes should never have overlapping
20137 base and destination registers, and overlapping index registers
20138 are already prohibited, so this doesn't need to worry about
20139 fix_cm3_ldrd. */
20140 otherops[0] = operands[0];
20141 otherops[1] = XEXP (XEXP (XEXP (operands[1], 0), 1), 0);
20142 otherops[2] = XEXP (XEXP (XEXP (operands[1], 0), 1), 1);
20143
20144 if (GET_CODE (XEXP (operands[1], 0)) == PRE_MODIFY)
20145 {
20146 if (reg_overlap_mentioned_p (otherops[0], otherops[2]))
20147 {
20148 /* Registers overlap so split out the increment. */
20149 if (emit)
20150 {
20151 gcc_assert (can_ldrd);
20152 output_asm_insn ("add%?\t%1, %1, %2", otherops);
20153 output_asm_insn ("ldrd%?\t%0, [%1] @split", otherops);
20154 }
20155 if (count)
20156 *count = 2;
20157 }
20158 else
20159 {
20160 /* Use a single insn if we can.
20161 FIXME: IWMMXT allows offsets larger than ldrd can
20162 handle, fix these up with a pair of ldr. */
20163 if (can_ldrd
20164 && (TARGET_THUMB2
20165 || !CONST_INT_P (otherops[2])
20166 || (INTVAL (otherops[2]) > -256
20167 && INTVAL (otherops[2]) < 256)))
20168 {
20169 if (emit)
20170 output_asm_insn ("ldrd%?\t%0, [%1, %2]!", otherops);
20171 }
20172 else
20173 {
20174 if (emit)
20175 {
20176 output_asm_insn ("ldr%?\t%0, [%1, %2]!", otherops);
20177 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
20178 }
20179 if (count)
20180 *count = 2;
20181
20182 }
20183 }
20184 }
20185 else
20186 {
20187 /* Use a single insn if we can.
20188 FIXME: IWMMXT allows offsets larger than ldrd can handle,
20189 fix these up with a pair of ldr. */
20190 if (can_ldrd
20191 && (TARGET_THUMB2
20192 || !CONST_INT_P (otherops[2])
20193 || (INTVAL (otherops[2]) > -256
20194 && INTVAL (otherops[2]) < 256)))
20195 {
20196 if (emit)
20197 output_asm_insn ("ldrd%?\t%0, [%1], %2", otherops);
20198 }
20199 else
20200 {
20201 if (emit)
20202 {
20203 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
20204 output_asm_insn ("ldr%?\t%0, [%1], %2", otherops);
20205 }
20206 if (count)
20207 *count = 2;
20208 }
20209 }
20210 break;
20211
20212 case LABEL_REF:
20213 case CONST:
20214 /* We might be able to use ldrd %0, %1 here. However the range is
20215 different to ldr/adr, and it is broken on some ARMv7-M
20216 implementations. */
20217 /* Use the second register of the pair to avoid problematic
20218 overlap. */
20219 otherops[1] = operands[1];
20220 if (emit)
20221 output_asm_insn ("adr%?\t%0, %1", otherops);
20222 operands[1] = otherops[0];
20223 if (emit)
20224 {
20225 if (can_ldrd)
20226 output_asm_insn ("ldrd%?\t%0, [%1]", operands);
20227 else
20228 output_asm_insn ("ldmia%?\t%1, %M0", operands);
20229 }
20230
20231 if (count)
20232 *count = 2;
20233 break;
20234
20235 /* ??? This needs checking for thumb2. */
20236 default:
20237 if (arm_add_operand (XEXP (XEXP (operands[1], 0), 1),
20238 GET_MODE (XEXP (XEXP (operands[1], 0), 1))))
20239 {
20240 otherops[0] = operands[0];
20241 otherops[1] = XEXP (XEXP (operands[1], 0), 0);
20242 otherops[2] = XEXP (XEXP (operands[1], 0), 1);
20243
20244 if (GET_CODE (XEXP (operands[1], 0)) == PLUS)
20245 {
20246 if (CONST_INT_P (otherops[2]) && !TARGET_LDRD)
20247 {
20248 switch ((int) INTVAL (otherops[2]))
20249 {
20250 case -8:
20251 if (emit)
20252 output_asm_insn ("ldmdb%?\t%1, %M0", otherops);
20253 return "";
20254 case -4:
20255 if (TARGET_THUMB2)
20256 break;
20257 if (emit)
20258 output_asm_insn ("ldmda%?\t%1, %M0", otherops);
20259 return "";
20260 case 4:
20261 if (TARGET_THUMB2)
20262 break;
20263 if (emit)
20264 output_asm_insn ("ldmib%?\t%1, %M0", otherops);
20265 return "";
20266 }
20267 }
20268 otherops[0] = gen_rtx_REG(SImode, REGNO(operands[0]) + 1);
20269 operands[1] = otherops[0];
20270 if (can_ldrd
20271 && (REG_P (otherops[2])
20272 || TARGET_THUMB2
20273 || (CONST_INT_P (otherops[2])
20274 && INTVAL (otherops[2]) > -256
20275 && INTVAL (otherops[2]) < 256)))
20276 {
20277 if (reg_overlap_mentioned_p (operands[0],
20278 otherops[2]))
20279 {
20280 /* Swap base and index registers over to
20281 avoid a conflict. */
20282 std::swap (otherops[1], otherops[2]);
20283 }
20284 /* If both registers conflict, it will usually
20285 have been fixed by a splitter. */
20286 if (reg_overlap_mentioned_p (operands[0], otherops[2])
20287 || (fix_cm3_ldrd && reg0 == REGNO (otherops[1])))
20288 {
20289 if (emit)
20290 {
20291 output_asm_insn ("add%?\t%0, %1, %2", otherops);
20292 output_asm_insn ("ldrd%?\t%0, [%1]", operands);
20293 }
20294 if (count)
20295 *count = 2;
20296 }
20297 else
20298 {
20299 otherops[0] = operands[0];
20300 if (emit)
20301 output_asm_insn ("ldrd%?\t%0, [%1, %2]", otherops);
20302 }
20303 return "";
20304 }
20305
20306 if (CONST_INT_P (otherops[2]))
20307 {
20308 if (emit)
20309 {
20310 if (!(const_ok_for_arm (INTVAL (otherops[2]))))
20311 output_asm_insn ("sub%?\t%0, %1, #%n2", otherops);
20312 else
20313 output_asm_insn ("add%?\t%0, %1, %2", otherops);
20314 }
20315 }
20316 else
20317 {
20318 if (emit)
20319 output_asm_insn ("add%?\t%0, %1, %2", otherops);
20320 }
20321 }
20322 else
20323 {
20324 if (emit)
20325 output_asm_insn ("sub%?\t%0, %1, %2", otherops);
20326 }
20327
20328 if (count)
20329 *count = 2;
20330
20331 if (can_ldrd)
20332 return "ldrd%?\t%0, [%1]";
20333
20334 return "ldmia%?\t%1, %M0";
20335 }
20336 else
20337 {
20338 otherops[1] = adjust_address (operands[1], SImode, 4);
20339 /* Take care of overlapping base/data reg. */
20340 if (reg_mentioned_p (operands[0], operands[1]))
20341 {
20342 if (emit)
20343 {
20344 output_asm_insn ("ldr%?\t%0, %1", otherops);
20345 output_asm_insn ("ldr%?\t%0, %1", operands);
20346 }
20347 if (count)
20348 *count = 2;
20349
20350 }
20351 else
20352 {
20353 if (emit)
20354 {
20355 output_asm_insn ("ldr%?\t%0, %1", operands);
20356 output_asm_insn ("ldr%?\t%0, %1", otherops);
20357 }
20358 if (count)
20359 *count = 2;
20360 }
20361 }
20362 }
20363 }
20364 else
20365 {
20366 /* Constraints should ensure this. */
20367 gcc_assert (code0 == MEM && code1 == REG);
20368 gcc_assert ((REGNO (operands[1]) != IP_REGNUM)
20369 || (TARGET_ARM && TARGET_LDRD));
20370
20371 /* For TARGET_ARM the first source register of an STRD
20372 must be even. This is usually the case for double-word
20373 values but user assembly constraints can force an odd
20374 starting register. */
20375 bool allow_strd = TARGET_LDRD
20376 && !(TARGET_ARM && (REGNO (operands[1]) & 1) == 1);
20377 switch (GET_CODE (XEXP (operands[0], 0)))
20378 {
20379 case REG:
20380 if (emit)
20381 {
20382 if (allow_strd)
20383 output_asm_insn ("strd%?\t%1, [%m0]", operands);
20384 else
20385 output_asm_insn ("stm%?\t%m0, %M1", operands);
20386 }
20387 break;
20388
20389 case PRE_INC:
20390 gcc_assert (allow_strd);
20391 if (emit)
20392 output_asm_insn ("strd%?\t%1, [%m0, #8]!", operands);
20393 break;
20394
20395 case PRE_DEC:
20396 if (emit)
20397 {
20398 if (allow_strd)
20399 output_asm_insn ("strd%?\t%1, [%m0, #-8]!", operands);
20400 else
20401 output_asm_insn ("stmdb%?\t%m0!, %M1", operands);
20402 }
20403 break;
20404
20405 case POST_INC:
20406 if (emit)
20407 {
20408 if (allow_strd)
20409 output_asm_insn ("strd%?\t%1, [%m0], #8", operands);
20410 else
20411 output_asm_insn ("stm%?\t%m0!, %M1", operands);
20412 }
20413 break;
20414
20415 case POST_DEC:
20416 gcc_assert (allow_strd);
20417 if (emit)
20418 output_asm_insn ("strd%?\t%1, [%m0], #-8", operands);
20419 break;
20420
20421 case PRE_MODIFY:
20422 case POST_MODIFY:
20423 otherops[0] = operands[1];
20424 otherops[1] = XEXP (XEXP (XEXP (operands[0], 0), 1), 0);
20425 otherops[2] = XEXP (XEXP (XEXP (operands[0], 0), 1), 1);
20426
20427 /* IWMMXT allows offsets larger than strd can handle,
20428 fix these up with a pair of str. */
20429 if (!TARGET_THUMB2
20430 && CONST_INT_P (otherops[2])
20431 && (INTVAL(otherops[2]) <= -256
20432 || INTVAL(otherops[2]) >= 256))
20433 {
20434 if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
20435 {
20436 if (emit)
20437 {
20438 output_asm_insn ("str%?\t%0, [%1, %2]!", otherops);
20439 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
20440 }
20441 if (count)
20442 *count = 2;
20443 }
20444 else
20445 {
20446 if (emit)
20447 {
20448 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
20449 output_asm_insn ("str%?\t%0, [%1], %2", otherops);
20450 }
20451 if (count)
20452 *count = 2;
20453 }
20454 }
20455 else if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
20456 {
20457 if (emit)
20458 output_asm_insn ("strd%?\t%0, [%1, %2]!", otherops);
20459 }
20460 else
20461 {
20462 if (emit)
20463 output_asm_insn ("strd%?\t%0, [%1], %2", otherops);
20464 }
20465 break;
20466
20467 case PLUS:
20468 otherops[2] = XEXP (XEXP (operands[0], 0), 1);
20469 if (CONST_INT_P (otherops[2]) && !TARGET_LDRD)
20470 {
20471 switch ((int) INTVAL (XEXP (XEXP (operands[0], 0), 1)))
20472 {
20473 case -8:
20474 if (emit)
20475 output_asm_insn ("stmdb%?\t%m0, %M1", operands);
20476 return "";
20477
20478 case -4:
20479 if (TARGET_THUMB2)
20480 break;
20481 if (emit)
20482 output_asm_insn ("stmda%?\t%m0, %M1", operands);
20483 return "";
20484
20485 case 4:
20486 if (TARGET_THUMB2)
20487 break;
20488 if (emit)
20489 output_asm_insn ("stmib%?\t%m0, %M1", operands);
20490 return "";
20491 }
20492 }
20493 if (allow_strd
20494 && (REG_P (otherops[2])
20495 || TARGET_THUMB2
20496 || (CONST_INT_P (otherops[2])
20497 && INTVAL (otherops[2]) > -256
20498 && INTVAL (otherops[2]) < 256)))
20499 {
20500 otherops[0] = operands[1];
20501 otherops[1] = XEXP (XEXP (operands[0], 0), 0);
20502 if (emit)
20503 output_asm_insn ("strd%?\t%0, [%1, %2]", otherops);
20504 return "";
20505 }
20506 /* Fall through */
20507
20508 default:
20509 otherops[0] = adjust_address (operands[0], SImode, 4);
20510 otherops[1] = operands[1];
20511 if (emit)
20512 {
20513 output_asm_insn ("str%?\t%1, %0", operands);
20514 output_asm_insn ("str%?\t%H1, %0", otherops);
20515 }
20516 if (count)
20517 *count = 2;
20518 }
20519 }
20520
20521 return "";
20522 }
20523
20524 /* Output a move, load or store for quad-word vectors in ARM registers. Only
20525 handles MEMs accepted by neon_vector_mem_operand with TYPE=1. */
20526
20527 const char *
20528 output_move_quad (rtx *operands)
20529 {
20530 if (REG_P (operands[0]))
20531 {
20532 /* Load, or reg->reg move. */
20533
20534 if (MEM_P (operands[1]))
20535 {
20536 switch (GET_CODE (XEXP (operands[1], 0)))
20537 {
20538 case REG:
20539 output_asm_insn ("ldmia%?\t%m1, %M0", operands);
20540 break;
20541
20542 case LABEL_REF:
20543 case CONST:
20544 output_asm_insn ("adr%?\t%0, %1", operands);
20545 output_asm_insn ("ldmia%?\t%0, %M0", operands);
20546 break;
20547
20548 default:
20549 gcc_unreachable ();
20550 }
20551 }
20552 else
20553 {
20554 rtx ops[2];
20555 int dest, src, i;
20556
20557 gcc_assert (REG_P (operands[1]));
20558
20559 dest = REGNO (operands[0]);
20560 src = REGNO (operands[1]);
20561
20562 /* This seems pretty dumb, but hopefully GCC won't try to do it
20563 very often. */
20564 if (dest < src)
20565 for (i = 0; i < 4; i++)
20566 {
20567 ops[0] = gen_rtx_REG (SImode, dest + i);
20568 ops[1] = gen_rtx_REG (SImode, src + i);
20569 output_asm_insn ("mov%?\t%0, %1", ops);
20570 }
20571 else
20572 for (i = 3; i >= 0; i--)
20573 {
20574 ops[0] = gen_rtx_REG (SImode, dest + i);
20575 ops[1] = gen_rtx_REG (SImode, src + i);
20576 output_asm_insn ("mov%?\t%0, %1", ops);
20577 }
20578 }
20579 }
20580 else
20581 {
20582 gcc_assert (MEM_P (operands[0]));
20583 gcc_assert (REG_P (operands[1]));
20584 gcc_assert (!reg_overlap_mentioned_p (operands[1], operands[0]));
20585
20586 switch (GET_CODE (XEXP (operands[0], 0)))
20587 {
20588 case REG:
20589 output_asm_insn ("stm%?\t%m0, %M1", operands);
20590 break;
20591
20592 default:
20593 gcc_unreachable ();
20594 }
20595 }
20596
20597 return "";
20598 }
20599
20600 /* Output a VFP load or store instruction. */
20601
20602 const char *
20603 output_move_vfp (rtx *operands)
20604 {
20605 rtx reg, mem, addr, ops[2];
20606 int load = REG_P (operands[0]);
20607 int dp = GET_MODE_SIZE (GET_MODE (operands[0])) == 8;
20608 int sp = (!TARGET_VFP_FP16INST
20609 || GET_MODE_SIZE (GET_MODE (operands[0])) == 4);
20610 int integer_p = GET_MODE_CLASS (GET_MODE (operands[0])) == MODE_INT;
20611 const char *templ;
20612 char buff[50];
20613 machine_mode mode;
20614
20615 reg = operands[!load];
20616 mem = operands[load];
20617
20618 mode = GET_MODE (reg);
20619
20620 gcc_assert (REG_P (reg));
20621 gcc_assert (IS_VFP_REGNUM (REGNO (reg)));
20622 gcc_assert ((mode == HFmode && TARGET_HARD_FLOAT)
20623 || mode == SFmode
20624 || mode == DFmode
20625 || mode == HImode
20626 || mode == SImode
20627 || mode == DImode
20628 || (TARGET_NEON && VALID_NEON_DREG_MODE (mode)));
20629 gcc_assert (MEM_P (mem));
20630
20631 addr = XEXP (mem, 0);
20632
20633 switch (GET_CODE (addr))
20634 {
20635 case PRE_DEC:
20636 templ = "v%smdb%%?.%s\t%%0!, {%%%s1}%s";
20637 ops[0] = XEXP (addr, 0);
20638 ops[1] = reg;
20639 break;
20640
20641 case POST_INC:
20642 templ = "v%smia%%?.%s\t%%0!, {%%%s1}%s";
20643 ops[0] = XEXP (addr, 0);
20644 ops[1] = reg;
20645 break;
20646
20647 default:
20648 templ = "v%sr%%?.%s\t%%%s0, %%1%s";
20649 ops[0] = reg;
20650 ops[1] = mem;
20651 break;
20652 }
20653
20654 sprintf (buff, templ,
20655 load ? "ld" : "st",
20656 dp ? "64" : sp ? "32" : "16",
20657 dp ? "P" : "",
20658 integer_p ? "\t%@ int" : "");
20659 output_asm_insn (buff, ops);
20660
20661 return "";
20662 }
20663
20664 /* Output a Neon double-word or quad-word load or store, or a load
20665 or store for larger structure modes.
20666
20667 WARNING: The ordering of elements is weird in big-endian mode,
20668 because the EABI requires that vectors stored in memory appear
20669 as though they were stored by a VSTM, as required by the EABI.
20670 GCC RTL defines element ordering based on in-memory order.
20671 This can be different from the architectural ordering of elements
20672 within a NEON register. The intrinsics defined in arm_neon.h use the
20673 NEON register element ordering, not the GCC RTL element ordering.
20674
20675 For example, the in-memory ordering of a big-endian a quadword
20676 vector with 16-bit elements when stored from register pair {d0,d1}
20677 will be (lowest address first, d0[N] is NEON register element N):
20678
20679 [d0[3], d0[2], d0[1], d0[0], d1[7], d1[6], d1[5], d1[4]]
20680
20681 When necessary, quadword registers (dN, dN+1) are moved to ARM
20682 registers from rN in the order:
20683
20684 dN -> (rN+1, rN), dN+1 -> (rN+3, rN+2)
20685
20686 So that STM/LDM can be used on vectors in ARM registers, and the
20687 same memory layout will result as if VSTM/VLDM were used.
20688
20689 Instead of VSTM/VLDM we prefer to use VST1.64/VLD1.64 where
20690 possible, which allows use of appropriate alignment tags.
20691 Note that the choice of "64" is independent of the actual vector
20692 element size; this size simply ensures that the behavior is
20693 equivalent to VSTM/VLDM in both little-endian and big-endian mode.
20694
20695 Due to limitations of those instructions, use of VST1.64/VLD1.64
20696 is not possible if:
20697 - the address contains PRE_DEC, or
20698 - the mode refers to more than 4 double-word registers
20699
20700 In those cases, it would be possible to replace VSTM/VLDM by a
20701 sequence of instructions; this is not currently implemented since
20702 this is not certain to actually improve performance. */
20703
20704 const char *
20705 output_move_neon (rtx *operands)
20706 {
20707 rtx reg, mem, addr, ops[2];
20708 int regno, nregs, load = REG_P (operands[0]);
20709 const char *templ;
20710 char buff[50];
20711 machine_mode mode;
20712
20713 reg = operands[!load];
20714 mem = operands[load];
20715
20716 mode = GET_MODE (reg);
20717
20718 gcc_assert (REG_P (reg));
20719 regno = REGNO (reg);
20720 nregs = REG_NREGS (reg) / 2;
20721 gcc_assert (VFP_REGNO_OK_FOR_DOUBLE (regno)
20722 || NEON_REGNO_OK_FOR_QUAD (regno));
20723 gcc_assert (VALID_NEON_DREG_MODE (mode)
20724 || VALID_NEON_QREG_MODE (mode)
20725 || VALID_NEON_STRUCT_MODE (mode));
20726 gcc_assert (MEM_P (mem));
20727
20728 addr = XEXP (mem, 0);
20729
20730 /* Strip off const from addresses like (const (plus (...))). */
20731 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
20732 addr = XEXP (addr, 0);
20733
20734 switch (GET_CODE (addr))
20735 {
20736 case POST_INC:
20737 /* We have to use vldm / vstm for too-large modes. */
20738 if (nregs > 4 || (TARGET_HAVE_MVE && nregs >= 2))
20739 {
20740 templ = "v%smia%%?\t%%0!, %%h1";
20741 ops[0] = XEXP (addr, 0);
20742 }
20743 else
20744 {
20745 templ = "v%s1.64\t%%h1, %%A0";
20746 ops[0] = mem;
20747 }
20748 ops[1] = reg;
20749 break;
20750
20751 case PRE_DEC:
20752 /* We have to use vldm / vstm in this case, since there is no
20753 pre-decrement form of the vld1 / vst1 instructions. */
20754 templ = "v%smdb%%?\t%%0!, %%h1";
20755 ops[0] = XEXP (addr, 0);
20756 ops[1] = reg;
20757 break;
20758
20759 case POST_MODIFY:
20760 /* FIXME: Not currently enabled in neon_vector_mem_operand. */
20761 gcc_unreachable ();
20762
20763 case REG:
20764 /* We have to use vldm / vstm for too-large modes. */
20765 if (nregs > 1)
20766 {
20767 if (nregs > 4 || (TARGET_HAVE_MVE && nregs >= 2))
20768 templ = "v%smia%%?\t%%m0, %%h1";
20769 else
20770 templ = "v%s1.64\t%%h1, %%A0";
20771
20772 ops[0] = mem;
20773 ops[1] = reg;
20774 break;
20775 }
20776 /* Fall through. */
20777 case PLUS:
20778 if (GET_CODE (addr) == PLUS)
20779 addr = XEXP (addr, 0);
20780 /* Fall through. */
20781 case LABEL_REF:
20782 {
20783 int i;
20784 int overlap = -1;
20785 for (i = 0; i < nregs; i++)
20786 {
20787 /* We're only using DImode here because it's a convenient
20788 size. */
20789 ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * i);
20790 ops[1] = adjust_address (mem, DImode, 8 * i);
20791 if (reg_overlap_mentioned_p (ops[0], mem))
20792 {
20793 gcc_assert (overlap == -1);
20794 overlap = i;
20795 }
20796 else
20797 {
20798 if (TARGET_HAVE_MVE && LABEL_REF_P (addr))
20799 sprintf (buff, "v%sr.64\t%%P0, %%1", load ? "ld" : "st");
20800 else
20801 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
20802 output_asm_insn (buff, ops);
20803 }
20804 }
20805 if (overlap != -1)
20806 {
20807 ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * overlap);
20808 ops[1] = adjust_address (mem, SImode, 8 * overlap);
20809 if (TARGET_HAVE_MVE && LABEL_REF_P (addr))
20810 sprintf (buff, "v%sr.32\t%%P0, %%1", load ? "ld" : "st");
20811 else
20812 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
20813 output_asm_insn (buff, ops);
20814 }
20815
20816 return "";
20817 }
20818
20819 default:
20820 gcc_unreachable ();
20821 }
20822
20823 sprintf (buff, templ, load ? "ld" : "st");
20824 output_asm_insn (buff, ops);
20825
20826 return "";
20827 }
20828
20829 /* Compute and return the length of neon_mov<mode>, where <mode> is
20830 one of VSTRUCT modes: EI, OI, CI or XI. */
20831 int
20832 arm_attr_length_move_neon (rtx_insn *insn)
20833 {
20834 rtx reg, mem, addr;
20835 int load;
20836 machine_mode mode;
20837
20838 extract_insn_cached (insn);
20839
20840 if (REG_P (recog_data.operand[0]) && REG_P (recog_data.operand[1]))
20841 {
20842 mode = GET_MODE (recog_data.operand[0]);
20843 switch (mode)
20844 {
20845 case E_EImode:
20846 case E_OImode:
20847 return 8;
20848 case E_CImode:
20849 return 12;
20850 case E_XImode:
20851 return 16;
20852 default:
20853 gcc_unreachable ();
20854 }
20855 }
20856
20857 load = REG_P (recog_data.operand[0]);
20858 reg = recog_data.operand[!load];
20859 mem = recog_data.operand[load];
20860
20861 gcc_assert (MEM_P (mem));
20862
20863 addr = XEXP (mem, 0);
20864
20865 /* Strip off const from addresses like (const (plus (...))). */
20866 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
20867 addr = XEXP (addr, 0);
20868
20869 if (LABEL_REF_P (addr) || GET_CODE (addr) == PLUS)
20870 {
20871 int insns = REG_NREGS (reg) / 2;
20872 return insns * 4;
20873 }
20874 else
20875 return 4;
20876 }
20877
20878 /* Return nonzero if the offset in the address is an immediate. Otherwise,
20879 return zero. */
20880
20881 int
20882 arm_address_offset_is_imm (rtx_insn *insn)
20883 {
20884 rtx mem, addr;
20885
20886 extract_insn_cached (insn);
20887
20888 if (REG_P (recog_data.operand[0]))
20889 return 0;
20890
20891 mem = recog_data.operand[0];
20892
20893 gcc_assert (MEM_P (mem));
20894
20895 addr = XEXP (mem, 0);
20896
20897 if (REG_P (addr)
20898 || (GET_CODE (addr) == PLUS
20899 && REG_P (XEXP (addr, 0))
20900 && CONST_INT_P (XEXP (addr, 1))))
20901 return 1;
20902 else
20903 return 0;
20904 }
20905
20906 /* Output an ADD r, s, #n where n may be too big for one instruction.
20907 If adding zero to one register, output nothing. */
20908 const char *
20909 output_add_immediate (rtx *operands)
20910 {
20911 HOST_WIDE_INT n = INTVAL (operands[2]);
20912
20913 if (n != 0 || REGNO (operands[0]) != REGNO (operands[1]))
20914 {
20915 if (n < 0)
20916 output_multi_immediate (operands,
20917 "sub%?\t%0, %1, %2", "sub%?\t%0, %0, %2", 2,
20918 -n);
20919 else
20920 output_multi_immediate (operands,
20921 "add%?\t%0, %1, %2", "add%?\t%0, %0, %2", 2,
20922 n);
20923 }
20924
20925 return "";
20926 }
20927
20928 /* Output a multiple immediate operation.
20929 OPERANDS is the vector of operands referred to in the output patterns.
20930 INSTR1 is the output pattern to use for the first constant.
20931 INSTR2 is the output pattern to use for subsequent constants.
20932 IMMED_OP is the index of the constant slot in OPERANDS.
20933 N is the constant value. */
20934 static const char *
20935 output_multi_immediate (rtx *operands, const char *instr1, const char *instr2,
20936 int immed_op, HOST_WIDE_INT n)
20937 {
20938 #if HOST_BITS_PER_WIDE_INT > 32
20939 n &= 0xffffffff;
20940 #endif
20941
20942 if (n == 0)
20943 {
20944 /* Quick and easy output. */
20945 operands[immed_op] = const0_rtx;
20946 output_asm_insn (instr1, operands);
20947 }
20948 else
20949 {
20950 int i;
20951 const char * instr = instr1;
20952
20953 /* Note that n is never zero here (which would give no output). */
20954 for (i = 0; i < 32; i += 2)
20955 {
20956 if (n & (3 << i))
20957 {
20958 operands[immed_op] = GEN_INT (n & (255 << i));
20959 output_asm_insn (instr, operands);
20960 instr = instr2;
20961 i += 6;
20962 }
20963 }
20964 }
20965
20966 return "";
20967 }
20968
20969 /* Return the name of a shifter operation. */
20970 static const char *
20971 arm_shift_nmem(enum rtx_code code)
20972 {
20973 switch (code)
20974 {
20975 case ASHIFT:
20976 return ARM_LSL_NAME;
20977
20978 case ASHIFTRT:
20979 return "asr";
20980
20981 case LSHIFTRT:
20982 return "lsr";
20983
20984 case ROTATERT:
20985 return "ror";
20986
20987 default:
20988 abort();
20989 }
20990 }
20991
20992 /* Return the appropriate ARM instruction for the operation code.
20993 The returned result should not be overwritten. OP is the rtx of the
20994 operation. SHIFT_FIRST_ARG is TRUE if the first argument of the operator
20995 was shifted. */
20996 const char *
20997 arithmetic_instr (rtx op, int shift_first_arg)
20998 {
20999 switch (GET_CODE (op))
21000 {
21001 case PLUS:
21002 return "add";
21003
21004 case MINUS:
21005 return shift_first_arg ? "rsb" : "sub";
21006
21007 case IOR:
21008 return "orr";
21009
21010 case XOR:
21011 return "eor";
21012
21013 case AND:
21014 return "and";
21015
21016 case ASHIFT:
21017 case ASHIFTRT:
21018 case LSHIFTRT:
21019 case ROTATERT:
21020 return arm_shift_nmem(GET_CODE(op));
21021
21022 default:
21023 gcc_unreachable ();
21024 }
21025 }
21026
21027 /* Ensure valid constant shifts and return the appropriate shift mnemonic
21028 for the operation code. The returned result should not be overwritten.
21029 OP is the rtx code of the shift.
21030 On exit, *AMOUNTP will be -1 if the shift is by a register, or a constant
21031 shift. */
21032 static const char *
21033 shift_op (rtx op, HOST_WIDE_INT *amountp)
21034 {
21035 const char * mnem;
21036 enum rtx_code code = GET_CODE (op);
21037
21038 switch (code)
21039 {
21040 case ROTATE:
21041 if (!CONST_INT_P (XEXP (op, 1)))
21042 {
21043 output_operand_lossage ("invalid shift operand");
21044 return NULL;
21045 }
21046
21047 code = ROTATERT;
21048 *amountp = 32 - INTVAL (XEXP (op, 1));
21049 mnem = "ror";
21050 break;
21051
21052 case ASHIFT:
21053 case ASHIFTRT:
21054 case LSHIFTRT:
21055 case ROTATERT:
21056 mnem = arm_shift_nmem(code);
21057 if (CONST_INT_P (XEXP (op, 1)))
21058 {
21059 *amountp = INTVAL (XEXP (op, 1));
21060 }
21061 else if (REG_P (XEXP (op, 1)))
21062 {
21063 *amountp = -1;
21064 return mnem;
21065 }
21066 else
21067 {
21068 output_operand_lossage ("invalid shift operand");
21069 return NULL;
21070 }
21071 break;
21072
21073 case MULT:
21074 /* We never have to worry about the amount being other than a
21075 power of 2, since this case can never be reloaded from a reg. */
21076 if (!CONST_INT_P (XEXP (op, 1)))
21077 {
21078 output_operand_lossage ("invalid shift operand");
21079 return NULL;
21080 }
21081
21082 *amountp = INTVAL (XEXP (op, 1)) & 0xFFFFFFFF;
21083
21084 /* Amount must be a power of two. */
21085 if (*amountp & (*amountp - 1))
21086 {
21087 output_operand_lossage ("invalid shift operand");
21088 return NULL;
21089 }
21090
21091 *amountp = exact_log2 (*amountp);
21092 gcc_assert (IN_RANGE (*amountp, 0, 31));
21093 return ARM_LSL_NAME;
21094
21095 default:
21096 output_operand_lossage ("invalid shift operand");
21097 return NULL;
21098 }
21099
21100 /* This is not 100% correct, but follows from the desire to merge
21101 multiplication by a power of 2 with the recognizer for a
21102 shift. >=32 is not a valid shift for "lsl", so we must try and
21103 output a shift that produces the correct arithmetical result.
21104 Using lsr #32 is identical except for the fact that the carry bit
21105 is not set correctly if we set the flags; but we never use the
21106 carry bit from such an operation, so we can ignore that. */
21107 if (code == ROTATERT)
21108 /* Rotate is just modulo 32. */
21109 *amountp &= 31;
21110 else if (*amountp != (*amountp & 31))
21111 {
21112 if (code == ASHIFT)
21113 mnem = "lsr";
21114 *amountp = 32;
21115 }
21116
21117 /* Shifts of 0 are no-ops. */
21118 if (*amountp == 0)
21119 return NULL;
21120
21121 return mnem;
21122 }
21123
21124 /* Output a .ascii pseudo-op, keeping track of lengths. This is
21125 because /bin/as is horribly restrictive. The judgement about
21126 whether or not each character is 'printable' (and can be output as
21127 is) or not (and must be printed with an octal escape) must be made
21128 with reference to the *host* character set -- the situation is
21129 similar to that discussed in the comments above pp_c_char in
21130 c-pretty-print.cc. */
21131
21132 #define MAX_ASCII_LEN 51
21133
21134 void
21135 output_ascii_pseudo_op (FILE *stream, const unsigned char *p, int len)
21136 {
21137 int i;
21138 int len_so_far = 0;
21139
21140 fputs ("\t.ascii\t\"", stream);
21141
21142 for (i = 0; i < len; i++)
21143 {
21144 int c = p[i];
21145
21146 if (len_so_far >= MAX_ASCII_LEN)
21147 {
21148 fputs ("\"\n\t.ascii\t\"", stream);
21149 len_so_far = 0;
21150 }
21151
21152 if (ISPRINT (c))
21153 {
21154 if (c == '\\' || c == '\"')
21155 {
21156 putc ('\\', stream);
21157 len_so_far++;
21158 }
21159 putc (c, stream);
21160 len_so_far++;
21161 }
21162 else
21163 {
21164 fprintf (stream, "\\%03o", c);
21165 len_so_far += 4;
21166 }
21167 }
21168
21169 fputs ("\"\n", stream);
21170 }
21171 \f
21172
21173 /* Compute the register save mask for registers 0 through 12
21174 inclusive. This code is used by arm_compute_save_core_reg_mask (). */
21175
21176 static unsigned long
21177 arm_compute_save_reg0_reg12_mask (void)
21178 {
21179 unsigned long func_type = arm_current_func_type ();
21180 unsigned long save_reg_mask = 0;
21181 unsigned int reg;
21182
21183 if (IS_INTERRUPT (func_type))
21184 {
21185 unsigned int max_reg;
21186 /* Interrupt functions must not corrupt any registers,
21187 even call clobbered ones. If this is a leaf function
21188 we can just examine the registers used by the RTL, but
21189 otherwise we have to assume that whatever function is
21190 called might clobber anything, and so we have to save
21191 all the call-clobbered registers as well. */
21192 if (ARM_FUNC_TYPE (func_type) == ARM_FT_FIQ)
21193 /* FIQ handlers have registers r8 - r12 banked, so
21194 we only need to check r0 - r7, Normal ISRs only
21195 bank r14 and r15, so we must check up to r12.
21196 r13 is the stack pointer which is always preserved,
21197 so we do not need to consider it here. */
21198 max_reg = 7;
21199 else
21200 max_reg = 12;
21201
21202 for (reg = 0; reg <= max_reg; reg++)
21203 if (reg_needs_saving_p (reg))
21204 save_reg_mask |= (1 << reg);
21205
21206 /* Also save the pic base register if necessary. */
21207 if (PIC_REGISTER_MAY_NEED_SAVING
21208 && crtl->uses_pic_offset_table)
21209 save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
21210 }
21211 else if (IS_VOLATILE(func_type))
21212 {
21213 /* For noreturn functions we historically omitted register saves
21214 altogether. However this really messes up debugging. As a
21215 compromise save just the frame pointers. Combined with the link
21216 register saved elsewhere this should be sufficient to get
21217 a backtrace. */
21218 if (frame_pointer_needed)
21219 save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
21220 if (df_regs_ever_live_p (ARM_HARD_FRAME_POINTER_REGNUM))
21221 save_reg_mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
21222 if (df_regs_ever_live_p (THUMB_HARD_FRAME_POINTER_REGNUM))
21223 save_reg_mask |= 1 << THUMB_HARD_FRAME_POINTER_REGNUM;
21224 }
21225 else
21226 {
21227 /* In the normal case we only need to save those registers
21228 which are call saved and which are used by this function. */
21229 for (reg = 0; reg <= 11; reg++)
21230 if (df_regs_ever_live_p (reg) && callee_saved_reg_p (reg))
21231 save_reg_mask |= (1 << reg);
21232
21233 /* Handle the frame pointer as a special case. */
21234 if (frame_pointer_needed)
21235 save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
21236
21237 /* If we aren't loading the PIC register,
21238 don't stack it even though it may be live. */
21239 if (PIC_REGISTER_MAY_NEED_SAVING
21240 && (df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM)
21241 || crtl->uses_pic_offset_table))
21242 save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
21243
21244 /* The prologue will copy SP into R0, so save it. */
21245 if (IS_STACKALIGN (func_type))
21246 save_reg_mask |= 1;
21247 }
21248
21249 /* Save registers so the exception handler can modify them. */
21250 if (crtl->calls_eh_return)
21251 {
21252 unsigned int i;
21253
21254 for (i = 0; ; i++)
21255 {
21256 reg = EH_RETURN_DATA_REGNO (i);
21257 if (reg == INVALID_REGNUM)
21258 break;
21259 save_reg_mask |= 1 << reg;
21260 }
21261 }
21262
21263 return save_reg_mask;
21264 }
21265
21266 /* Return true if r3 is live at the start of the function. */
21267
21268 static bool
21269 arm_r3_live_at_start_p (void)
21270 {
21271 /* Just look at cfg info, which is still close enough to correct at this
21272 point. This gives false positives for broken functions that might use
21273 uninitialized data that happens to be allocated in r3, but who cares? */
21274 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)), 3);
21275 }
21276
21277 /* Compute the number of bytes used to store the static chain register on the
21278 stack, above the stack frame. We need to know this accurately to get the
21279 alignment of the rest of the stack frame correct. */
21280
21281 static int
21282 arm_compute_static_chain_stack_bytes (void)
21283 {
21284 /* Once the value is updated from the init value of -1, do not
21285 re-compute. */
21286 if (cfun->machine->static_chain_stack_bytes != -1)
21287 return cfun->machine->static_chain_stack_bytes;
21288
21289 /* See the defining assertion in arm_expand_prologue. */
21290 if (IS_NESTED (arm_current_func_type ())
21291 && ((TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
21292 || ((flag_stack_check == STATIC_BUILTIN_STACK_CHECK
21293 || flag_stack_clash_protection)
21294 && !df_regs_ever_live_p (LR_REGNUM)))
21295 && arm_r3_live_at_start_p ()
21296 && crtl->args.pretend_args_size == 0)
21297 return 4;
21298
21299 return 0;
21300 }
21301
21302 /* Compute a bit mask of which core registers need to be
21303 saved on the stack for the current function.
21304 This is used by arm_compute_frame_layout, which may add extra registers. */
21305
21306 static unsigned long
21307 arm_compute_save_core_reg_mask (void)
21308 {
21309 unsigned int save_reg_mask = 0;
21310 unsigned long func_type = arm_current_func_type ();
21311 unsigned int reg;
21312
21313 if (IS_NAKED (func_type))
21314 /* This should never really happen. */
21315 return 0;
21316
21317 /* If we are creating a stack frame, then we must save the frame pointer,
21318 IP (which will hold the old stack pointer), LR and the PC. */
21319 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
21320 save_reg_mask |=
21321 (1 << ARM_HARD_FRAME_POINTER_REGNUM)
21322 | (1 << IP_REGNUM)
21323 | (1 << LR_REGNUM)
21324 | (1 << PC_REGNUM);
21325
21326 save_reg_mask |= arm_compute_save_reg0_reg12_mask ();
21327
21328 if (arm_current_function_pac_enabled_p ())
21329 save_reg_mask |= 1 << IP_REGNUM;
21330
21331 /* Decide if we need to save the link register.
21332 Interrupt routines have their own banked link register,
21333 so they never need to save it.
21334 Otherwise if we do not use the link register we do not need to save
21335 it. If we are pushing other registers onto the stack however, we
21336 can save an instruction in the epilogue by pushing the link register
21337 now and then popping it back into the PC. This incurs extra memory
21338 accesses though, so we only do it when optimizing for size, and only
21339 if we know that we will not need a fancy return sequence. */
21340 if (df_regs_ever_live_p (LR_REGNUM)
21341 || (save_reg_mask
21342 && optimize_size
21343 && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
21344 && !crtl->tail_call_emit
21345 && !crtl->calls_eh_return))
21346 save_reg_mask |= 1 << LR_REGNUM;
21347
21348 if (cfun->machine->lr_save_eliminated)
21349 save_reg_mask &= ~ (1 << LR_REGNUM);
21350
21351 if (TARGET_REALLY_IWMMXT
21352 && ((bit_count (save_reg_mask)
21353 + ARM_NUM_INTS (crtl->args.pretend_args_size +
21354 arm_compute_static_chain_stack_bytes())
21355 ) % 2) != 0)
21356 {
21357 /* The total number of registers that are going to be pushed
21358 onto the stack is odd. We need to ensure that the stack
21359 is 64-bit aligned before we start to save iWMMXt registers,
21360 and also before we start to create locals. (A local variable
21361 might be a double or long long which we will load/store using
21362 an iWMMXt instruction). Therefore we need to push another
21363 ARM register, so that the stack will be 64-bit aligned. We
21364 try to avoid using the arg registers (r0 -r3) as they might be
21365 used to pass values in a tail call. */
21366 for (reg = 4; reg <= 12; reg++)
21367 if ((save_reg_mask & (1 << reg)) == 0)
21368 break;
21369
21370 if (reg <= 12)
21371 save_reg_mask |= (1 << reg);
21372 else
21373 {
21374 cfun->machine->sibcall_blocked = 1;
21375 save_reg_mask |= (1 << 3);
21376 }
21377 }
21378
21379 /* We may need to push an additional register for use initializing the
21380 PIC base register. */
21381 if (TARGET_THUMB2 && IS_NESTED (func_type) && flag_pic
21382 && (save_reg_mask & THUMB2_WORK_REGS) == 0)
21383 {
21384 reg = thumb_find_work_register (1 << 4);
21385 if (!call_used_or_fixed_reg_p (reg))
21386 save_reg_mask |= (1 << reg);
21387 }
21388
21389 return save_reg_mask;
21390 }
21391
21392 /* Compute a bit mask of which core registers need to be
21393 saved on the stack for the current function. */
21394 static unsigned long
21395 thumb1_compute_save_core_reg_mask (void)
21396 {
21397 unsigned long mask;
21398 unsigned reg;
21399
21400 mask = 0;
21401 for (reg = 0; reg < 12; reg ++)
21402 if (df_regs_ever_live_p (reg) && callee_saved_reg_p (reg))
21403 mask |= 1 << reg;
21404
21405 /* Handle the frame pointer as a special case. */
21406 if (frame_pointer_needed)
21407 mask |= 1 << HARD_FRAME_POINTER_REGNUM;
21408
21409 if (flag_pic
21410 && !TARGET_SINGLE_PIC_BASE
21411 && arm_pic_register != INVALID_REGNUM
21412 && crtl->uses_pic_offset_table)
21413 mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
21414
21415 /* See if we might need r11 for calls to _interwork_r11_call_via_rN(). */
21416 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
21417 mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
21418
21419 /* LR will also be pushed if any lo regs are pushed. */
21420 if (mask & 0xff || thumb_force_lr_save ())
21421 mask |= (1 << LR_REGNUM);
21422
21423 bool call_clobbered_scratch
21424 = (thumb1_prologue_unused_call_clobbered_lo_regs ()
21425 && thumb1_epilogue_unused_call_clobbered_lo_regs ());
21426
21427 /* Make sure we have a low work register if we need one. We will
21428 need one if we are going to push a high register, but we are not
21429 currently intending to push a low register. However if both the
21430 prologue and epilogue have a spare call-clobbered low register,
21431 then we won't need to find an additional work register. It does
21432 not need to be the same register in the prologue and
21433 epilogue. */
21434 if ((mask & 0xff) == 0
21435 && !call_clobbered_scratch
21436 && ((mask & 0x0f00) || TARGET_BACKTRACE))
21437 {
21438 /* Use thumb_find_work_register to choose which register
21439 we will use. If the register is live then we will
21440 have to push it. Use LAST_LO_REGNUM as our fallback
21441 choice for the register to select. */
21442 reg = thumb_find_work_register (1 << LAST_LO_REGNUM);
21443 /* Make sure the register returned by thumb_find_work_register is
21444 not part of the return value. */
21445 if (reg * UNITS_PER_WORD <= (unsigned) arm_size_return_regs ())
21446 reg = LAST_LO_REGNUM;
21447
21448 if (callee_saved_reg_p (reg))
21449 mask |= 1 << reg;
21450 }
21451
21452 /* The 504 below is 8 bytes less than 512 because there are two possible
21453 alignment words. We can't tell here if they will be present or not so we
21454 have to play it safe and assume that they are. */
21455 if ((CALLER_INTERWORKING_SLOT_SIZE +
21456 ROUND_UP_WORD (get_frame_size ()) +
21457 crtl->outgoing_args_size) >= 504)
21458 {
21459 /* This is the same as the code in thumb1_expand_prologue() which
21460 determines which register to use for stack decrement. */
21461 for (reg = LAST_ARG_REGNUM + 1; reg <= LAST_LO_REGNUM; reg++)
21462 if (mask & (1 << reg))
21463 break;
21464
21465 if (reg > LAST_LO_REGNUM)
21466 {
21467 /* Make sure we have a register available for stack decrement. */
21468 mask |= 1 << LAST_LO_REGNUM;
21469 }
21470 }
21471
21472 return mask;
21473 }
21474
21475 /* Return the number of bytes required to save VFP registers. */
21476 static int
21477 arm_get_vfp_saved_size (void)
21478 {
21479 unsigned int regno;
21480 int count;
21481 int saved;
21482
21483 saved = 0;
21484 /* Space for saved VFP registers. */
21485 if (TARGET_VFP_BASE)
21486 {
21487 count = 0;
21488 for (regno = FIRST_VFP_REGNUM;
21489 regno < LAST_VFP_REGNUM;
21490 regno += 2)
21491 {
21492 if (!reg_needs_saving_p (regno) && !reg_needs_saving_p (regno + 1))
21493 {
21494 if (count > 0)
21495 {
21496 /* Workaround ARM10 VFPr1 bug. */
21497 if (count == 2 && !arm_arch6)
21498 count++;
21499 saved += count * 8;
21500 }
21501 count = 0;
21502 }
21503 else
21504 count++;
21505 }
21506 if (count > 0)
21507 {
21508 if (count == 2 && !arm_arch6)
21509 count++;
21510 saved += count * 8;
21511 }
21512 }
21513 return saved;
21514 }
21515
21516
21517 /* Generate a function exit sequence. If REALLY_RETURN is false, then do
21518 everything bar the final return instruction. If simple_return is true,
21519 then do not output epilogue, because it has already been emitted in RTL.
21520
21521 Note: do not forget to update length attribute of corresponding insn pattern
21522 when changing assembly output (eg. length attribute of
21523 thumb2_cmse_entry_return when updating Armv8-M Mainline Security Extensions
21524 register clearing sequences). */
21525 const char *
21526 output_return_instruction (rtx operand, bool really_return, bool reverse,
21527 bool simple_return)
21528 {
21529 char conditional[10];
21530 char instr[100];
21531 unsigned reg;
21532 unsigned long live_regs_mask;
21533 unsigned long func_type;
21534 arm_stack_offsets *offsets;
21535
21536 func_type = arm_current_func_type ();
21537
21538 if (IS_NAKED (func_type))
21539 return "";
21540
21541 if (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN)
21542 {
21543 /* If this function was declared non-returning, and we have
21544 found a tail call, then we have to trust that the called
21545 function won't return. */
21546 if (really_return)
21547 {
21548 rtx ops[2];
21549
21550 /* Otherwise, trap an attempted return by aborting. */
21551 ops[0] = operand;
21552 ops[1] = gen_rtx_SYMBOL_REF (Pmode, NEED_PLT_RELOC ? "abort(PLT)"
21553 : "abort");
21554 assemble_external_libcall (ops[1]);
21555 output_asm_insn (reverse ? "bl%D0\t%a1" : "bl%d0\t%a1", ops);
21556 }
21557
21558 return "";
21559 }
21560
21561 gcc_assert (!cfun->calls_alloca || really_return);
21562
21563 sprintf (conditional, "%%?%%%c0", reverse ? 'D' : 'd');
21564
21565 cfun->machine->return_used_this_function = 1;
21566
21567 offsets = arm_get_frame_offsets ();
21568 live_regs_mask = offsets->saved_regs_mask;
21569
21570 if (!simple_return && live_regs_mask)
21571 {
21572 const char * return_reg;
21573
21574 /* If we do not have any special requirements for function exit
21575 (e.g. interworking) then we can load the return address
21576 directly into the PC. Otherwise we must load it into LR. */
21577 if (really_return
21578 && !IS_CMSE_ENTRY (func_type)
21579 && (IS_INTERRUPT (func_type) || !TARGET_INTERWORK))
21580 return_reg = reg_names[PC_REGNUM];
21581 else
21582 return_reg = reg_names[LR_REGNUM];
21583
21584 if ((live_regs_mask & (1 << IP_REGNUM)) == (1 << IP_REGNUM))
21585 {
21586 /* There are three possible reasons for the IP register
21587 being saved. 1) a stack frame was created, in which case
21588 IP contains the old stack pointer, or 2) an ISR routine
21589 corrupted it, or 3) it was saved to align the stack on
21590 iWMMXt. In case 1, restore IP into SP, otherwise just
21591 restore IP. */
21592 if (frame_pointer_needed)
21593 {
21594 live_regs_mask &= ~ (1 << IP_REGNUM);
21595 live_regs_mask |= (1 << SP_REGNUM);
21596 }
21597 else
21598 gcc_assert (IS_INTERRUPT (func_type) || TARGET_REALLY_IWMMXT);
21599 }
21600
21601 /* On some ARM architectures it is faster to use LDR rather than
21602 LDM to load a single register. On other architectures, the
21603 cost is the same. In 26 bit mode, or for exception handlers,
21604 we have to use LDM to load the PC so that the CPSR is also
21605 restored. */
21606 for (reg = 0; reg <= LAST_ARM_REGNUM; reg++)
21607 if (live_regs_mask == (1U << reg))
21608 break;
21609
21610 if (reg <= LAST_ARM_REGNUM
21611 && (reg != LR_REGNUM
21612 || ! really_return
21613 || ! IS_INTERRUPT (func_type)))
21614 {
21615 sprintf (instr, "ldr%s\t%%|%s, [%%|sp], #4", conditional,
21616 (reg == LR_REGNUM) ? return_reg : reg_names[reg]);
21617 }
21618 else
21619 {
21620 char *p;
21621 int first = 1;
21622
21623 /* Generate the load multiple instruction to restore the
21624 registers. Note we can get here, even if
21625 frame_pointer_needed is true, but only if sp already
21626 points to the base of the saved core registers. */
21627 if (live_regs_mask & (1 << SP_REGNUM))
21628 {
21629 unsigned HOST_WIDE_INT stack_adjust;
21630
21631 stack_adjust = offsets->outgoing_args - offsets->saved_regs;
21632 gcc_assert (stack_adjust == 0 || stack_adjust == 4);
21633
21634 if (stack_adjust && arm_arch5t && TARGET_ARM)
21635 sprintf (instr, "ldmib%s\t%%|sp, {", conditional);
21636 else
21637 {
21638 /* If we can't use ldmib (SA110 bug),
21639 then try to pop r3 instead. */
21640 if (stack_adjust)
21641 live_regs_mask |= 1 << 3;
21642
21643 sprintf (instr, "ldmfd%s\t%%|sp, {", conditional);
21644 }
21645 }
21646 /* For interrupt returns we have to use an LDM rather than
21647 a POP so that we can use the exception return variant. */
21648 else if (IS_INTERRUPT (func_type))
21649 sprintf (instr, "ldmfd%s\t%%|sp!, {", conditional);
21650 else
21651 sprintf (instr, "pop%s\t{", conditional);
21652
21653 p = instr + strlen (instr);
21654
21655 for (reg = 0; reg <= SP_REGNUM; reg++)
21656 if (live_regs_mask & (1 << reg))
21657 {
21658 int l = strlen (reg_names[reg]);
21659
21660 if (first)
21661 first = 0;
21662 else
21663 {
21664 memcpy (p, ", ", 2);
21665 p += 2;
21666 }
21667
21668 memcpy (p, "%|", 2);
21669 memcpy (p + 2, reg_names[reg], l);
21670 p += l + 2;
21671 }
21672
21673 if (live_regs_mask & (1 << LR_REGNUM))
21674 {
21675 sprintf (p, "%s%%|%s}", first ? "" : ", ", return_reg);
21676 /* If returning from an interrupt, restore the CPSR. */
21677 if (IS_INTERRUPT (func_type))
21678 strcat (p, "^");
21679 }
21680 else
21681 strcpy (p, "}");
21682 }
21683
21684 output_asm_insn (instr, & operand);
21685
21686 /* See if we need to generate an extra instruction to
21687 perform the actual function return. */
21688 if (really_return
21689 && func_type != ARM_FT_INTERWORKED
21690 && (live_regs_mask & (1 << LR_REGNUM)) != 0)
21691 {
21692 /* The return has already been handled
21693 by loading the LR into the PC. */
21694 return "";
21695 }
21696 }
21697
21698 if (really_return)
21699 {
21700 switch ((int) ARM_FUNC_TYPE (func_type))
21701 {
21702 case ARM_FT_ISR:
21703 case ARM_FT_FIQ:
21704 /* ??? This is wrong for unified assembly syntax. */
21705 sprintf (instr, "sub%ss\t%%|pc, %%|lr, #4", conditional);
21706 break;
21707
21708 case ARM_FT_INTERWORKED:
21709 gcc_assert (arm_arch5t || arm_arch4t);
21710 sprintf (instr, "bx%s\t%%|lr", conditional);
21711 break;
21712
21713 case ARM_FT_EXCEPTION:
21714 /* ??? This is wrong for unified assembly syntax. */
21715 sprintf (instr, "mov%ss\t%%|pc, %%|lr", conditional);
21716 break;
21717
21718 default:
21719 if (IS_CMSE_ENTRY (func_type))
21720 {
21721 /* For Armv8.1-M, this is cleared as part of the CLRM instruction
21722 emitted by cmse_nonsecure_entry_clear_before_return () and the
21723 VSTR/VLDR instructions in the prologue and epilogue. */
21724 if (!TARGET_HAVE_FPCXT_CMSE)
21725 {
21726 /* Check if we have to clear the 'GE bits' which is only used if
21727 parallel add and subtraction instructions are available. */
21728 if (TARGET_INT_SIMD)
21729 snprintf (instr, sizeof (instr),
21730 "msr%s\tAPSR_nzcvqg, %%|lr", conditional);
21731 else
21732 snprintf (instr, sizeof (instr),
21733 "msr%s\tAPSR_nzcvq, %%|lr", conditional);
21734
21735 output_asm_insn (instr, & operand);
21736 /* Do not clear FPSCR if targeting Armv8.1-M Mainline, VLDR takes
21737 care of it. */
21738 if (TARGET_HARD_FLOAT)
21739 {
21740 /* Clear the cumulative exception-status bits (0-4,7) and
21741 the condition code bits (28-31) of the FPSCR. We need
21742 to remember to clear the first scratch register used
21743 (IP) and save and restore the second (r4).
21744
21745 Important note: the length of the
21746 thumb2_cmse_entry_return insn pattern must account for
21747 the size of the below instructions. */
21748 output_asm_insn ("push\t{%|r4}", & operand);
21749 output_asm_insn ("vmrs\t%|ip, fpscr", & operand);
21750 output_asm_insn ("movw\t%|r4, #65376", & operand);
21751 output_asm_insn ("movt\t%|r4, #4095", & operand);
21752 output_asm_insn ("and\t%|ip, %|r4", & operand);
21753 output_asm_insn ("vmsr\tfpscr, %|ip", & operand);
21754 output_asm_insn ("pop\t{%|r4}", & operand);
21755 output_asm_insn ("mov\t%|ip, %|lr", & operand);
21756 }
21757 }
21758 snprintf (instr, sizeof (instr), "bxns\t%%|lr");
21759 }
21760 /* Use bx if it's available. */
21761 else if (arm_arch5t || arm_arch4t)
21762 sprintf (instr, "bx%s\t%%|lr", conditional);
21763 else
21764 sprintf (instr, "mov%s\t%%|pc, %%|lr", conditional);
21765 break;
21766 }
21767
21768 output_asm_insn (instr, & operand);
21769 }
21770
21771 return "";
21772 }
21773
21774 /* Output in FILE asm statements needed to declare the NAME of the function
21775 defined by its DECL node. */
21776
21777 void
21778 arm_asm_declare_function_name (FILE *file, const char *name, tree decl)
21779 {
21780 size_t cmse_name_len;
21781 char *cmse_name = 0;
21782 char cmse_prefix[] = "__acle_se_";
21783
21784 /* When compiling with ARMv8-M Security Extensions enabled, we should print an
21785 extra function label for each function with the 'cmse_nonsecure_entry'
21786 attribute. This extra function label should be prepended with
21787 '__acle_se_', telling the linker that it needs to create secure gateway
21788 veneers for this function. */
21789 if (use_cmse && lookup_attribute ("cmse_nonsecure_entry",
21790 DECL_ATTRIBUTES (decl)))
21791 {
21792 cmse_name_len = sizeof (cmse_prefix) + strlen (name);
21793 cmse_name = XALLOCAVEC (char, cmse_name_len);
21794 snprintf (cmse_name, cmse_name_len, "%s%s", cmse_prefix, name);
21795 targetm.asm_out.globalize_label (file, cmse_name);
21796
21797 ARM_DECLARE_FUNCTION_NAME (file, cmse_name, decl);
21798 ASM_OUTPUT_TYPE_DIRECTIVE (file, cmse_name, "function");
21799 }
21800
21801 ARM_DECLARE_FUNCTION_NAME (file, name, decl);
21802 ASM_OUTPUT_TYPE_DIRECTIVE (file, name, "function");
21803 ASM_DECLARE_RESULT (file, DECL_RESULT (decl));
21804 ASM_OUTPUT_LABEL (file, name);
21805
21806 if (cmse_name)
21807 ASM_OUTPUT_LABEL (file, cmse_name);
21808
21809 ARM_OUTPUT_FN_UNWIND (file, TRUE);
21810 }
21811
21812 /* Write the function name into the code section, directly preceding
21813 the function prologue.
21814
21815 Code will be output similar to this:
21816 t0
21817 .ascii "arm_poke_function_name", 0
21818 .align
21819 t1
21820 .word 0xff000000 + (t1 - t0)
21821 arm_poke_function_name
21822 mov ip, sp
21823 stmfd sp!, {fp, ip, lr, pc}
21824 sub fp, ip, #4
21825
21826 When performing a stack backtrace, code can inspect the value
21827 of 'pc' stored at 'fp' + 0. If the trace function then looks
21828 at location pc - 12 and the top 8 bits are set, then we know
21829 that there is a function name embedded immediately preceding this
21830 location and has length ((pc[-3]) & 0xff000000).
21831
21832 We assume that pc is declared as a pointer to an unsigned long.
21833
21834 It is of no benefit to output the function name if we are assembling
21835 a leaf function. These function types will not contain a stack
21836 backtrace structure, therefore it is not possible to determine the
21837 function name. */
21838 void
21839 arm_poke_function_name (FILE *stream, const char *name)
21840 {
21841 unsigned long alignlength;
21842 unsigned long length;
21843 rtx x;
21844
21845 length = strlen (name) + 1;
21846 alignlength = ROUND_UP_WORD (length);
21847
21848 ASM_OUTPUT_ASCII (stream, name, length);
21849 ASM_OUTPUT_ALIGN (stream, 2);
21850 x = GEN_INT ((unsigned HOST_WIDE_INT) 0xff000000 + alignlength);
21851 assemble_aligned_integer (UNITS_PER_WORD, x);
21852 }
21853
21854 /* Place some comments into the assembler stream
21855 describing the current function. */
21856 static void
21857 arm_output_function_prologue (FILE *f)
21858 {
21859 unsigned long func_type;
21860
21861 /* Sanity check. */
21862 gcc_assert (!arm_ccfsm_state && !arm_target_insn);
21863
21864 func_type = arm_current_func_type ();
21865
21866 switch ((int) ARM_FUNC_TYPE (func_type))
21867 {
21868 default:
21869 case ARM_FT_NORMAL:
21870 break;
21871 case ARM_FT_INTERWORKED:
21872 asm_fprintf (f, "\t%@ Function supports interworking.\n");
21873 break;
21874 case ARM_FT_ISR:
21875 asm_fprintf (f, "\t%@ Interrupt Service Routine.\n");
21876 break;
21877 case ARM_FT_FIQ:
21878 asm_fprintf (f, "\t%@ Fast Interrupt Service Routine.\n");
21879 break;
21880 case ARM_FT_EXCEPTION:
21881 asm_fprintf (f, "\t%@ ARM Exception Handler.\n");
21882 break;
21883 }
21884
21885 if (IS_NAKED (func_type))
21886 asm_fprintf (f, "\t%@ Naked Function: prologue and epilogue provided by programmer.\n");
21887
21888 if (IS_VOLATILE (func_type))
21889 asm_fprintf (f, "\t%@ Volatile: function does not return.\n");
21890
21891 if (IS_NESTED (func_type))
21892 asm_fprintf (f, "\t%@ Nested: function declared inside another function.\n");
21893 if (IS_STACKALIGN (func_type))
21894 asm_fprintf (f, "\t%@ Stack Align: May be called with mis-aligned SP.\n");
21895 if (IS_CMSE_ENTRY (func_type))
21896 asm_fprintf (f, "\t%@ Non-secure entry function: called from non-secure code.\n");
21897
21898 asm_fprintf (f, "\t%@ args = %wd, pretend = %d, frame = %wd\n",
21899 (HOST_WIDE_INT) crtl->args.size,
21900 crtl->args.pretend_args_size,
21901 (HOST_WIDE_INT) get_frame_size ());
21902
21903 asm_fprintf (f, "\t%@ frame_needed = %d, uses_anonymous_args = %d\n",
21904 frame_pointer_needed,
21905 cfun->machine->uses_anonymous_args);
21906
21907 if (cfun->machine->lr_save_eliminated)
21908 asm_fprintf (f, "\t%@ link register save eliminated.\n");
21909
21910 if (crtl->calls_eh_return)
21911 asm_fprintf (f, "\t@ Calls __builtin_eh_return.\n");
21912
21913 }
21914
21915 static void
21916 arm_output_function_epilogue (FILE *)
21917 {
21918 arm_stack_offsets *offsets;
21919
21920 if (TARGET_THUMB1)
21921 {
21922 int regno;
21923
21924 /* Emit any call-via-reg trampolines that are needed for v4t support
21925 of call_reg and call_value_reg type insns. */
21926 for (regno = 0; regno < LR_REGNUM; regno++)
21927 {
21928 rtx label = cfun->machine->call_via[regno];
21929
21930 if (label != NULL)
21931 {
21932 switch_to_section (function_section (current_function_decl));
21933 targetm.asm_out.internal_label (asm_out_file, "L",
21934 CODE_LABEL_NUMBER (label));
21935 asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
21936 }
21937 }
21938
21939 /* ??? Probably not safe to set this here, since it assumes that a
21940 function will be emitted as assembly immediately after we generate
21941 RTL for it. This does not happen for inline functions. */
21942 cfun->machine->return_used_this_function = 0;
21943 }
21944 else /* TARGET_32BIT */
21945 {
21946 /* We need to take into account any stack-frame rounding. */
21947 offsets = arm_get_frame_offsets ();
21948
21949 gcc_assert (!use_return_insn (FALSE, NULL)
21950 || (cfun->machine->return_used_this_function != 0)
21951 || offsets->saved_regs == offsets->outgoing_args
21952 || frame_pointer_needed);
21953 }
21954 }
21955
21956 /* Generate and emit a sequence of insns equivalent to PUSH, but using
21957 STR and STRD. If an even number of registers are being pushed, one
21958 or more STRD patterns are created for each register pair. If an
21959 odd number of registers are pushed, emit an initial STR followed by
21960 as many STRD instructions as are needed. This works best when the
21961 stack is initially 64-bit aligned (the normal case), since it
21962 ensures that each STRD is also 64-bit aligned. */
21963 static void
21964 thumb2_emit_strd_push (unsigned long saved_regs_mask)
21965 {
21966 int num_regs = 0;
21967 int i;
21968 int regno;
21969 rtx par = NULL_RTX;
21970 rtx dwarf = NULL_RTX;
21971 rtx tmp;
21972 bool first = true;
21973
21974 num_regs = bit_count (saved_regs_mask);
21975
21976 /* Must be at least one register to save, and can't save SP or PC. */
21977 gcc_assert (num_regs > 0 && num_regs <= 14);
21978 gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
21979 gcc_assert (!(saved_regs_mask & (1 << PC_REGNUM)));
21980
21981 /* Create sequence for DWARF info. All the frame-related data for
21982 debugging is held in this wrapper. */
21983 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_regs + 1));
21984
21985 /* Describe the stack adjustment. */
21986 tmp = gen_rtx_SET (stack_pointer_rtx,
21987 plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
21988 RTX_FRAME_RELATED_P (tmp) = 1;
21989 XVECEXP (dwarf, 0, 0) = tmp;
21990
21991 /* Find the first register. */
21992 for (regno = 0; (saved_regs_mask & (1 << regno)) == 0; regno++)
21993 ;
21994
21995 i = 0;
21996
21997 /* If there's an odd number of registers to push. Start off by
21998 pushing a single register. This ensures that subsequent strd
21999 operations are dword aligned (assuming that SP was originally
22000 64-bit aligned). */
22001 if ((num_regs & 1) != 0)
22002 {
22003 rtx reg, mem, insn;
22004
22005 reg = gen_rtx_REG (SImode, regno);
22006 if (num_regs == 1)
22007 mem = gen_frame_mem (Pmode, gen_rtx_PRE_DEC (Pmode,
22008 stack_pointer_rtx));
22009 else
22010 mem = gen_frame_mem (Pmode,
22011 gen_rtx_PRE_MODIFY
22012 (Pmode, stack_pointer_rtx,
22013 plus_constant (Pmode, stack_pointer_rtx,
22014 -4 * num_regs)));
22015
22016 tmp = gen_rtx_SET (mem, reg);
22017 RTX_FRAME_RELATED_P (tmp) = 1;
22018 insn = emit_insn (tmp);
22019 RTX_FRAME_RELATED_P (insn) = 1;
22020 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
22021 tmp = gen_rtx_SET (gen_frame_mem (Pmode, stack_pointer_rtx), reg);
22022 RTX_FRAME_RELATED_P (tmp) = 1;
22023 i++;
22024 regno++;
22025 XVECEXP (dwarf, 0, i) = tmp;
22026 first = false;
22027 }
22028
22029 while (i < num_regs)
22030 if (saved_regs_mask & (1 << regno))
22031 {
22032 rtx reg1, reg2, mem1, mem2;
22033 rtx tmp0, tmp1, tmp2;
22034 int regno2;
22035
22036 /* Find the register to pair with this one. */
22037 for (regno2 = regno + 1; (saved_regs_mask & (1 << regno2)) == 0;
22038 regno2++)
22039 ;
22040
22041 reg1 = gen_rtx_REG (SImode, regno);
22042 reg2 = gen_rtx_REG (SImode, regno2);
22043
22044 if (first)
22045 {
22046 rtx insn;
22047
22048 first = false;
22049 mem1 = gen_frame_mem (Pmode, plus_constant (Pmode,
22050 stack_pointer_rtx,
22051 -4 * num_regs));
22052 mem2 = gen_frame_mem (Pmode, plus_constant (Pmode,
22053 stack_pointer_rtx,
22054 -4 * (num_regs - 1)));
22055 tmp0 = gen_rtx_SET (stack_pointer_rtx,
22056 plus_constant (Pmode, stack_pointer_rtx,
22057 -4 * (num_regs)));
22058 tmp1 = gen_rtx_SET (mem1, reg1);
22059 tmp2 = gen_rtx_SET (mem2, reg2);
22060 RTX_FRAME_RELATED_P (tmp0) = 1;
22061 RTX_FRAME_RELATED_P (tmp1) = 1;
22062 RTX_FRAME_RELATED_P (tmp2) = 1;
22063 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (3));
22064 XVECEXP (par, 0, 0) = tmp0;
22065 XVECEXP (par, 0, 1) = tmp1;
22066 XVECEXP (par, 0, 2) = tmp2;
22067 insn = emit_insn (par);
22068 RTX_FRAME_RELATED_P (insn) = 1;
22069 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
22070 }
22071 else
22072 {
22073 mem1 = gen_frame_mem (Pmode, plus_constant (Pmode,
22074 stack_pointer_rtx,
22075 4 * i));
22076 mem2 = gen_frame_mem (Pmode, plus_constant (Pmode,
22077 stack_pointer_rtx,
22078 4 * (i + 1)));
22079 tmp1 = gen_rtx_SET (mem1, reg1);
22080 tmp2 = gen_rtx_SET (mem2, reg2);
22081 RTX_FRAME_RELATED_P (tmp1) = 1;
22082 RTX_FRAME_RELATED_P (tmp2) = 1;
22083 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
22084 XVECEXP (par, 0, 0) = tmp1;
22085 XVECEXP (par, 0, 1) = tmp2;
22086 emit_insn (par);
22087 }
22088
22089 /* Create unwind information. This is an approximation. */
22090 tmp1 = gen_rtx_SET (gen_frame_mem (Pmode,
22091 plus_constant (Pmode,
22092 stack_pointer_rtx,
22093 4 * i)),
22094 reg1);
22095 tmp2 = gen_rtx_SET (gen_frame_mem (Pmode,
22096 plus_constant (Pmode,
22097 stack_pointer_rtx,
22098 4 * (i + 1))),
22099 reg2);
22100
22101 RTX_FRAME_RELATED_P (tmp1) = 1;
22102 RTX_FRAME_RELATED_P (tmp2) = 1;
22103 XVECEXP (dwarf, 0, i + 1) = tmp1;
22104 XVECEXP (dwarf, 0, i + 2) = tmp2;
22105 i += 2;
22106 regno = regno2 + 1;
22107 }
22108 else
22109 regno++;
22110
22111 return;
22112 }
22113
22114 /* STRD in ARM mode requires consecutive registers. This function emits STRD
22115 whenever possible, otherwise it emits single-word stores. The first store
22116 also allocates stack space for all saved registers, using writeback with
22117 post-addressing mode. All other stores use offset addressing. If no STRD
22118 can be emitted, this function emits a sequence of single-word stores,
22119 and not an STM as before, because single-word stores provide more freedom
22120 scheduling and can be turned into an STM by peephole optimizations. */
22121 static void
22122 arm_emit_strd_push (unsigned long saved_regs_mask)
22123 {
22124 int num_regs = 0;
22125 int i, j, dwarf_index = 0;
22126 int offset = 0;
22127 rtx dwarf = NULL_RTX;
22128 rtx insn = NULL_RTX;
22129 rtx tmp, mem;
22130
22131 /* TODO: A more efficient code can be emitted by changing the
22132 layout, e.g., first push all pairs that can use STRD to keep the
22133 stack aligned, and then push all other registers. */
22134 for (i = 0; i <= LAST_ARM_REGNUM; i++)
22135 if (saved_regs_mask & (1 << i))
22136 num_regs++;
22137
22138 gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
22139 gcc_assert (!(saved_regs_mask & (1 << PC_REGNUM)));
22140 gcc_assert (num_regs > 0);
22141
22142 /* Create sequence for DWARF info. */
22143 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_regs + 1));
22144
22145 /* For dwarf info, we generate explicit stack update. */
22146 tmp = gen_rtx_SET (stack_pointer_rtx,
22147 plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
22148 RTX_FRAME_RELATED_P (tmp) = 1;
22149 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
22150
22151 /* Save registers. */
22152 offset = - 4 * num_regs;
22153 j = 0;
22154 while (j <= LAST_ARM_REGNUM)
22155 if (saved_regs_mask & (1 << j))
22156 {
22157 if ((j % 2 == 0)
22158 && (saved_regs_mask & (1 << (j + 1))))
22159 {
22160 /* Current register and previous register form register pair for
22161 which STRD can be generated. */
22162 if (offset < 0)
22163 {
22164 /* Allocate stack space for all saved registers. */
22165 tmp = plus_constant (Pmode, stack_pointer_rtx, offset);
22166 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
22167 mem = gen_frame_mem (DImode, tmp);
22168 offset = 0;
22169 }
22170 else if (offset > 0)
22171 mem = gen_frame_mem (DImode,
22172 plus_constant (Pmode,
22173 stack_pointer_rtx,
22174 offset));
22175 else
22176 mem = gen_frame_mem (DImode, stack_pointer_rtx);
22177
22178 tmp = gen_rtx_SET (mem, gen_rtx_REG (DImode, j));
22179 RTX_FRAME_RELATED_P (tmp) = 1;
22180 tmp = emit_insn (tmp);
22181
22182 /* Record the first store insn. */
22183 if (dwarf_index == 1)
22184 insn = tmp;
22185
22186 /* Generate dwarf info. */
22187 mem = gen_frame_mem (SImode,
22188 plus_constant (Pmode,
22189 stack_pointer_rtx,
22190 offset));
22191 tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j));
22192 RTX_FRAME_RELATED_P (tmp) = 1;
22193 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
22194
22195 mem = gen_frame_mem (SImode,
22196 plus_constant (Pmode,
22197 stack_pointer_rtx,
22198 offset + 4));
22199 tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j + 1));
22200 RTX_FRAME_RELATED_P (tmp) = 1;
22201 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
22202
22203 offset += 8;
22204 j += 2;
22205 }
22206 else
22207 {
22208 /* Emit a single word store. */
22209 if (offset < 0)
22210 {
22211 /* Allocate stack space for all saved registers. */
22212 tmp = plus_constant (Pmode, stack_pointer_rtx, offset);
22213 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
22214 mem = gen_frame_mem (SImode, tmp);
22215 offset = 0;
22216 }
22217 else if (offset > 0)
22218 mem = gen_frame_mem (SImode,
22219 plus_constant (Pmode,
22220 stack_pointer_rtx,
22221 offset));
22222 else
22223 mem = gen_frame_mem (SImode, stack_pointer_rtx);
22224
22225 tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j));
22226 RTX_FRAME_RELATED_P (tmp) = 1;
22227 tmp = emit_insn (tmp);
22228
22229 /* Record the first store insn. */
22230 if (dwarf_index == 1)
22231 insn = tmp;
22232
22233 /* Generate dwarf info. */
22234 mem = gen_frame_mem (SImode,
22235 plus_constant(Pmode,
22236 stack_pointer_rtx,
22237 offset));
22238 tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j));
22239 RTX_FRAME_RELATED_P (tmp) = 1;
22240 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
22241
22242 offset += 4;
22243 j += 1;
22244 }
22245 }
22246 else
22247 j++;
22248
22249 /* Attach dwarf info to the first insn we generate. */
22250 gcc_assert (insn != NULL_RTX);
22251 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
22252 RTX_FRAME_RELATED_P (insn) = 1;
22253 }
22254
22255 /* Generate and emit an insn that we will recognize as a push_multi.
22256 Unfortunately, since this insn does not reflect very well the actual
22257 semantics of the operation, we need to annotate the insn for the benefit
22258 of DWARF2 frame unwind information. DWARF_REGS_MASK is a subset of
22259 MASK for registers that should be annotated for DWARF2 frame unwind
22260 information. */
22261 static rtx
22262 emit_multi_reg_push (unsigned long mask, unsigned long dwarf_regs_mask)
22263 {
22264 int num_regs = 0;
22265 int num_dwarf_regs = 0;
22266 int i, j;
22267 rtx par;
22268 rtx dwarf;
22269 int dwarf_par_index;
22270 rtx tmp, reg;
22271
22272 /* We don't record the PC in the dwarf frame information. */
22273 dwarf_regs_mask &= ~(1 << PC_REGNUM);
22274
22275 for (i = 0; i <= LAST_ARM_REGNUM; i++)
22276 {
22277 if (mask & (1 << i))
22278 num_regs++;
22279 if (dwarf_regs_mask & (1 << i))
22280 num_dwarf_regs++;
22281 }
22282
22283 gcc_assert (num_regs && num_regs <= 16);
22284 gcc_assert ((dwarf_regs_mask & ~mask) == 0);
22285
22286 /* For the body of the insn we are going to generate an UNSPEC in
22287 parallel with several USEs. This allows the insn to be recognized
22288 by the push_multi pattern in the arm.md file.
22289
22290 The body of the insn looks something like this:
22291
22292 (parallel [
22293 (set (mem:BLK (pre_modify:SI (reg:SI sp)
22294 (const_int:SI <num>)))
22295 (unspec:BLK [(reg:SI r4)] UNSPEC_PUSH_MULT))
22296 (use (reg:SI XX))
22297 (use (reg:SI YY))
22298 ...
22299 ])
22300
22301 For the frame note however, we try to be more explicit and actually
22302 show each register being stored into the stack frame, plus a (single)
22303 decrement of the stack pointer. We do it this way in order to be
22304 friendly to the stack unwinding code, which only wants to see a single
22305 stack decrement per instruction. The RTL we generate for the note looks
22306 something like this:
22307
22308 (sequence [
22309 (set (reg:SI sp) (plus:SI (reg:SI sp) (const_int -20)))
22310 (set (mem:SI (reg:SI sp)) (reg:SI r4))
22311 (set (mem:SI (plus:SI (reg:SI sp) (const_int 4))) (reg:SI XX))
22312 (set (mem:SI (plus:SI (reg:SI sp) (const_int 8))) (reg:SI YY))
22313 ...
22314 ])
22315
22316 FIXME:: In an ideal world the PRE_MODIFY would not exist and
22317 instead we'd have a parallel expression detailing all
22318 the stores to the various memory addresses so that debug
22319 information is more up-to-date. Remember however while writing
22320 this to take care of the constraints with the push instruction.
22321
22322 Note also that this has to be taken care of for the VFP registers.
22323
22324 For more see PR43399. */
22325
22326 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs));
22327 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_dwarf_regs + 1));
22328 dwarf_par_index = 1;
22329
22330 for (i = 0; i <= LAST_ARM_REGNUM; i++)
22331 {
22332 if (mask & (1 << i))
22333 {
22334 /* NOTE: Dwarf code emitter handle reg-reg copies correctly and in the
22335 following example reg-reg copy of SP to IP register is handled
22336 through .cfi_def_cfa_register directive and the .cfi_offset
22337 directive for IP register is skipped by dwarf code emitter.
22338 Example:
22339 mov ip, sp
22340 .cfi_def_cfa_register 12
22341 push {fp, ip, lr, pc}
22342 .cfi_offset 11, -16
22343 .cfi_offset 13, -12
22344 .cfi_offset 14, -8
22345
22346 Where as Arm-specific .save directive handling is different to that
22347 of dwarf code emitter and it doesn't consider reg-reg copies while
22348 updating the register list. When PACBTI is enabled we manually
22349 updated the .save directive register list to use "ra_auth_code"
22350 (pseduo register 143) instead of IP register as shown in following
22351 pseduo code.
22352 Example:
22353 pacbti ip, lr, sp
22354 .cfi_register 143, 12
22355 push {r3, r7, ip, lr}
22356 .save {r3, r7, ra_auth_code, lr}
22357 */
22358 rtx dwarf_reg = reg = gen_rtx_REG (SImode, i);
22359 if (arm_current_function_pac_enabled_p () && i == IP_REGNUM)
22360 dwarf_reg = gen_rtx_REG (SImode, RA_AUTH_CODE);
22361
22362 XVECEXP (par, 0, 0)
22363 = gen_rtx_SET (gen_frame_mem
22364 (BLKmode,
22365 gen_rtx_PRE_MODIFY (Pmode,
22366 stack_pointer_rtx,
22367 plus_constant
22368 (Pmode, stack_pointer_rtx,
22369 -4 * num_regs))
22370 ),
22371 gen_rtx_UNSPEC (BLKmode,
22372 gen_rtvec (1, reg),
22373 UNSPEC_PUSH_MULT));
22374
22375 if (dwarf_regs_mask & (1 << i))
22376 {
22377 tmp = gen_rtx_SET (gen_frame_mem (SImode, stack_pointer_rtx),
22378 dwarf_reg);
22379 RTX_FRAME_RELATED_P (tmp) = 1;
22380 XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
22381 }
22382
22383 break;
22384 }
22385 }
22386
22387 for (j = 1, i++; j < num_regs; i++)
22388 {
22389 if (mask & (1 << i))
22390 {
22391 rtx dwarf_reg = reg = gen_rtx_REG (SImode, i);
22392 if (arm_current_function_pac_enabled_p () && i == IP_REGNUM)
22393 dwarf_reg = gen_rtx_REG (SImode, RA_AUTH_CODE);
22394
22395 XVECEXP (par, 0, j) = gen_rtx_USE (VOIDmode, reg);
22396
22397 if (dwarf_regs_mask & (1 << i))
22398 {
22399 tmp
22400 = gen_rtx_SET (gen_frame_mem
22401 (SImode,
22402 plus_constant (Pmode, stack_pointer_rtx,
22403 4 * j)),
22404 dwarf_reg);
22405 RTX_FRAME_RELATED_P (tmp) = 1;
22406 XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
22407 }
22408
22409 j++;
22410 }
22411 }
22412
22413 par = emit_insn (par);
22414
22415 tmp = gen_rtx_SET (stack_pointer_rtx,
22416 plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
22417 RTX_FRAME_RELATED_P (tmp) = 1;
22418 XVECEXP (dwarf, 0, 0) = tmp;
22419
22420 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
22421
22422 return par;
22423 }
22424
22425 /* Add a REG_CFA_ADJUST_CFA REG note to INSN.
22426 SIZE is the offset to be adjusted.
22427 DEST and SRC might be stack_pointer_rtx or hard_frame_pointer_rtx. */
22428 static void
22429 arm_add_cfa_adjust_cfa_note (rtx insn, int size, rtx dest, rtx src)
22430 {
22431 rtx dwarf;
22432
22433 RTX_FRAME_RELATED_P (insn) = 1;
22434 dwarf = gen_rtx_SET (dest, plus_constant (Pmode, src, size));
22435 add_reg_note (insn, REG_CFA_ADJUST_CFA, dwarf);
22436 }
22437
22438 /* Generate and emit an insn pattern that we will recognize as a pop_multi.
22439 SAVED_REGS_MASK shows which registers need to be restored.
22440
22441 Unfortunately, since this insn does not reflect very well the actual
22442 semantics of the operation, we need to annotate the insn for the benefit
22443 of DWARF2 frame unwind information. */
22444 static void
22445 arm_emit_multi_reg_pop (unsigned long saved_regs_mask)
22446 {
22447 int num_regs = 0;
22448 int i, j;
22449 rtx par;
22450 rtx dwarf = NULL_RTX;
22451 rtx tmp, reg;
22452 bool return_in_pc = saved_regs_mask & (1 << PC_REGNUM);
22453 int offset_adj;
22454 int emit_update;
22455
22456 offset_adj = return_in_pc ? 1 : 0;
22457 for (i = 0; i <= LAST_ARM_REGNUM; i++)
22458 if (saved_regs_mask & (1 << i))
22459 num_regs++;
22460
22461 gcc_assert (num_regs && num_regs <= 16);
22462
22463 /* If SP is in reglist, then we don't emit SP update insn. */
22464 emit_update = (saved_regs_mask & (1 << SP_REGNUM)) ? 0 : 1;
22465
22466 /* The parallel needs to hold num_regs SETs
22467 and one SET for the stack update. */
22468 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs + emit_update + offset_adj));
22469
22470 if (return_in_pc)
22471 XVECEXP (par, 0, 0) = ret_rtx;
22472
22473 if (emit_update)
22474 {
22475 /* Increment the stack pointer, based on there being
22476 num_regs 4-byte registers to restore. */
22477 tmp = gen_rtx_SET (stack_pointer_rtx,
22478 plus_constant (Pmode,
22479 stack_pointer_rtx,
22480 4 * num_regs));
22481 RTX_FRAME_RELATED_P (tmp) = 1;
22482 XVECEXP (par, 0, offset_adj) = tmp;
22483 }
22484
22485 /* Now restore every reg, which may include PC. */
22486 for (j = 0, i = 0; j < num_regs; i++)
22487 if (saved_regs_mask & (1 << i))
22488 {
22489 rtx dwarf_reg = reg = gen_rtx_REG (SImode, i);
22490 if (arm_current_function_pac_enabled_p () && i == IP_REGNUM)
22491 dwarf_reg = gen_rtx_REG (SImode, RA_AUTH_CODE);
22492 if ((num_regs == 1) && emit_update && !return_in_pc)
22493 {
22494 /* Emit single load with writeback. */
22495 tmp = gen_frame_mem (SImode,
22496 gen_rtx_POST_INC (Pmode,
22497 stack_pointer_rtx));
22498 tmp = emit_insn (gen_rtx_SET (reg, tmp));
22499 REG_NOTES (tmp) = alloc_reg_note (REG_CFA_RESTORE, dwarf_reg,
22500 dwarf);
22501 return;
22502 }
22503
22504 tmp = gen_rtx_SET (reg,
22505 gen_frame_mem
22506 (SImode,
22507 plus_constant (Pmode, stack_pointer_rtx, 4 * j)));
22508 RTX_FRAME_RELATED_P (tmp) = 1;
22509 XVECEXP (par, 0, j + emit_update + offset_adj) = tmp;
22510
22511 /* We need to maintain a sequence for DWARF info too. As dwarf info
22512 should not have PC, skip PC. */
22513 if (i != PC_REGNUM)
22514 dwarf = alloc_reg_note (REG_CFA_RESTORE, dwarf_reg, dwarf);
22515
22516 j++;
22517 }
22518
22519 if (return_in_pc)
22520 par = emit_jump_insn (par);
22521 else
22522 par = emit_insn (par);
22523
22524 REG_NOTES (par) = dwarf;
22525 if (!return_in_pc)
22526 arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD * num_regs,
22527 stack_pointer_rtx, stack_pointer_rtx);
22528 }
22529
22530 /* Generate and emit an insn pattern that we will recognize as a pop_multi
22531 of NUM_REGS consecutive VFP regs, starting at FIRST_REG.
22532
22533 Unfortunately, since this insn does not reflect very well the actual
22534 semantics of the operation, we need to annotate the insn for the benefit
22535 of DWARF2 frame unwind information. */
22536 static void
22537 arm_emit_vfp_multi_reg_pop (int first_reg, int num_regs, rtx base_reg)
22538 {
22539 int i, j;
22540 rtx par;
22541 rtx dwarf = NULL_RTX;
22542 rtx tmp, reg;
22543
22544 gcc_assert (num_regs && num_regs <= 32);
22545
22546 /* Workaround ARM10 VFPr1 bug. */
22547 if (num_regs == 2 && !arm_arch6)
22548 {
22549 if (first_reg == 15)
22550 first_reg--;
22551
22552 num_regs++;
22553 }
22554
22555 /* We can emit at most 16 D-registers in a single pop_multi instruction, and
22556 there could be up to 32 D-registers to restore.
22557 If there are more than 16 D-registers, make two recursive calls,
22558 each of which emits one pop_multi instruction. */
22559 if (num_regs > 16)
22560 {
22561 arm_emit_vfp_multi_reg_pop (first_reg, 16, base_reg);
22562 arm_emit_vfp_multi_reg_pop (first_reg + 16, num_regs - 16, base_reg);
22563 return;
22564 }
22565
22566 /* The parallel needs to hold num_regs SETs
22567 and one SET for the stack update. */
22568 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs + 1));
22569
22570 /* Increment the stack pointer, based on there being
22571 num_regs 8-byte registers to restore. */
22572 tmp = gen_rtx_SET (base_reg, plus_constant (Pmode, base_reg, 8 * num_regs));
22573 RTX_FRAME_RELATED_P (tmp) = 1;
22574 XVECEXP (par, 0, 0) = tmp;
22575
22576 /* Now show every reg that will be restored, using a SET for each. */
22577 for (j = 0, i=first_reg; j < num_regs; i += 2)
22578 {
22579 reg = gen_rtx_REG (DFmode, i);
22580
22581 tmp = gen_rtx_SET (reg,
22582 gen_frame_mem
22583 (DFmode,
22584 plus_constant (Pmode, base_reg, 8 * j)));
22585 RTX_FRAME_RELATED_P (tmp) = 1;
22586 XVECEXP (par, 0, j + 1) = tmp;
22587
22588 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
22589
22590 j++;
22591 }
22592
22593 par = emit_insn (par);
22594 REG_NOTES (par) = dwarf;
22595
22596 /* Make sure cfa doesn't leave with IP_REGNUM to allow unwinding fron FP. */
22597 if (REGNO (base_reg) == IP_REGNUM)
22598 {
22599 RTX_FRAME_RELATED_P (par) = 1;
22600 add_reg_note (par, REG_CFA_DEF_CFA, hard_frame_pointer_rtx);
22601 }
22602 else
22603 arm_add_cfa_adjust_cfa_note (par, 2 * UNITS_PER_WORD * num_regs,
22604 base_reg, base_reg);
22605 }
22606
22607 /* Generate and emit a pattern that will be recognized as LDRD pattern. If even
22608 number of registers are being popped, multiple LDRD patterns are created for
22609 all register pairs. If odd number of registers are popped, last register is
22610 loaded by using LDR pattern. */
22611 static void
22612 thumb2_emit_ldrd_pop (unsigned long saved_regs_mask)
22613 {
22614 int num_regs = 0;
22615 int i, j;
22616 rtx par = NULL_RTX;
22617 rtx dwarf = NULL_RTX;
22618 rtx tmp, reg, tmp1;
22619 bool return_in_pc = saved_regs_mask & (1 << PC_REGNUM);
22620
22621 for (i = 0; i <= LAST_ARM_REGNUM; i++)
22622 if (saved_regs_mask & (1 << i))
22623 num_regs++;
22624
22625 gcc_assert (num_regs && num_regs <= 16);
22626
22627 /* We cannot generate ldrd for PC. Hence, reduce the count if PC is
22628 to be popped. So, if num_regs is even, now it will become odd,
22629 and we can generate pop with PC. If num_regs is odd, it will be
22630 even now, and ldr with return can be generated for PC. */
22631 if (return_in_pc)
22632 num_regs--;
22633
22634 gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
22635
22636 /* Var j iterates over all the registers to gather all the registers in
22637 saved_regs_mask. Var i gives index of saved registers in stack frame.
22638 A PARALLEL RTX of register-pair is created here, so that pattern for
22639 LDRD can be matched. As PC is always last register to be popped, and
22640 we have already decremented num_regs if PC, we don't have to worry
22641 about PC in this loop. */
22642 for (i = 0, j = 0; i < (num_regs - (num_regs % 2)); j++)
22643 if (saved_regs_mask & (1 << j))
22644 {
22645 /* Create RTX for memory load. */
22646 reg = gen_rtx_REG (SImode, j);
22647 tmp = gen_rtx_SET (reg,
22648 gen_frame_mem (SImode,
22649 plus_constant (Pmode,
22650 stack_pointer_rtx, 4 * i)));
22651 RTX_FRAME_RELATED_P (tmp) = 1;
22652
22653 if (i % 2 == 0)
22654 {
22655 /* When saved-register index (i) is even, the RTX to be emitted is
22656 yet to be created. Hence create it first. The LDRD pattern we
22657 are generating is :
22658 [ (SET (reg_t0) (MEM (PLUS (SP) (NUM))))
22659 (SET (reg_t1) (MEM (PLUS (SP) (NUM + 4)))) ]
22660 where target registers need not be consecutive. */
22661 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
22662 dwarf = NULL_RTX;
22663 }
22664
22665 /* ith register is added in PARALLEL RTX. If i is even, the reg_i is
22666 added as 0th element and if i is odd, reg_i is added as 1st element
22667 of LDRD pattern shown above. */
22668 XVECEXP (par, 0, (i % 2)) = tmp;
22669 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
22670
22671 if ((i % 2) == 1)
22672 {
22673 /* When saved-register index (i) is odd, RTXs for both the registers
22674 to be loaded are generated in above given LDRD pattern, and the
22675 pattern can be emitted now. */
22676 par = emit_insn (par);
22677 REG_NOTES (par) = dwarf;
22678 RTX_FRAME_RELATED_P (par) = 1;
22679 }
22680
22681 i++;
22682 }
22683
22684 /* If the number of registers pushed is odd AND return_in_pc is false OR
22685 number of registers are even AND return_in_pc is true, last register is
22686 popped using LDR. It can be PC as well. Hence, adjust the stack first and
22687 then LDR with post increment. */
22688
22689 /* Increment the stack pointer, based on there being
22690 num_regs 4-byte registers to restore. */
22691 tmp = gen_rtx_SET (stack_pointer_rtx,
22692 plus_constant (Pmode, stack_pointer_rtx, 4 * i));
22693 RTX_FRAME_RELATED_P (tmp) = 1;
22694 tmp = emit_insn (tmp);
22695 if (!return_in_pc)
22696 {
22697 arm_add_cfa_adjust_cfa_note (tmp, UNITS_PER_WORD * i,
22698 stack_pointer_rtx, stack_pointer_rtx);
22699 }
22700
22701 dwarf = NULL_RTX;
22702
22703 if (((num_regs % 2) == 1 && !return_in_pc)
22704 || ((num_regs % 2) == 0 && return_in_pc))
22705 {
22706 /* Scan for the single register to be popped. Skip until the saved
22707 register is found. */
22708 for (; (saved_regs_mask & (1 << j)) == 0; j++);
22709
22710 /* Gen LDR with post increment here. */
22711 tmp1 = gen_rtx_MEM (SImode,
22712 gen_rtx_POST_INC (SImode,
22713 stack_pointer_rtx));
22714 set_mem_alias_set (tmp1, get_frame_alias_set ());
22715
22716 reg = gen_rtx_REG (SImode, j);
22717 tmp = gen_rtx_SET (reg, tmp1);
22718 RTX_FRAME_RELATED_P (tmp) = 1;
22719 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
22720
22721 if (return_in_pc)
22722 {
22723 /* If return_in_pc, j must be PC_REGNUM. */
22724 gcc_assert (j == PC_REGNUM);
22725 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
22726 XVECEXP (par, 0, 0) = ret_rtx;
22727 XVECEXP (par, 0, 1) = tmp;
22728 par = emit_jump_insn (par);
22729 }
22730 else
22731 {
22732 par = emit_insn (tmp);
22733 REG_NOTES (par) = dwarf;
22734 arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD,
22735 stack_pointer_rtx, stack_pointer_rtx);
22736 }
22737
22738 }
22739 else if ((num_regs % 2) == 1 && return_in_pc)
22740 {
22741 /* There are 2 registers to be popped. So, generate the pattern
22742 pop_multiple_with_stack_update_and_return to pop in PC. */
22743 arm_emit_multi_reg_pop (saved_regs_mask & (~((1 << j) - 1)));
22744 }
22745
22746 return;
22747 }
22748
22749 /* LDRD in ARM mode needs consecutive registers as operands. This function
22750 emits LDRD whenever possible, otherwise it emits single-word loads. It uses
22751 offset addressing and then generates one separate stack udpate. This provides
22752 more scheduling freedom, compared to writeback on every load. However,
22753 if the function returns using load into PC directly
22754 (i.e., if PC is in SAVED_REGS_MASK), the stack needs to be updated
22755 before the last load. TODO: Add a peephole optimization to recognize
22756 the new epilogue sequence as an LDM instruction whenever possible. TODO: Add
22757 peephole optimization to merge the load at stack-offset zero
22758 with the stack update instruction using load with writeback
22759 in post-index addressing mode. */
22760 static void
22761 arm_emit_ldrd_pop (unsigned long saved_regs_mask)
22762 {
22763 int j = 0;
22764 int offset = 0;
22765 rtx par = NULL_RTX;
22766 rtx dwarf = NULL_RTX;
22767 rtx tmp, mem;
22768
22769 /* Restore saved registers. */
22770 gcc_assert (!((saved_regs_mask & (1 << SP_REGNUM))));
22771 j = 0;
22772 while (j <= LAST_ARM_REGNUM)
22773 if (saved_regs_mask & (1 << j))
22774 {
22775 if ((j % 2) == 0
22776 && (saved_regs_mask & (1 << (j + 1)))
22777 && (j + 1) != PC_REGNUM)
22778 {
22779 /* Current register and next register form register pair for which
22780 LDRD can be generated. PC is always the last register popped, and
22781 we handle it separately. */
22782 if (offset > 0)
22783 mem = gen_frame_mem (DImode,
22784 plus_constant (Pmode,
22785 stack_pointer_rtx,
22786 offset));
22787 else
22788 mem = gen_frame_mem (DImode, stack_pointer_rtx);
22789
22790 tmp = gen_rtx_SET (gen_rtx_REG (DImode, j), mem);
22791 tmp = emit_insn (tmp);
22792 RTX_FRAME_RELATED_P (tmp) = 1;
22793
22794 /* Generate dwarf info. */
22795
22796 dwarf = alloc_reg_note (REG_CFA_RESTORE,
22797 gen_rtx_REG (SImode, j),
22798 NULL_RTX);
22799 dwarf = alloc_reg_note (REG_CFA_RESTORE,
22800 gen_rtx_REG (SImode, j + 1),
22801 dwarf);
22802
22803 REG_NOTES (tmp) = dwarf;
22804
22805 offset += 8;
22806 j += 2;
22807 }
22808 else if (j != PC_REGNUM)
22809 {
22810 /* Emit a single word load. */
22811 if (offset > 0)
22812 mem = gen_frame_mem (SImode,
22813 plus_constant (Pmode,
22814 stack_pointer_rtx,
22815 offset));
22816 else
22817 mem = gen_frame_mem (SImode, stack_pointer_rtx);
22818
22819 tmp = gen_rtx_SET (gen_rtx_REG (SImode, j), mem);
22820 tmp = emit_insn (tmp);
22821 RTX_FRAME_RELATED_P (tmp) = 1;
22822
22823 /* Generate dwarf info. */
22824 REG_NOTES (tmp) = alloc_reg_note (REG_CFA_RESTORE,
22825 gen_rtx_REG (SImode, j),
22826 NULL_RTX);
22827
22828 offset += 4;
22829 j += 1;
22830 }
22831 else /* j == PC_REGNUM */
22832 j++;
22833 }
22834 else
22835 j++;
22836
22837 /* Update the stack. */
22838 if (offset > 0)
22839 {
22840 tmp = gen_rtx_SET (stack_pointer_rtx,
22841 plus_constant (Pmode,
22842 stack_pointer_rtx,
22843 offset));
22844 tmp = emit_insn (tmp);
22845 arm_add_cfa_adjust_cfa_note (tmp, offset,
22846 stack_pointer_rtx, stack_pointer_rtx);
22847 offset = 0;
22848 }
22849
22850 if (saved_regs_mask & (1 << PC_REGNUM))
22851 {
22852 /* Only PC is to be popped. */
22853 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
22854 XVECEXP (par, 0, 0) = ret_rtx;
22855 tmp = gen_rtx_SET (gen_rtx_REG (SImode, PC_REGNUM),
22856 gen_frame_mem (SImode,
22857 gen_rtx_POST_INC (SImode,
22858 stack_pointer_rtx)));
22859 RTX_FRAME_RELATED_P (tmp) = 1;
22860 XVECEXP (par, 0, 1) = tmp;
22861 par = emit_jump_insn (par);
22862
22863 /* Generate dwarf info. */
22864 dwarf = alloc_reg_note (REG_CFA_RESTORE,
22865 gen_rtx_REG (SImode, PC_REGNUM),
22866 NULL_RTX);
22867 REG_NOTES (par) = dwarf;
22868 arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD,
22869 stack_pointer_rtx, stack_pointer_rtx);
22870 }
22871 }
22872
22873 /* Calculate the size of the return value that is passed in registers. */
22874 static unsigned
22875 arm_size_return_regs (void)
22876 {
22877 machine_mode mode;
22878
22879 if (crtl->return_rtx != 0)
22880 mode = GET_MODE (crtl->return_rtx);
22881 else
22882 mode = DECL_MODE (DECL_RESULT (current_function_decl));
22883
22884 return GET_MODE_SIZE (mode);
22885 }
22886
22887 /* Return true if the current function needs to save/restore LR. */
22888 static bool
22889 thumb_force_lr_save (void)
22890 {
22891 return !cfun->machine->lr_save_eliminated
22892 && (!crtl->is_leaf
22893 || thumb_far_jump_used_p ()
22894 || df_regs_ever_live_p (LR_REGNUM));
22895 }
22896
22897 /* We do not know if r3 will be available because
22898 we do have an indirect tailcall happening in this
22899 particular case. */
22900 static bool
22901 is_indirect_tailcall_p (rtx call)
22902 {
22903 rtx pat = PATTERN (call);
22904
22905 /* Indirect tail call. */
22906 pat = XVECEXP (pat, 0, 0);
22907 if (GET_CODE (pat) == SET)
22908 pat = SET_SRC (pat);
22909
22910 pat = XEXP (XEXP (pat, 0), 0);
22911 return REG_P (pat);
22912 }
22913
22914 /* Return true if r3 is used by any of the tail call insns in the
22915 current function. */
22916 static bool
22917 any_sibcall_could_use_r3 (void)
22918 {
22919 edge_iterator ei;
22920 edge e;
22921
22922 if (!crtl->tail_call_emit)
22923 return false;
22924 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
22925 if (e->flags & EDGE_SIBCALL)
22926 {
22927 rtx_insn *call = BB_END (e->src);
22928 if (!CALL_P (call))
22929 call = prev_nonnote_nondebug_insn (call);
22930 gcc_assert (CALL_P (call) && SIBLING_CALL_P (call));
22931 if (find_regno_fusage (call, USE, 3)
22932 || is_indirect_tailcall_p (call))
22933 return true;
22934 }
22935 return false;
22936 }
22937
22938
22939 /* Compute the distance from register FROM to register TO.
22940 These can be the arg pointer (26), the soft frame pointer (25),
22941 the stack pointer (13) or the hard frame pointer (11).
22942 In thumb mode r7 is used as the soft frame pointer, if needed.
22943 Typical stack layout looks like this:
22944
22945 old stack pointer -> | |
22946 ----
22947 | | \
22948 | | saved arguments for
22949 | | vararg functions
22950 | | /
22951 --
22952 hard FP & arg pointer -> | | \
22953 | | stack
22954 | | frame
22955 | | /
22956 --
22957 | | \
22958 | | call saved
22959 | | registers
22960 soft frame pointer -> | | /
22961 --
22962 | | \
22963 | | local
22964 | | variables
22965 locals base pointer -> | | /
22966 --
22967 | | \
22968 | | outgoing
22969 | | arguments
22970 current stack pointer -> | | /
22971 --
22972
22973 For a given function some or all of these stack components
22974 may not be needed, giving rise to the possibility of
22975 eliminating some of the registers.
22976
22977 The values returned by this function must reflect the behavior
22978 of arm_expand_prologue () and arm_compute_save_core_reg_mask ().
22979
22980 The sign of the number returned reflects the direction of stack
22981 growth, so the values are positive for all eliminations except
22982 from the soft frame pointer to the hard frame pointer.
22983
22984 SFP may point just inside the local variables block to ensure correct
22985 alignment. */
22986
22987
22988 /* Return cached stack offsets. */
22989
22990 static arm_stack_offsets *
22991 arm_get_frame_offsets (void)
22992 {
22993 struct arm_stack_offsets *offsets;
22994
22995 offsets = &cfun->machine->stack_offsets;
22996
22997 return offsets;
22998 }
22999
23000
23001 /* Calculate stack offsets. These are used to calculate register elimination
23002 offsets and in prologue/epilogue code. Also calculates which registers
23003 should be saved. */
23004
23005 static void
23006 arm_compute_frame_layout (void)
23007 {
23008 struct arm_stack_offsets *offsets;
23009 unsigned long func_type;
23010 int saved;
23011 int core_saved;
23012 HOST_WIDE_INT frame_size;
23013 int i;
23014
23015 offsets = &cfun->machine->stack_offsets;
23016
23017 /* Initially this is the size of the local variables. It will translated
23018 into an offset once we have determined the size of preceding data. */
23019 frame_size = ROUND_UP_WORD (get_frame_size ());
23020
23021 /* Space for variadic functions. */
23022 offsets->saved_args = crtl->args.pretend_args_size;
23023
23024 /* In Thumb mode this is incorrect, but never used. */
23025 offsets->frame
23026 = (offsets->saved_args
23027 + arm_compute_static_chain_stack_bytes ()
23028 + (frame_pointer_needed ? 4 : 0));
23029
23030 if (TARGET_32BIT)
23031 {
23032 unsigned int regno;
23033
23034 offsets->saved_regs_mask = arm_compute_save_core_reg_mask ();
23035 core_saved = bit_count (offsets->saved_regs_mask) * 4;
23036 saved = core_saved;
23037
23038 /* We know that SP will be doubleword aligned on entry, and we must
23039 preserve that condition at any subroutine call. We also require the
23040 soft frame pointer to be doubleword aligned. */
23041
23042 if (TARGET_REALLY_IWMMXT)
23043 {
23044 /* Check for the call-saved iWMMXt registers. */
23045 for (regno = FIRST_IWMMXT_REGNUM;
23046 regno <= LAST_IWMMXT_REGNUM;
23047 regno++)
23048 if (reg_needs_saving_p (regno))
23049 saved += 8;
23050 }
23051
23052 func_type = arm_current_func_type ();
23053 /* Space for saved VFP registers. */
23054 if (! IS_VOLATILE (func_type)
23055 && TARGET_VFP_BASE)
23056 saved += arm_get_vfp_saved_size ();
23057
23058 /* Allocate space for saving/restoring FPCXTNS in Armv8.1-M Mainline
23059 nonecure entry functions with VSTR/VLDR. */
23060 if (TARGET_HAVE_FPCXT_CMSE && IS_CMSE_ENTRY (func_type))
23061 saved += 4;
23062 }
23063 else /* TARGET_THUMB1 */
23064 {
23065 offsets->saved_regs_mask = thumb1_compute_save_core_reg_mask ();
23066 core_saved = bit_count (offsets->saved_regs_mask) * 4;
23067 saved = core_saved;
23068 if (TARGET_BACKTRACE)
23069 saved += 16;
23070 }
23071
23072 /* Saved registers include the stack frame. */
23073 offsets->saved_regs
23074 = offsets->saved_args + arm_compute_static_chain_stack_bytes () + saved;
23075 offsets->soft_frame = offsets->saved_regs + CALLER_INTERWORKING_SLOT_SIZE;
23076
23077 /* A leaf function does not need any stack alignment if it has nothing
23078 on the stack. */
23079 if (crtl->is_leaf && frame_size == 0
23080 /* However if it calls alloca(), we have a dynamically allocated
23081 block of BIGGEST_ALIGNMENT on stack, so still do stack alignment. */
23082 && ! cfun->calls_alloca)
23083 {
23084 offsets->outgoing_args = offsets->soft_frame;
23085 offsets->locals_base = offsets->soft_frame;
23086 return;
23087 }
23088
23089 /* Ensure SFP has the correct alignment. */
23090 if (ARM_DOUBLEWORD_ALIGN
23091 && (offsets->soft_frame & 7))
23092 {
23093 offsets->soft_frame += 4;
23094 /* Try to align stack by pushing an extra reg. Don't bother doing this
23095 when there is a stack frame as the alignment will be rolled into
23096 the normal stack adjustment. */
23097 if (frame_size + crtl->outgoing_args_size == 0)
23098 {
23099 int reg = -1;
23100
23101 /* Register r3 is caller-saved. Normally it does not need to be
23102 saved on entry by the prologue. However if we choose to save
23103 it for padding then we may confuse the compiler into thinking
23104 a prologue sequence is required when in fact it is not. This
23105 will occur when shrink-wrapping if r3 is used as a scratch
23106 register and there are no other callee-saved writes.
23107
23108 This situation can be avoided when other callee-saved registers
23109 are available and r3 is not mandatory if we choose a callee-saved
23110 register for padding. */
23111 bool prefer_callee_reg_p = false;
23112
23113 /* If it is safe to use r3, then do so. This sometimes
23114 generates better code on Thumb-2 by avoiding the need to
23115 use 32-bit push/pop instructions. */
23116 if (! any_sibcall_could_use_r3 ()
23117 && arm_size_return_regs () <= 12
23118 && (offsets->saved_regs_mask & (1 << 3)) == 0
23119 && (TARGET_THUMB2
23120 || !(TARGET_LDRD && current_tune->prefer_ldrd_strd)))
23121 {
23122 reg = 3;
23123 if (!TARGET_THUMB2)
23124 prefer_callee_reg_p = true;
23125 }
23126 if (reg == -1
23127 || prefer_callee_reg_p)
23128 {
23129 for (i = 4; i <= (TARGET_THUMB1 ? LAST_LO_REGNUM : 11); i++)
23130 {
23131 /* Avoid fixed registers; they may be changed at
23132 arbitrary times so it's unsafe to restore them
23133 during the epilogue. */
23134 if (!fixed_regs[i]
23135 && (offsets->saved_regs_mask & (1 << i)) == 0)
23136 {
23137 reg = i;
23138 break;
23139 }
23140 }
23141 }
23142
23143 if (reg != -1)
23144 {
23145 offsets->saved_regs += 4;
23146 offsets->saved_regs_mask |= (1 << reg);
23147 }
23148 }
23149 }
23150
23151 offsets->locals_base = offsets->soft_frame + frame_size;
23152 offsets->outgoing_args = (offsets->locals_base
23153 + crtl->outgoing_args_size);
23154
23155 if (ARM_DOUBLEWORD_ALIGN)
23156 {
23157 /* Ensure SP remains doubleword aligned. */
23158 if (offsets->outgoing_args & 7)
23159 offsets->outgoing_args += 4;
23160 gcc_assert (!(offsets->outgoing_args & 7));
23161 }
23162 }
23163
23164
23165 /* Calculate the relative offsets for the different stack pointers. Positive
23166 offsets are in the direction of stack growth. */
23167
23168 HOST_WIDE_INT
23169 arm_compute_initial_elimination_offset (unsigned int from, unsigned int to)
23170 {
23171 arm_stack_offsets *offsets;
23172
23173 offsets = arm_get_frame_offsets ();
23174
23175 /* OK, now we have enough information to compute the distances.
23176 There must be an entry in these switch tables for each pair
23177 of registers in ELIMINABLE_REGS, even if some of the entries
23178 seem to be redundant or useless. */
23179 switch (from)
23180 {
23181 case ARG_POINTER_REGNUM:
23182 switch (to)
23183 {
23184 case THUMB_HARD_FRAME_POINTER_REGNUM:
23185 return 0;
23186
23187 case FRAME_POINTER_REGNUM:
23188 /* This is the reverse of the soft frame pointer
23189 to hard frame pointer elimination below. */
23190 return offsets->soft_frame - offsets->saved_args;
23191
23192 case ARM_HARD_FRAME_POINTER_REGNUM:
23193 /* This is only non-zero in the case where the static chain register
23194 is stored above the frame. */
23195 return offsets->frame - offsets->saved_args - 4;
23196
23197 case STACK_POINTER_REGNUM:
23198 /* If nothing has been pushed on the stack at all
23199 then this will return -4. This *is* correct! */
23200 return offsets->outgoing_args - (offsets->saved_args + 4);
23201
23202 default:
23203 gcc_unreachable ();
23204 }
23205 gcc_unreachable ();
23206
23207 case FRAME_POINTER_REGNUM:
23208 switch (to)
23209 {
23210 case THUMB_HARD_FRAME_POINTER_REGNUM:
23211 return 0;
23212
23213 case ARM_HARD_FRAME_POINTER_REGNUM:
23214 /* The hard frame pointer points to the top entry in the
23215 stack frame. The soft frame pointer to the bottom entry
23216 in the stack frame. If there is no stack frame at all,
23217 then they are identical. */
23218
23219 return offsets->frame - offsets->soft_frame;
23220
23221 case STACK_POINTER_REGNUM:
23222 return offsets->outgoing_args - offsets->soft_frame;
23223
23224 default:
23225 gcc_unreachable ();
23226 }
23227 gcc_unreachable ();
23228
23229 default:
23230 /* You cannot eliminate from the stack pointer.
23231 In theory you could eliminate from the hard frame
23232 pointer to the stack pointer, but this will never
23233 happen, since if a stack frame is not needed the
23234 hard frame pointer will never be used. */
23235 gcc_unreachable ();
23236 }
23237 }
23238
23239 /* Given FROM and TO register numbers, say whether this elimination is
23240 allowed. Frame pointer elimination is automatically handled.
23241
23242 All eliminations are permissible. Note that ARG_POINTER_REGNUM and
23243 HARD_FRAME_POINTER_REGNUM are in fact the same thing. If we need a frame
23244 pointer, we must eliminate FRAME_POINTER_REGNUM into
23245 HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM or
23246 ARG_POINTER_REGNUM. */
23247
23248 bool
23249 arm_can_eliminate (const int from, const int to)
23250 {
23251 return ((to == FRAME_POINTER_REGNUM && from == ARG_POINTER_REGNUM) ? false :
23252 (to == STACK_POINTER_REGNUM && frame_pointer_needed) ? false :
23253 (to == ARM_HARD_FRAME_POINTER_REGNUM && TARGET_THUMB) ? false :
23254 (to == THUMB_HARD_FRAME_POINTER_REGNUM && TARGET_ARM) ? false :
23255 true);
23256 }
23257
23258 /* Emit RTL to save coprocessor registers on function entry. Returns the
23259 number of bytes pushed. */
23260
23261 static int
23262 arm_save_coproc_regs(void)
23263 {
23264 int saved_size = 0;
23265 unsigned reg;
23266 unsigned start_reg;
23267 rtx insn;
23268
23269 if (TARGET_REALLY_IWMMXT)
23270 for (reg = LAST_IWMMXT_REGNUM; reg >= FIRST_IWMMXT_REGNUM; reg--)
23271 if (reg_needs_saving_p (reg))
23272 {
23273 insn = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
23274 insn = gen_rtx_MEM (V2SImode, insn);
23275 insn = emit_set_insn (insn, gen_rtx_REG (V2SImode, reg));
23276 RTX_FRAME_RELATED_P (insn) = 1;
23277 saved_size += 8;
23278 }
23279
23280 if (TARGET_VFP_BASE)
23281 {
23282 start_reg = FIRST_VFP_REGNUM;
23283
23284 for (reg = FIRST_VFP_REGNUM; reg < LAST_VFP_REGNUM; reg += 2)
23285 {
23286 if (!reg_needs_saving_p (reg) && !reg_needs_saving_p (reg + 1))
23287 {
23288 if (start_reg != reg)
23289 saved_size += vfp_emit_fstmd (start_reg,
23290 (reg - start_reg) / 2);
23291 start_reg = reg + 2;
23292 }
23293 }
23294 if (start_reg != reg)
23295 saved_size += vfp_emit_fstmd (start_reg,
23296 (reg - start_reg) / 2);
23297 }
23298 return saved_size;
23299 }
23300
23301
23302 /* Set the Thumb frame pointer from the stack pointer. */
23303
23304 static void
23305 thumb_set_frame_pointer (arm_stack_offsets *offsets)
23306 {
23307 HOST_WIDE_INT amount;
23308 rtx insn, dwarf;
23309
23310 amount = offsets->outgoing_args - offsets->locals_base;
23311 if (amount < 1024)
23312 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
23313 stack_pointer_rtx, GEN_INT (amount)));
23314 else
23315 {
23316 emit_insn (gen_movsi (hard_frame_pointer_rtx, GEN_INT (amount)));
23317 /* Thumb-2 RTL patterns expect sp as the first input. Thumb-1
23318 expects the first two operands to be the same. */
23319 if (TARGET_THUMB2)
23320 {
23321 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
23322 stack_pointer_rtx,
23323 hard_frame_pointer_rtx));
23324 }
23325 else
23326 {
23327 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
23328 hard_frame_pointer_rtx,
23329 stack_pointer_rtx));
23330 }
23331 dwarf = gen_rtx_SET (hard_frame_pointer_rtx,
23332 plus_constant (Pmode, stack_pointer_rtx, amount));
23333 RTX_FRAME_RELATED_P (dwarf) = 1;
23334 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
23335 }
23336
23337 RTX_FRAME_RELATED_P (insn) = 1;
23338 }
23339
23340 struct scratch_reg {
23341 rtx reg;
23342 bool saved;
23343 };
23344
23345 /* Return a short-lived scratch register for use as a 2nd scratch register on
23346 function entry after the registers are saved in the prologue. This register
23347 must be released by means of release_scratch_register_on_entry. IP is not
23348 considered since it is always used as the 1st scratch register if available.
23349
23350 REGNO1 is the index number of the 1st scratch register and LIVE_REGS is the
23351 mask of live registers. */
23352
23353 static void
23354 get_scratch_register_on_entry (struct scratch_reg *sr, unsigned int regno1,
23355 unsigned long live_regs)
23356 {
23357 int regno = -1;
23358
23359 sr->saved = false;
23360
23361 if (regno1 != LR_REGNUM && (live_regs & (1 << LR_REGNUM)) != 0)
23362 regno = LR_REGNUM;
23363 else
23364 {
23365 unsigned int i;
23366
23367 for (i = 4; i < 11; i++)
23368 if (regno1 != i && (live_regs & (1 << i)) != 0)
23369 {
23370 regno = i;
23371 break;
23372 }
23373
23374 if (regno < 0)
23375 {
23376 /* If IP is used as the 1st scratch register for a nested function,
23377 then either r3 wasn't available or is used to preserve IP. */
23378 if (regno1 == IP_REGNUM && IS_NESTED (arm_current_func_type ()))
23379 regno1 = 3;
23380 regno = (regno1 == 3 ? 2 : 3);
23381 sr->saved
23382 = REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)),
23383 regno);
23384 }
23385 }
23386
23387 sr->reg = gen_rtx_REG (SImode, regno);
23388 if (sr->saved)
23389 {
23390 rtx addr = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
23391 rtx insn = emit_set_insn (gen_frame_mem (SImode, addr), sr->reg);
23392 rtx x = gen_rtx_SET (stack_pointer_rtx,
23393 plus_constant (Pmode, stack_pointer_rtx, -4));
23394 RTX_FRAME_RELATED_P (insn) = 1;
23395 add_reg_note (insn, REG_FRAME_RELATED_EXPR, x);
23396 }
23397 }
23398
23399 /* Release a scratch register obtained from the preceding function. */
23400
23401 static void
23402 release_scratch_register_on_entry (struct scratch_reg *sr)
23403 {
23404 if (sr->saved)
23405 {
23406 rtx addr = gen_rtx_POST_INC (Pmode, stack_pointer_rtx);
23407 rtx insn = emit_set_insn (sr->reg, gen_frame_mem (SImode, addr));
23408 rtx x = gen_rtx_SET (stack_pointer_rtx,
23409 plus_constant (Pmode, stack_pointer_rtx, 4));
23410 RTX_FRAME_RELATED_P (insn) = 1;
23411 add_reg_note (insn, REG_FRAME_RELATED_EXPR, x);
23412 }
23413 }
23414
23415 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
23416
23417 #if PROBE_INTERVAL > 4096
23418 #error Cannot use indexed addressing mode for stack probing
23419 #endif
23420
23421 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
23422 inclusive. These are offsets from the current stack pointer. REGNO1
23423 is the index number of the 1st scratch register and LIVE_REGS is the
23424 mask of live registers. */
23425
23426 static void
23427 arm_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size,
23428 unsigned int regno1, unsigned long live_regs)
23429 {
23430 rtx reg1 = gen_rtx_REG (Pmode, regno1);
23431
23432 /* See if we have a constant small number of probes to generate. If so,
23433 that's the easy case. */
23434 if (size <= PROBE_INTERVAL)
23435 {
23436 emit_move_insn (reg1, GEN_INT (first + PROBE_INTERVAL));
23437 emit_set_insn (reg1, gen_rtx_MINUS (Pmode, stack_pointer_rtx, reg1));
23438 emit_stack_probe (plus_constant (Pmode, reg1, PROBE_INTERVAL - size));
23439 }
23440
23441 /* The run-time loop is made up of 10 insns in the generic case while the
23442 compile-time loop is made up of 4+2*(n-2) insns for n # of intervals. */
23443 else if (size <= 5 * PROBE_INTERVAL)
23444 {
23445 HOST_WIDE_INT i, rem;
23446
23447 emit_move_insn (reg1, GEN_INT (first + PROBE_INTERVAL));
23448 emit_set_insn (reg1, gen_rtx_MINUS (Pmode, stack_pointer_rtx, reg1));
23449 emit_stack_probe (reg1);
23450
23451 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 2 until
23452 it exceeds SIZE. If only two probes are needed, this will not
23453 generate any code. Then probe at FIRST + SIZE. */
23454 for (i = 2 * PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
23455 {
23456 emit_set_insn (reg1, plus_constant (Pmode, reg1, -PROBE_INTERVAL));
23457 emit_stack_probe (reg1);
23458 }
23459
23460 rem = size - (i - PROBE_INTERVAL);
23461 if (rem > 4095 || (TARGET_THUMB2 && rem > 255))
23462 {
23463 emit_set_insn (reg1, plus_constant (Pmode, reg1, -PROBE_INTERVAL));
23464 emit_stack_probe (plus_constant (Pmode, reg1, PROBE_INTERVAL - rem));
23465 }
23466 else
23467 emit_stack_probe (plus_constant (Pmode, reg1, -rem));
23468 }
23469
23470 /* Otherwise, do the same as above, but in a loop. Note that we must be
23471 extra careful with variables wrapping around because we might be at
23472 the very top (or the very bottom) of the address space and we have
23473 to be able to handle this case properly; in particular, we use an
23474 equality test for the loop condition. */
23475 else
23476 {
23477 HOST_WIDE_INT rounded_size;
23478 struct scratch_reg sr;
23479
23480 get_scratch_register_on_entry (&sr, regno1, live_regs);
23481
23482 emit_move_insn (reg1, GEN_INT (first));
23483
23484
23485 /* Step 1: round SIZE to the previous multiple of the interval. */
23486
23487 rounded_size = size & -PROBE_INTERVAL;
23488 emit_move_insn (sr.reg, GEN_INT (rounded_size));
23489
23490
23491 /* Step 2: compute initial and final value of the loop counter. */
23492
23493 /* TEST_ADDR = SP + FIRST. */
23494 emit_set_insn (reg1, gen_rtx_MINUS (Pmode, stack_pointer_rtx, reg1));
23495
23496 /* LAST_ADDR = SP + FIRST + ROUNDED_SIZE. */
23497 emit_set_insn (sr.reg, gen_rtx_MINUS (Pmode, reg1, sr.reg));
23498
23499
23500 /* Step 3: the loop
23501
23502 do
23503 {
23504 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
23505 probe at TEST_ADDR
23506 }
23507 while (TEST_ADDR != LAST_ADDR)
23508
23509 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
23510 until it is equal to ROUNDED_SIZE. */
23511
23512 emit_insn (gen_probe_stack_range (reg1, reg1, sr.reg));
23513
23514
23515 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
23516 that SIZE is equal to ROUNDED_SIZE. */
23517
23518 if (size != rounded_size)
23519 {
23520 HOST_WIDE_INT rem = size - rounded_size;
23521
23522 if (rem > 4095 || (TARGET_THUMB2 && rem > 255))
23523 {
23524 emit_set_insn (sr.reg,
23525 plus_constant (Pmode, sr.reg, -PROBE_INTERVAL));
23526 emit_stack_probe (plus_constant (Pmode, sr.reg,
23527 PROBE_INTERVAL - rem));
23528 }
23529 else
23530 emit_stack_probe (plus_constant (Pmode, sr.reg, -rem));
23531 }
23532
23533 release_scratch_register_on_entry (&sr);
23534 }
23535
23536 /* Make sure nothing is scheduled before we are done. */
23537 emit_insn (gen_blockage ());
23538 }
23539
23540 /* Probe a range of stack addresses from REG1 to REG2 inclusive. These are
23541 absolute addresses. */
23542
23543 const char *
23544 output_probe_stack_range (rtx reg1, rtx reg2)
23545 {
23546 static int labelno = 0;
23547 char loop_lab[32];
23548 rtx xops[2];
23549
23550 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno++);
23551
23552 /* Loop. */
23553 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
23554
23555 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
23556 xops[0] = reg1;
23557 xops[1] = GEN_INT (PROBE_INTERVAL);
23558 output_asm_insn ("sub\t%0, %0, %1", xops);
23559
23560 /* Probe at TEST_ADDR. */
23561 output_asm_insn ("str\tr0, [%0, #0]", xops);
23562
23563 /* Test if TEST_ADDR == LAST_ADDR. */
23564 xops[1] = reg2;
23565 output_asm_insn ("cmp\t%0, %1", xops);
23566
23567 /* Branch. */
23568 fputs ("\tbne\t", asm_out_file);
23569 assemble_name_raw (asm_out_file, loop_lab);
23570 fputc ('\n', asm_out_file);
23571
23572 return "";
23573 }
23574
23575 /* Generate the prologue instructions for entry into an ARM or Thumb-2
23576 function. */
23577 void
23578 arm_expand_prologue (void)
23579 {
23580 rtx amount;
23581 rtx insn;
23582 rtx ip_rtx;
23583 unsigned long live_regs_mask;
23584 unsigned long func_type;
23585 int fp_offset = 0;
23586 int saved_pretend_args = 0;
23587 int saved_regs = 0;
23588 unsigned HOST_WIDE_INT args_to_push;
23589 HOST_WIDE_INT size;
23590 arm_stack_offsets *offsets;
23591 bool clobber_ip;
23592
23593 func_type = arm_current_func_type ();
23594
23595 /* Naked functions don't have prologues. */
23596 if (IS_NAKED (func_type))
23597 {
23598 if (flag_stack_usage_info)
23599 current_function_static_stack_size = 0;
23600 return;
23601 }
23602
23603 /* Make a copy of c_f_p_a_s as we may need to modify it locally. */
23604 args_to_push = crtl->args.pretend_args_size;
23605
23606 /* Compute which register we will have to save onto the stack. */
23607 offsets = arm_get_frame_offsets ();
23608 live_regs_mask = offsets->saved_regs_mask;
23609
23610 ip_rtx = gen_rtx_REG (SImode, IP_REGNUM);
23611
23612 if (IS_STACKALIGN (func_type))
23613 {
23614 rtx r0, r1;
23615
23616 /* Handle a word-aligned stack pointer. We generate the following:
23617
23618 mov r0, sp
23619 bic r1, r0, #7
23620 mov sp, r1
23621 <save and restore r0 in normal prologue/epilogue>
23622 mov sp, r0
23623 bx lr
23624
23625 The unwinder doesn't need to know about the stack realignment.
23626 Just tell it we saved SP in r0. */
23627 gcc_assert (TARGET_THUMB2 && !arm_arch_notm && args_to_push == 0);
23628
23629 r0 = gen_rtx_REG (SImode, R0_REGNUM);
23630 r1 = gen_rtx_REG (SImode, R1_REGNUM);
23631
23632 insn = emit_insn (gen_movsi (r0, stack_pointer_rtx));
23633 RTX_FRAME_RELATED_P (insn) = 1;
23634 add_reg_note (insn, REG_CFA_REGISTER, NULL);
23635
23636 emit_insn (gen_andsi3 (r1, r0, GEN_INT (~(HOST_WIDE_INT)7)));
23637
23638 /* ??? The CFA changes here, which may cause GDB to conclude that it
23639 has entered a different function. That said, the unwind info is
23640 correct, individually, before and after this instruction because
23641 we've described the save of SP, which will override the default
23642 handling of SP as restoring from the CFA. */
23643 emit_insn (gen_movsi (stack_pointer_rtx, r1));
23644 }
23645
23646 /* Let's compute the static_chain_stack_bytes required and store it. Right
23647 now the value must be -1 as stored by arm_init_machine_status (). */
23648 cfun->machine->static_chain_stack_bytes
23649 = arm_compute_static_chain_stack_bytes ();
23650
23651 /* The static chain register is the same as the IP register. If it is
23652 clobbered when creating the frame, we need to save and restore it. */
23653 clobber_ip = (IS_NESTED (func_type)
23654 && (((TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
23655 || ((flag_stack_check == STATIC_BUILTIN_STACK_CHECK
23656 || flag_stack_clash_protection)
23657 && !df_regs_ever_live_p (LR_REGNUM)
23658 && arm_r3_live_at_start_p ()))
23659 || arm_current_function_pac_enabled_p ()));
23660
23661 /* Find somewhere to store IP whilst the frame is being created.
23662 We try the following places in order:
23663
23664 1. The last argument register r3 if it is available.
23665 2. A slot on the stack above the frame if there are no
23666 arguments to push onto the stack.
23667 3. Register r3 again, after pushing the argument registers
23668 onto the stack, if this is a varargs function.
23669 4. The last slot on the stack created for the arguments to
23670 push, if this isn't a varargs function.
23671
23672 Note - we only need to tell the dwarf2 backend about the SP
23673 adjustment in the second variant; the static chain register
23674 doesn't need to be unwound, as it doesn't contain a value
23675 inherited from the caller. */
23676 if (clobber_ip)
23677 {
23678 if (!arm_r3_live_at_start_p ())
23679 insn = emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
23680 else if (args_to_push == 0)
23681 {
23682 rtx addr, dwarf;
23683
23684 saved_regs += 4;
23685
23686 addr = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
23687 insn = emit_set_insn (gen_frame_mem (SImode, addr), ip_rtx);
23688 fp_offset = 4;
23689
23690 /* Just tell the dwarf backend that we adjusted SP. */
23691 dwarf = gen_rtx_SET (stack_pointer_rtx,
23692 plus_constant (Pmode, stack_pointer_rtx,
23693 -fp_offset));
23694 RTX_FRAME_RELATED_P (insn) = 1;
23695 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
23696 if (arm_current_function_pac_enabled_p ())
23697 cfun->machine->pacspval_needed = 1;
23698 }
23699 else
23700 {
23701 /* Store the args on the stack. */
23702 if (cfun->machine->uses_anonymous_args)
23703 {
23704 insn = emit_multi_reg_push ((0xf0 >> (args_to_push / 4)) & 0xf,
23705 (0xf0 >> (args_to_push / 4)) & 0xf);
23706 emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
23707 saved_pretend_args = 1;
23708 }
23709 else
23710 {
23711 rtx addr, dwarf;
23712
23713 if (args_to_push == 4)
23714 addr = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
23715 else
23716 addr = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx,
23717 plus_constant (Pmode,
23718 stack_pointer_rtx,
23719 -args_to_push));
23720
23721 insn = emit_set_insn (gen_frame_mem (SImode, addr), ip_rtx);
23722
23723 /* Just tell the dwarf backend that we adjusted SP. */
23724 dwarf = gen_rtx_SET (stack_pointer_rtx,
23725 plus_constant (Pmode, stack_pointer_rtx,
23726 -args_to_push));
23727 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
23728 }
23729
23730 RTX_FRAME_RELATED_P (insn) = 1;
23731 fp_offset = args_to_push;
23732 args_to_push = 0;
23733 if (arm_current_function_pac_enabled_p ())
23734 cfun->machine->pacspval_needed = 1;
23735 }
23736 }
23737
23738 if (arm_current_function_pac_enabled_p ())
23739 {
23740 /* If IP was clobbered we only emit a PAC instruction as the BTI
23741 one will be added before the push of the clobbered IP (if
23742 necessary) by the bti pass. */
23743 if (aarch_bti_enabled () && !clobber_ip)
23744 insn = emit_insn (gen_pacbti_nop ());
23745 else
23746 insn = emit_insn (gen_pac_nop ());
23747
23748 rtx dwarf = gen_rtx_SET (ip_rtx, gen_rtx_REG (SImode, RA_AUTH_CODE));
23749 RTX_FRAME_RELATED_P (insn) = 1;
23750 add_reg_note (insn, REG_CFA_REGISTER, dwarf);
23751 }
23752
23753 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
23754 {
23755 if (IS_INTERRUPT (func_type))
23756 {
23757 /* Interrupt functions must not corrupt any registers.
23758 Creating a frame pointer however, corrupts the IP
23759 register, so we must push it first. */
23760 emit_multi_reg_push (1 << IP_REGNUM, 1 << IP_REGNUM);
23761
23762 /* Do not set RTX_FRAME_RELATED_P on this insn.
23763 The dwarf stack unwinding code only wants to see one
23764 stack decrement per function, and this is not it. If
23765 this instruction is labeled as being part of the frame
23766 creation sequence then dwarf2out_frame_debug_expr will
23767 die when it encounters the assignment of IP to FP
23768 later on, since the use of SP here establishes SP as
23769 the CFA register and not IP.
23770
23771 Anyway this instruction is not really part of the stack
23772 frame creation although it is part of the prologue. */
23773 }
23774
23775 insn = emit_set_insn (ip_rtx,
23776 plus_constant (Pmode, stack_pointer_rtx,
23777 fp_offset));
23778 RTX_FRAME_RELATED_P (insn) = 1;
23779 }
23780
23781 /* Armv8.1-M Mainline nonsecure entry: save FPCXTNS on stack using VSTR. */
23782 if (TARGET_HAVE_FPCXT_CMSE && IS_CMSE_ENTRY (func_type))
23783 {
23784 saved_regs += 4;
23785 insn = emit_insn (gen_push_fpsysreg_insn (stack_pointer_rtx,
23786 GEN_INT (FPCXTNS_ENUM)));
23787 rtx dwarf = gen_rtx_SET (stack_pointer_rtx,
23788 plus_constant (Pmode, stack_pointer_rtx, -4));
23789 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
23790 RTX_FRAME_RELATED_P (insn) = 1;
23791 }
23792
23793 if (args_to_push)
23794 {
23795 /* Push the argument registers, or reserve space for them. */
23796 if (cfun->machine->uses_anonymous_args)
23797 insn = emit_multi_reg_push
23798 ((0xf0 >> (args_to_push / 4)) & 0xf,
23799 (0xf0 >> (args_to_push / 4)) & 0xf);
23800 else
23801 insn = emit_insn
23802 (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
23803 GEN_INT (- args_to_push)));
23804 RTX_FRAME_RELATED_P (insn) = 1;
23805 }
23806
23807 /* If this is an interrupt service routine, and the link register
23808 is going to be pushed, and we're not generating extra
23809 push of IP (needed when frame is needed and frame layout if apcs),
23810 subtracting four from LR now will mean that the function return
23811 can be done with a single instruction. */
23812 if ((func_type == ARM_FT_ISR || func_type == ARM_FT_FIQ)
23813 && (live_regs_mask & (1 << LR_REGNUM)) != 0
23814 && !(frame_pointer_needed && TARGET_APCS_FRAME)
23815 && TARGET_ARM)
23816 {
23817 rtx lr = gen_rtx_REG (SImode, LR_REGNUM);
23818
23819 emit_set_insn (lr, plus_constant (SImode, lr, -4));
23820 }
23821
23822 if (live_regs_mask)
23823 {
23824 unsigned long dwarf_regs_mask = live_regs_mask;
23825
23826 saved_regs += bit_count (live_regs_mask) * 4;
23827 if (optimize_size && !frame_pointer_needed
23828 && saved_regs == offsets->saved_regs - offsets->saved_args)
23829 {
23830 /* If no coprocessor registers are being pushed and we don't have
23831 to worry about a frame pointer then push extra registers to
23832 create the stack frame. This is done in a way that does not
23833 alter the frame layout, so is independent of the epilogue. */
23834 int n;
23835 int frame;
23836 n = 0;
23837 while (n < 8 && (live_regs_mask & (1 << n)) == 0)
23838 n++;
23839 frame = offsets->outgoing_args - (offsets->saved_args + saved_regs);
23840 if (frame && n * 4 >= frame)
23841 {
23842 n = frame / 4;
23843 live_regs_mask |= (1 << n) - 1;
23844 saved_regs += frame;
23845 }
23846 }
23847
23848 if (TARGET_LDRD
23849 && current_tune->prefer_ldrd_strd
23850 && !optimize_function_for_size_p (cfun))
23851 {
23852 gcc_checking_assert (live_regs_mask == dwarf_regs_mask);
23853 if (TARGET_THUMB2)
23854 thumb2_emit_strd_push (live_regs_mask);
23855 else if (TARGET_ARM
23856 && !TARGET_APCS_FRAME
23857 && !IS_INTERRUPT (func_type))
23858 arm_emit_strd_push (live_regs_mask);
23859 else
23860 {
23861 insn = emit_multi_reg_push (live_regs_mask, live_regs_mask);
23862 RTX_FRAME_RELATED_P (insn) = 1;
23863 }
23864 }
23865 else
23866 {
23867 insn = emit_multi_reg_push (live_regs_mask, dwarf_regs_mask);
23868 RTX_FRAME_RELATED_P (insn) = 1;
23869 }
23870 }
23871
23872 if (! IS_VOLATILE (func_type))
23873 saved_regs += arm_save_coproc_regs ();
23874
23875 if (frame_pointer_needed && TARGET_ARM)
23876 {
23877 /* Create the new frame pointer. */
23878 if (TARGET_APCS_FRAME)
23879 {
23880 insn = GEN_INT (-(4 + args_to_push + fp_offset));
23881 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx, ip_rtx, insn));
23882 RTX_FRAME_RELATED_P (insn) = 1;
23883 }
23884 else
23885 {
23886 insn = GEN_INT (saved_regs - (4 + fp_offset));
23887 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
23888 stack_pointer_rtx, insn));
23889 RTX_FRAME_RELATED_P (insn) = 1;
23890 }
23891 }
23892
23893 size = offsets->outgoing_args - offsets->saved_args;
23894 if (flag_stack_usage_info)
23895 current_function_static_stack_size = size;
23896
23897 /* If this isn't an interrupt service routine and we have a frame, then do
23898 stack checking. We use IP as the first scratch register, except for the
23899 non-APCS nested functions if LR or r3 are available (see clobber_ip). */
23900 if (!IS_INTERRUPT (func_type)
23901 && (flag_stack_check == STATIC_BUILTIN_STACK_CHECK
23902 || flag_stack_clash_protection))
23903 {
23904 unsigned int regno;
23905
23906 if (!IS_NESTED (func_type) || clobber_ip)
23907 regno = IP_REGNUM;
23908 else if (df_regs_ever_live_p (LR_REGNUM))
23909 regno = LR_REGNUM;
23910 else
23911 regno = 3;
23912
23913 if (crtl->is_leaf && !cfun->calls_alloca)
23914 {
23915 if (size > PROBE_INTERVAL && size > get_stack_check_protect ())
23916 arm_emit_probe_stack_range (get_stack_check_protect (),
23917 size - get_stack_check_protect (),
23918 regno, live_regs_mask);
23919 }
23920 else if (size > 0)
23921 arm_emit_probe_stack_range (get_stack_check_protect (), size,
23922 regno, live_regs_mask);
23923 }
23924
23925 /* Recover the static chain register. */
23926 if (clobber_ip)
23927 {
23928 if (!arm_r3_live_at_start_p () || saved_pretend_args)
23929 insn = gen_rtx_REG (SImode, 3);
23930 else
23931 {
23932 insn = plus_constant (Pmode, hard_frame_pointer_rtx, 4);
23933 insn = gen_frame_mem (SImode, insn);
23934 }
23935 emit_set_insn (ip_rtx, insn);
23936 emit_insn (gen_force_register_use (ip_rtx));
23937 }
23938
23939 if (offsets->outgoing_args != offsets->saved_args + saved_regs)
23940 {
23941 /* This add can produce multiple insns for a large constant, so we
23942 need to get tricky. */
23943 rtx_insn *last = get_last_insn ();
23944
23945 amount = GEN_INT (offsets->saved_args + saved_regs
23946 - offsets->outgoing_args);
23947
23948 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
23949 amount));
23950 do
23951 {
23952 last = last ? NEXT_INSN (last) : get_insns ();
23953 RTX_FRAME_RELATED_P (last) = 1;
23954 }
23955 while (last != insn);
23956
23957 /* If the frame pointer is needed, emit a special barrier that
23958 will prevent the scheduler from moving stores to the frame
23959 before the stack adjustment. */
23960 if (frame_pointer_needed)
23961 emit_insn (gen_stack_tie (stack_pointer_rtx,
23962 hard_frame_pointer_rtx));
23963 }
23964
23965
23966 if (frame_pointer_needed && TARGET_THUMB2)
23967 thumb_set_frame_pointer (offsets);
23968
23969 if (flag_pic && arm_pic_register != INVALID_REGNUM)
23970 {
23971 unsigned long mask;
23972
23973 mask = live_regs_mask;
23974 mask &= THUMB2_WORK_REGS;
23975 if (!IS_NESTED (func_type))
23976 mask |= (1 << IP_REGNUM);
23977 arm_load_pic_register (mask, NULL_RTX);
23978 }
23979
23980 /* If we are profiling, make sure no instructions are scheduled before
23981 the call to mcount. Similarly if the user has requested no
23982 scheduling in the prolog. Similarly if we want non-call exceptions
23983 using the EABI unwinder, to prevent faulting instructions from being
23984 swapped with a stack adjustment. */
23985 if (crtl->profile || !TARGET_SCHED_PROLOG
23986 || (arm_except_unwind_info (&global_options) == UI_TARGET
23987 && cfun->can_throw_non_call_exceptions))
23988 emit_insn (gen_blockage ());
23989
23990 /* If the link register is being kept alive, with the return address in it,
23991 then make sure that it does not get reused by the ce2 pass. */
23992 if ((live_regs_mask & (1 << LR_REGNUM)) == 0)
23993 cfun->machine->lr_save_eliminated = 1;
23994 }
23995 \f
23996 /* Print condition code to STREAM. Helper function for arm_print_operand. */
23997 static void
23998 arm_print_condition (FILE *stream)
23999 {
24000 if (arm_ccfsm_state == 3 || arm_ccfsm_state == 4)
24001 {
24002 /* Branch conversion is not implemented for Thumb-2. */
24003 if (TARGET_THUMB)
24004 {
24005 output_operand_lossage ("predicated Thumb instruction");
24006 return;
24007 }
24008 if (current_insn_predicate != NULL)
24009 {
24010 output_operand_lossage
24011 ("predicated instruction in conditional sequence");
24012 return;
24013 }
24014
24015 fputs (arm_condition_codes[arm_current_cc], stream);
24016 }
24017 else if (current_insn_predicate)
24018 {
24019 enum arm_cond_code code;
24020
24021 if (TARGET_THUMB1)
24022 {
24023 output_operand_lossage ("predicated Thumb instruction");
24024 return;
24025 }
24026
24027 code = get_arm_condition_code (current_insn_predicate);
24028 fputs (arm_condition_codes[code], stream);
24029 }
24030 }
24031
24032
24033 /* Globally reserved letters: acln
24034 Puncutation letters currently used: @_|?().!#
24035 Lower case letters currently used: bcdefhimpqtvwxyz
24036 Upper case letters currently used: ABCDEFGHIJKLMNOPQRSTUV
24037 Letters previously used, but now deprecated/obsolete: sWXYZ.
24038
24039 Note that the global reservation for 'c' is only for CONSTANT_ADDRESS_P.
24040
24041 If CODE is 'd', then the X is a condition operand and the instruction
24042 should only be executed if the condition is true.
24043 if CODE is 'D', then the X is a condition operand and the instruction
24044 should only be executed if the condition is false: however, if the mode
24045 of the comparison is CCFPEmode, then always execute the instruction -- we
24046 do this because in these circumstances !GE does not necessarily imply LT;
24047 in these cases the instruction pattern will take care to make sure that
24048 an instruction containing %d will follow, thereby undoing the effects of
24049 doing this instruction unconditionally.
24050 If CODE is 'N' then X is a floating point operand that must be negated
24051 before output.
24052 If CODE is 'B' then output a bitwise inverted value of X (a const int).
24053 If X is a REG and CODE is `M', output a ldm/stm style multi-reg.
24054 If CODE is 'V', then the operand must be a CONST_INT representing
24055 the bits to preserve in the modified register (Rd) of a BFI or BFC
24056 instruction: print out both the width and lsb (shift) fields. */
24057 static void
24058 arm_print_operand (FILE *stream, rtx x, int code)
24059 {
24060 switch (code)
24061 {
24062 case '@':
24063 fputs (ASM_COMMENT_START, stream);
24064 return;
24065
24066 case '_':
24067 fputs (user_label_prefix, stream);
24068 return;
24069
24070 case '|':
24071 fputs (REGISTER_PREFIX, stream);
24072 return;
24073
24074 case '?':
24075 arm_print_condition (stream);
24076 return;
24077
24078 case '.':
24079 /* The current condition code for a condition code setting instruction.
24080 Preceded by 's' in unified syntax, otherwise followed by 's'. */
24081 fputc('s', stream);
24082 arm_print_condition (stream);
24083 return;
24084
24085 case '!':
24086 /* If the instruction is conditionally executed then print
24087 the current condition code, otherwise print 's'. */
24088 gcc_assert (TARGET_THUMB2);
24089 if (current_insn_predicate)
24090 arm_print_condition (stream);
24091 else
24092 fputc('s', stream);
24093 break;
24094
24095 /* %# is a "break" sequence. It doesn't output anything, but is used to
24096 separate e.g. operand numbers from following text, if that text consists
24097 of further digits which we don't want to be part of the operand
24098 number. */
24099 case '#':
24100 return;
24101
24102 case 'N':
24103 {
24104 REAL_VALUE_TYPE r;
24105 r = real_value_negate (CONST_DOUBLE_REAL_VALUE (x));
24106 fprintf (stream, "%s", fp_const_from_val (&r));
24107 }
24108 return;
24109
24110 /* An integer or symbol address without a preceding # sign. */
24111 case 'c':
24112 switch (GET_CODE (x))
24113 {
24114 case CONST_INT:
24115 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
24116 break;
24117
24118 case SYMBOL_REF:
24119 output_addr_const (stream, x);
24120 break;
24121
24122 case CONST:
24123 if (GET_CODE (XEXP (x, 0)) == PLUS
24124 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
24125 {
24126 output_addr_const (stream, x);
24127 break;
24128 }
24129 /* Fall through. */
24130
24131 default:
24132 output_operand_lossage ("Unsupported operand for code '%c'", code);
24133 }
24134 return;
24135
24136 /* An integer that we want to print in HEX. */
24137 case 'x':
24138 switch (GET_CODE (x))
24139 {
24140 case CONST_INT:
24141 fprintf (stream, "#" HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
24142 break;
24143
24144 default:
24145 output_operand_lossage ("Unsupported operand for code '%c'", code);
24146 }
24147 return;
24148
24149 case 'B':
24150 if (CONST_INT_P (x))
24151 {
24152 HOST_WIDE_INT val;
24153 val = ARM_SIGN_EXTEND (~INTVAL (x));
24154 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, val);
24155 }
24156 else
24157 {
24158 putc ('~', stream);
24159 output_addr_const (stream, x);
24160 }
24161 return;
24162
24163 case 'b':
24164 /* Print the log2 of a CONST_INT. */
24165 {
24166 HOST_WIDE_INT val;
24167
24168 if (!CONST_INT_P (x)
24169 || (val = exact_log2 (INTVAL (x) & 0xffffffff)) < 0)
24170 output_operand_lossage ("Unsupported operand for code '%c'", code);
24171 else
24172 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val);
24173 }
24174 return;
24175
24176 case 'L':
24177 /* The low 16 bits of an immediate constant. */
24178 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL(x) & 0xffff);
24179 return;
24180
24181 case 'i':
24182 fprintf (stream, "%s", arithmetic_instr (x, 1));
24183 return;
24184
24185 case 'I':
24186 fprintf (stream, "%s", arithmetic_instr (x, 0));
24187 return;
24188
24189 case 'S':
24190 {
24191 HOST_WIDE_INT val;
24192 const char *shift;
24193
24194 shift = shift_op (x, &val);
24195
24196 if (shift)
24197 {
24198 fprintf (stream, ", %s ", shift);
24199 if (val == -1)
24200 arm_print_operand (stream, XEXP (x, 1), 0);
24201 else
24202 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val);
24203 }
24204 }
24205 return;
24206
24207 /* An explanation of the 'Q', 'R' and 'H' register operands:
24208
24209 In a pair of registers containing a DI or DF value the 'Q'
24210 operand returns the register number of the register containing
24211 the least significant part of the value. The 'R' operand returns
24212 the register number of the register containing the most
24213 significant part of the value.
24214
24215 The 'H' operand returns the higher of the two register numbers.
24216 On a run where WORDS_BIG_ENDIAN is true the 'H' operand is the
24217 same as the 'Q' operand, since the most significant part of the
24218 value is held in the lower number register. The reverse is true
24219 on systems where WORDS_BIG_ENDIAN is false.
24220
24221 The purpose of these operands is to distinguish between cases
24222 where the endian-ness of the values is important (for example
24223 when they are added together), and cases where the endian-ness
24224 is irrelevant, but the order of register operations is important.
24225 For example when loading a value from memory into a register
24226 pair, the endian-ness does not matter. Provided that the value
24227 from the lower memory address is put into the lower numbered
24228 register, and the value from the higher address is put into the
24229 higher numbered register, the load will work regardless of whether
24230 the value being loaded is big-wordian or little-wordian. The
24231 order of the two register loads can matter however, if the address
24232 of the memory location is actually held in one of the registers
24233 being overwritten by the load.
24234
24235 The 'Q' and 'R' constraints are also available for 64-bit
24236 constants. */
24237 case 'Q':
24238 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
24239 {
24240 rtx part = gen_lowpart (SImode, x);
24241 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
24242 return;
24243 }
24244
24245 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
24246 {
24247 output_operand_lossage ("invalid operand for code '%c'", code);
24248 return;
24249 }
24250
24251 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 1 : 0));
24252 return;
24253
24254 case 'R':
24255 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
24256 {
24257 machine_mode mode = GET_MODE (x);
24258 rtx part;
24259
24260 if (mode == VOIDmode)
24261 mode = DImode;
24262 part = gen_highpart_mode (SImode, mode, x);
24263 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
24264 return;
24265 }
24266
24267 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
24268 {
24269 output_operand_lossage ("invalid operand for code '%c'", code);
24270 return;
24271 }
24272
24273 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 0 : 1));
24274 return;
24275
24276 case 'H':
24277 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
24278 {
24279 output_operand_lossage ("invalid operand for code '%c'", code);
24280 return;
24281 }
24282
24283 asm_fprintf (stream, "%r", REGNO (x) + 1);
24284 return;
24285
24286 case 'J':
24287 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
24288 {
24289 output_operand_lossage ("invalid operand for code '%c'", code);
24290 return;
24291 }
24292
24293 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 3 : 2));
24294 return;
24295
24296 case 'K':
24297 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
24298 {
24299 output_operand_lossage ("invalid operand for code '%c'", code);
24300 return;
24301 }
24302
24303 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 2 : 3));
24304 return;
24305
24306 case 'm':
24307 asm_fprintf (stream, "%r",
24308 REG_P (XEXP (x, 0))
24309 ? REGNO (XEXP (x, 0)) : REGNO (XEXP (XEXP (x, 0), 0)));
24310 return;
24311
24312 case 'M':
24313 asm_fprintf (stream, "{%r-%r}",
24314 REGNO (x),
24315 REGNO (x) + ARM_NUM_REGS (GET_MODE (x)) - 1);
24316 return;
24317
24318 /* Like 'M', but writing doubleword vector registers, for use by Neon
24319 insns. */
24320 case 'h':
24321 {
24322 int regno = (REGNO (x) - FIRST_VFP_REGNUM) / 2;
24323 int numregs = ARM_NUM_REGS (GET_MODE (x)) / 2;
24324 if (numregs == 1)
24325 asm_fprintf (stream, "{d%d}", regno);
24326 else
24327 asm_fprintf (stream, "{d%d-d%d}", regno, regno + numregs - 1);
24328 }
24329 return;
24330
24331 case 'd':
24332 /* CONST_TRUE_RTX means always -- that's the default. */
24333 if (x == const_true_rtx)
24334 return;
24335
24336 if (!COMPARISON_P (x))
24337 {
24338 output_operand_lossage ("invalid operand for code '%c'", code);
24339 return;
24340 }
24341
24342 fputs (arm_condition_codes[get_arm_condition_code (x)],
24343 stream);
24344 return;
24345
24346 case 'D':
24347 /* CONST_TRUE_RTX means not always -- i.e. never. We shouldn't ever
24348 want to do that. */
24349 if (x == const_true_rtx)
24350 {
24351 output_operand_lossage ("instruction never executed");
24352 return;
24353 }
24354 if (!COMPARISON_P (x))
24355 {
24356 output_operand_lossage ("invalid operand for code '%c'", code);
24357 return;
24358 }
24359
24360 fputs (arm_condition_codes[ARM_INVERSE_CONDITION_CODE
24361 (get_arm_condition_code (x))],
24362 stream);
24363 return;
24364
24365 case 'V':
24366 {
24367 /* Output the LSB (shift) and width for a bitmask instruction
24368 based on a literal mask. The LSB is printed first,
24369 followed by the width.
24370
24371 Eg. For 0b1...1110001, the result is #1, #3. */
24372 if (!CONST_INT_P (x))
24373 {
24374 output_operand_lossage ("invalid operand for code '%c'", code);
24375 return;
24376 }
24377
24378 unsigned HOST_WIDE_INT val
24379 = ~UINTVAL (x) & HOST_WIDE_INT_UC (0xffffffff);
24380 int lsb = exact_log2 (val & -val);
24381 asm_fprintf (stream, "#%d, #%d", lsb,
24382 (exact_log2 (val + (val & -val)) - lsb));
24383 }
24384 return;
24385
24386 case 's':
24387 case 'W':
24388 case 'X':
24389 case 'Y':
24390 case 'Z':
24391 /* Former Maverick support, removed after GCC-4.7. */
24392 output_operand_lossage ("obsolete Maverick format code '%c'", code);
24393 return;
24394
24395 case 'U':
24396 if (!REG_P (x)
24397 || REGNO (x) < FIRST_IWMMXT_GR_REGNUM
24398 || REGNO (x) > LAST_IWMMXT_GR_REGNUM)
24399 /* Bad value for wCG register number. */
24400 {
24401 output_operand_lossage ("invalid operand for code '%c'", code);
24402 return;
24403 }
24404
24405 else
24406 fprintf (stream, "%d", REGNO (x) - FIRST_IWMMXT_GR_REGNUM);
24407 return;
24408
24409 /* Print an iWMMXt control register name. */
24410 case 'w':
24411 if (!CONST_INT_P (x)
24412 || INTVAL (x) < 0
24413 || INTVAL (x) >= 16)
24414 /* Bad value for wC register number. */
24415 {
24416 output_operand_lossage ("invalid operand for code '%c'", code);
24417 return;
24418 }
24419
24420 else
24421 {
24422 static const char * wc_reg_names [16] =
24423 {
24424 "wCID", "wCon", "wCSSF", "wCASF",
24425 "wC4", "wC5", "wC6", "wC7",
24426 "wCGR0", "wCGR1", "wCGR2", "wCGR3",
24427 "wC12", "wC13", "wC14", "wC15"
24428 };
24429
24430 fputs (wc_reg_names [INTVAL (x)], stream);
24431 }
24432 return;
24433
24434 /* Print the high single-precision register of a VFP double-precision
24435 register. */
24436 case 'p':
24437 {
24438 machine_mode mode = GET_MODE (x);
24439 int regno;
24440
24441 if (GET_MODE_SIZE (mode) != 8 || !REG_P (x))
24442 {
24443 output_operand_lossage ("invalid operand for code '%c'", code);
24444 return;
24445 }
24446
24447 regno = REGNO (x);
24448 if (!VFP_REGNO_OK_FOR_DOUBLE (regno))
24449 {
24450 output_operand_lossage ("invalid operand for code '%c'", code);
24451 return;
24452 }
24453
24454 fprintf (stream, "s%d", regno - FIRST_VFP_REGNUM + 1);
24455 }
24456 return;
24457
24458 /* Print a VFP/Neon double precision or quad precision register name. */
24459 case 'P':
24460 case 'q':
24461 {
24462 machine_mode mode = GET_MODE (x);
24463 int is_quad = (code == 'q');
24464 int regno;
24465
24466 if (GET_MODE_SIZE (mode) != (is_quad ? 16 : 8))
24467 {
24468 output_operand_lossage ("invalid operand for code '%c'", code);
24469 return;
24470 }
24471
24472 if (!REG_P (x)
24473 || !IS_VFP_REGNUM (REGNO (x)))
24474 {
24475 output_operand_lossage ("invalid operand for code '%c'", code);
24476 return;
24477 }
24478
24479 regno = REGNO (x);
24480 if ((is_quad && !NEON_REGNO_OK_FOR_QUAD (regno))
24481 || (!is_quad && !VFP_REGNO_OK_FOR_DOUBLE (regno)))
24482 {
24483 output_operand_lossage ("invalid operand for code '%c'", code);
24484 return;
24485 }
24486
24487 fprintf (stream, "%c%d", is_quad ? 'q' : 'd',
24488 (regno - FIRST_VFP_REGNUM) >> (is_quad ? 2 : 1));
24489 }
24490 return;
24491
24492 /* These two codes print the low/high doubleword register of a Neon quad
24493 register, respectively. For pair-structure types, can also print
24494 low/high quadword registers. */
24495 case 'e':
24496 case 'f':
24497 {
24498 machine_mode mode = GET_MODE (x);
24499 int regno;
24500
24501 if ((GET_MODE_SIZE (mode) != 16
24502 && GET_MODE_SIZE (mode) != 32) || !REG_P (x))
24503 {
24504 output_operand_lossage ("invalid operand for code '%c'", code);
24505 return;
24506 }
24507
24508 regno = REGNO (x);
24509 if (!NEON_REGNO_OK_FOR_QUAD (regno))
24510 {
24511 output_operand_lossage ("invalid operand for code '%c'", code);
24512 return;
24513 }
24514
24515 if (GET_MODE_SIZE (mode) == 16)
24516 fprintf (stream, "d%d", ((regno - FIRST_VFP_REGNUM) >> 1)
24517 + (code == 'f' ? 1 : 0));
24518 else
24519 fprintf (stream, "q%d", ((regno - FIRST_VFP_REGNUM) >> 2)
24520 + (code == 'f' ? 1 : 0));
24521 }
24522 return;
24523
24524 /* Print a VFPv3 floating-point constant, represented as an integer
24525 index. */
24526 case 'G':
24527 {
24528 int index = vfp3_const_double_index (x);
24529 gcc_assert (index != -1);
24530 fprintf (stream, "%d", index);
24531 }
24532 return;
24533
24534 /* Print bits representing opcode features for Neon.
24535
24536 Bit 0 is 1 for signed, 0 for unsigned. Floats count as signed
24537 and polynomials as unsigned.
24538
24539 Bit 1 is 1 for floats and polynomials, 0 for ordinary integers.
24540
24541 Bit 2 is 1 for rounding functions, 0 otherwise. */
24542
24543 /* Identify the type as 's', 'u', 'p' or 'f'. */
24544 case 'T':
24545 {
24546 HOST_WIDE_INT bits = INTVAL (x);
24547 fputc ("uspf"[bits & 3], stream);
24548 }
24549 return;
24550
24551 /* Likewise, but signed and unsigned integers are both 'i'. */
24552 case 'F':
24553 {
24554 HOST_WIDE_INT bits = INTVAL (x);
24555 fputc ("iipf"[bits & 3], stream);
24556 }
24557 return;
24558
24559 /* As for 'T', but emit 'u' instead of 'p'. */
24560 case 't':
24561 {
24562 HOST_WIDE_INT bits = INTVAL (x);
24563 fputc ("usuf"[bits & 3], stream);
24564 }
24565 return;
24566
24567 /* Bit 2: rounding (vs none). */
24568 case 'O':
24569 {
24570 HOST_WIDE_INT bits = INTVAL (x);
24571 fputs ((bits & 4) != 0 ? "r" : "", stream);
24572 }
24573 return;
24574
24575 /* Memory operand for vld1/vst1 instruction. */
24576 case 'A':
24577 {
24578 rtx addr;
24579 bool postinc = FALSE;
24580 rtx postinc_reg = NULL;
24581 unsigned align, memsize, align_bits;
24582
24583 gcc_assert (MEM_P (x));
24584 addr = XEXP (x, 0);
24585 if (GET_CODE (addr) == POST_INC)
24586 {
24587 postinc = 1;
24588 addr = XEXP (addr, 0);
24589 }
24590 if (GET_CODE (addr) == POST_MODIFY)
24591 {
24592 postinc_reg = XEXP( XEXP (addr, 1), 1);
24593 addr = XEXP (addr, 0);
24594 }
24595 asm_fprintf (stream, "[%r", REGNO (addr));
24596
24597 /* We know the alignment of this access, so we can emit a hint in the
24598 instruction (for some alignments) as an aid to the memory subsystem
24599 of the target. */
24600 align = MEM_ALIGN (x) >> 3;
24601 memsize = MEM_SIZE (x);
24602
24603 /* Only certain alignment specifiers are supported by the hardware. */
24604 if (memsize == 32 && (align % 32) == 0)
24605 align_bits = 256;
24606 else if ((memsize == 16 || memsize == 32) && (align % 16) == 0)
24607 align_bits = 128;
24608 else if (memsize >= 8 && (align % 8) == 0)
24609 align_bits = 64;
24610 else
24611 align_bits = 0;
24612
24613 if (align_bits != 0)
24614 asm_fprintf (stream, ":%d", align_bits);
24615
24616 asm_fprintf (stream, "]");
24617
24618 if (postinc)
24619 fputs("!", stream);
24620 if (postinc_reg)
24621 asm_fprintf (stream, ", %r", REGNO (postinc_reg));
24622 }
24623 return;
24624
24625 /* To print the memory operand with "Ux" or "Uj" constraint. Based on the
24626 rtx_code the memory operands output looks like following.
24627 1. [Rn], #+/-<imm>
24628 2. [Rn, #+/-<imm>]!
24629 3. [Rn, #+/-<imm>]
24630 4. [Rn]. */
24631 case 'E':
24632 {
24633 rtx addr;
24634 rtx postinc_reg = NULL;
24635 unsigned inc_val = 0;
24636 enum rtx_code code;
24637
24638 gcc_assert (MEM_P (x));
24639 addr = XEXP (x, 0);
24640 code = GET_CODE (addr);
24641 if (code == POST_INC || code == POST_DEC || code == PRE_INC
24642 || code == PRE_DEC)
24643 {
24644 asm_fprintf (stream, "[%r", REGNO (XEXP (addr, 0)));
24645 inc_val = GET_MODE_SIZE (GET_MODE (x));
24646 if (code == POST_INC || code == POST_DEC)
24647 asm_fprintf (stream, "], #%s%d",(code == POST_INC)
24648 ? "": "-", inc_val);
24649 else
24650 asm_fprintf (stream, ", #%s%d]!",(code == PRE_INC)
24651 ? "": "-", inc_val);
24652 }
24653 else if (code == POST_MODIFY || code == PRE_MODIFY)
24654 {
24655 asm_fprintf (stream, "[%r", REGNO (XEXP (addr, 0)));
24656 postinc_reg = XEXP (XEXP (addr, 1), 1);
24657 if (postinc_reg && CONST_INT_P (postinc_reg))
24658 {
24659 if (code == POST_MODIFY)
24660 asm_fprintf (stream, "], #%wd",INTVAL (postinc_reg));
24661 else
24662 asm_fprintf (stream, ", #%wd]!",INTVAL (postinc_reg));
24663 }
24664 }
24665 else if (code == PLUS)
24666 {
24667 rtx base = XEXP (addr, 0);
24668 rtx index = XEXP (addr, 1);
24669
24670 gcc_assert (REG_P (base) && CONST_INT_P (index));
24671
24672 HOST_WIDE_INT offset = INTVAL (index);
24673 asm_fprintf (stream, "[%r, #%wd]", REGNO (base), offset);
24674 }
24675 else
24676 {
24677 gcc_assert (REG_P (addr));
24678 asm_fprintf (stream, "[%r]",REGNO (addr));
24679 }
24680 }
24681 return;
24682
24683 case 'C':
24684 {
24685 rtx addr;
24686
24687 gcc_assert (MEM_P (x));
24688 addr = XEXP (x, 0);
24689 gcc_assert (REG_P (addr));
24690 asm_fprintf (stream, "[%r]", REGNO (addr));
24691 }
24692 return;
24693
24694 /* Translate an S register number into a D register number and element index. */
24695 case 'y':
24696 {
24697 machine_mode mode = GET_MODE (x);
24698 int regno;
24699
24700 if (GET_MODE_SIZE (mode) != 4 || !REG_P (x))
24701 {
24702 output_operand_lossage ("invalid operand for code '%c'", code);
24703 return;
24704 }
24705
24706 regno = REGNO (x);
24707 if (!VFP_REGNO_OK_FOR_SINGLE (regno))
24708 {
24709 output_operand_lossage ("invalid operand for code '%c'", code);
24710 return;
24711 }
24712
24713 regno = regno - FIRST_VFP_REGNUM;
24714 fprintf (stream, "d%d[%d]", regno / 2, regno % 2);
24715 }
24716 return;
24717
24718 case 'v':
24719 gcc_assert (CONST_DOUBLE_P (x));
24720 int result;
24721 result = vfp3_const_double_for_fract_bits (x);
24722 if (result == 0)
24723 result = vfp3_const_double_for_bits (x);
24724 fprintf (stream, "#%d", result);
24725 return;
24726
24727 /* Register specifier for vld1.16/vst1.16. Translate the S register
24728 number into a D register number and element index. */
24729 case 'z':
24730 {
24731 machine_mode mode = GET_MODE (x);
24732 int regno;
24733
24734 if (GET_MODE_SIZE (mode) != 2 || !REG_P (x))
24735 {
24736 output_operand_lossage ("invalid operand for code '%c'", code);
24737 return;
24738 }
24739
24740 regno = REGNO (x);
24741 if (!VFP_REGNO_OK_FOR_SINGLE (regno))
24742 {
24743 output_operand_lossage ("invalid operand for code '%c'", code);
24744 return;
24745 }
24746
24747 regno = regno - FIRST_VFP_REGNUM;
24748 fprintf (stream, "d%d[%d]", regno/2, ((regno % 2) ? 2 : 0));
24749 }
24750 return;
24751
24752 default:
24753 if (x == 0)
24754 {
24755 output_operand_lossage ("missing operand");
24756 return;
24757 }
24758
24759 switch (GET_CODE (x))
24760 {
24761 case REG:
24762 asm_fprintf (stream, "%r", REGNO (x));
24763 break;
24764
24765 case MEM:
24766 output_address (GET_MODE (x), XEXP (x, 0));
24767 break;
24768
24769 case CONST_DOUBLE:
24770 {
24771 char fpstr[20];
24772 real_to_decimal (fpstr, CONST_DOUBLE_REAL_VALUE (x),
24773 sizeof (fpstr), 0, 1);
24774 fprintf (stream, "#%s", fpstr);
24775 }
24776 break;
24777
24778 default:
24779 gcc_assert (GET_CODE (x) != NEG);
24780 fputc ('#', stream);
24781 if (GET_CODE (x) == HIGH)
24782 {
24783 fputs (":lower16:", stream);
24784 x = XEXP (x, 0);
24785 }
24786
24787 output_addr_const (stream, x);
24788 break;
24789 }
24790 }
24791 }
24792 \f
24793 /* Target hook for printing a memory address. */
24794 static void
24795 arm_print_operand_address (FILE *stream, machine_mode mode, rtx x)
24796 {
24797 if (TARGET_32BIT)
24798 {
24799 int is_minus = GET_CODE (x) == MINUS;
24800
24801 if (REG_P (x))
24802 asm_fprintf (stream, "[%r]", REGNO (x));
24803 else if (GET_CODE (x) == PLUS || is_minus)
24804 {
24805 rtx base = XEXP (x, 0);
24806 rtx index = XEXP (x, 1);
24807 HOST_WIDE_INT offset = 0;
24808 if (!REG_P (base)
24809 || (REG_P (index) && REGNO (index) == SP_REGNUM))
24810 {
24811 /* Ensure that BASE is a register. */
24812 /* (one of them must be). */
24813 /* Also ensure the SP is not used as in index register. */
24814 std::swap (base, index);
24815 }
24816 switch (GET_CODE (index))
24817 {
24818 case CONST_INT:
24819 offset = INTVAL (index);
24820 if (is_minus)
24821 offset = -offset;
24822 asm_fprintf (stream, "[%r, #%wd]",
24823 REGNO (base), offset);
24824 break;
24825
24826 case REG:
24827 asm_fprintf (stream, "[%r, %s%r]",
24828 REGNO (base), is_minus ? "-" : "",
24829 REGNO (index));
24830 break;
24831
24832 case MULT:
24833 case ASHIFTRT:
24834 case LSHIFTRT:
24835 case ASHIFT:
24836 case ROTATERT:
24837 {
24838 asm_fprintf (stream, "[%r, %s%r",
24839 REGNO (base), is_minus ? "-" : "",
24840 REGNO (XEXP (index, 0)));
24841 arm_print_operand (stream, index, 'S');
24842 fputs ("]", stream);
24843 break;
24844 }
24845
24846 default:
24847 gcc_unreachable ();
24848 }
24849 }
24850 else if (GET_CODE (x) == PRE_INC || GET_CODE (x) == POST_INC
24851 || GET_CODE (x) == PRE_DEC || GET_CODE (x) == POST_DEC)
24852 {
24853 gcc_assert (REG_P (XEXP (x, 0)));
24854
24855 if (GET_CODE (x) == PRE_DEC || GET_CODE (x) == PRE_INC)
24856 asm_fprintf (stream, "[%r, #%s%d]!",
24857 REGNO (XEXP (x, 0)),
24858 GET_CODE (x) == PRE_DEC ? "-" : "",
24859 GET_MODE_SIZE (mode));
24860 else if (TARGET_HAVE_MVE && (mode == OImode || mode == XImode))
24861 asm_fprintf (stream, "[%r]!", REGNO (XEXP (x,0)));
24862 else
24863 asm_fprintf (stream, "[%r], #%s%d", REGNO (XEXP (x, 0)),
24864 GET_CODE (x) == POST_DEC ? "-" : "",
24865 GET_MODE_SIZE (mode));
24866 }
24867 else if (GET_CODE (x) == PRE_MODIFY)
24868 {
24869 asm_fprintf (stream, "[%r, ", REGNO (XEXP (x, 0)));
24870 if (CONST_INT_P (XEXP (XEXP (x, 1), 1)))
24871 asm_fprintf (stream, "#%wd]!",
24872 INTVAL (XEXP (XEXP (x, 1), 1)));
24873 else
24874 asm_fprintf (stream, "%r]!",
24875 REGNO (XEXP (XEXP (x, 1), 1)));
24876 }
24877 else if (GET_CODE (x) == POST_MODIFY)
24878 {
24879 asm_fprintf (stream, "[%r], ", REGNO (XEXP (x, 0)));
24880 if (CONST_INT_P (XEXP (XEXP (x, 1), 1)))
24881 asm_fprintf (stream, "#%wd",
24882 INTVAL (XEXP (XEXP (x, 1), 1)));
24883 else
24884 asm_fprintf (stream, "%r",
24885 REGNO (XEXP (XEXP (x, 1), 1)));
24886 }
24887 else output_addr_const (stream, x);
24888 }
24889 else
24890 {
24891 if (REG_P (x))
24892 asm_fprintf (stream, "[%r]", REGNO (x));
24893 else if (GET_CODE (x) == POST_INC)
24894 asm_fprintf (stream, "%r!", REGNO (XEXP (x, 0)));
24895 else if (GET_CODE (x) == PLUS)
24896 {
24897 gcc_assert (REG_P (XEXP (x, 0)));
24898 if (CONST_INT_P (XEXP (x, 1)))
24899 asm_fprintf (stream, "[%r, #%wd]",
24900 REGNO (XEXP (x, 0)),
24901 INTVAL (XEXP (x, 1)));
24902 else
24903 asm_fprintf (stream, "[%r, %r]",
24904 REGNO (XEXP (x, 0)),
24905 REGNO (XEXP (x, 1)));
24906 }
24907 else
24908 output_addr_const (stream, x);
24909 }
24910 }
24911 \f
24912 /* Target hook for indicating whether a punctuation character for
24913 TARGET_PRINT_OPERAND is valid. */
24914 static bool
24915 arm_print_operand_punct_valid_p (unsigned char code)
24916 {
24917 return (code == '@' || code == '|' || code == '.'
24918 || code == '(' || code == ')' || code == '#'
24919 || (TARGET_32BIT && (code == '?'))
24920 || (TARGET_THUMB2 && (code == '!'))
24921 || (TARGET_THUMB && (code == '_')));
24922 }
24923 \f
24924 /* Target hook for assembling integer objects. The ARM version needs to
24925 handle word-sized values specially. */
24926 static bool
24927 arm_assemble_integer (rtx x, unsigned int size, int aligned_p)
24928 {
24929 machine_mode mode;
24930
24931 if (size == UNITS_PER_WORD && aligned_p)
24932 {
24933 fputs ("\t.word\t", asm_out_file);
24934 output_addr_const (asm_out_file, x);
24935
24936 /* Mark symbols as position independent. We only do this in the
24937 .text segment, not in the .data segment. */
24938 if (NEED_GOT_RELOC && flag_pic && making_const_table &&
24939 (SYMBOL_REF_P (x) || LABEL_REF_P (x)))
24940 {
24941 /* See legitimize_pic_address for an explanation of the
24942 TARGET_VXWORKS_RTP check. */
24943 /* References to weak symbols cannot be resolved locally:
24944 they may be overridden by a non-weak definition at link
24945 time. */
24946 if (!arm_pic_data_is_text_relative
24947 || (SYMBOL_REF_P (x)
24948 && (!SYMBOL_REF_LOCAL_P (x)
24949 || (SYMBOL_REF_DECL (x)
24950 ? DECL_WEAK (SYMBOL_REF_DECL (x)) : 0)
24951 || (SYMBOL_REF_FUNCTION_P (x)
24952 && !arm_fdpic_local_funcdesc_p (x)))))
24953 {
24954 if (TARGET_FDPIC && SYMBOL_REF_FUNCTION_P (x))
24955 fputs ("(GOTFUNCDESC)", asm_out_file);
24956 else
24957 fputs ("(GOT)", asm_out_file);
24958 }
24959 else
24960 {
24961 if (TARGET_FDPIC && SYMBOL_REF_FUNCTION_P (x))
24962 fputs ("(GOTOFFFUNCDESC)", asm_out_file);
24963 else
24964 {
24965 bool is_readonly;
24966
24967 if (!TARGET_FDPIC
24968 || arm_is_segment_info_known (x, &is_readonly))
24969 fputs ("(GOTOFF)", asm_out_file);
24970 else
24971 fputs ("(GOT)", asm_out_file);
24972 }
24973 }
24974 }
24975
24976 /* For FDPIC we also have to mark symbol for .data section. */
24977 if (TARGET_FDPIC
24978 && !making_const_table
24979 && SYMBOL_REF_P (x)
24980 && SYMBOL_REF_FUNCTION_P (x))
24981 fputs ("(FUNCDESC)", asm_out_file);
24982
24983 fputc ('\n', asm_out_file);
24984 return true;
24985 }
24986
24987 mode = GET_MODE (x);
24988
24989 if (arm_vector_mode_supported_p (mode))
24990 {
24991 int i, units;
24992
24993 gcc_assert (GET_CODE (x) == CONST_VECTOR);
24994
24995 units = CONST_VECTOR_NUNITS (x);
24996 size = GET_MODE_UNIT_SIZE (mode);
24997
24998 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
24999 for (i = 0; i < units; i++)
25000 {
25001 rtx elt = CONST_VECTOR_ELT (x, i);
25002 assemble_integer
25003 (elt, size, i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT, 1);
25004 }
25005 else
25006 for (i = 0; i < units; i++)
25007 {
25008 rtx elt = CONST_VECTOR_ELT (x, i);
25009 assemble_real
25010 (*CONST_DOUBLE_REAL_VALUE (elt),
25011 as_a <scalar_float_mode> (GET_MODE_INNER (mode)),
25012 i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT);
25013 }
25014
25015 return true;
25016 }
25017
25018 return default_assemble_integer (x, size, aligned_p);
25019 }
25020
25021 static void
25022 arm_elf_asm_cdtor (rtx symbol, int priority, bool is_ctor)
25023 {
25024 section *s;
25025
25026 if (!TARGET_AAPCS_BASED)
25027 {
25028 (is_ctor ?
25029 default_named_section_asm_out_constructor
25030 : default_named_section_asm_out_destructor) (symbol, priority);
25031 return;
25032 }
25033
25034 /* Put these in the .init_array section, using a special relocation. */
25035 if (priority != DEFAULT_INIT_PRIORITY)
25036 {
25037 char buf[18];
25038 sprintf (buf, "%s.%.5u",
25039 is_ctor ? ".init_array" : ".fini_array",
25040 priority);
25041 s = get_section (buf, SECTION_WRITE | SECTION_NOTYPE, NULL_TREE);
25042 }
25043 else if (is_ctor)
25044 s = ctors_section;
25045 else
25046 s = dtors_section;
25047
25048 switch_to_section (s);
25049 assemble_align (POINTER_SIZE);
25050 fputs ("\t.word\t", asm_out_file);
25051 output_addr_const (asm_out_file, symbol);
25052 fputs ("(target1)\n", asm_out_file);
25053 }
25054
25055 /* Add a function to the list of static constructors. */
25056
25057 static void
25058 arm_elf_asm_constructor (rtx symbol, int priority)
25059 {
25060 arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/true);
25061 }
25062
25063 /* Add a function to the list of static destructors. */
25064
25065 static void
25066 arm_elf_asm_destructor (rtx symbol, int priority)
25067 {
25068 arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/false);
25069 }
25070 \f
25071 /* A finite state machine takes care of noticing whether or not instructions
25072 can be conditionally executed, and thus decrease execution time and code
25073 size by deleting branch instructions. The fsm is controlled by
25074 final_prescan_insn, and controls the actions of ASM_OUTPUT_OPCODE. */
25075
25076 /* The state of the fsm controlling condition codes are:
25077 0: normal, do nothing special
25078 1: make ASM_OUTPUT_OPCODE not output this instruction
25079 2: make ASM_OUTPUT_OPCODE not output this instruction
25080 3: make instructions conditional
25081 4: make instructions conditional
25082
25083 State transitions (state->state by whom under condition):
25084 0 -> 1 final_prescan_insn if the `target' is a label
25085 0 -> 2 final_prescan_insn if the `target' is an unconditional branch
25086 1 -> 3 ASM_OUTPUT_OPCODE after not having output the conditional branch
25087 2 -> 4 ASM_OUTPUT_OPCODE after not having output the conditional branch
25088 3 -> 0 (*targetm.asm_out.internal_label) if the `target' label is reached
25089 (the target label has CODE_LABEL_NUMBER equal to arm_target_label).
25090 4 -> 0 final_prescan_insn if the `target' unconditional branch is reached
25091 (the target insn is arm_target_insn).
25092
25093 If the jump clobbers the conditions then we use states 2 and 4.
25094
25095 A similar thing can be done with conditional return insns.
25096
25097 XXX In case the `target' is an unconditional branch, this conditionalising
25098 of the instructions always reduces code size, but not always execution
25099 time. But then, I want to reduce the code size to somewhere near what
25100 /bin/cc produces. */
25101
25102 /* In addition to this, state is maintained for Thumb-2 COND_EXEC
25103 instructions. When a COND_EXEC instruction is seen the subsequent
25104 instructions are scanned so that multiple conditional instructions can be
25105 combined into a single IT block. arm_condexec_count and arm_condexec_mask
25106 specify the length and true/false mask for the IT block. These will be
25107 decremented/zeroed by arm_asm_output_opcode as the insns are output. */
25108
25109 /* Returns the index of the ARM condition code string in
25110 `arm_condition_codes', or ARM_NV if the comparison is invalid.
25111 COMPARISON should be an rtx like `(eq (...) (...))'. */
25112
25113 enum arm_cond_code
25114 maybe_get_arm_condition_code (rtx comparison)
25115 {
25116 machine_mode mode = GET_MODE (XEXP (comparison, 0));
25117 enum arm_cond_code code;
25118 enum rtx_code comp_code = GET_CODE (comparison);
25119
25120 if (GET_MODE_CLASS (mode) != MODE_CC)
25121 mode = SELECT_CC_MODE (comp_code, XEXP (comparison, 0),
25122 XEXP (comparison, 1));
25123
25124 switch (mode)
25125 {
25126 case E_CC_DNEmode: code = ARM_NE; goto dominance;
25127 case E_CC_DEQmode: code = ARM_EQ; goto dominance;
25128 case E_CC_DGEmode: code = ARM_GE; goto dominance;
25129 case E_CC_DGTmode: code = ARM_GT; goto dominance;
25130 case E_CC_DLEmode: code = ARM_LE; goto dominance;
25131 case E_CC_DLTmode: code = ARM_LT; goto dominance;
25132 case E_CC_DGEUmode: code = ARM_CS; goto dominance;
25133 case E_CC_DGTUmode: code = ARM_HI; goto dominance;
25134 case E_CC_DLEUmode: code = ARM_LS; goto dominance;
25135 case E_CC_DLTUmode: code = ARM_CC;
25136
25137 dominance:
25138 if (comp_code == EQ)
25139 return ARM_INVERSE_CONDITION_CODE (code);
25140 if (comp_code == NE)
25141 return code;
25142 return ARM_NV;
25143
25144 case E_CC_NZmode:
25145 switch (comp_code)
25146 {
25147 case NE: return ARM_NE;
25148 case EQ: return ARM_EQ;
25149 case GE: return ARM_PL;
25150 case LT: return ARM_MI;
25151 default: return ARM_NV;
25152 }
25153
25154 case E_CC_Zmode:
25155 switch (comp_code)
25156 {
25157 case NE: return ARM_NE;
25158 case EQ: return ARM_EQ;
25159 default: return ARM_NV;
25160 }
25161
25162 case E_CC_Nmode:
25163 switch (comp_code)
25164 {
25165 case NE: return ARM_MI;
25166 case EQ: return ARM_PL;
25167 default: return ARM_NV;
25168 }
25169
25170 case E_CCFPEmode:
25171 case E_CCFPmode:
25172 /* We can handle all cases except UNEQ and LTGT. */
25173 switch (comp_code)
25174 {
25175 case GE: return ARM_GE;
25176 case GT: return ARM_GT;
25177 case LE: return ARM_LS;
25178 case LT: return ARM_MI;
25179 case NE: return ARM_NE;
25180 case EQ: return ARM_EQ;
25181 case ORDERED: return ARM_VC;
25182 case UNORDERED: return ARM_VS;
25183 case UNLT: return ARM_LT;
25184 case UNLE: return ARM_LE;
25185 case UNGT: return ARM_HI;
25186 case UNGE: return ARM_PL;
25187 /* UNEQ and LTGT do not have a representation. */
25188 case UNEQ: /* Fall through. */
25189 case LTGT: /* Fall through. */
25190 default: return ARM_NV;
25191 }
25192
25193 case E_CC_SWPmode:
25194 switch (comp_code)
25195 {
25196 case NE: return ARM_NE;
25197 case EQ: return ARM_EQ;
25198 case GE: return ARM_LE;
25199 case GT: return ARM_LT;
25200 case LE: return ARM_GE;
25201 case LT: return ARM_GT;
25202 case GEU: return ARM_LS;
25203 case GTU: return ARM_CC;
25204 case LEU: return ARM_CS;
25205 case LTU: return ARM_HI;
25206 default: return ARM_NV;
25207 }
25208
25209 case E_CC_Cmode:
25210 switch (comp_code)
25211 {
25212 case LTU: return ARM_CS;
25213 case GEU: return ARM_CC;
25214 default: return ARM_NV;
25215 }
25216
25217 case E_CC_NVmode:
25218 switch (comp_code)
25219 {
25220 case GE: return ARM_GE;
25221 case LT: return ARM_LT;
25222 default: return ARM_NV;
25223 }
25224
25225 case E_CC_Bmode:
25226 switch (comp_code)
25227 {
25228 case GEU: return ARM_CS;
25229 case LTU: return ARM_CC;
25230 default: return ARM_NV;
25231 }
25232
25233 case E_CC_Vmode:
25234 switch (comp_code)
25235 {
25236 case NE: return ARM_VS;
25237 case EQ: return ARM_VC;
25238 default: return ARM_NV;
25239 }
25240
25241 case E_CC_ADCmode:
25242 switch (comp_code)
25243 {
25244 case GEU: return ARM_CS;
25245 case LTU: return ARM_CC;
25246 default: return ARM_NV;
25247 }
25248
25249 case E_CCmode:
25250 case E_CC_RSBmode:
25251 switch (comp_code)
25252 {
25253 case NE: return ARM_NE;
25254 case EQ: return ARM_EQ;
25255 case GE: return ARM_GE;
25256 case GT: return ARM_GT;
25257 case LE: return ARM_LE;
25258 case LT: return ARM_LT;
25259 case GEU: return ARM_CS;
25260 case GTU: return ARM_HI;
25261 case LEU: return ARM_LS;
25262 case LTU: return ARM_CC;
25263 default: return ARM_NV;
25264 }
25265
25266 default: gcc_unreachable ();
25267 }
25268 }
25269
25270 /* Like maybe_get_arm_condition_code, but never return ARM_NV. */
25271 static enum arm_cond_code
25272 get_arm_condition_code (rtx comparison)
25273 {
25274 enum arm_cond_code code = maybe_get_arm_condition_code (comparison);
25275 gcc_assert (code != ARM_NV);
25276 return code;
25277 }
25278
25279 /* Implement TARGET_FIXED_CONDITION_CODE_REGS. We only have condition
25280 code registers when not targetting Thumb1. The VFP condition register
25281 only exists when generating hard-float code. */
25282 static bool
25283 arm_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
25284 {
25285 if (!TARGET_32BIT)
25286 return false;
25287
25288 *p1 = CC_REGNUM;
25289 *p2 = TARGET_VFP_BASE ? VFPCC_REGNUM : INVALID_REGNUM;
25290 return true;
25291 }
25292
25293 /* Tell arm_asm_output_opcode to output IT blocks for conditionally executed
25294 instructions. */
25295 void
25296 thumb2_final_prescan_insn (rtx_insn *insn)
25297 {
25298 rtx_insn *first_insn = insn;
25299 rtx body = PATTERN (insn);
25300 rtx predicate;
25301 enum arm_cond_code code;
25302 int n;
25303 int mask;
25304 int max;
25305
25306 /* max_insns_skipped in the tune was already taken into account in the
25307 cost model of ifcvt pass when generating COND_EXEC insns. At this stage
25308 just emit the IT blocks as we can. It does not make sense to split
25309 the IT blocks. */
25310 max = MAX_INSN_PER_IT_BLOCK;
25311
25312 /* Remove the previous insn from the count of insns to be output. */
25313 if (arm_condexec_count)
25314 arm_condexec_count--;
25315
25316 /* Nothing to do if we are already inside a conditional block. */
25317 if (arm_condexec_count)
25318 return;
25319
25320 if (GET_CODE (body) != COND_EXEC)
25321 return;
25322
25323 /* Conditional jumps are implemented directly. */
25324 if (JUMP_P (insn))
25325 return;
25326
25327 predicate = COND_EXEC_TEST (body);
25328 arm_current_cc = get_arm_condition_code (predicate);
25329
25330 n = get_attr_ce_count (insn);
25331 arm_condexec_count = 1;
25332 arm_condexec_mask = (1 << n) - 1;
25333 arm_condexec_masklen = n;
25334 /* See if subsequent instructions can be combined into the same block. */
25335 for (;;)
25336 {
25337 insn = next_nonnote_insn (insn);
25338
25339 /* Jumping into the middle of an IT block is illegal, so a label or
25340 barrier terminates the block. */
25341 if (!NONJUMP_INSN_P (insn) && !JUMP_P (insn))
25342 break;
25343
25344 body = PATTERN (insn);
25345 /* USE and CLOBBER aren't really insns, so just skip them. */
25346 if (GET_CODE (body) == USE
25347 || GET_CODE (body) == CLOBBER)
25348 continue;
25349
25350 /* ??? Recognize conditional jumps, and combine them with IT blocks. */
25351 if (GET_CODE (body) != COND_EXEC)
25352 break;
25353 /* Maximum number of conditionally executed instructions in a block. */
25354 n = get_attr_ce_count (insn);
25355 if (arm_condexec_masklen + n > max)
25356 break;
25357
25358 predicate = COND_EXEC_TEST (body);
25359 code = get_arm_condition_code (predicate);
25360 mask = (1 << n) - 1;
25361 if (arm_current_cc == code)
25362 arm_condexec_mask |= (mask << arm_condexec_masklen);
25363 else if (arm_current_cc != ARM_INVERSE_CONDITION_CODE(code))
25364 break;
25365
25366 arm_condexec_count++;
25367 arm_condexec_masklen += n;
25368
25369 /* A jump must be the last instruction in a conditional block. */
25370 if (JUMP_P (insn))
25371 break;
25372 }
25373 /* Restore recog_data (getting the attributes of other insns can
25374 destroy this array, but final.cc assumes that it remains intact
25375 across this call). */
25376 extract_constrain_insn_cached (first_insn);
25377 }
25378
25379 void
25380 arm_final_prescan_insn (rtx_insn *insn)
25381 {
25382 /* BODY will hold the body of INSN. */
25383 rtx body = PATTERN (insn);
25384
25385 /* This will be 1 if trying to repeat the trick, and things need to be
25386 reversed if it appears to fail. */
25387 int reverse = 0;
25388
25389 /* If we start with a return insn, we only succeed if we find another one. */
25390 int seeking_return = 0;
25391 enum rtx_code return_code = UNKNOWN;
25392
25393 /* START_INSN will hold the insn from where we start looking. This is the
25394 first insn after the following code_label if REVERSE is true. */
25395 rtx_insn *start_insn = insn;
25396
25397 /* If in state 4, check if the target branch is reached, in order to
25398 change back to state 0. */
25399 if (arm_ccfsm_state == 4)
25400 {
25401 if (insn == arm_target_insn)
25402 {
25403 arm_target_insn = NULL;
25404 arm_ccfsm_state = 0;
25405 }
25406 return;
25407 }
25408
25409 /* If in state 3, it is possible to repeat the trick, if this insn is an
25410 unconditional branch to a label, and immediately following this branch
25411 is the previous target label which is only used once, and the label this
25412 branch jumps to is not too far off. */
25413 if (arm_ccfsm_state == 3)
25414 {
25415 if (simplejump_p (insn))
25416 {
25417 start_insn = next_nonnote_insn (start_insn);
25418 if (BARRIER_P (start_insn))
25419 {
25420 /* XXX Isn't this always a barrier? */
25421 start_insn = next_nonnote_insn (start_insn);
25422 }
25423 if (LABEL_P (start_insn)
25424 && CODE_LABEL_NUMBER (start_insn) == arm_target_label
25425 && LABEL_NUSES (start_insn) == 1)
25426 reverse = TRUE;
25427 else
25428 return;
25429 }
25430 else if (ANY_RETURN_P (body))
25431 {
25432 start_insn = next_nonnote_insn (start_insn);
25433 if (BARRIER_P (start_insn))
25434 start_insn = next_nonnote_insn (start_insn);
25435 if (LABEL_P (start_insn)
25436 && CODE_LABEL_NUMBER (start_insn) == arm_target_label
25437 && LABEL_NUSES (start_insn) == 1)
25438 {
25439 reverse = TRUE;
25440 seeking_return = 1;
25441 return_code = GET_CODE (body);
25442 }
25443 else
25444 return;
25445 }
25446 else
25447 return;
25448 }
25449
25450 gcc_assert (!arm_ccfsm_state || reverse);
25451 if (!JUMP_P (insn))
25452 return;
25453
25454 /* This jump might be paralleled with a clobber of the condition codes
25455 the jump should always come first */
25456 if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0) > 0)
25457 body = XVECEXP (body, 0, 0);
25458
25459 if (reverse
25460 || (GET_CODE (body) == SET && GET_CODE (SET_DEST (body)) == PC
25461 && GET_CODE (SET_SRC (body)) == IF_THEN_ELSE))
25462 {
25463 int insns_skipped;
25464 int fail = FALSE, succeed = FALSE;
25465 /* Flag which part of the IF_THEN_ELSE is the LABEL_REF. */
25466 int then_not_else = TRUE;
25467 rtx_insn *this_insn = start_insn;
25468 rtx label = 0;
25469
25470 /* Register the insn jumped to. */
25471 if (reverse)
25472 {
25473 if (!seeking_return)
25474 label = XEXP (SET_SRC (body), 0);
25475 }
25476 else if (GET_CODE (XEXP (SET_SRC (body), 1)) == LABEL_REF)
25477 label = XEXP (XEXP (SET_SRC (body), 1), 0);
25478 else if (GET_CODE (XEXP (SET_SRC (body), 2)) == LABEL_REF)
25479 {
25480 label = XEXP (XEXP (SET_SRC (body), 2), 0);
25481 then_not_else = FALSE;
25482 }
25483 else if (ANY_RETURN_P (XEXP (SET_SRC (body), 1)))
25484 {
25485 seeking_return = 1;
25486 return_code = GET_CODE (XEXP (SET_SRC (body), 1));
25487 }
25488 else if (ANY_RETURN_P (XEXP (SET_SRC (body), 2)))
25489 {
25490 seeking_return = 1;
25491 return_code = GET_CODE (XEXP (SET_SRC (body), 2));
25492 then_not_else = FALSE;
25493 }
25494 else
25495 gcc_unreachable ();
25496
25497 /* See how many insns this branch skips, and what kind of insns. If all
25498 insns are okay, and the label or unconditional branch to the same
25499 label is not too far away, succeed. */
25500 for (insns_skipped = 0;
25501 !fail && !succeed && insns_skipped++ < max_insns_skipped;)
25502 {
25503 rtx scanbody;
25504
25505 this_insn = next_nonnote_insn (this_insn);
25506 if (!this_insn)
25507 break;
25508
25509 switch (GET_CODE (this_insn))
25510 {
25511 case CODE_LABEL:
25512 /* Succeed if it is the target label, otherwise fail since
25513 control falls in from somewhere else. */
25514 if (this_insn == label)
25515 {
25516 arm_ccfsm_state = 1;
25517 succeed = TRUE;
25518 }
25519 else
25520 fail = TRUE;
25521 break;
25522
25523 case BARRIER:
25524 /* Succeed if the following insn is the target label.
25525 Otherwise fail.
25526 If return insns are used then the last insn in a function
25527 will be a barrier. */
25528 this_insn = next_nonnote_insn (this_insn);
25529 if (this_insn && this_insn == label)
25530 {
25531 arm_ccfsm_state = 1;
25532 succeed = TRUE;
25533 }
25534 else
25535 fail = TRUE;
25536 break;
25537
25538 case CALL_INSN:
25539 /* The AAPCS says that conditional calls should not be
25540 used since they make interworking inefficient (the
25541 linker can't transform BL<cond> into BLX). That's
25542 only a problem if the machine has BLX. */
25543 if (arm_arch5t)
25544 {
25545 fail = TRUE;
25546 break;
25547 }
25548
25549 /* Succeed if the following insn is the target label, or
25550 if the following two insns are a barrier and the
25551 target label. */
25552 this_insn = next_nonnote_insn (this_insn);
25553 if (this_insn && BARRIER_P (this_insn))
25554 this_insn = next_nonnote_insn (this_insn);
25555
25556 if (this_insn && this_insn == label
25557 && insns_skipped < max_insns_skipped)
25558 {
25559 arm_ccfsm_state = 1;
25560 succeed = TRUE;
25561 }
25562 else
25563 fail = TRUE;
25564 break;
25565
25566 case JUMP_INSN:
25567 /* If this is an unconditional branch to the same label, succeed.
25568 If it is to another label, do nothing. If it is conditional,
25569 fail. */
25570 /* XXX Probably, the tests for SET and the PC are
25571 unnecessary. */
25572
25573 scanbody = PATTERN (this_insn);
25574 if (GET_CODE (scanbody) == SET
25575 && GET_CODE (SET_DEST (scanbody)) == PC)
25576 {
25577 if (GET_CODE (SET_SRC (scanbody)) == LABEL_REF
25578 && XEXP (SET_SRC (scanbody), 0) == label && !reverse)
25579 {
25580 arm_ccfsm_state = 2;
25581 succeed = TRUE;
25582 }
25583 else if (GET_CODE (SET_SRC (scanbody)) == IF_THEN_ELSE)
25584 fail = TRUE;
25585 }
25586 /* Fail if a conditional return is undesirable (e.g. on a
25587 StrongARM), but still allow this if optimizing for size. */
25588 else if (GET_CODE (scanbody) == return_code
25589 && !use_return_insn (TRUE, NULL)
25590 && !optimize_size)
25591 fail = TRUE;
25592 else if (GET_CODE (scanbody) == return_code)
25593 {
25594 arm_ccfsm_state = 2;
25595 succeed = TRUE;
25596 }
25597 else if (GET_CODE (scanbody) == PARALLEL)
25598 {
25599 switch (get_attr_conds (this_insn))
25600 {
25601 case CONDS_NOCOND:
25602 break;
25603 default:
25604 fail = TRUE;
25605 break;
25606 }
25607 }
25608 else
25609 fail = TRUE; /* Unrecognized jump (e.g. epilogue). */
25610
25611 break;
25612
25613 case INSN:
25614 /* Instructions using or affecting the condition codes make it
25615 fail. */
25616 scanbody = PATTERN (this_insn);
25617 if (!(GET_CODE (scanbody) == SET
25618 || GET_CODE (scanbody) == PARALLEL)
25619 || get_attr_conds (this_insn) != CONDS_NOCOND)
25620 fail = TRUE;
25621 break;
25622
25623 default:
25624 break;
25625 }
25626 }
25627 if (succeed)
25628 {
25629 if ((!seeking_return) && (arm_ccfsm_state == 1 || reverse))
25630 arm_target_label = CODE_LABEL_NUMBER (label);
25631 else
25632 {
25633 gcc_assert (seeking_return || arm_ccfsm_state == 2);
25634
25635 while (this_insn && GET_CODE (PATTERN (this_insn)) == USE)
25636 {
25637 this_insn = next_nonnote_insn (this_insn);
25638 gcc_assert (!this_insn
25639 || (!BARRIER_P (this_insn)
25640 && !LABEL_P (this_insn)));
25641 }
25642 if (!this_insn)
25643 {
25644 /* Oh, dear! we ran off the end.. give up. */
25645 extract_constrain_insn_cached (insn);
25646 arm_ccfsm_state = 0;
25647 arm_target_insn = NULL;
25648 return;
25649 }
25650 arm_target_insn = this_insn;
25651 }
25652
25653 /* If REVERSE is true, ARM_CURRENT_CC needs to be inverted from
25654 what it was. */
25655 if (!reverse)
25656 arm_current_cc = get_arm_condition_code (XEXP (SET_SRC (body), 0));
25657
25658 if (reverse || then_not_else)
25659 arm_current_cc = ARM_INVERSE_CONDITION_CODE (arm_current_cc);
25660 }
25661
25662 /* Restore recog_data (getting the attributes of other insns can
25663 destroy this array, but final.cc assumes that it remains intact
25664 across this call. */
25665 extract_constrain_insn_cached (insn);
25666 }
25667 }
25668
25669 /* Output IT instructions. */
25670 void
25671 thumb2_asm_output_opcode (FILE * stream)
25672 {
25673 char buff[5];
25674 int n;
25675
25676 if (arm_condexec_mask)
25677 {
25678 for (n = 0; n < arm_condexec_masklen; n++)
25679 buff[n] = (arm_condexec_mask & (1 << n)) ? 't' : 'e';
25680 buff[n] = 0;
25681 asm_fprintf(stream, "i%s\t%s\n\t", buff,
25682 arm_condition_codes[arm_current_cc]);
25683 arm_condexec_mask = 0;
25684 }
25685 }
25686
25687 /* Implement TARGET_HARD_REGNO_NREGS. On the ARM core regs are
25688 UNITS_PER_WORD bytes wide. */
25689 static unsigned int
25690 arm_hard_regno_nregs (unsigned int regno, machine_mode mode)
25691 {
25692 if (IS_VPR_REGNUM (regno))
25693 return CEIL (GET_MODE_SIZE (mode), 2);
25694
25695 if (TARGET_32BIT
25696 && regno > PC_REGNUM
25697 && regno != FRAME_POINTER_REGNUM
25698 && regno != ARG_POINTER_REGNUM
25699 && !IS_VFP_REGNUM (regno))
25700 return 1;
25701
25702 return ARM_NUM_REGS (mode);
25703 }
25704
25705 /* Implement TARGET_HARD_REGNO_MODE_OK. */
25706 static bool
25707 arm_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
25708 {
25709 if (GET_MODE_CLASS (mode) == MODE_CC)
25710 return (regno == CC_REGNUM
25711 || (TARGET_VFP_BASE
25712 && regno == VFPCC_REGNUM));
25713
25714 if (regno == CC_REGNUM && GET_MODE_CLASS (mode) != MODE_CC)
25715 return false;
25716
25717 if (IS_VPR_REGNUM (regno))
25718 return VALID_MVE_PRED_MODE (mode);
25719
25720 if (TARGET_THUMB1)
25721 /* For the Thumb we only allow values bigger than SImode in
25722 registers 0 - 6, so that there is always a second low
25723 register available to hold the upper part of the value.
25724 We probably we ought to ensure that the register is the
25725 start of an even numbered register pair. */
25726 return (ARM_NUM_REGS (mode) < 2) || (regno < LAST_LO_REGNUM);
25727
25728 if (TARGET_VFP_BASE && IS_VFP_REGNUM (regno))
25729 {
25730 if (mode == DFmode || mode == DImode)
25731 return VFP_REGNO_OK_FOR_DOUBLE (regno);
25732
25733 if (mode == HFmode || mode == BFmode || mode == HImode
25734 || mode == SFmode || mode == SImode)
25735 return VFP_REGNO_OK_FOR_SINGLE (regno);
25736
25737 if (TARGET_NEON)
25738 return (VALID_NEON_DREG_MODE (mode) && VFP_REGNO_OK_FOR_DOUBLE (regno))
25739 || (VALID_NEON_QREG_MODE (mode)
25740 && NEON_REGNO_OK_FOR_QUAD (regno))
25741 || (mode == TImode && NEON_REGNO_OK_FOR_NREGS (regno, 2))
25742 || (mode == EImode && NEON_REGNO_OK_FOR_NREGS (regno, 3))
25743 || (mode == OImode && NEON_REGNO_OK_FOR_NREGS (regno, 4))
25744 || (mode == CImode && NEON_REGNO_OK_FOR_NREGS (regno, 6))
25745 || (mode == XImode && NEON_REGNO_OK_FOR_NREGS (regno, 8));
25746 if (TARGET_HAVE_MVE)
25747 return ((VALID_MVE_MODE (mode) && NEON_REGNO_OK_FOR_QUAD (regno))
25748 || (mode == OImode && NEON_REGNO_OK_FOR_NREGS (regno, 4))
25749 || (mode == XImode && NEON_REGNO_OK_FOR_NREGS (regno, 8)));
25750
25751 return false;
25752 }
25753
25754 if (TARGET_REALLY_IWMMXT)
25755 {
25756 if (IS_IWMMXT_GR_REGNUM (regno))
25757 return mode == SImode;
25758
25759 if (IS_IWMMXT_REGNUM (regno))
25760 return VALID_IWMMXT_REG_MODE (mode);
25761 }
25762
25763 /* We allow almost any value to be stored in the general registers.
25764 Restrict doubleword quantities to even register pairs in ARM state
25765 so that we can use ldrd. The same restriction applies for MVE
25766 in order to support Armv8.1-M Mainline instructions.
25767 Do not allow very large Neon structure opaque modes in general
25768 registers; they would use too many. */
25769 if (regno <= LAST_ARM_REGNUM)
25770 {
25771 if (ARM_NUM_REGS (mode) > 4)
25772 return false;
25773
25774 if (TARGET_THUMB2 && !(TARGET_HAVE_MVE || TARGET_CDE))
25775 return true;
25776
25777 return !((TARGET_LDRD || TARGET_CDE)
25778 && GET_MODE_SIZE (mode) > 4 && (regno & 1) != 0);
25779 }
25780
25781 if (regno == FRAME_POINTER_REGNUM
25782 || regno == ARG_POINTER_REGNUM)
25783 /* We only allow integers in the fake hard registers. */
25784 return GET_MODE_CLASS (mode) == MODE_INT;
25785
25786 return false;
25787 }
25788
25789 /* Implement TARGET_MODES_TIEABLE_P. */
25790
25791 static bool
25792 arm_modes_tieable_p (machine_mode mode1, machine_mode mode2)
25793 {
25794 if (GET_MODE_CLASS (mode1) == GET_MODE_CLASS (mode2))
25795 return true;
25796
25797 if (TARGET_HAVE_MVE
25798 && (VALID_MVE_PRED_MODE (mode1) && VALID_MVE_PRED_MODE (mode2)))
25799 return true;
25800
25801 /* We specifically want to allow elements of "structure" modes to
25802 be tieable to the structure. This more general condition allows
25803 other rarer situations too. */
25804 if ((TARGET_NEON
25805 && (VALID_NEON_DREG_MODE (mode1)
25806 || VALID_NEON_QREG_MODE (mode1)
25807 || VALID_NEON_STRUCT_MODE (mode1))
25808 && (VALID_NEON_DREG_MODE (mode2)
25809 || VALID_NEON_QREG_MODE (mode2)
25810 || VALID_NEON_STRUCT_MODE (mode2)))
25811 || (TARGET_HAVE_MVE
25812 && (VALID_MVE_MODE (mode1)
25813 || VALID_MVE_STRUCT_MODE (mode1))
25814 && (VALID_MVE_MODE (mode2)
25815 || VALID_MVE_STRUCT_MODE (mode2))))
25816 return true;
25817
25818 return false;
25819 }
25820
25821 /* For efficiency and historical reasons LO_REGS, HI_REGS and CC_REGS are
25822 not used in arm mode. */
25823
25824 enum reg_class
25825 arm_regno_class (int regno)
25826 {
25827 if (regno == PC_REGNUM)
25828 return NO_REGS;
25829
25830 if (IS_VPR_REGNUM (regno))
25831 return VPR_REG;
25832
25833 if (IS_PAC_REGNUM (regno))
25834 return PAC_REG;
25835
25836 if (TARGET_THUMB1)
25837 {
25838 if (regno == STACK_POINTER_REGNUM)
25839 return STACK_REG;
25840 if (regno == CC_REGNUM)
25841 return CC_REG;
25842 if (regno < 8)
25843 return LO_REGS;
25844 return HI_REGS;
25845 }
25846
25847 if (TARGET_THUMB2 && regno < 8)
25848 return LO_REGS;
25849
25850 if ( regno <= LAST_ARM_REGNUM
25851 || regno == FRAME_POINTER_REGNUM
25852 || regno == ARG_POINTER_REGNUM)
25853 return TARGET_THUMB2 ? HI_REGS : GENERAL_REGS;
25854
25855 if (regno == CC_REGNUM || regno == VFPCC_REGNUM)
25856 return TARGET_THUMB2 ? CC_REG : NO_REGS;
25857
25858 if (IS_VFP_REGNUM (regno))
25859 {
25860 if (regno <= D7_VFP_REGNUM)
25861 return VFP_D0_D7_REGS;
25862 else if (regno <= LAST_LO_VFP_REGNUM)
25863 return VFP_LO_REGS;
25864 else
25865 return VFP_HI_REGS;
25866 }
25867
25868 if (IS_IWMMXT_REGNUM (regno))
25869 return IWMMXT_REGS;
25870
25871 if (IS_IWMMXT_GR_REGNUM (regno))
25872 return IWMMXT_GR_REGS;
25873
25874 return NO_REGS;
25875 }
25876
25877 /* Handle a special case when computing the offset
25878 of an argument from the frame pointer. */
25879 int
25880 arm_debugger_arg_offset (int value, rtx addr)
25881 {
25882 rtx_insn *insn;
25883
25884 /* We are only interested if dbxout_parms() failed to compute the offset. */
25885 if (value != 0)
25886 return 0;
25887
25888 /* We can only cope with the case where the address is held in a register. */
25889 if (!REG_P (addr))
25890 return 0;
25891
25892 /* If we are using the frame pointer to point at the argument, then
25893 an offset of 0 is correct. */
25894 if (REGNO (addr) == (unsigned) HARD_FRAME_POINTER_REGNUM)
25895 return 0;
25896
25897 /* If we are using the stack pointer to point at the
25898 argument, then an offset of 0 is correct. */
25899 /* ??? Check this is consistent with thumb2 frame layout. */
25900 if ((TARGET_THUMB || !frame_pointer_needed)
25901 && REGNO (addr) == SP_REGNUM)
25902 return 0;
25903
25904 /* Oh dear. The argument is pointed to by a register rather
25905 than being held in a register, or being stored at a known
25906 offset from the frame pointer. Since GDB only understands
25907 those two kinds of argument we must translate the address
25908 held in the register into an offset from the frame pointer.
25909 We do this by searching through the insns for the function
25910 looking to see where this register gets its value. If the
25911 register is initialized from the frame pointer plus an offset
25912 then we are in luck and we can continue, otherwise we give up.
25913
25914 This code is exercised by producing debugging information
25915 for a function with arguments like this:
25916
25917 double func (double a, double b, int c, double d) {return d;}
25918
25919 Without this code the stab for parameter 'd' will be set to
25920 an offset of 0 from the frame pointer, rather than 8. */
25921
25922 /* The if() statement says:
25923
25924 If the insn is a normal instruction
25925 and if the insn is setting the value in a register
25926 and if the register being set is the register holding the address of the argument
25927 and if the address is computing by an addition
25928 that involves adding to a register
25929 which is the frame pointer
25930 a constant integer
25931
25932 then... */
25933
25934 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
25935 {
25936 if ( NONJUMP_INSN_P (insn)
25937 && GET_CODE (PATTERN (insn)) == SET
25938 && REGNO (XEXP (PATTERN (insn), 0)) == REGNO (addr)
25939 && GET_CODE (XEXP (PATTERN (insn), 1)) == PLUS
25940 && REG_P (XEXP (XEXP (PATTERN (insn), 1), 0))
25941 && REGNO (XEXP (XEXP (PATTERN (insn), 1), 0)) == (unsigned) HARD_FRAME_POINTER_REGNUM
25942 && CONST_INT_P (XEXP (XEXP (PATTERN (insn), 1), 1))
25943 )
25944 {
25945 value = INTVAL (XEXP (XEXP (PATTERN (insn), 1), 1));
25946
25947 break;
25948 }
25949 }
25950
25951 if (value == 0)
25952 {
25953 debug_rtx (addr);
25954 warning (0, "unable to compute real location of stacked parameter");
25955 value = 8; /* XXX magic hack */
25956 }
25957
25958 return value;
25959 }
25960 \f
25961 /* Implement TARGET_PROMOTED_TYPE. */
25962
25963 static tree
25964 arm_promoted_type (const_tree t)
25965 {
25966 if (SCALAR_FLOAT_TYPE_P (t)
25967 && TYPE_PRECISION (t) == 16
25968 && TYPE_MAIN_VARIANT (t) == arm_fp16_type_node)
25969 return float_type_node;
25970 return NULL_TREE;
25971 }
25972
25973 /* Implement TARGET_SCALAR_MODE_SUPPORTED_P.
25974 This simply adds HFmode as a supported mode; even though we don't
25975 implement arithmetic on this type directly, it's supported by
25976 optabs conversions, much the way the double-word arithmetic is
25977 special-cased in the default hook. */
25978
25979 static bool
25980 arm_scalar_mode_supported_p (scalar_mode mode)
25981 {
25982 if (mode == HFmode)
25983 return (arm_fp16_format != ARM_FP16_FORMAT_NONE);
25984 else if (ALL_FIXED_POINT_MODE_P (mode))
25985 return true;
25986 else
25987 return default_scalar_mode_supported_p (mode);
25988 }
25989
25990 /* Set the value of FLT_EVAL_METHOD.
25991 ISO/IEC TS 18661-3 defines two values that we'd like to make use of:
25992
25993 0: evaluate all operations and constants, whose semantic type has at
25994 most the range and precision of type float, to the range and
25995 precision of float; evaluate all other operations and constants to
25996 the range and precision of the semantic type;
25997
25998 N, where _FloatN is a supported interchange floating type
25999 evaluate all operations and constants, whose semantic type has at
26000 most the range and precision of _FloatN type, to the range and
26001 precision of the _FloatN type; evaluate all other operations and
26002 constants to the range and precision of the semantic type;
26003
26004 If we have the ARMv8.2-A extensions then we support _Float16 in native
26005 precision, so we should set this to 16. Otherwise, we support the type,
26006 but want to evaluate expressions in float precision, so set this to
26007 0. */
26008
26009 static enum flt_eval_method
26010 arm_excess_precision (enum excess_precision_type type)
26011 {
26012 switch (type)
26013 {
26014 case EXCESS_PRECISION_TYPE_FAST:
26015 case EXCESS_PRECISION_TYPE_STANDARD:
26016 /* We can calculate either in 16-bit range and precision or
26017 32-bit range and precision. Make that decision based on whether
26018 we have native support for the ARMv8.2-A 16-bit floating-point
26019 instructions or not. */
26020 return (TARGET_VFP_FP16INST
26021 ? FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16
26022 : FLT_EVAL_METHOD_PROMOTE_TO_FLOAT);
26023 case EXCESS_PRECISION_TYPE_IMPLICIT:
26024 case EXCESS_PRECISION_TYPE_FLOAT16:
26025 return FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16;
26026 default:
26027 gcc_unreachable ();
26028 }
26029 return FLT_EVAL_METHOD_UNPREDICTABLE;
26030 }
26031
26032
26033 /* Implement TARGET_FLOATN_MODE. Make very sure that we don't provide
26034 _Float16 if we are using anything other than ieee format for 16-bit
26035 floating point. Otherwise, punt to the default implementation. */
26036 static opt_scalar_float_mode
26037 arm_floatn_mode (int n, bool extended)
26038 {
26039 if (!extended && n == 16)
26040 {
26041 if (arm_fp16_format == ARM_FP16_FORMAT_IEEE)
26042 return HFmode;
26043 return opt_scalar_float_mode ();
26044 }
26045
26046 return default_floatn_mode (n, extended);
26047 }
26048
26049
26050 /* Set up OPERANDS for a register copy from SRC to DEST, taking care
26051 not to early-clobber SRC registers in the process.
26052
26053 We assume that the operands described by SRC and DEST represent a
26054 decomposed copy of OPERANDS[1] into OPERANDS[0]. COUNT is the
26055 number of components into which the copy has been decomposed. */
26056 void
26057 neon_disambiguate_copy (rtx *operands, rtx *dest, rtx *src, unsigned int count)
26058 {
26059 unsigned int i;
26060
26061 if (!reg_overlap_mentioned_p (operands[0], operands[1])
26062 || REGNO (operands[0]) < REGNO (operands[1]))
26063 {
26064 for (i = 0; i < count; i++)
26065 {
26066 operands[2 * i] = dest[i];
26067 operands[2 * i + 1] = src[i];
26068 }
26069 }
26070 else
26071 {
26072 for (i = 0; i < count; i++)
26073 {
26074 operands[2 * i] = dest[count - i - 1];
26075 operands[2 * i + 1] = src[count - i - 1];
26076 }
26077 }
26078 }
26079
26080 /* Split operands into moves from op[1] + op[2] into op[0]. */
26081
26082 void
26083 neon_split_vcombine (rtx operands[3])
26084 {
26085 unsigned int dest = REGNO (operands[0]);
26086 unsigned int src1 = REGNO (operands[1]);
26087 unsigned int src2 = REGNO (operands[2]);
26088 machine_mode halfmode = GET_MODE (operands[1]);
26089 unsigned int halfregs = REG_NREGS (operands[1]);
26090 rtx destlo, desthi;
26091
26092 if (src1 == dest && src2 == dest + halfregs)
26093 {
26094 /* No-op move. Can't split to nothing; emit something. */
26095 emit_note (NOTE_INSN_DELETED);
26096 return;
26097 }
26098
26099 /* Preserve register attributes for variable tracking. */
26100 destlo = gen_rtx_REG_offset (operands[0], halfmode, dest, 0);
26101 desthi = gen_rtx_REG_offset (operands[0], halfmode, dest + halfregs,
26102 GET_MODE_SIZE (halfmode));
26103
26104 /* Special case of reversed high/low parts. Use VSWP. */
26105 if (src2 == dest && src1 == dest + halfregs)
26106 {
26107 rtx x = gen_rtx_SET (destlo, operands[1]);
26108 rtx y = gen_rtx_SET (desthi, operands[2]);
26109 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, x, y)));
26110 return;
26111 }
26112
26113 if (!reg_overlap_mentioned_p (operands[2], destlo))
26114 {
26115 /* Try to avoid unnecessary moves if part of the result
26116 is in the right place already. */
26117 if (src1 != dest)
26118 emit_move_insn (destlo, operands[1]);
26119 if (src2 != dest + halfregs)
26120 emit_move_insn (desthi, operands[2]);
26121 }
26122 else
26123 {
26124 if (src2 != dest + halfregs)
26125 emit_move_insn (desthi, operands[2]);
26126 if (src1 != dest)
26127 emit_move_insn (destlo, operands[1]);
26128 }
26129 }
26130 \f
26131 /* Return the number (counting from 0) of
26132 the least significant set bit in MASK. */
26133
26134 inline static int
26135 number_of_first_bit_set (unsigned mask)
26136 {
26137 return ctz_hwi (mask);
26138 }
26139
26140 /* Like emit_multi_reg_push, but allowing for a different set of
26141 registers to be described as saved. MASK is the set of registers
26142 to be saved; REAL_REGS is the set of registers to be described as
26143 saved. If REAL_REGS is 0, only describe the stack adjustment. */
26144
26145 static rtx_insn *
26146 thumb1_emit_multi_reg_push (unsigned long mask, unsigned long real_regs)
26147 {
26148 unsigned long regno;
26149 rtx par[10], tmp, reg;
26150 rtx_insn *insn;
26151 int i, j;
26152
26153 /* Build the parallel of the registers actually being stored. */
26154 for (i = 0; mask; ++i, mask &= mask - 1)
26155 {
26156 regno = ctz_hwi (mask);
26157 reg = gen_rtx_REG (SImode, regno);
26158
26159 if (i == 0)
26160 tmp = gen_rtx_UNSPEC (BLKmode, gen_rtvec (1, reg), UNSPEC_PUSH_MULT);
26161 else
26162 tmp = gen_rtx_USE (VOIDmode, reg);
26163
26164 par[i] = tmp;
26165 }
26166
26167 tmp = plus_constant (Pmode, stack_pointer_rtx, -4 * i);
26168 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
26169 tmp = gen_frame_mem (BLKmode, tmp);
26170 tmp = gen_rtx_SET (tmp, par[0]);
26171 par[0] = tmp;
26172
26173 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (i, par));
26174 insn = emit_insn (tmp);
26175
26176 /* Always build the stack adjustment note for unwind info. */
26177 tmp = plus_constant (Pmode, stack_pointer_rtx, -4 * i);
26178 tmp = gen_rtx_SET (stack_pointer_rtx, tmp);
26179 par[0] = tmp;
26180
26181 /* Build the parallel of the registers recorded as saved for unwind. */
26182 for (j = 0; real_regs; ++j, real_regs &= real_regs - 1)
26183 {
26184 regno = ctz_hwi (real_regs);
26185 reg = gen_rtx_REG (SImode, regno);
26186
26187 tmp = plus_constant (Pmode, stack_pointer_rtx, j * 4);
26188 tmp = gen_frame_mem (SImode, tmp);
26189 tmp = gen_rtx_SET (tmp, reg);
26190 RTX_FRAME_RELATED_P (tmp) = 1;
26191 par[j + 1] = tmp;
26192 }
26193
26194 if (j == 0)
26195 tmp = par[0];
26196 else
26197 {
26198 RTX_FRAME_RELATED_P (par[0]) = 1;
26199 tmp = gen_rtx_SEQUENCE (VOIDmode, gen_rtvec_v (j + 1, par));
26200 }
26201
26202 add_reg_note (insn, REG_FRAME_RELATED_EXPR, tmp);
26203
26204 return insn;
26205 }
26206
26207 /* Emit code to push or pop registers to or from the stack. F is the
26208 assembly file. MASK is the registers to pop. */
26209 static void
26210 thumb_pop (FILE *f, unsigned long mask)
26211 {
26212 int regno;
26213 int lo_mask = mask & 0xFF;
26214
26215 gcc_assert (mask);
26216
26217 if (lo_mask == 0 && (mask & (1 << PC_REGNUM)))
26218 {
26219 /* Special case. Do not generate a POP PC statement here, do it in
26220 thumb_exit() */
26221 thumb_exit (f, -1);
26222 return;
26223 }
26224
26225 fprintf (f, "\tpop\t{");
26226
26227 /* Look at the low registers first. */
26228 for (regno = 0; regno <= LAST_LO_REGNUM; regno++, lo_mask >>= 1)
26229 {
26230 if (lo_mask & 1)
26231 {
26232 asm_fprintf (f, "%r", regno);
26233
26234 if ((lo_mask & ~1) != 0)
26235 fprintf (f, ", ");
26236 }
26237 }
26238
26239 if (mask & (1 << PC_REGNUM))
26240 {
26241 /* Catch popping the PC. */
26242 if (TARGET_INTERWORK || TARGET_BACKTRACE || crtl->calls_eh_return
26243 || IS_CMSE_ENTRY (arm_current_func_type ()))
26244 {
26245 /* The PC is never poped directly, instead
26246 it is popped into r3 and then BX is used. */
26247 fprintf (f, "}\n");
26248
26249 thumb_exit (f, -1);
26250
26251 return;
26252 }
26253 else
26254 {
26255 if (mask & 0xFF)
26256 fprintf (f, ", ");
26257
26258 asm_fprintf (f, "%r", PC_REGNUM);
26259 }
26260 }
26261
26262 fprintf (f, "}\n");
26263 }
26264
26265 /* Generate code to return from a thumb function.
26266 If 'reg_containing_return_addr' is -1, then the return address is
26267 actually on the stack, at the stack pointer.
26268
26269 Note: do not forget to update length attribute of corresponding insn pattern
26270 when changing assembly output (eg. length attribute of epilogue_insns when
26271 updating Armv8-M Baseline Security Extensions register clearing
26272 sequences). */
26273 static void
26274 thumb_exit (FILE *f, int reg_containing_return_addr)
26275 {
26276 unsigned regs_available_for_popping;
26277 unsigned regs_to_pop;
26278 int pops_needed;
26279 unsigned available;
26280 unsigned required;
26281 machine_mode mode;
26282 int size;
26283 int restore_a4 = FALSE;
26284
26285 /* Compute the registers we need to pop. */
26286 regs_to_pop = 0;
26287 pops_needed = 0;
26288
26289 if (reg_containing_return_addr == -1)
26290 {
26291 regs_to_pop |= 1 << LR_REGNUM;
26292 ++pops_needed;
26293 }
26294
26295 if (TARGET_BACKTRACE)
26296 {
26297 /* Restore the (ARM) frame pointer and stack pointer. */
26298 regs_to_pop |= (1 << ARM_HARD_FRAME_POINTER_REGNUM) | (1 << SP_REGNUM);
26299 pops_needed += 2;
26300 }
26301
26302 /* If there is nothing to pop then just emit the BX instruction and
26303 return. */
26304 if (pops_needed == 0)
26305 {
26306 if (crtl->calls_eh_return)
26307 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
26308
26309 if (IS_CMSE_ENTRY (arm_current_func_type ()))
26310 {
26311 /* For Armv8.1-M, this is cleared as part of the CLRM instruction
26312 emitted by cmse_nonsecure_entry_clear_before_return (). */
26313 if (!TARGET_HAVE_FPCXT_CMSE)
26314 asm_fprintf (f, "\tmsr\tAPSR_nzcvq, %r\n",
26315 reg_containing_return_addr);
26316 asm_fprintf (f, "\tbxns\t%r\n", reg_containing_return_addr);
26317 }
26318 else
26319 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
26320 return;
26321 }
26322 /* Otherwise if we are not supporting interworking and we have not created
26323 a backtrace structure and the function was not entered in ARM mode then
26324 just pop the return address straight into the PC. */
26325 else if (!TARGET_INTERWORK
26326 && !TARGET_BACKTRACE
26327 && !is_called_in_ARM_mode (current_function_decl)
26328 && !crtl->calls_eh_return
26329 && !IS_CMSE_ENTRY (arm_current_func_type ()))
26330 {
26331 asm_fprintf (f, "\tpop\t{%r}\n", PC_REGNUM);
26332 return;
26333 }
26334
26335 /* Find out how many of the (return) argument registers we can corrupt. */
26336 regs_available_for_popping = 0;
26337
26338 /* If returning via __builtin_eh_return, the bottom three registers
26339 all contain information needed for the return. */
26340 if (crtl->calls_eh_return)
26341 size = 12;
26342 else
26343 {
26344 /* If we can deduce the registers used from the function's
26345 return value. This is more reliable that examining
26346 df_regs_ever_live_p () because that will be set if the register is
26347 ever used in the function, not just if the register is used
26348 to hold a return value. */
26349
26350 if (crtl->return_rtx != 0)
26351 mode = GET_MODE (crtl->return_rtx);
26352 else
26353 mode = DECL_MODE (DECL_RESULT (current_function_decl));
26354
26355 size = GET_MODE_SIZE (mode);
26356
26357 if (size == 0)
26358 {
26359 /* In a void function we can use any argument register.
26360 In a function that returns a structure on the stack
26361 we can use the second and third argument registers. */
26362 if (mode == VOIDmode)
26363 regs_available_for_popping =
26364 (1 << ARG_REGISTER (1))
26365 | (1 << ARG_REGISTER (2))
26366 | (1 << ARG_REGISTER (3));
26367 else
26368 regs_available_for_popping =
26369 (1 << ARG_REGISTER (2))
26370 | (1 << ARG_REGISTER (3));
26371 }
26372 else if (size <= 4)
26373 regs_available_for_popping =
26374 (1 << ARG_REGISTER (2))
26375 | (1 << ARG_REGISTER (3));
26376 else if (size <= 8)
26377 regs_available_for_popping =
26378 (1 << ARG_REGISTER (3));
26379 }
26380
26381 /* Match registers to be popped with registers into which we pop them. */
26382 for (available = regs_available_for_popping,
26383 required = regs_to_pop;
26384 required != 0 && available != 0;
26385 available &= ~(available & - available),
26386 required &= ~(required & - required))
26387 -- pops_needed;
26388
26389 /* If we have any popping registers left over, remove them. */
26390 if (available > 0)
26391 regs_available_for_popping &= ~available;
26392
26393 /* Otherwise if we need another popping register we can use
26394 the fourth argument register. */
26395 else if (pops_needed)
26396 {
26397 /* If we have not found any free argument registers and
26398 reg a4 contains the return address, we must move it. */
26399 if (regs_available_for_popping == 0
26400 && reg_containing_return_addr == LAST_ARG_REGNUM)
26401 {
26402 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
26403 reg_containing_return_addr = LR_REGNUM;
26404 }
26405 else if (size > 12)
26406 {
26407 /* Register a4 is being used to hold part of the return value,
26408 but we have dire need of a free, low register. */
26409 restore_a4 = TRUE;
26410
26411 asm_fprintf (f, "\tmov\t%r, %r\n",IP_REGNUM, LAST_ARG_REGNUM);
26412 }
26413
26414 if (reg_containing_return_addr != LAST_ARG_REGNUM)
26415 {
26416 /* The fourth argument register is available. */
26417 regs_available_for_popping |= 1 << LAST_ARG_REGNUM;
26418
26419 --pops_needed;
26420 }
26421 }
26422
26423 /* Pop as many registers as we can. */
26424 thumb_pop (f, regs_available_for_popping);
26425
26426 /* Process the registers we popped. */
26427 if (reg_containing_return_addr == -1)
26428 {
26429 /* The return address was popped into the lowest numbered register. */
26430 regs_to_pop &= ~(1 << LR_REGNUM);
26431
26432 reg_containing_return_addr =
26433 number_of_first_bit_set (regs_available_for_popping);
26434
26435 /* Remove this register for the mask of available registers, so that
26436 the return address will not be corrupted by further pops. */
26437 regs_available_for_popping &= ~(1 << reg_containing_return_addr);
26438 }
26439
26440 /* If we popped other registers then handle them here. */
26441 if (regs_available_for_popping)
26442 {
26443 int frame_pointer;
26444
26445 /* Work out which register currently contains the frame pointer. */
26446 frame_pointer = number_of_first_bit_set (regs_available_for_popping);
26447
26448 /* Move it into the correct place. */
26449 asm_fprintf (f, "\tmov\t%r, %r\n",
26450 ARM_HARD_FRAME_POINTER_REGNUM, frame_pointer);
26451
26452 /* (Temporarily) remove it from the mask of popped registers. */
26453 regs_available_for_popping &= ~(1 << frame_pointer);
26454 regs_to_pop &= ~(1 << ARM_HARD_FRAME_POINTER_REGNUM);
26455
26456 if (regs_available_for_popping)
26457 {
26458 int stack_pointer;
26459
26460 /* We popped the stack pointer as well,
26461 find the register that contains it. */
26462 stack_pointer = number_of_first_bit_set (regs_available_for_popping);
26463
26464 /* Move it into the stack register. */
26465 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, stack_pointer);
26466
26467 /* At this point we have popped all necessary registers, so
26468 do not worry about restoring regs_available_for_popping
26469 to its correct value:
26470
26471 assert (pops_needed == 0)
26472 assert (regs_available_for_popping == (1 << frame_pointer))
26473 assert (regs_to_pop == (1 << STACK_POINTER)) */
26474 }
26475 else
26476 {
26477 /* Since we have just move the popped value into the frame
26478 pointer, the popping register is available for reuse, and
26479 we know that we still have the stack pointer left to pop. */
26480 regs_available_for_popping |= (1 << frame_pointer);
26481 }
26482 }
26483
26484 /* If we still have registers left on the stack, but we no longer have
26485 any registers into which we can pop them, then we must move the return
26486 address into the link register and make available the register that
26487 contained it. */
26488 if (regs_available_for_popping == 0 && pops_needed > 0)
26489 {
26490 regs_available_for_popping |= 1 << reg_containing_return_addr;
26491
26492 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM,
26493 reg_containing_return_addr);
26494
26495 reg_containing_return_addr = LR_REGNUM;
26496 }
26497
26498 /* If we have registers left on the stack then pop some more.
26499 We know that at most we will want to pop FP and SP. */
26500 if (pops_needed > 0)
26501 {
26502 int popped_into;
26503 int move_to;
26504
26505 thumb_pop (f, regs_available_for_popping);
26506
26507 /* We have popped either FP or SP.
26508 Move whichever one it is into the correct register. */
26509 popped_into = number_of_first_bit_set (regs_available_for_popping);
26510 move_to = number_of_first_bit_set (regs_to_pop);
26511
26512 asm_fprintf (f, "\tmov\t%r, %r\n", move_to, popped_into);
26513 --pops_needed;
26514 }
26515
26516 /* If we still have not popped everything then we must have only
26517 had one register available to us and we are now popping the SP. */
26518 if (pops_needed > 0)
26519 {
26520 int popped_into;
26521
26522 thumb_pop (f, regs_available_for_popping);
26523
26524 popped_into = number_of_first_bit_set (regs_available_for_popping);
26525
26526 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, popped_into);
26527 /*
26528 assert (regs_to_pop == (1 << STACK_POINTER))
26529 assert (pops_needed == 1)
26530 */
26531 }
26532
26533 /* If necessary restore the a4 register. */
26534 if (restore_a4)
26535 {
26536 if (reg_containing_return_addr != LR_REGNUM)
26537 {
26538 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
26539 reg_containing_return_addr = LR_REGNUM;
26540 }
26541
26542 asm_fprintf (f, "\tmov\t%r, %r\n", LAST_ARG_REGNUM, IP_REGNUM);
26543 }
26544
26545 if (crtl->calls_eh_return)
26546 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
26547
26548 /* Return to caller. */
26549 if (IS_CMSE_ENTRY (arm_current_func_type ()))
26550 {
26551 /* This is for the cases where LR is not being used to contain the return
26552 address. It may therefore contain information that we might not want
26553 to leak, hence it must be cleared. The value in R0 will never be a
26554 secret at this point, so it is safe to use it, see the clearing code
26555 in cmse_nonsecure_entry_clear_before_return (). */
26556 if (reg_containing_return_addr != LR_REGNUM)
26557 asm_fprintf (f, "\tmov\tlr, r0\n");
26558
26559 /* For Armv8.1-M, this is cleared as part of the CLRM instruction emitted
26560 by cmse_nonsecure_entry_clear_before_return (). */
26561 if (!TARGET_HAVE_FPCXT_CMSE)
26562 asm_fprintf (f, "\tmsr\tAPSR_nzcvq, %r\n", reg_containing_return_addr);
26563 asm_fprintf (f, "\tbxns\t%r\n", reg_containing_return_addr);
26564 }
26565 else
26566 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
26567 }
26568 \f
26569 /* Scan INSN just before assembler is output for it.
26570 For Thumb-1, we track the status of the condition codes; this
26571 information is used in the cbranchsi4_insn pattern. */
26572 void
26573 thumb1_final_prescan_insn (rtx_insn *insn)
26574 {
26575 if (flag_print_asm_name)
26576 asm_fprintf (asm_out_file, "%@ 0x%04x\n",
26577 INSN_ADDRESSES (INSN_UID (insn)));
26578 /* Don't overwrite the previous setter when we get to a cbranch. */
26579 if (INSN_CODE (insn) != CODE_FOR_cbranchsi4_insn)
26580 {
26581 enum attr_conds conds;
26582
26583 if (cfun->machine->thumb1_cc_insn)
26584 {
26585 if (modified_in_p (cfun->machine->thumb1_cc_op0, insn)
26586 || modified_in_p (cfun->machine->thumb1_cc_op1, insn))
26587 CC_STATUS_INIT;
26588 }
26589 conds = get_attr_conds (insn);
26590 if (conds == CONDS_SET)
26591 {
26592 rtx set = single_set (insn);
26593 cfun->machine->thumb1_cc_insn = insn;
26594 cfun->machine->thumb1_cc_op0 = SET_DEST (set);
26595 cfun->machine->thumb1_cc_op1 = const0_rtx;
26596 cfun->machine->thumb1_cc_mode = CC_NZmode;
26597 if (INSN_CODE (insn) == CODE_FOR_thumb1_subsi3_insn)
26598 {
26599 rtx src1 = XEXP (SET_SRC (set), 1);
26600 if (src1 == const0_rtx)
26601 cfun->machine->thumb1_cc_mode = CCmode;
26602 }
26603 else if (REG_P (SET_DEST (set)) && REG_P (SET_SRC (set)))
26604 {
26605 /* Record the src register operand instead of dest because
26606 cprop_hardreg pass propagates src. */
26607 cfun->machine->thumb1_cc_op0 = SET_SRC (set);
26608 }
26609 }
26610 else if (conds != CONDS_NOCOND)
26611 cfun->machine->thumb1_cc_insn = NULL_RTX;
26612 }
26613
26614 /* Check if unexpected far jump is used. */
26615 if (cfun->machine->lr_save_eliminated
26616 && get_attr_far_jump (insn) == FAR_JUMP_YES)
26617 internal_error("Unexpected thumb1 far jump");
26618 }
26619
26620 int
26621 thumb_shiftable_const (unsigned HOST_WIDE_INT val)
26622 {
26623 unsigned HOST_WIDE_INT mask = 0xff;
26624 int i;
26625
26626 val = val & (unsigned HOST_WIDE_INT)0xffffffffu;
26627 if (val == 0) /* XXX */
26628 return 0;
26629
26630 for (i = 0; i < 25; i++)
26631 if ((val & (mask << i)) == val)
26632 return 1;
26633
26634 return 0;
26635 }
26636
26637 /* Returns nonzero if the current function contains,
26638 or might contain a far jump. */
26639 static int
26640 thumb_far_jump_used_p (void)
26641 {
26642 rtx_insn *insn;
26643 bool far_jump = false;
26644 unsigned int func_size = 0;
26645
26646 /* If we have already decided that far jumps may be used,
26647 do not bother checking again, and always return true even if
26648 it turns out that they are not being used. Once we have made
26649 the decision that far jumps are present (and that hence the link
26650 register will be pushed onto the stack) we cannot go back on it. */
26651 if (cfun->machine->far_jump_used)
26652 return 1;
26653
26654 /* If this function is not being called from the prologue/epilogue
26655 generation code then it must be being called from the
26656 INITIAL_ELIMINATION_OFFSET macro. */
26657 if (!(ARM_DOUBLEWORD_ALIGN || reload_completed))
26658 {
26659 /* In this case we know that we are being asked about the elimination
26660 of the arg pointer register. If that register is not being used,
26661 then there are no arguments on the stack, and we do not have to
26662 worry that a far jump might force the prologue to push the link
26663 register, changing the stack offsets. In this case we can just
26664 return false, since the presence of far jumps in the function will
26665 not affect stack offsets.
26666
26667 If the arg pointer is live (or if it was live, but has now been
26668 eliminated and so set to dead) then we do have to test to see if
26669 the function might contain a far jump. This test can lead to some
26670 false negatives, since before reload is completed, then length of
26671 branch instructions is not known, so gcc defaults to returning their
26672 longest length, which in turn sets the far jump attribute to true.
26673
26674 A false negative will not result in bad code being generated, but it
26675 will result in a needless push and pop of the link register. We
26676 hope that this does not occur too often.
26677
26678 If we need doubleword stack alignment this could affect the other
26679 elimination offsets so we can't risk getting it wrong. */
26680 if (df_regs_ever_live_p (ARG_POINTER_REGNUM))
26681 cfun->machine->arg_pointer_live = 1;
26682 else if (!cfun->machine->arg_pointer_live)
26683 return 0;
26684 }
26685
26686 /* We should not change far_jump_used during or after reload, as there is
26687 no chance to change stack frame layout. */
26688 if (reload_in_progress || reload_completed)
26689 return 0;
26690
26691 /* Check to see if the function contains a branch
26692 insn with the far jump attribute set. */
26693 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
26694 {
26695 if (JUMP_P (insn) && get_attr_far_jump (insn) == FAR_JUMP_YES)
26696 {
26697 far_jump = true;
26698 }
26699 func_size += get_attr_length (insn);
26700 }
26701
26702 /* Attribute far_jump will always be true for thumb1 before
26703 shorten_branch pass. So checking far_jump attribute before
26704 shorten_branch isn't much useful.
26705
26706 Following heuristic tries to estimate more accurately if a far jump
26707 may finally be used. The heuristic is very conservative as there is
26708 no chance to roll-back the decision of not to use far jump.
26709
26710 Thumb1 long branch offset is -2048 to 2046. The worst case is each
26711 2-byte insn is associated with a 4 byte constant pool. Using
26712 function size 2048/3 as the threshold is conservative enough. */
26713 if (far_jump)
26714 {
26715 if ((func_size * 3) >= 2048)
26716 {
26717 /* Record the fact that we have decided that
26718 the function does use far jumps. */
26719 cfun->machine->far_jump_used = 1;
26720 return 1;
26721 }
26722 }
26723
26724 return 0;
26725 }
26726
26727 /* Return nonzero if FUNC must be entered in ARM mode. */
26728 static bool
26729 is_called_in_ARM_mode (tree func)
26730 {
26731 gcc_assert (TREE_CODE (func) == FUNCTION_DECL);
26732
26733 /* Ignore the problem about functions whose address is taken. */
26734 if (TARGET_CALLEE_INTERWORKING && TREE_PUBLIC (func))
26735 return true;
26736
26737 #ifdef ARM_PE
26738 return lookup_attribute ("interfacearm", DECL_ATTRIBUTES (func)) != NULL_TREE;
26739 #else
26740 return false;
26741 #endif
26742 }
26743
26744 /* Given the stack offsets and register mask in OFFSETS, decide how
26745 many additional registers to push instead of subtracting a constant
26746 from SP. For epilogues the principle is the same except we use pop.
26747 FOR_PROLOGUE indicates which we're generating. */
26748 static int
26749 thumb1_extra_regs_pushed (arm_stack_offsets *offsets, bool for_prologue)
26750 {
26751 HOST_WIDE_INT amount;
26752 unsigned long live_regs_mask = offsets->saved_regs_mask;
26753 /* Extract a mask of the ones we can give to the Thumb's push/pop
26754 instruction. */
26755 unsigned long l_mask = live_regs_mask & (for_prologue ? 0x40ff : 0xff);
26756 /* Then count how many other high registers will need to be pushed. */
26757 unsigned long high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
26758 int n_free, reg_base, size;
26759
26760 if (!for_prologue && frame_pointer_needed)
26761 amount = offsets->locals_base - offsets->saved_regs;
26762 else
26763 amount = offsets->outgoing_args - offsets->saved_regs;
26764
26765 /* If the stack frame size is 512 exactly, we can save one load
26766 instruction, which should make this a win even when optimizing
26767 for speed. */
26768 if (!optimize_size && amount != 512)
26769 return 0;
26770
26771 /* Can't do this if there are high registers to push. */
26772 if (high_regs_pushed != 0)
26773 return 0;
26774
26775 /* Shouldn't do it in the prologue if no registers would normally
26776 be pushed at all. In the epilogue, also allow it if we'll have
26777 a pop insn for the PC. */
26778 if (l_mask == 0
26779 && (for_prologue
26780 || TARGET_BACKTRACE
26781 || (live_regs_mask & 1 << LR_REGNUM) == 0
26782 || TARGET_INTERWORK
26783 || crtl->args.pretend_args_size != 0))
26784 return 0;
26785
26786 /* Don't do this if thumb_expand_prologue wants to emit instructions
26787 between the push and the stack frame allocation. */
26788 if (for_prologue
26789 && ((flag_pic && arm_pic_register != INVALID_REGNUM)
26790 || (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)))
26791 return 0;
26792
26793 reg_base = 0;
26794 n_free = 0;
26795 if (!for_prologue)
26796 {
26797 size = arm_size_return_regs ();
26798 reg_base = ARM_NUM_INTS (size);
26799 live_regs_mask >>= reg_base;
26800 }
26801
26802 while (reg_base + n_free < 8 && !(live_regs_mask & 1)
26803 && (for_prologue || call_used_or_fixed_reg_p (reg_base + n_free)))
26804 {
26805 live_regs_mask >>= 1;
26806 n_free++;
26807 }
26808
26809 if (n_free == 0)
26810 return 0;
26811 gcc_assert (amount / 4 * 4 == amount);
26812
26813 if (amount >= 512 && (amount - n_free * 4) < 512)
26814 return (amount - 508) / 4;
26815 if (amount <= n_free * 4)
26816 return amount / 4;
26817 return 0;
26818 }
26819
26820 /* The bits which aren't usefully expanded as rtl. */
26821 const char *
26822 thumb1_unexpanded_epilogue (void)
26823 {
26824 arm_stack_offsets *offsets;
26825 int regno;
26826 unsigned long live_regs_mask = 0;
26827 int high_regs_pushed = 0;
26828 int extra_pop;
26829 int had_to_push_lr;
26830 int size;
26831
26832 if (cfun->machine->return_used_this_function != 0)
26833 return "";
26834
26835 if (IS_NAKED (arm_current_func_type ()))
26836 return "";
26837
26838 offsets = arm_get_frame_offsets ();
26839 live_regs_mask = offsets->saved_regs_mask;
26840 high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
26841
26842 /* If we can deduce the registers used from the function's return value.
26843 This is more reliable that examining df_regs_ever_live_p () because that
26844 will be set if the register is ever used in the function, not just if
26845 the register is used to hold a return value. */
26846 size = arm_size_return_regs ();
26847
26848 extra_pop = thumb1_extra_regs_pushed (offsets, false);
26849 if (extra_pop > 0)
26850 {
26851 unsigned long extra_mask = (1 << extra_pop) - 1;
26852 live_regs_mask |= extra_mask << ARM_NUM_INTS (size);
26853 }
26854
26855 /* The prolog may have pushed some high registers to use as
26856 work registers. e.g. the testsuite file:
26857 gcc/testsuite/gcc/gcc.c-torture/execute/complex-2.c
26858 compiles to produce:
26859 push {r4, r5, r6, r7, lr}
26860 mov r7, r9
26861 mov r6, r8
26862 push {r6, r7}
26863 as part of the prolog. We have to undo that pushing here. */
26864
26865 if (high_regs_pushed)
26866 {
26867 unsigned long mask = live_regs_mask & 0xff;
26868 int next_hi_reg;
26869
26870 mask |= thumb1_epilogue_unused_call_clobbered_lo_regs ();
26871
26872 if (mask == 0)
26873 /* Oh dear! We have no low registers into which we can pop
26874 high registers! */
26875 internal_error
26876 ("no low registers available for popping high registers");
26877
26878 for (next_hi_reg = 12; next_hi_reg > LAST_LO_REGNUM; next_hi_reg--)
26879 if (live_regs_mask & (1 << next_hi_reg))
26880 break;
26881
26882 while (high_regs_pushed)
26883 {
26884 /* Find lo register(s) into which the high register(s) can
26885 be popped. */
26886 for (regno = LAST_LO_REGNUM; regno >= 0; regno--)
26887 {
26888 if (mask & (1 << regno))
26889 high_regs_pushed--;
26890 if (high_regs_pushed == 0)
26891 break;
26892 }
26893
26894 if (high_regs_pushed == 0 && regno >= 0)
26895 mask &= ~((1 << regno) - 1);
26896
26897 /* Pop the values into the low register(s). */
26898 thumb_pop (asm_out_file, mask);
26899
26900 /* Move the value(s) into the high registers. */
26901 for (regno = LAST_LO_REGNUM; regno >= 0; regno--)
26902 {
26903 if (mask & (1 << regno))
26904 {
26905 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", next_hi_reg,
26906 regno);
26907
26908 for (next_hi_reg--; next_hi_reg > LAST_LO_REGNUM;
26909 next_hi_reg--)
26910 if (live_regs_mask & (1 << next_hi_reg))
26911 break;
26912 }
26913 }
26914 }
26915 live_regs_mask &= ~0x0f00;
26916 }
26917
26918 had_to_push_lr = (live_regs_mask & (1 << LR_REGNUM)) != 0;
26919 live_regs_mask &= 0xff;
26920
26921 if (crtl->args.pretend_args_size == 0 || TARGET_BACKTRACE)
26922 {
26923 /* Pop the return address into the PC. */
26924 if (had_to_push_lr)
26925 live_regs_mask |= 1 << PC_REGNUM;
26926
26927 /* Either no argument registers were pushed or a backtrace
26928 structure was created which includes an adjusted stack
26929 pointer, so just pop everything. */
26930 if (live_regs_mask)
26931 thumb_pop (asm_out_file, live_regs_mask);
26932
26933 /* We have either just popped the return address into the
26934 PC or it is was kept in LR for the entire function.
26935 Note that thumb_pop has already called thumb_exit if the
26936 PC was in the list. */
26937 if (!had_to_push_lr)
26938 thumb_exit (asm_out_file, LR_REGNUM);
26939 }
26940 else
26941 {
26942 /* Pop everything but the return address. */
26943 if (live_regs_mask)
26944 thumb_pop (asm_out_file, live_regs_mask);
26945
26946 if (had_to_push_lr)
26947 {
26948 if (size > 12)
26949 {
26950 /* We have no free low regs, so save one. */
26951 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", IP_REGNUM,
26952 LAST_ARG_REGNUM);
26953 }
26954
26955 /* Get the return address into a temporary register. */
26956 thumb_pop (asm_out_file, 1 << LAST_ARG_REGNUM);
26957
26958 if (size > 12)
26959 {
26960 /* Move the return address to lr. */
26961 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LR_REGNUM,
26962 LAST_ARG_REGNUM);
26963 /* Restore the low register. */
26964 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LAST_ARG_REGNUM,
26965 IP_REGNUM);
26966 regno = LR_REGNUM;
26967 }
26968 else
26969 regno = LAST_ARG_REGNUM;
26970 }
26971 else
26972 regno = LR_REGNUM;
26973
26974 /* Remove the argument registers that were pushed onto the stack. */
26975 asm_fprintf (asm_out_file, "\tadd\t%r, %r, #%d\n",
26976 SP_REGNUM, SP_REGNUM,
26977 crtl->args.pretend_args_size);
26978
26979 thumb_exit (asm_out_file, regno);
26980 }
26981
26982 return "";
26983 }
26984
26985 /* Functions to save and restore machine-specific function data. */
26986 static struct machine_function *
26987 arm_init_machine_status (void)
26988 {
26989 struct machine_function *machine;
26990 machine = ggc_cleared_alloc<machine_function> ();
26991
26992 #if ARM_FT_UNKNOWN != 0
26993 machine->func_type = ARM_FT_UNKNOWN;
26994 #endif
26995 machine->static_chain_stack_bytes = -1;
26996 machine->pacspval_needed = 0;
26997 return machine;
26998 }
26999
27000 /* Return an RTX indicating where the return address to the
27001 calling function can be found. */
27002 rtx
27003 arm_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
27004 {
27005 if (count != 0)
27006 return NULL_RTX;
27007
27008 return get_hard_reg_initial_val (Pmode, LR_REGNUM);
27009 }
27010
27011 /* Do anything needed before RTL is emitted for each function. */
27012 void
27013 arm_init_expanders (void)
27014 {
27015 /* Arrange to initialize and mark the machine per-function status. */
27016 init_machine_status = arm_init_machine_status;
27017
27018 /* This is to stop the combine pass optimizing away the alignment
27019 adjustment of va_arg. */
27020 /* ??? It is claimed that this should not be necessary. */
27021 if (cfun)
27022 mark_reg_pointer (arg_pointer_rtx, PARM_BOUNDARY);
27023 }
27024
27025 /* Check that FUNC is called with a different mode. */
27026
27027 bool
27028 arm_change_mode_p (tree func)
27029 {
27030 if (TREE_CODE (func) != FUNCTION_DECL)
27031 return false;
27032
27033 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (func);
27034
27035 if (!callee_tree)
27036 callee_tree = target_option_default_node;
27037
27038 struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
27039 int flags = callee_opts->x_target_flags;
27040
27041 return (TARGET_THUMB_P (flags) != TARGET_THUMB);
27042 }
27043
27044 /* Like arm_compute_initial_elimination offset. Simpler because there
27045 isn't an ABI specified frame pointer for Thumb. Instead, we set it
27046 to point at the base of the local variables after static stack
27047 space for a function has been allocated. */
27048
27049 HOST_WIDE_INT
27050 thumb_compute_initial_elimination_offset (unsigned int from, unsigned int to)
27051 {
27052 arm_stack_offsets *offsets;
27053
27054 offsets = arm_get_frame_offsets ();
27055
27056 switch (from)
27057 {
27058 case ARG_POINTER_REGNUM:
27059 switch (to)
27060 {
27061 case STACK_POINTER_REGNUM:
27062 return offsets->outgoing_args - offsets->saved_args;
27063
27064 case FRAME_POINTER_REGNUM:
27065 return offsets->soft_frame - offsets->saved_args;
27066
27067 case ARM_HARD_FRAME_POINTER_REGNUM:
27068 return offsets->saved_regs - offsets->saved_args;
27069
27070 case THUMB_HARD_FRAME_POINTER_REGNUM:
27071 return offsets->locals_base - offsets->saved_args;
27072
27073 default:
27074 gcc_unreachable ();
27075 }
27076 break;
27077
27078 case FRAME_POINTER_REGNUM:
27079 switch (to)
27080 {
27081 case STACK_POINTER_REGNUM:
27082 return offsets->outgoing_args - offsets->soft_frame;
27083
27084 case ARM_HARD_FRAME_POINTER_REGNUM:
27085 return offsets->saved_regs - offsets->soft_frame;
27086
27087 case THUMB_HARD_FRAME_POINTER_REGNUM:
27088 return offsets->locals_base - offsets->soft_frame;
27089
27090 default:
27091 gcc_unreachable ();
27092 }
27093 break;
27094
27095 default:
27096 gcc_unreachable ();
27097 }
27098 }
27099
27100 /* Generate the function's prologue. */
27101
27102 void
27103 thumb1_expand_prologue (void)
27104 {
27105 rtx_insn *insn;
27106
27107 HOST_WIDE_INT amount;
27108 HOST_WIDE_INT size;
27109 arm_stack_offsets *offsets;
27110 unsigned long func_type;
27111 int regno;
27112 unsigned long live_regs_mask;
27113 unsigned long l_mask;
27114 unsigned high_regs_pushed = 0;
27115 bool lr_needs_saving;
27116
27117 func_type = arm_current_func_type ();
27118
27119 /* Naked functions don't have prologues. */
27120 if (IS_NAKED (func_type))
27121 {
27122 if (flag_stack_usage_info)
27123 current_function_static_stack_size = 0;
27124 return;
27125 }
27126
27127 if (IS_INTERRUPT (func_type))
27128 {
27129 error ("Interrupt Service Routines cannot be coded in Thumb-1 mode");
27130 return;
27131 }
27132
27133 if (is_called_in_ARM_mode (current_function_decl))
27134 emit_insn (gen_prologue_thumb1_interwork ());
27135
27136 offsets = arm_get_frame_offsets ();
27137 live_regs_mask = offsets->saved_regs_mask;
27138 lr_needs_saving = live_regs_mask & (1 << LR_REGNUM);
27139
27140 /* Extract a mask of the ones we can give to the Thumb's push instruction. */
27141 l_mask = live_regs_mask & 0x40ff;
27142 /* Then count how many other high registers will need to be pushed. */
27143 high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
27144
27145 if (crtl->args.pretend_args_size)
27146 {
27147 rtx x = GEN_INT (-crtl->args.pretend_args_size);
27148
27149 if (cfun->machine->uses_anonymous_args)
27150 {
27151 int num_pushes = ARM_NUM_INTS (crtl->args.pretend_args_size);
27152 unsigned long mask;
27153
27154 mask = 1ul << (LAST_ARG_REGNUM + 1);
27155 mask -= 1ul << (LAST_ARG_REGNUM + 1 - num_pushes);
27156
27157 insn = thumb1_emit_multi_reg_push (mask, 0);
27158 }
27159 else
27160 {
27161 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
27162 stack_pointer_rtx, x));
27163 }
27164 RTX_FRAME_RELATED_P (insn) = 1;
27165 }
27166
27167 if (TARGET_BACKTRACE)
27168 {
27169 HOST_WIDE_INT offset = 0;
27170 unsigned work_register;
27171 rtx work_reg, x, arm_hfp_rtx;
27172
27173 /* We have been asked to create a stack backtrace structure.
27174 The code looks like this:
27175
27176 0 .align 2
27177 0 func:
27178 0 sub SP, #16 Reserve space for 4 registers.
27179 2 push {R7} Push low registers.
27180 4 add R7, SP, #20 Get the stack pointer before the push.
27181 6 str R7, [SP, #8] Store the stack pointer
27182 (before reserving the space).
27183 8 mov R7, PC Get hold of the start of this code + 12.
27184 10 str R7, [SP, #16] Store it.
27185 12 mov R7, FP Get hold of the current frame pointer.
27186 14 str R7, [SP, #4] Store it.
27187 16 mov R7, LR Get hold of the current return address.
27188 18 str R7, [SP, #12] Store it.
27189 20 add R7, SP, #16 Point at the start of the
27190 backtrace structure.
27191 22 mov FP, R7 Put this value into the frame pointer. */
27192
27193 work_register = thumb_find_work_register (live_regs_mask);
27194 work_reg = gen_rtx_REG (SImode, work_register);
27195 arm_hfp_rtx = gen_rtx_REG (SImode, ARM_HARD_FRAME_POINTER_REGNUM);
27196
27197 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
27198 stack_pointer_rtx, GEN_INT (-16)));
27199 RTX_FRAME_RELATED_P (insn) = 1;
27200
27201 if (l_mask)
27202 {
27203 insn = thumb1_emit_multi_reg_push (l_mask, l_mask);
27204 RTX_FRAME_RELATED_P (insn) = 1;
27205 lr_needs_saving = false;
27206
27207 offset = bit_count (l_mask) * UNITS_PER_WORD;
27208 }
27209
27210 x = GEN_INT (offset + 16 + crtl->args.pretend_args_size);
27211 emit_insn (gen_addsi3 (work_reg, stack_pointer_rtx, x));
27212
27213 x = plus_constant (Pmode, stack_pointer_rtx, offset + 4);
27214 x = gen_frame_mem (SImode, x);
27215 emit_move_insn (x, work_reg);
27216
27217 /* Make sure that the instruction fetching the PC is in the right place
27218 to calculate "start of backtrace creation code + 12". */
27219 /* ??? The stores using the common WORK_REG ought to be enough to
27220 prevent the scheduler from doing anything weird. Failing that
27221 we could always move all of the following into an UNSPEC_VOLATILE. */
27222 if (l_mask)
27223 {
27224 x = gen_rtx_REG (SImode, PC_REGNUM);
27225 emit_move_insn (work_reg, x);
27226
27227 x = plus_constant (Pmode, stack_pointer_rtx, offset + 12);
27228 x = gen_frame_mem (SImode, x);
27229 emit_move_insn (x, work_reg);
27230
27231 emit_move_insn (work_reg, arm_hfp_rtx);
27232
27233 x = plus_constant (Pmode, stack_pointer_rtx, offset);
27234 x = gen_frame_mem (SImode, x);
27235 emit_move_insn (x, work_reg);
27236 }
27237 else
27238 {
27239 emit_move_insn (work_reg, arm_hfp_rtx);
27240
27241 x = plus_constant (Pmode, stack_pointer_rtx, offset);
27242 x = gen_frame_mem (SImode, x);
27243 emit_move_insn (x, work_reg);
27244
27245 x = gen_rtx_REG (SImode, PC_REGNUM);
27246 emit_move_insn (work_reg, x);
27247
27248 x = plus_constant (Pmode, stack_pointer_rtx, offset + 12);
27249 x = gen_frame_mem (SImode, x);
27250 emit_move_insn (x, work_reg);
27251 }
27252
27253 x = gen_rtx_REG (SImode, LR_REGNUM);
27254 emit_move_insn (work_reg, x);
27255
27256 x = plus_constant (Pmode, stack_pointer_rtx, offset + 8);
27257 x = gen_frame_mem (SImode, x);
27258 emit_move_insn (x, work_reg);
27259
27260 x = GEN_INT (offset + 12);
27261 emit_insn (gen_addsi3 (work_reg, stack_pointer_rtx, x));
27262
27263 emit_move_insn (arm_hfp_rtx, work_reg);
27264 }
27265 /* Optimization: If we are not pushing any low registers but we are going
27266 to push some high registers then delay our first push. This will just
27267 be a push of LR and we can combine it with the push of the first high
27268 register. */
27269 else if ((l_mask & 0xff) != 0
27270 || (high_regs_pushed == 0 && lr_needs_saving))
27271 {
27272 unsigned long mask = l_mask;
27273 mask |= (1 << thumb1_extra_regs_pushed (offsets, true)) - 1;
27274 insn = thumb1_emit_multi_reg_push (mask, mask);
27275 RTX_FRAME_RELATED_P (insn) = 1;
27276 lr_needs_saving = false;
27277 }
27278
27279 if (high_regs_pushed)
27280 {
27281 unsigned pushable_regs;
27282 unsigned next_hi_reg;
27283 unsigned arg_regs_num = TARGET_AAPCS_BASED ? crtl->args.info.aapcs_ncrn
27284 : crtl->args.info.nregs;
27285 unsigned arg_regs_mask = (1 << arg_regs_num) - 1;
27286
27287 for (next_hi_reg = 12; next_hi_reg > LAST_LO_REGNUM; next_hi_reg--)
27288 if (live_regs_mask & (1 << next_hi_reg))
27289 break;
27290
27291 /* Here we need to mask out registers used for passing arguments
27292 even if they can be pushed. This is to avoid using them to
27293 stash the high registers. Such kind of stash may clobber the
27294 use of arguments. */
27295 pushable_regs = l_mask & (~arg_regs_mask);
27296 pushable_regs |= thumb1_prologue_unused_call_clobbered_lo_regs ();
27297
27298 /* Normally, LR can be used as a scratch register once it has been
27299 saved; but if the function examines its own return address then
27300 the value is still live and we need to avoid using it. */
27301 bool return_addr_live
27302 = REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)),
27303 LR_REGNUM);
27304
27305 if (lr_needs_saving || return_addr_live)
27306 pushable_regs &= ~(1 << LR_REGNUM);
27307
27308 if (pushable_regs == 0)
27309 pushable_regs = 1 << thumb_find_work_register (live_regs_mask);
27310
27311 while (high_regs_pushed > 0)
27312 {
27313 unsigned long real_regs_mask = 0;
27314 unsigned long push_mask = 0;
27315
27316 for (regno = LR_REGNUM; regno >= 0; regno --)
27317 {
27318 if (pushable_regs & (1 << regno))
27319 {
27320 emit_move_insn (gen_rtx_REG (SImode, regno),
27321 gen_rtx_REG (SImode, next_hi_reg));
27322
27323 high_regs_pushed --;
27324 real_regs_mask |= (1 << next_hi_reg);
27325 push_mask |= (1 << regno);
27326
27327 if (high_regs_pushed)
27328 {
27329 for (next_hi_reg --; next_hi_reg > LAST_LO_REGNUM;
27330 next_hi_reg --)
27331 if (live_regs_mask & (1 << next_hi_reg))
27332 break;
27333 }
27334 else
27335 break;
27336 }
27337 }
27338
27339 /* If we had to find a work register and we have not yet
27340 saved the LR then add it to the list of regs to push. */
27341 if (lr_needs_saving)
27342 {
27343 push_mask |= 1 << LR_REGNUM;
27344 real_regs_mask |= 1 << LR_REGNUM;
27345 lr_needs_saving = false;
27346 /* If the return address is not live at this point, we
27347 can add LR to the list of registers that we can use
27348 for pushes. */
27349 if (!return_addr_live)
27350 pushable_regs |= 1 << LR_REGNUM;
27351 }
27352
27353 insn = thumb1_emit_multi_reg_push (push_mask, real_regs_mask);
27354 RTX_FRAME_RELATED_P (insn) = 1;
27355 }
27356 }
27357
27358 /* Load the pic register before setting the frame pointer,
27359 so we can use r7 as a temporary work register. */
27360 if (flag_pic && arm_pic_register != INVALID_REGNUM)
27361 arm_load_pic_register (live_regs_mask, NULL_RTX);
27362
27363 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
27364 emit_move_insn (gen_rtx_REG (Pmode, ARM_HARD_FRAME_POINTER_REGNUM),
27365 stack_pointer_rtx);
27366
27367 size = offsets->outgoing_args - offsets->saved_args;
27368 if (flag_stack_usage_info)
27369 current_function_static_stack_size = size;
27370
27371 /* If we have a frame, then do stack checking. FIXME: not implemented. */
27372 if ((flag_stack_check == STATIC_BUILTIN_STACK_CHECK
27373 || flag_stack_clash_protection)
27374 && size)
27375 sorry ("%<-fstack-check=specific%> for Thumb-1");
27376
27377 amount = offsets->outgoing_args - offsets->saved_regs;
27378 amount -= 4 * thumb1_extra_regs_pushed (offsets, true);
27379 if (amount)
27380 {
27381 if (amount < 512)
27382 {
27383 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
27384 GEN_INT (- amount)));
27385 RTX_FRAME_RELATED_P (insn) = 1;
27386 }
27387 else
27388 {
27389 rtx reg, dwarf;
27390
27391 /* The stack decrement is too big for an immediate value in a single
27392 insn. In theory we could issue multiple subtracts, but after
27393 three of them it becomes more space efficient to place the full
27394 value in the constant pool and load into a register. (Also the
27395 ARM debugger really likes to see only one stack decrement per
27396 function). So instead we look for a scratch register into which
27397 we can load the decrement, and then we subtract this from the
27398 stack pointer. Unfortunately on the thumb the only available
27399 scratch registers are the argument registers, and we cannot use
27400 these as they may hold arguments to the function. Instead we
27401 attempt to locate a call preserved register which is used by this
27402 function. If we can find one, then we know that it will have
27403 been pushed at the start of the prologue and so we can corrupt
27404 it now. */
27405 for (regno = LAST_ARG_REGNUM + 1; regno <= LAST_LO_REGNUM; regno++)
27406 if (live_regs_mask & (1 << regno))
27407 break;
27408
27409 gcc_assert(regno <= LAST_LO_REGNUM);
27410
27411 reg = gen_rtx_REG (SImode, regno);
27412
27413 emit_insn (gen_movsi (reg, GEN_INT (- amount)));
27414
27415 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
27416 stack_pointer_rtx, reg));
27417
27418 dwarf = gen_rtx_SET (stack_pointer_rtx,
27419 plus_constant (Pmode, stack_pointer_rtx,
27420 -amount));
27421 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
27422 RTX_FRAME_RELATED_P (insn) = 1;
27423 }
27424 }
27425
27426 if (frame_pointer_needed)
27427 thumb_set_frame_pointer (offsets);
27428
27429 /* If we are profiling, make sure no instructions are scheduled before
27430 the call to mcount. Similarly if the user has requested no
27431 scheduling in the prolog. Similarly if we want non-call exceptions
27432 using the EABI unwinder, to prevent faulting instructions from being
27433 swapped with a stack adjustment. */
27434 if (crtl->profile || !TARGET_SCHED_PROLOG
27435 || (arm_except_unwind_info (&global_options) == UI_TARGET
27436 && cfun->can_throw_non_call_exceptions))
27437 emit_insn (gen_blockage ());
27438
27439 cfun->machine->lr_save_eliminated = !thumb_force_lr_save ();
27440 if (live_regs_mask & 0xff)
27441 cfun->machine->lr_save_eliminated = 0;
27442 }
27443
27444 /* Clear caller saved registers not used to pass return values and leaked
27445 condition flags before exiting a cmse_nonsecure_entry function. */
27446
27447 void
27448 cmse_nonsecure_entry_clear_before_return (void)
27449 {
27450 bool clear_vfpregs = TARGET_HARD_FLOAT || TARGET_HAVE_FPCXT_CMSE;
27451 int regno, maxregno = clear_vfpregs ? LAST_VFP_REGNUM : IP_REGNUM;
27452 uint32_t padding_bits_to_clear = 0;
27453 auto_sbitmap to_clear_bitmap (maxregno + 1);
27454 rtx r1_reg, result_rtl, clearing_reg = NULL_RTX;
27455 tree result_type;
27456
27457 bitmap_clear (to_clear_bitmap);
27458 bitmap_set_range (to_clear_bitmap, R0_REGNUM, NUM_ARG_REGS);
27459 bitmap_set_bit (to_clear_bitmap, IP_REGNUM);
27460
27461 /* If we are not dealing with -mfloat-abi=soft we will need to clear VFP
27462 registers. */
27463 if (clear_vfpregs)
27464 {
27465 int float_bits = D7_VFP_REGNUM - FIRST_VFP_REGNUM + 1;
27466
27467 bitmap_set_range (to_clear_bitmap, FIRST_VFP_REGNUM, float_bits);
27468
27469 if (!TARGET_HAVE_FPCXT_CMSE)
27470 {
27471 /* Make sure we don't clear the two scratch registers used to clear
27472 the relevant FPSCR bits in output_return_instruction. */
27473 emit_use (gen_rtx_REG (SImode, IP_REGNUM));
27474 bitmap_clear_bit (to_clear_bitmap, IP_REGNUM);
27475 emit_use (gen_rtx_REG (SImode, 4));
27476 bitmap_clear_bit (to_clear_bitmap, 4);
27477 }
27478 }
27479
27480 /* If the user has defined registers to be caller saved, these are no longer
27481 restored by the function before returning and must thus be cleared for
27482 security purposes. */
27483 for (regno = NUM_ARG_REGS; regno <= maxregno; regno++)
27484 {
27485 /* We do not touch registers that can be used to pass arguments as per
27486 the AAPCS, since these should never be made callee-saved by user
27487 options. */
27488 if (IN_RANGE (regno, FIRST_VFP_REGNUM, D7_VFP_REGNUM))
27489 continue;
27490 if (IN_RANGE (regno, IP_REGNUM, PC_REGNUM))
27491 continue;
27492 if (!callee_saved_reg_p (regno)
27493 && (!IN_RANGE (regno, FIRST_VFP_REGNUM, LAST_VFP_REGNUM)
27494 || TARGET_HARD_FLOAT))
27495 bitmap_set_bit (to_clear_bitmap, regno);
27496 }
27497
27498 /* Make sure we do not clear the registers used to return the result in. */
27499 result_type = TREE_TYPE (DECL_RESULT (current_function_decl));
27500 if (!VOID_TYPE_P (result_type))
27501 {
27502 uint64_t to_clear_return_mask;
27503 result_rtl = arm_function_value (result_type, current_function_decl, 0);
27504
27505 /* No need to check that we return in registers, because we don't
27506 support returning on stack yet. */
27507 gcc_assert (REG_P (result_rtl));
27508 to_clear_return_mask
27509 = compute_not_to_clear_mask (result_type, result_rtl, 0,
27510 &padding_bits_to_clear);
27511 if (to_clear_return_mask)
27512 {
27513 gcc_assert ((unsigned) maxregno < sizeof (long long) * __CHAR_BIT__);
27514 for (regno = R0_REGNUM; regno <= maxregno; regno++)
27515 {
27516 if (to_clear_return_mask & (1ULL << regno))
27517 bitmap_clear_bit (to_clear_bitmap, regno);
27518 }
27519 }
27520 }
27521
27522 if (padding_bits_to_clear != 0)
27523 {
27524 int to_clear_bitmap_size = SBITMAP_SIZE ((sbitmap) to_clear_bitmap);
27525 auto_sbitmap to_clear_arg_regs_bitmap (to_clear_bitmap_size);
27526
27527 /* Padding_bits_to_clear is not 0 so we know we are dealing with
27528 returning a composite type, which only uses r0. Let's make sure that
27529 r1-r3 is cleared too. */
27530 bitmap_clear (to_clear_arg_regs_bitmap);
27531 bitmap_set_range (to_clear_arg_regs_bitmap, R1_REGNUM, NUM_ARG_REGS - 1);
27532 gcc_assert (bitmap_subset_p (to_clear_arg_regs_bitmap, to_clear_bitmap));
27533 }
27534
27535 /* Clear full registers that leak before returning. */
27536 clearing_reg = gen_rtx_REG (SImode, TARGET_THUMB1 ? R0_REGNUM : LR_REGNUM);
27537 r1_reg = gen_rtx_REG (SImode, R0_REGNUM + 1);
27538 cmse_clear_registers (to_clear_bitmap, &padding_bits_to_clear, 1, r1_reg,
27539 clearing_reg);
27540 }
27541
27542 /* Generate pattern *pop_multiple_with_stack_update_and_return if single
27543 POP instruction can be generated. LR should be replaced by PC. All
27544 the checks required are already done by USE_RETURN_INSN (). Hence,
27545 all we really need to check here is if single register is to be
27546 returned, or multiple register return. */
27547 void
27548 thumb2_expand_return (bool simple_return)
27549 {
27550 int i, num_regs;
27551 unsigned long saved_regs_mask;
27552 arm_stack_offsets *offsets;
27553
27554 offsets = arm_get_frame_offsets ();
27555 saved_regs_mask = offsets->saved_regs_mask;
27556
27557 for (i = 0, num_regs = 0; i <= LAST_ARM_REGNUM; i++)
27558 if (saved_regs_mask & (1 << i))
27559 num_regs++;
27560
27561 if (!simple_return && saved_regs_mask)
27562 {
27563 /* TODO: Verify that this path is never taken for cmse_nonsecure_entry
27564 functions or adapt code to handle according to ACLE. This path should
27565 not be reachable for cmse_nonsecure_entry functions though we prefer
27566 to assert it for now to ensure that future code changes do not silently
27567 change this behavior. */
27568 gcc_assert (!IS_CMSE_ENTRY (arm_current_func_type ()));
27569 if (arm_current_function_pac_enabled_p ())
27570 {
27571 gcc_assert (!(saved_regs_mask & (1 << PC_REGNUM)));
27572 arm_emit_multi_reg_pop (saved_regs_mask);
27573 emit_insn (gen_aut_nop ());
27574 emit_jump_insn (simple_return_rtx);
27575 }
27576 else if (num_regs == 1)
27577 {
27578 rtx par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
27579 rtx reg = gen_rtx_REG (SImode, PC_REGNUM);
27580 rtx addr = gen_rtx_MEM (SImode,
27581 gen_rtx_POST_INC (SImode,
27582 stack_pointer_rtx));
27583 set_mem_alias_set (addr, get_frame_alias_set ());
27584 XVECEXP (par, 0, 0) = ret_rtx;
27585 XVECEXP (par, 0, 1) = gen_rtx_SET (reg, addr);
27586 RTX_FRAME_RELATED_P (XVECEXP (par, 0, 1)) = 1;
27587 emit_jump_insn (par);
27588 }
27589 else
27590 {
27591 saved_regs_mask &= ~ (1 << LR_REGNUM);
27592 saved_regs_mask |= (1 << PC_REGNUM);
27593 arm_emit_multi_reg_pop (saved_regs_mask);
27594 }
27595 }
27596 else
27597 {
27598 if (IS_CMSE_ENTRY (arm_current_func_type ()))
27599 cmse_nonsecure_entry_clear_before_return ();
27600 emit_jump_insn (simple_return_rtx);
27601 }
27602 }
27603
27604 void
27605 thumb1_expand_epilogue (void)
27606 {
27607 HOST_WIDE_INT amount;
27608 arm_stack_offsets *offsets;
27609 int regno;
27610
27611 /* Naked functions don't have prologues. */
27612 if (IS_NAKED (arm_current_func_type ()))
27613 return;
27614
27615 offsets = arm_get_frame_offsets ();
27616 amount = offsets->outgoing_args - offsets->saved_regs;
27617
27618 if (frame_pointer_needed)
27619 {
27620 emit_insn (gen_movsi (stack_pointer_rtx, hard_frame_pointer_rtx));
27621 amount = offsets->locals_base - offsets->saved_regs;
27622 }
27623 amount -= 4 * thumb1_extra_regs_pushed (offsets, false);
27624
27625 gcc_assert (amount >= 0);
27626 if (amount)
27627 {
27628 emit_insn (gen_blockage ());
27629
27630 if (amount < 512)
27631 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
27632 GEN_INT (amount)));
27633 else
27634 {
27635 /* r3 is always free in the epilogue. */
27636 rtx reg = gen_rtx_REG (SImode, LAST_ARG_REGNUM);
27637
27638 emit_insn (gen_movsi (reg, GEN_INT (amount)));
27639 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, reg));
27640 }
27641 }
27642
27643 /* Emit a USE (stack_pointer_rtx), so that
27644 the stack adjustment will not be deleted. */
27645 emit_insn (gen_force_register_use (stack_pointer_rtx));
27646
27647 if (crtl->profile || !TARGET_SCHED_PROLOG)
27648 emit_insn (gen_blockage ());
27649
27650 /* Emit a clobber for each insn that will be restored in the epilogue,
27651 so that flow2 will get register lifetimes correct. */
27652 for (regno = 0; regno < 13; regno++)
27653 if (reg_needs_saving_p (regno))
27654 emit_clobber (gen_rtx_REG (SImode, regno));
27655
27656 if (! df_regs_ever_live_p (LR_REGNUM))
27657 emit_use (gen_rtx_REG (SImode, LR_REGNUM));
27658
27659 /* Clear all caller-saved regs that are not used to return. */
27660 if (IS_CMSE_ENTRY (arm_current_func_type ()))
27661 cmse_nonsecure_entry_clear_before_return ();
27662 }
27663
27664 /* Epilogue code for APCS frame. */
27665 static void
27666 arm_expand_epilogue_apcs_frame (bool really_return)
27667 {
27668 unsigned long func_type;
27669 unsigned long saved_regs_mask;
27670 int num_regs = 0;
27671 int i;
27672 int floats_from_frame = 0;
27673 arm_stack_offsets *offsets;
27674
27675 gcc_assert (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM);
27676 func_type = arm_current_func_type ();
27677
27678 /* Get frame offsets for ARM. */
27679 offsets = arm_get_frame_offsets ();
27680 saved_regs_mask = offsets->saved_regs_mask;
27681
27682 /* Find the offset of the floating-point save area in the frame. */
27683 floats_from_frame
27684 = (offsets->saved_args
27685 + arm_compute_static_chain_stack_bytes ()
27686 - offsets->frame);
27687
27688 /* Compute how many core registers saved and how far away the floats are. */
27689 for (i = 0; i <= LAST_ARM_REGNUM; i++)
27690 if (saved_regs_mask & (1 << i))
27691 {
27692 num_regs++;
27693 floats_from_frame += 4;
27694 }
27695
27696 if (TARGET_VFP_BASE)
27697 {
27698 int start_reg;
27699 rtx ip_rtx = gen_rtx_REG (SImode, IP_REGNUM);
27700
27701 /* The offset is from IP_REGNUM. */
27702 int saved_size = arm_get_vfp_saved_size ();
27703 if (saved_size > 0)
27704 {
27705 rtx_insn *insn;
27706 floats_from_frame += saved_size;
27707 insn = emit_insn (gen_addsi3 (ip_rtx,
27708 hard_frame_pointer_rtx,
27709 GEN_INT (-floats_from_frame)));
27710 arm_add_cfa_adjust_cfa_note (insn, -floats_from_frame,
27711 ip_rtx, hard_frame_pointer_rtx);
27712 }
27713
27714 /* Generate VFP register multi-pop. */
27715 start_reg = FIRST_VFP_REGNUM;
27716
27717 for (i = FIRST_VFP_REGNUM; i < LAST_VFP_REGNUM; i += 2)
27718 /* Look for a case where a reg does not need restoring. */
27719 if (!reg_needs_saving_p (i) && !reg_needs_saving_p (i + 1))
27720 {
27721 if (start_reg != i)
27722 arm_emit_vfp_multi_reg_pop (start_reg,
27723 (i - start_reg) / 2,
27724 gen_rtx_REG (SImode,
27725 IP_REGNUM));
27726 start_reg = i + 2;
27727 }
27728
27729 /* Restore the remaining regs that we have discovered (or possibly
27730 even all of them, if the conditional in the for loop never
27731 fired). */
27732 if (start_reg != i)
27733 arm_emit_vfp_multi_reg_pop (start_reg,
27734 (i - start_reg) / 2,
27735 gen_rtx_REG (SImode, IP_REGNUM));
27736 }
27737
27738 if (TARGET_IWMMXT)
27739 {
27740 /* The frame pointer is guaranteed to be non-double-word aligned, as
27741 it is set to double-word-aligned old_stack_pointer - 4. */
27742 rtx_insn *insn;
27743 int lrm_count = (num_regs % 2) ? (num_regs + 2) : (num_regs + 1);
27744
27745 for (i = LAST_IWMMXT_REGNUM; i >= FIRST_IWMMXT_REGNUM; i--)
27746 if (reg_needs_saving_p (i))
27747 {
27748 rtx addr = gen_frame_mem (V2SImode,
27749 plus_constant (Pmode, hard_frame_pointer_rtx,
27750 - lrm_count * 4));
27751 insn = emit_insn (gen_movsi (gen_rtx_REG (V2SImode, i), addr));
27752 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
27753 gen_rtx_REG (V2SImode, i),
27754 NULL_RTX);
27755 lrm_count += 2;
27756 }
27757 }
27758
27759 /* saved_regs_mask should contain IP which contains old stack pointer
27760 at the time of activation creation. Since SP and IP are adjacent registers,
27761 we can restore the value directly into SP. */
27762 gcc_assert (saved_regs_mask & (1 << IP_REGNUM));
27763 saved_regs_mask &= ~(1 << IP_REGNUM);
27764 saved_regs_mask |= (1 << SP_REGNUM);
27765
27766 /* There are two registers left in saved_regs_mask - LR and PC. We
27767 only need to restore LR (the return address), but to
27768 save time we can load it directly into PC, unless we need a
27769 special function exit sequence, or we are not really returning. */
27770 if (really_return
27771 && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
27772 && !crtl->calls_eh_return)
27773 /* Delete LR from the register mask, so that LR on
27774 the stack is loaded into the PC in the register mask. */
27775 saved_regs_mask &= ~(1 << LR_REGNUM);
27776 else
27777 saved_regs_mask &= ~(1 << PC_REGNUM);
27778
27779 num_regs = bit_count (saved_regs_mask);
27780 if ((offsets->outgoing_args != (1 + num_regs)) || cfun->calls_alloca)
27781 {
27782 rtx_insn *insn;
27783 emit_insn (gen_blockage ());
27784 /* Unwind the stack to just below the saved registers. */
27785 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
27786 hard_frame_pointer_rtx,
27787 GEN_INT (- 4 * num_regs)));
27788
27789 arm_add_cfa_adjust_cfa_note (insn, - 4 * num_regs,
27790 stack_pointer_rtx, hard_frame_pointer_rtx);
27791 }
27792
27793 arm_emit_multi_reg_pop (saved_regs_mask);
27794
27795 if (IS_INTERRUPT (func_type))
27796 {
27797 /* Interrupt handlers will have pushed the
27798 IP onto the stack, so restore it now. */
27799 rtx_insn *insn;
27800 rtx addr = gen_rtx_MEM (SImode,
27801 gen_rtx_POST_INC (SImode,
27802 stack_pointer_rtx));
27803 set_mem_alias_set (addr, get_frame_alias_set ());
27804 insn = emit_insn (gen_movsi (gen_rtx_REG (SImode, IP_REGNUM), addr));
27805 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
27806 gen_rtx_REG (SImode, IP_REGNUM),
27807 NULL_RTX);
27808 }
27809
27810 if (!really_return || (saved_regs_mask & (1 << PC_REGNUM)))
27811 return;
27812
27813 if (crtl->calls_eh_return)
27814 emit_insn (gen_addsi3 (stack_pointer_rtx,
27815 stack_pointer_rtx,
27816 gen_rtx_REG (SImode, ARM_EH_STACKADJ_REGNUM)));
27817
27818 if (IS_STACKALIGN (func_type))
27819 /* Restore the original stack pointer. Before prologue, the stack was
27820 realigned and the original stack pointer saved in r0. For details,
27821 see comment in arm_expand_prologue. */
27822 emit_insn (gen_movsi (stack_pointer_rtx, gen_rtx_REG (SImode, R0_REGNUM)));
27823
27824 emit_jump_insn (simple_return_rtx);
27825 }
27826
27827 /* Generate RTL to represent ARM epilogue. Really_return is true if the
27828 function is not a sibcall. */
27829 void
27830 arm_expand_epilogue (bool really_return)
27831 {
27832 unsigned long func_type;
27833 unsigned long saved_regs_mask;
27834 int num_regs = 0;
27835 int i;
27836 int amount;
27837 arm_stack_offsets *offsets;
27838
27839 func_type = arm_current_func_type ();
27840
27841 /* Naked functions don't have epilogue. Hence, generate return pattern, and
27842 let output_return_instruction take care of instruction emission if any. */
27843 if (IS_NAKED (func_type)
27844 || (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN))
27845 {
27846 if (really_return)
27847 emit_jump_insn (simple_return_rtx);
27848 return;
27849 }
27850
27851 /* If we are throwing an exception, then we really must be doing a
27852 return, so we can't tail-call. */
27853 gcc_assert (!crtl->calls_eh_return || really_return);
27854
27855 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
27856 {
27857 arm_expand_epilogue_apcs_frame (really_return);
27858 return;
27859 }
27860
27861 /* Get frame offsets for ARM. */
27862 offsets = arm_get_frame_offsets ();
27863 saved_regs_mask = offsets->saved_regs_mask;
27864 num_regs = bit_count (saved_regs_mask);
27865
27866 if (frame_pointer_needed)
27867 {
27868 rtx_insn *insn;
27869 /* Restore stack pointer if necessary. */
27870 if (TARGET_ARM)
27871 {
27872 /* In ARM mode, frame pointer points to first saved register.
27873 Restore stack pointer to last saved register. */
27874 amount = offsets->frame - offsets->saved_regs;
27875
27876 /* Force out any pending memory operations that reference stacked data
27877 before stack de-allocation occurs. */
27878 emit_insn (gen_blockage ());
27879 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
27880 hard_frame_pointer_rtx,
27881 GEN_INT (amount)));
27882 arm_add_cfa_adjust_cfa_note (insn, amount,
27883 stack_pointer_rtx,
27884 hard_frame_pointer_rtx);
27885
27886 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
27887 deleted. */
27888 emit_insn (gen_force_register_use (stack_pointer_rtx));
27889 }
27890 else
27891 {
27892 /* In Thumb-2 mode, the frame pointer points to the last saved
27893 register. */
27894 amount = offsets->locals_base - offsets->saved_regs;
27895 if (amount)
27896 {
27897 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
27898 hard_frame_pointer_rtx,
27899 GEN_INT (amount)));
27900 arm_add_cfa_adjust_cfa_note (insn, amount,
27901 hard_frame_pointer_rtx,
27902 hard_frame_pointer_rtx);
27903 }
27904
27905 /* Force out any pending memory operations that reference stacked data
27906 before stack de-allocation occurs. */
27907 emit_insn (gen_blockage ());
27908 insn = emit_insn (gen_movsi (stack_pointer_rtx,
27909 hard_frame_pointer_rtx));
27910 arm_add_cfa_adjust_cfa_note (insn, 0,
27911 stack_pointer_rtx,
27912 hard_frame_pointer_rtx);
27913 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
27914 deleted. */
27915 emit_insn (gen_force_register_use (stack_pointer_rtx));
27916 }
27917 }
27918 else
27919 {
27920 /* Pop off outgoing args and local frame to adjust stack pointer to
27921 last saved register. */
27922 amount = offsets->outgoing_args - offsets->saved_regs;
27923 if (amount)
27924 {
27925 rtx_insn *tmp;
27926 /* Force out any pending memory operations that reference stacked data
27927 before stack de-allocation occurs. */
27928 emit_insn (gen_blockage ());
27929 tmp = emit_insn (gen_addsi3 (stack_pointer_rtx,
27930 stack_pointer_rtx,
27931 GEN_INT (amount)));
27932 arm_add_cfa_adjust_cfa_note (tmp, amount,
27933 stack_pointer_rtx, stack_pointer_rtx);
27934 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is
27935 not deleted. */
27936 emit_insn (gen_force_register_use (stack_pointer_rtx));
27937 }
27938 }
27939
27940 if (TARGET_VFP_BASE)
27941 {
27942 /* Generate VFP register multi-pop. */
27943 int end_reg = LAST_VFP_REGNUM + 1;
27944
27945 /* Scan the registers in reverse order. We need to match
27946 any groupings made in the prologue and generate matching
27947 vldm operations. The need to match groups is because,
27948 unlike pop, vldm can only do consecutive regs. */
27949 for (i = LAST_VFP_REGNUM - 1; i >= FIRST_VFP_REGNUM; i -= 2)
27950 /* Look for a case where a reg does not need restoring. */
27951 if (!reg_needs_saving_p (i) && !reg_needs_saving_p (i + 1))
27952 {
27953 /* Restore the regs discovered so far (from reg+2 to
27954 end_reg). */
27955 if (end_reg > i + 2)
27956 arm_emit_vfp_multi_reg_pop (i + 2,
27957 (end_reg - (i + 2)) / 2,
27958 stack_pointer_rtx);
27959 end_reg = i;
27960 }
27961
27962 /* Restore the remaining regs that we have discovered (or possibly
27963 even all of them, if the conditional in the for loop never
27964 fired). */
27965 if (end_reg > i + 2)
27966 arm_emit_vfp_multi_reg_pop (i + 2,
27967 (end_reg - (i + 2)) / 2,
27968 stack_pointer_rtx);
27969 }
27970
27971 if (TARGET_IWMMXT)
27972 for (i = FIRST_IWMMXT_REGNUM; i <= LAST_IWMMXT_REGNUM; i++)
27973 if (reg_needs_saving_p (i))
27974 {
27975 rtx_insn *insn;
27976 rtx addr = gen_rtx_MEM (V2SImode,
27977 gen_rtx_POST_INC (SImode,
27978 stack_pointer_rtx));
27979 set_mem_alias_set (addr, get_frame_alias_set ());
27980 insn = emit_insn (gen_movsi (gen_rtx_REG (V2SImode, i), addr));
27981 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
27982 gen_rtx_REG (V2SImode, i),
27983 NULL_RTX);
27984 arm_add_cfa_adjust_cfa_note (insn, UNITS_PER_WORD,
27985 stack_pointer_rtx, stack_pointer_rtx);
27986 }
27987
27988 if (saved_regs_mask)
27989 {
27990 rtx insn;
27991 bool return_in_pc = false;
27992
27993 if (ARM_FUNC_TYPE (func_type) != ARM_FT_INTERWORKED
27994 && (TARGET_ARM || ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL)
27995 && !IS_CMSE_ENTRY (func_type)
27996 && !IS_STACKALIGN (func_type)
27997 && really_return
27998 && crtl->args.pretend_args_size == 0
27999 && saved_regs_mask & (1 << LR_REGNUM)
28000 && !crtl->calls_eh_return
28001 && !arm_current_function_pac_enabled_p ())
28002 {
28003 saved_regs_mask &= ~(1 << LR_REGNUM);
28004 saved_regs_mask |= (1 << PC_REGNUM);
28005 return_in_pc = true;
28006 }
28007
28008 if (num_regs == 1 && (!IS_INTERRUPT (func_type) || !return_in_pc))
28009 {
28010 for (i = 0; i <= LAST_ARM_REGNUM; i++)
28011 if (saved_regs_mask & (1 << i))
28012 {
28013 rtx addr = gen_rtx_MEM (SImode,
28014 gen_rtx_POST_INC (SImode,
28015 stack_pointer_rtx));
28016 set_mem_alias_set (addr, get_frame_alias_set ());
28017
28018 if (i == PC_REGNUM)
28019 {
28020 insn = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
28021 XVECEXP (insn, 0, 0) = ret_rtx;
28022 XVECEXP (insn, 0, 1) = gen_rtx_SET (gen_rtx_REG (SImode, i),
28023 addr);
28024 RTX_FRAME_RELATED_P (XVECEXP (insn, 0, 1)) = 1;
28025 insn = emit_jump_insn (insn);
28026 }
28027 else
28028 {
28029 insn = emit_insn (gen_movsi (gen_rtx_REG (SImode, i),
28030 addr));
28031 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
28032 gen_rtx_REG (SImode, i),
28033 NULL_RTX);
28034 arm_add_cfa_adjust_cfa_note (insn, UNITS_PER_WORD,
28035 stack_pointer_rtx,
28036 stack_pointer_rtx);
28037 }
28038 }
28039 }
28040 else
28041 {
28042 if (TARGET_LDRD
28043 && current_tune->prefer_ldrd_strd
28044 && !optimize_function_for_size_p (cfun))
28045 {
28046 if (TARGET_THUMB2)
28047 thumb2_emit_ldrd_pop (saved_regs_mask);
28048 else if (TARGET_ARM && !IS_INTERRUPT (func_type))
28049 arm_emit_ldrd_pop (saved_regs_mask);
28050 else
28051 arm_emit_multi_reg_pop (saved_regs_mask);
28052 }
28053 else
28054 arm_emit_multi_reg_pop (saved_regs_mask);
28055 }
28056
28057 if (return_in_pc)
28058 return;
28059 }
28060
28061 amount
28062 = crtl->args.pretend_args_size + arm_compute_static_chain_stack_bytes();
28063 if (amount)
28064 {
28065 int i, j;
28066 rtx dwarf = NULL_RTX;
28067 rtx_insn *tmp =
28068 emit_insn (gen_addsi3 (stack_pointer_rtx,
28069 stack_pointer_rtx,
28070 GEN_INT (amount)));
28071
28072 RTX_FRAME_RELATED_P (tmp) = 1;
28073
28074 if (cfun->machine->uses_anonymous_args)
28075 {
28076 /* Restore pretend args. Refer arm_expand_prologue on how to save
28077 pretend_args in stack. */
28078 int num_regs = crtl->args.pretend_args_size / 4;
28079 saved_regs_mask = (0xf0 >> num_regs) & 0xf;
28080 for (j = 0, i = 0; j < num_regs; i++)
28081 if (saved_regs_mask & (1 << i))
28082 {
28083 rtx reg = gen_rtx_REG (SImode, i);
28084 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
28085 j++;
28086 }
28087 REG_NOTES (tmp) = dwarf;
28088 }
28089 arm_add_cfa_adjust_cfa_note (tmp, amount,
28090 stack_pointer_rtx, stack_pointer_rtx);
28091 }
28092
28093 if (IS_CMSE_ENTRY (func_type))
28094 {
28095 /* CMSE_ENTRY always returns. */
28096 gcc_assert (really_return);
28097 /* Clear all caller-saved regs that are not used to return. */
28098 cmse_nonsecure_entry_clear_before_return ();
28099
28100 /* Armv8.1-M Mainline nonsecure entry: restore FPCXTNS from stack using
28101 VLDR. */
28102 if (TARGET_HAVE_FPCXT_CMSE)
28103 {
28104 rtx_insn *insn;
28105
28106 insn = emit_insn (gen_pop_fpsysreg_insn (stack_pointer_rtx,
28107 GEN_INT (FPCXTNS_ENUM)));
28108 rtx dwarf = gen_rtx_SET (stack_pointer_rtx,
28109 plus_constant (Pmode, stack_pointer_rtx, 4));
28110 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
28111 RTX_FRAME_RELATED_P (insn) = 1;
28112 }
28113 }
28114
28115 if (arm_current_function_pac_enabled_p ())
28116 emit_insn (gen_aut_nop ());
28117
28118 if (!really_return)
28119 return;
28120
28121 if (crtl->calls_eh_return)
28122 emit_insn (gen_addsi3 (stack_pointer_rtx,
28123 stack_pointer_rtx,
28124 gen_rtx_REG (SImode, ARM_EH_STACKADJ_REGNUM)));
28125
28126 if (IS_STACKALIGN (func_type))
28127 /* Restore the original stack pointer. Before prologue, the stack was
28128 realigned and the original stack pointer saved in r0. For details,
28129 see comment in arm_expand_prologue. */
28130 emit_insn (gen_movsi (stack_pointer_rtx, gen_rtx_REG (SImode, R0_REGNUM)));
28131
28132 emit_jump_insn (simple_return_rtx);
28133 }
28134
28135 /* Implementation of insn prologue_thumb1_interwork. This is the first
28136 "instruction" of a function called in ARM mode. Swap to thumb mode. */
28137
28138 const char *
28139 thumb1_output_interwork (void)
28140 {
28141 const char * name;
28142 FILE *f = asm_out_file;
28143
28144 gcc_assert (MEM_P (DECL_RTL (current_function_decl)));
28145 gcc_assert (GET_CODE (XEXP (DECL_RTL (current_function_decl), 0))
28146 == SYMBOL_REF);
28147 name = XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0);
28148
28149 /* Generate code sequence to switch us into Thumb mode. */
28150 /* The .code 32 directive has already been emitted by
28151 ASM_DECLARE_FUNCTION_NAME. */
28152 asm_fprintf (f, "\torr\t%r, %r, #1\n", IP_REGNUM, PC_REGNUM);
28153 asm_fprintf (f, "\tbx\t%r\n", IP_REGNUM);
28154
28155 /* Generate a label, so that the debugger will notice the
28156 change in instruction sets. This label is also used by
28157 the assembler to bypass the ARM code when this function
28158 is called from a Thumb encoded function elsewhere in the
28159 same file. Hence the definition of STUB_NAME here must
28160 agree with the definition in gas/config/tc-arm.c. */
28161
28162 #define STUB_NAME ".real_start_of"
28163
28164 fprintf (f, "\t.code\t16\n");
28165 #ifdef ARM_PE
28166 if (arm_dllexport_name_p (name))
28167 name = arm_strip_name_encoding (name);
28168 #endif
28169 asm_fprintf (f, "\t.globl %s%U%s\n", STUB_NAME, name);
28170 fprintf (f, "\t.thumb_func\n");
28171 asm_fprintf (f, "%s%U%s:\n", STUB_NAME, name);
28172
28173 return "";
28174 }
28175
28176 /* Handle the case of a double word load into a low register from
28177 a computed memory address. The computed address may involve a
28178 register which is overwritten by the load. */
28179 const char *
28180 thumb_load_double_from_address (rtx *operands)
28181 {
28182 rtx addr;
28183 rtx base;
28184 rtx offset;
28185 rtx arg1;
28186 rtx arg2;
28187
28188 gcc_assert (REG_P (operands[0]));
28189 gcc_assert (MEM_P (operands[1]));
28190
28191 /* Get the memory address. */
28192 addr = XEXP (operands[1], 0);
28193
28194 /* Work out how the memory address is computed. */
28195 switch (GET_CODE (addr))
28196 {
28197 case REG:
28198 operands[2] = adjust_address (operands[1], SImode, 4);
28199
28200 if (REGNO (operands[0]) == REGNO (addr))
28201 {
28202 output_asm_insn ("ldr\t%H0, %2", operands);
28203 output_asm_insn ("ldr\t%0, %1", operands);
28204 }
28205 else
28206 {
28207 output_asm_insn ("ldr\t%0, %1", operands);
28208 output_asm_insn ("ldr\t%H0, %2", operands);
28209 }
28210 break;
28211
28212 case CONST:
28213 /* Compute <address> + 4 for the high order load. */
28214 operands[2] = adjust_address (operands[1], SImode, 4);
28215
28216 output_asm_insn ("ldr\t%0, %1", operands);
28217 output_asm_insn ("ldr\t%H0, %2", operands);
28218 break;
28219
28220 case PLUS:
28221 arg1 = XEXP (addr, 0);
28222 arg2 = XEXP (addr, 1);
28223
28224 if (CONSTANT_P (arg1))
28225 base = arg2, offset = arg1;
28226 else
28227 base = arg1, offset = arg2;
28228
28229 gcc_assert (REG_P (base));
28230
28231 /* Catch the case of <address> = <reg> + <reg> */
28232 if (REG_P (offset))
28233 {
28234 int reg_offset = REGNO (offset);
28235 int reg_base = REGNO (base);
28236 int reg_dest = REGNO (operands[0]);
28237
28238 /* Add the base and offset registers together into the
28239 higher destination register. */
28240 asm_fprintf (asm_out_file, "\tadd\t%r, %r, %r",
28241 reg_dest + 1, reg_base, reg_offset);
28242
28243 /* Load the lower destination register from the address in
28244 the higher destination register. */
28245 asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #0]",
28246 reg_dest, reg_dest + 1);
28247
28248 /* Load the higher destination register from its own address
28249 plus 4. */
28250 asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #4]",
28251 reg_dest + 1, reg_dest + 1);
28252 }
28253 else
28254 {
28255 /* Compute <address> + 4 for the high order load. */
28256 operands[2] = adjust_address (operands[1], SImode, 4);
28257
28258 /* If the computed address is held in the low order register
28259 then load the high order register first, otherwise always
28260 load the low order register first. */
28261 if (REGNO (operands[0]) == REGNO (base))
28262 {
28263 output_asm_insn ("ldr\t%H0, %2", operands);
28264 output_asm_insn ("ldr\t%0, %1", operands);
28265 }
28266 else
28267 {
28268 output_asm_insn ("ldr\t%0, %1", operands);
28269 output_asm_insn ("ldr\t%H0, %2", operands);
28270 }
28271 }
28272 break;
28273
28274 case LABEL_REF:
28275 /* With no registers to worry about we can just load the value
28276 directly. */
28277 operands[2] = adjust_address (operands[1], SImode, 4);
28278
28279 output_asm_insn ("ldr\t%H0, %2", operands);
28280 output_asm_insn ("ldr\t%0, %1", operands);
28281 break;
28282
28283 default:
28284 gcc_unreachable ();
28285 }
28286
28287 return "";
28288 }
28289
28290 const char *
28291 thumb_output_move_mem_multiple (int n, rtx *operands)
28292 {
28293 switch (n)
28294 {
28295 case 2:
28296 if (REGNO (operands[4]) > REGNO (operands[5]))
28297 std::swap (operands[4], operands[5]);
28298
28299 output_asm_insn ("ldmia\t%1!, {%4, %5}", operands);
28300 output_asm_insn ("stmia\t%0!, {%4, %5}", operands);
28301 break;
28302
28303 case 3:
28304 if (REGNO (operands[4]) > REGNO (operands[5]))
28305 std::swap (operands[4], operands[5]);
28306 if (REGNO (operands[5]) > REGNO (operands[6]))
28307 std::swap (operands[5], operands[6]);
28308 if (REGNO (operands[4]) > REGNO (operands[5]))
28309 std::swap (operands[4], operands[5]);
28310
28311 output_asm_insn ("ldmia\t%1!, {%4, %5, %6}", operands);
28312 output_asm_insn ("stmia\t%0!, {%4, %5, %6}", operands);
28313 break;
28314
28315 default:
28316 gcc_unreachable ();
28317 }
28318
28319 return "";
28320 }
28321
28322 /* Output a call-via instruction for thumb state. */
28323 const char *
28324 thumb_call_via_reg (rtx reg)
28325 {
28326 int regno = REGNO (reg);
28327 rtx *labelp;
28328
28329 gcc_assert (regno < LR_REGNUM);
28330
28331 /* If we are in the normal text section we can use a single instance
28332 per compilation unit. If we are doing function sections, then we need
28333 an entry per section, since we can't rely on reachability. */
28334 if (in_section == text_section)
28335 {
28336 thumb_call_reg_needed = 1;
28337
28338 if (thumb_call_via_label[regno] == NULL)
28339 thumb_call_via_label[regno] = gen_label_rtx ();
28340 labelp = thumb_call_via_label + regno;
28341 }
28342 else
28343 {
28344 if (cfun->machine->call_via[regno] == NULL)
28345 cfun->machine->call_via[regno] = gen_label_rtx ();
28346 labelp = cfun->machine->call_via + regno;
28347 }
28348
28349 output_asm_insn ("bl\t%a0", labelp);
28350 return "";
28351 }
28352
28353 /* Routines for generating rtl. */
28354 void
28355 thumb_expand_cpymemqi (rtx *operands)
28356 {
28357 rtx out = copy_to_mode_reg (SImode, XEXP (operands[0], 0));
28358 rtx in = copy_to_mode_reg (SImode, XEXP (operands[1], 0));
28359 HOST_WIDE_INT len = INTVAL (operands[2]);
28360 HOST_WIDE_INT offset = 0;
28361
28362 while (len >= 12)
28363 {
28364 emit_insn (gen_cpymem12b (out, in, out, in));
28365 len -= 12;
28366 }
28367
28368 if (len >= 8)
28369 {
28370 emit_insn (gen_cpymem8b (out, in, out, in));
28371 len -= 8;
28372 }
28373
28374 if (len >= 4)
28375 {
28376 rtx reg = gen_reg_rtx (SImode);
28377 emit_insn (gen_movsi (reg, gen_rtx_MEM (SImode, in)));
28378 emit_insn (gen_movsi (gen_rtx_MEM (SImode, out), reg));
28379 len -= 4;
28380 offset += 4;
28381 }
28382
28383 if (len >= 2)
28384 {
28385 rtx reg = gen_reg_rtx (HImode);
28386 emit_insn (gen_movhi (reg, gen_rtx_MEM (HImode,
28387 plus_constant (Pmode, in,
28388 offset))));
28389 emit_insn (gen_movhi (gen_rtx_MEM (HImode, plus_constant (Pmode, out,
28390 offset)),
28391 reg));
28392 len -= 2;
28393 offset += 2;
28394 }
28395
28396 if (len)
28397 {
28398 rtx reg = gen_reg_rtx (QImode);
28399 emit_insn (gen_movqi (reg, gen_rtx_MEM (QImode,
28400 plus_constant (Pmode, in,
28401 offset))));
28402 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, out,
28403 offset)),
28404 reg));
28405 }
28406 }
28407
28408 void
28409 thumb_reload_out_hi (rtx *operands)
28410 {
28411 emit_insn (gen_thumb_movhi_clobber (operands[0], operands[1], operands[2]));
28412 }
28413
28414 /* Return the length of a function name prefix
28415 that starts with the character 'c'. */
28416 static int
28417 arm_get_strip_length (int c)
28418 {
28419 switch (c)
28420 {
28421 ARM_NAME_ENCODING_LENGTHS
28422 default: return 0;
28423 }
28424 }
28425
28426 /* Return a pointer to a function's name with any
28427 and all prefix encodings stripped from it. */
28428 const char *
28429 arm_strip_name_encoding (const char *name)
28430 {
28431 int skip;
28432
28433 while ((skip = arm_get_strip_length (* name)))
28434 name += skip;
28435
28436 return name;
28437 }
28438
28439 /* If there is a '*' anywhere in the name's prefix, then
28440 emit the stripped name verbatim, otherwise prepend an
28441 underscore if leading underscores are being used. */
28442 void
28443 arm_asm_output_labelref (FILE *stream, const char *name)
28444 {
28445 int skip;
28446 int verbatim = 0;
28447
28448 while ((skip = arm_get_strip_length (* name)))
28449 {
28450 verbatim |= (*name == '*');
28451 name += skip;
28452 }
28453
28454 if (verbatim)
28455 fputs (name, stream);
28456 else
28457 asm_fprintf (stream, "%U%s", name);
28458 }
28459
28460 /* This function is used to emit an EABI tag and its associated value.
28461 We emit the numerical value of the tag in case the assembler does not
28462 support textual tags. (Eg gas prior to 2.20). If requested we include
28463 the tag name in a comment so that anyone reading the assembler output
28464 will know which tag is being set.
28465
28466 This function is not static because arm-c.cc needs it too. */
28467
28468 void
28469 arm_emit_eabi_attribute (const char *name, int num, int val)
28470 {
28471 asm_fprintf (asm_out_file, "\t.eabi_attribute %d, %d", num, val);
28472 if (flag_verbose_asm || flag_debug_asm)
28473 asm_fprintf (asm_out_file, "\t%s %s", ASM_COMMENT_START, name);
28474 asm_fprintf (asm_out_file, "\n");
28475 }
28476
28477 /* This function is used to print CPU tuning information as comment
28478 in assembler file. Pointers are not printed for now. */
28479
28480 void
28481 arm_print_tune_info (void)
28482 {
28483 asm_fprintf (asm_out_file, "\t" ASM_COMMENT_START ".tune parameters\n");
28484 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "constant_limit:\t%d\n",
28485 current_tune->constant_limit);
28486 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
28487 "max_insns_skipped:\t%d\n", current_tune->max_insns_skipped);
28488 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
28489 "prefetch.num_slots:\t%d\n", current_tune->prefetch.num_slots);
28490 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
28491 "prefetch.l1_cache_size:\t%d\n",
28492 current_tune->prefetch.l1_cache_size);
28493 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
28494 "prefetch.l1_cache_line_size:\t%d\n",
28495 current_tune->prefetch.l1_cache_line_size);
28496 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
28497 "prefer_constant_pool:\t%d\n",
28498 (int) current_tune->prefer_constant_pool);
28499 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
28500 "branch_cost:\t(s:speed, p:predictable)\n");
28501 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\ts&p\tcost\n");
28502 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\t00\t%d\n",
28503 current_tune->branch_cost (false, false));
28504 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\t01\t%d\n",
28505 current_tune->branch_cost (false, true));
28506 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\t10\t%d\n",
28507 current_tune->branch_cost (true, false));
28508 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\t11\t%d\n",
28509 current_tune->branch_cost (true, true));
28510 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
28511 "prefer_ldrd_strd:\t%d\n",
28512 (int) current_tune->prefer_ldrd_strd);
28513 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
28514 "logical_op_non_short_circuit:\t[%d,%d]\n",
28515 (int) current_tune->logical_op_non_short_circuit_thumb,
28516 (int) current_tune->logical_op_non_short_circuit_arm);
28517 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
28518 "disparage_flag_setting_t16_encodings:\t%d\n",
28519 (int) current_tune->disparage_flag_setting_t16_encodings);
28520 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
28521 "string_ops_prefer_neon:\t%d\n",
28522 (int) current_tune->string_ops_prefer_neon);
28523 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
28524 "max_insns_inline_memset:\t%d\n",
28525 current_tune->max_insns_inline_memset);
28526 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "fusible_ops:\t%u\n",
28527 current_tune->fusible_ops);
28528 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "sched_autopref:\t%d\n",
28529 (int) current_tune->sched_autopref);
28530 }
28531
28532 /* The last set of target options used to emit .arch directives, etc. This
28533 could be a function-local static if it were not required to expose it as a
28534 root to the garbage collector. */
28535 static GTY(()) cl_target_option *last_asm_targ_options = NULL;
28536
28537 /* Print .arch and .arch_extension directives corresponding to the
28538 current architecture configuration. */
28539 static void
28540 arm_print_asm_arch_directives (FILE *stream, cl_target_option *targ_options)
28541 {
28542 arm_build_target build_target;
28543 /* If the target options haven't changed since the last time we were called
28544 there is nothing to do. This should be sufficient to suppress the
28545 majority of redundant work. */
28546 if (last_asm_targ_options == targ_options)
28547 return;
28548
28549 last_asm_targ_options = targ_options;
28550
28551 build_target.isa = sbitmap_alloc (isa_num_bits);
28552 arm_configure_build_target (&build_target, targ_options, false);
28553
28554 if (build_target.core_name
28555 && !bitmap_bit_p (build_target.isa, isa_bit_quirk_no_asmcpu))
28556 {
28557 const char* truncated_name
28558 = arm_rewrite_selected_cpu (build_target.core_name);
28559 asm_fprintf (stream, "\t.cpu %s\n", truncated_name);
28560 }
28561
28562 const arch_option *arch
28563 = arm_parse_arch_option_name (all_architectures, "-march",
28564 build_target.arch_name);
28565 auto_sbitmap opt_bits (isa_num_bits);
28566
28567 gcc_assert (arch);
28568
28569 if (strcmp (build_target.arch_name, "armv7ve") == 0)
28570 {
28571 /* Keep backward compatability for assemblers which don't support
28572 armv7ve. Fortunately, none of the following extensions are reset
28573 by a .fpu directive. */
28574 asm_fprintf (stream, "\t.arch armv7-a\n");
28575 asm_fprintf (stream, "\t.arch_extension virt\n");
28576 asm_fprintf (stream, "\t.arch_extension idiv\n");
28577 asm_fprintf (stream, "\t.arch_extension sec\n");
28578 asm_fprintf (stream, "\t.arch_extension mp\n");
28579 }
28580 else
28581 asm_fprintf (stream, "\t.arch %s\n", build_target.arch_name);
28582
28583 /* The .fpu directive will reset any architecture extensions from the
28584 assembler that relate to the fp/vector extensions. So put this out before
28585 any .arch_extension directives. */
28586 const char *fpu_name = (TARGET_SOFT_FLOAT
28587 ? "softvfp"
28588 : arm_identify_fpu_from_isa (build_target.isa));
28589 asm_fprintf (stream, "\t.fpu %s\n", fpu_name);
28590
28591 if (!arch->common.extensions)
28592 return;
28593
28594 for (const struct cpu_arch_extension *opt = arch->common.extensions;
28595 opt->name != NULL;
28596 opt++)
28597 {
28598 if (!opt->remove)
28599 {
28600 arm_initialize_isa (opt_bits, opt->isa_bits);
28601
28602 /* For the cases "-march=armv8.1-m.main+mve -mfloat-abi=soft" and
28603 "-march=armv8.1-m.main+mve.fp -mfloat-abi=soft" MVE and MVE with
28604 floating point instructions is disabled. So the following check
28605 restricts the printing of ".arch_extension mve" and
28606 ".arch_extension fp" (for mve.fp) in the assembly file. MVE needs
28607 this special behaviour because the feature bit "mve" and
28608 "mve_float" are not part of "fpu bits", so they are not cleared
28609 when -mfloat-abi=soft (i.e nofp) but the marco TARGET_HAVE_MVE and
28610 TARGET_HAVE_MVE_FLOAT are disabled. */
28611 if ((bitmap_bit_p (opt_bits, isa_bit_mve) && !TARGET_HAVE_MVE)
28612 || (bitmap_bit_p (opt_bits, isa_bit_mve_float)
28613 && !TARGET_HAVE_MVE_FLOAT))
28614 continue;
28615
28616 /* If every feature bit of this option is set in the target ISA
28617 specification, print out the option name. However, don't print
28618 anything if all the bits are part of the FPU specification. */
28619 if (bitmap_subset_p (opt_bits, build_target.isa)
28620 && !bitmap_subset_p (opt_bits, isa_all_fpubits_internal))
28621 asm_fprintf (stream, "\t.arch_extension %s\n", opt->name);
28622 }
28623 }
28624 }
28625
28626 static void
28627 arm_file_start (void)
28628 {
28629 int val;
28630 bool pac = (aarch_ra_sign_scope != AARCH_FUNCTION_NONE);
28631 bool bti = (aarch_enable_bti == 1);
28632
28633 arm_print_asm_arch_directives
28634 (asm_out_file, TREE_TARGET_OPTION (target_option_default_node));
28635
28636 if (TARGET_BPABI)
28637 {
28638 /* If we have a named cpu, but we the assembler does not support that
28639 name via .cpu, put out a cpu name attribute; but don't do this if the
28640 name starts with the fictitious prefix, 'generic'. */
28641 if (arm_active_target.core_name
28642 && bitmap_bit_p (arm_active_target.isa, isa_bit_quirk_no_asmcpu)
28643 && !startswith (arm_active_target.core_name, "generic"))
28644 {
28645 const char* truncated_name
28646 = arm_rewrite_selected_cpu (arm_active_target.core_name);
28647 if (bitmap_bit_p (arm_active_target.isa, isa_bit_quirk_no_asmcpu))
28648 asm_fprintf (asm_out_file, "\t.eabi_attribute 5, \"%s\"\n",
28649 truncated_name);
28650 }
28651
28652 if (print_tune_info)
28653 arm_print_tune_info ();
28654
28655 if (TARGET_HARD_FLOAT && TARGET_VFP_SINGLE)
28656 arm_emit_eabi_attribute ("Tag_ABI_HardFP_use", 27, 1);
28657
28658 if (TARGET_HARD_FLOAT_ABI)
28659 arm_emit_eabi_attribute ("Tag_ABI_VFP_args", 28, 1);
28660
28661 /* Some of these attributes only apply when the corresponding features
28662 are used. However we don't have any easy way of figuring this out.
28663 Conservatively record the setting that would have been used. */
28664
28665 if (flag_rounding_math)
28666 arm_emit_eabi_attribute ("Tag_ABI_FP_rounding", 19, 1);
28667
28668 if (!flag_unsafe_math_optimizations)
28669 {
28670 arm_emit_eabi_attribute ("Tag_ABI_FP_denormal", 20, 1);
28671 arm_emit_eabi_attribute ("Tag_ABI_FP_exceptions", 21, 1);
28672 }
28673 if (flag_signaling_nans)
28674 arm_emit_eabi_attribute ("Tag_ABI_FP_user_exceptions", 22, 1);
28675
28676 arm_emit_eabi_attribute ("Tag_ABI_FP_number_model", 23,
28677 flag_finite_math_only ? 1 : 3);
28678
28679 arm_emit_eabi_attribute ("Tag_ABI_align8_needed", 24, 1);
28680 arm_emit_eabi_attribute ("Tag_ABI_align8_preserved", 25, 1);
28681 arm_emit_eabi_attribute ("Tag_ABI_enum_size", 26,
28682 flag_short_enums ? 1 : 2);
28683
28684 /* Tag_ABI_optimization_goals. */
28685 if (optimize_size)
28686 val = 4;
28687 else if (optimize >= 2)
28688 val = 2;
28689 else if (optimize)
28690 val = 1;
28691 else
28692 val = 6;
28693 arm_emit_eabi_attribute ("Tag_ABI_optimization_goals", 30, val);
28694
28695 arm_emit_eabi_attribute ("Tag_CPU_unaligned_access", 34,
28696 unaligned_access);
28697
28698 if (arm_fp16_format)
28699 arm_emit_eabi_attribute ("Tag_ABI_FP_16bit_format", 38,
28700 (int) arm_fp16_format);
28701
28702 if (TARGET_HAVE_PACBTI)
28703 {
28704 arm_emit_eabi_attribute ("Tag_PAC_extension", 50, 2);
28705 arm_emit_eabi_attribute ("Tag_BTI_extension", 52, 2);
28706 }
28707 else if (pac || bti)
28708 {
28709 arm_emit_eabi_attribute ("Tag_PAC_extension", 50, 1);
28710 arm_emit_eabi_attribute ("Tag_BTI_extension", 52, 1);
28711 }
28712
28713 if (bti)
28714 arm_emit_eabi_attribute ("TAG_BTI_use", 74, 1);
28715 if (pac)
28716 arm_emit_eabi_attribute ("TAG_PACRET_use", 76, 1);
28717
28718 if (arm_lang_output_object_attributes_hook)
28719 arm_lang_output_object_attributes_hook();
28720 }
28721
28722 default_file_start ();
28723 }
28724
28725 static void
28726 arm_file_end (void)
28727 {
28728 int regno;
28729
28730 /* Just in case the last function output in the assembler had non-default
28731 architecture directives, we force the assembler state back to the default
28732 set, so that any 'calculated' build attributes are based on the default
28733 options rather than the special options for that function. */
28734 arm_print_asm_arch_directives
28735 (asm_out_file, TREE_TARGET_OPTION (target_option_default_node));
28736
28737 if (NEED_INDICATE_EXEC_STACK)
28738 /* Add .note.GNU-stack. */
28739 file_end_indicate_exec_stack ();
28740
28741 if (! thumb_call_reg_needed)
28742 return;
28743
28744 switch_to_section (text_section);
28745 asm_fprintf (asm_out_file, "\t.code 16\n");
28746 ASM_OUTPUT_ALIGN (asm_out_file, 1);
28747
28748 for (regno = 0; regno < LR_REGNUM; regno++)
28749 {
28750 rtx label = thumb_call_via_label[regno];
28751
28752 if (label != 0)
28753 {
28754 targetm.asm_out.internal_label (asm_out_file, "L",
28755 CODE_LABEL_NUMBER (label));
28756 asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
28757 }
28758 }
28759 }
28760
28761 #ifndef ARM_PE
28762 /* Symbols in the text segment can be accessed without indirecting via the
28763 constant pool; it may take an extra binary operation, but this is still
28764 faster than indirecting via memory. Don't do this when not optimizing,
28765 since we won't be calculating al of the offsets necessary to do this
28766 simplification. */
28767
28768 static void
28769 arm_encode_section_info (tree decl, rtx rtl, int first)
28770 {
28771 if (optimize > 0 && TREE_CONSTANT (decl))
28772 SYMBOL_REF_FLAG (XEXP (rtl, 0)) = 1;
28773
28774 default_encode_section_info (decl, rtl, first);
28775 }
28776 #endif /* !ARM_PE */
28777
28778 static void
28779 arm_internal_label (FILE *stream, const char *prefix, unsigned long labelno)
28780 {
28781 if (arm_ccfsm_state == 3 && (unsigned) arm_target_label == labelno
28782 && !strcmp (prefix, "L"))
28783 {
28784 arm_ccfsm_state = 0;
28785 arm_target_insn = NULL;
28786 }
28787 default_internal_label (stream, prefix, labelno);
28788 }
28789
28790 /* Define classes to generate code as RTL or output asm to a file.
28791 Using templates then allows to use the same code to output code
28792 sequences in the two formats. */
28793 class thumb1_const_rtl
28794 {
28795 public:
28796 thumb1_const_rtl (rtx dst) : dst (dst) {}
28797
28798 void mov (HOST_WIDE_INT val)
28799 {
28800 emit_set_insn (dst, GEN_INT (val));
28801 }
28802
28803 void add (HOST_WIDE_INT val)
28804 {
28805 emit_set_insn (dst, gen_rtx_PLUS (SImode, dst, GEN_INT (val)));
28806 }
28807
28808 void ashift (HOST_WIDE_INT shift)
28809 {
28810 emit_set_insn (dst, gen_rtx_ASHIFT (SImode, dst, GEN_INT (shift)));
28811 }
28812
28813 void neg ()
28814 {
28815 emit_set_insn (dst, gen_rtx_NEG (SImode, dst));
28816 }
28817
28818 private:
28819 rtx dst;
28820 };
28821
28822 class thumb1_const_print
28823 {
28824 public:
28825 thumb1_const_print (FILE *f, int regno)
28826 {
28827 t_file = f;
28828 dst_regname = reg_names[regno];
28829 }
28830
28831 void mov (HOST_WIDE_INT val)
28832 {
28833 asm_fprintf (t_file, "\tmovs\t%s, #" HOST_WIDE_INT_PRINT_DEC "\n",
28834 dst_regname, val);
28835 }
28836
28837 void add (HOST_WIDE_INT val)
28838 {
28839 asm_fprintf (t_file, "\tadds\t%s, #" HOST_WIDE_INT_PRINT_DEC "\n",
28840 dst_regname, val);
28841 }
28842
28843 void ashift (HOST_WIDE_INT shift)
28844 {
28845 asm_fprintf (t_file, "\tlsls\t%s, #" HOST_WIDE_INT_PRINT_DEC "\n",
28846 dst_regname, shift);
28847 }
28848
28849 void neg ()
28850 {
28851 asm_fprintf (t_file, "\trsbs\t%s, #0\n", dst_regname);
28852 }
28853
28854 private:
28855 FILE *t_file;
28856 const char *dst_regname;
28857 };
28858
28859 /* Emit a sequence of movs/adds/shift to produce a 32-bit constant.
28860 Avoid generating useless code when one of the bytes is zero. */
28861 template <class T>
28862 void
28863 thumb1_gen_const_int_1 (T dst, HOST_WIDE_INT op1)
28864 {
28865 bool mov_done_p = false;
28866 unsigned HOST_WIDE_INT val = op1;
28867 int shift = 0;
28868 int i;
28869
28870 gcc_assert (op1 == trunc_int_for_mode (op1, SImode));
28871
28872 if (val <= 255)
28873 {
28874 dst.mov (val);
28875 return;
28876 }
28877
28878 /* For negative numbers with the first nine bits set, build the
28879 opposite of OP1, then negate it, it's generally shorter and not
28880 longer. */
28881 if ((val & 0xFF800000) == 0xFF800000)
28882 {
28883 thumb1_gen_const_int_1 (dst, -op1);
28884 dst.neg ();
28885 return;
28886 }
28887
28888 /* In the general case, we need 7 instructions to build
28889 a 32 bits constant (1 movs, 3 lsls, 3 adds). We can
28890 do better if VAL is small enough, or
28891 right-shiftable by a suitable amount. If the
28892 right-shift enables to encode at least one less byte,
28893 it's worth it: we save a adds and a lsls at the
28894 expense of a final lsls. */
28895 int final_shift = number_of_first_bit_set (val);
28896
28897 int leading_zeroes = clz_hwi (val);
28898 int number_of_bytes_needed
28899 = ((HOST_BITS_PER_WIDE_INT - 1 - leading_zeroes)
28900 / BITS_PER_UNIT) + 1;
28901 int number_of_bytes_needed2
28902 = ((HOST_BITS_PER_WIDE_INT - 1 - leading_zeroes - final_shift)
28903 / BITS_PER_UNIT) + 1;
28904
28905 if (number_of_bytes_needed2 < number_of_bytes_needed)
28906 val >>= final_shift;
28907 else
28908 final_shift = 0;
28909
28910 /* If we are in a very small range, we can use either a single movs
28911 or movs+adds. */
28912 if (val <= 510)
28913 {
28914 if (val > 255)
28915 {
28916 unsigned HOST_WIDE_INT high = val - 255;
28917
28918 dst.mov (high);
28919 dst.add (255);
28920 }
28921 else
28922 dst.mov (val);
28923
28924 if (final_shift > 0)
28925 dst.ashift (final_shift);
28926 }
28927 else
28928 {
28929 /* General case, emit upper 3 bytes as needed. */
28930 for (i = 0; i < 3; i++)
28931 {
28932 unsigned HOST_WIDE_INT byte = (val >> (8 * (3 - i))) & 0xff;
28933
28934 if (byte)
28935 {
28936 /* We are about to emit new bits, stop accumulating a
28937 shift amount, and left-shift only if we have already
28938 emitted some upper bits. */
28939 if (mov_done_p)
28940 {
28941 dst.ashift (shift);
28942 dst.add (byte);
28943 }
28944 else
28945 dst.mov (byte);
28946
28947 /* Stop accumulating shift amount since we've just
28948 emitted some bits. */
28949 shift = 0;
28950
28951 mov_done_p = true;
28952 }
28953
28954 if (mov_done_p)
28955 shift += 8;
28956 }
28957
28958 /* Emit lower byte. */
28959 if (!mov_done_p)
28960 dst.mov (val & 0xff);
28961 else
28962 {
28963 dst.ashift (shift);
28964 if (val & 0xff)
28965 dst.add (val & 0xff);
28966 }
28967
28968 if (final_shift > 0)
28969 dst.ashift (final_shift);
28970 }
28971 }
28972
28973 /* Proxies for thumb1.md, since the thumb1_const_print and
28974 thumb1_const_rtl classes are not exported. */
28975 void
28976 thumb1_gen_const_int_rtl (rtx dst, HOST_WIDE_INT op1)
28977 {
28978 thumb1_const_rtl t (dst);
28979 thumb1_gen_const_int_1 (t, op1);
28980 }
28981
28982 void
28983 thumb1_gen_const_int_print (rtx dst, HOST_WIDE_INT op1)
28984 {
28985 thumb1_const_print t (asm_out_file, REGNO (dst));
28986 thumb1_gen_const_int_1 (t, op1);
28987 }
28988
28989 /* Output code to add DELTA to the first argument, and then jump
28990 to FUNCTION. Used for C++ multiple inheritance. */
28991
28992 static void
28993 arm_thumb1_mi_thunk (FILE *file, tree, HOST_WIDE_INT delta,
28994 HOST_WIDE_INT, tree function)
28995 {
28996 static int thunk_label = 0;
28997 char label[256];
28998 char labelpc[256];
28999 int mi_delta = delta;
29000 const char *const mi_op = mi_delta < 0 ? "sub" : "add";
29001 int shift = 0;
29002 int this_regno = (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function)
29003 ? 1 : 0);
29004 if (mi_delta < 0)
29005 mi_delta = - mi_delta;
29006
29007 final_start_function (emit_barrier (), file, 1);
29008
29009 if (TARGET_THUMB1)
29010 {
29011 int labelno = thunk_label++;
29012 ASM_GENERATE_INTERNAL_LABEL (label, "LTHUMBFUNC", labelno);
29013 /* Thunks are entered in arm mode when available. */
29014 if (TARGET_THUMB1_ONLY)
29015 {
29016 /* push r3 so we can use it as a temporary. */
29017 /* TODO: Omit this save if r3 is not used. */
29018 fputs ("\tpush {r3}\n", file);
29019
29020 /* With -mpure-code, we cannot load the address from the
29021 constant pool: we build it explicitly. */
29022 if (target_pure_code)
29023 {
29024 fputs ("\tmovs\tr3, #:upper8_15:#", file);
29025 assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
29026 fputc ('\n', file);
29027 fputs ("\tlsls r3, #8\n", file);
29028 fputs ("\tadds\tr3, #:upper0_7:#", file);
29029 assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
29030 fputc ('\n', file);
29031 fputs ("\tlsls r3, #8\n", file);
29032 fputs ("\tadds\tr3, #:lower8_15:#", file);
29033 assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
29034 fputc ('\n', file);
29035 fputs ("\tlsls r3, #8\n", file);
29036 fputs ("\tadds\tr3, #:lower0_7:#", file);
29037 assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
29038 fputc ('\n', file);
29039 }
29040 else
29041 fputs ("\tldr\tr3, ", file);
29042 }
29043 else
29044 {
29045 fputs ("\tldr\tr12, ", file);
29046 }
29047
29048 if (!target_pure_code)
29049 {
29050 assemble_name (file, label);
29051 fputc ('\n', file);
29052 }
29053
29054 if (flag_pic)
29055 {
29056 /* If we are generating PIC, the ldr instruction below loads
29057 "(target - 7) - .LTHUNKPCn" into r12. The pc reads as
29058 the address of the add + 8, so we have:
29059
29060 r12 = (target - 7) - .LTHUNKPCn + (.LTHUNKPCn + 8)
29061 = target + 1.
29062
29063 Note that we have "+ 1" because some versions of GNU ld
29064 don't set the low bit of the result for R_ARM_REL32
29065 relocations against thumb function symbols.
29066 On ARMv6M this is +4, not +8. */
29067 ASM_GENERATE_INTERNAL_LABEL (labelpc, "LTHUNKPC", labelno);
29068 assemble_name (file, labelpc);
29069 fputs (":\n", file);
29070 if (TARGET_THUMB1_ONLY)
29071 {
29072 /* This is 2 insns after the start of the thunk, so we know it
29073 is 4-byte aligned. */
29074 fputs ("\tadd\tr3, pc, r3\n", file);
29075 fputs ("\tmov r12, r3\n", file);
29076 }
29077 else
29078 fputs ("\tadd\tr12, pc, r12\n", file);
29079 }
29080 else if (TARGET_THUMB1_ONLY)
29081 fputs ("\tmov r12, r3\n", file);
29082 }
29083 if (TARGET_THUMB1_ONLY)
29084 {
29085 if (mi_delta > 255)
29086 {
29087 /* With -mpure-code, we cannot load MI_DELTA from the
29088 constant pool: we build it explicitly. */
29089 if (target_pure_code)
29090 {
29091 thumb1_const_print r3 (file, 3);
29092 thumb1_gen_const_int_1 (r3, mi_delta);
29093 }
29094 else
29095 {
29096 fputs ("\tldr\tr3, ", file);
29097 assemble_name (file, label);
29098 fputs ("+4\n", file);
29099 }
29100 asm_fprintf (file, "\t%ss\t%r, %r, r3\n",
29101 mi_op, this_regno, this_regno);
29102 }
29103 else if (mi_delta != 0)
29104 {
29105 /* Thumb1 unified syntax requires s suffix in instruction name when
29106 one of the operands is immediate. */
29107 asm_fprintf (file, "\t%ss\t%r, %r, #%d\n",
29108 mi_op, this_regno, this_regno,
29109 mi_delta);
29110 }
29111 }
29112 else
29113 {
29114 /* TODO: Use movw/movt for large constants when available. */
29115 while (mi_delta != 0)
29116 {
29117 if ((mi_delta & (3 << shift)) == 0)
29118 shift += 2;
29119 else
29120 {
29121 asm_fprintf (file, "\t%s\t%r, %r, #%d\n",
29122 mi_op, this_regno, this_regno,
29123 mi_delta & (0xff << shift));
29124 mi_delta &= ~(0xff << shift);
29125 shift += 8;
29126 }
29127 }
29128 }
29129 if (TARGET_THUMB1)
29130 {
29131 if (TARGET_THUMB1_ONLY)
29132 fputs ("\tpop\t{r3}\n", file);
29133
29134 fprintf (file, "\tbx\tr12\n");
29135
29136 /* With -mpure-code, we don't need to emit literals for the
29137 function address and delta since we emitted code to build
29138 them. */
29139 if (!target_pure_code)
29140 {
29141 ASM_OUTPUT_ALIGN (file, 2);
29142 assemble_name (file, label);
29143 fputs (":\n", file);
29144 if (flag_pic)
29145 {
29146 /* Output ".word .LTHUNKn-[3,7]-.LTHUNKPCn". */
29147 rtx tem = XEXP (DECL_RTL (function), 0);
29148 /* For TARGET_THUMB1_ONLY the thunk is in Thumb mode, so the PC
29149 pipeline offset is four rather than eight. Adjust the offset
29150 accordingly. */
29151 tem = plus_constant (GET_MODE (tem), tem,
29152 TARGET_THUMB1_ONLY ? -3 : -7);
29153 tem = gen_rtx_MINUS (GET_MODE (tem),
29154 tem,
29155 gen_rtx_SYMBOL_REF (Pmode,
29156 ggc_strdup (labelpc)));
29157 assemble_integer (tem, 4, BITS_PER_WORD, 1);
29158 }
29159 else
29160 /* Output ".word .LTHUNKn". */
29161 assemble_integer (XEXP (DECL_RTL (function), 0), 4, BITS_PER_WORD, 1);
29162
29163 if (TARGET_THUMB1_ONLY && mi_delta > 255)
29164 assemble_integer (GEN_INT (mi_delta), 4, BITS_PER_WORD, 1);
29165 }
29166 }
29167 else
29168 {
29169 fputs ("\tb\t", file);
29170 assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
29171 if (NEED_PLT_RELOC)
29172 fputs ("(PLT)", file);
29173 fputc ('\n', file);
29174 }
29175
29176 final_end_function ();
29177 }
29178
29179 /* MI thunk handling for TARGET_32BIT. */
29180
29181 static void
29182 arm32_output_mi_thunk (FILE *file, tree, HOST_WIDE_INT delta,
29183 HOST_WIDE_INT vcall_offset, tree function)
29184 {
29185 const bool long_call_p = arm_is_long_call_p (function);
29186
29187 /* On ARM, this_regno is R0 or R1 depending on
29188 whether the function returns an aggregate or not.
29189 */
29190 int this_regno = (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)),
29191 function)
29192 ? R1_REGNUM : R0_REGNUM);
29193
29194 rtx temp = gen_rtx_REG (Pmode, IP_REGNUM);
29195 rtx this_rtx = gen_rtx_REG (Pmode, this_regno);
29196 reload_completed = 1;
29197 emit_note (NOTE_INSN_PROLOGUE_END);
29198
29199 /* Add DELTA to THIS_RTX. */
29200 if (delta != 0)
29201 arm_split_constant (PLUS, Pmode, NULL_RTX,
29202 delta, this_rtx, this_rtx, false);
29203
29204 /* Add *(*THIS_RTX + VCALL_OFFSET) to THIS_RTX. */
29205 if (vcall_offset != 0)
29206 {
29207 /* Load *THIS_RTX. */
29208 emit_move_insn (temp, gen_rtx_MEM (Pmode, this_rtx));
29209 /* Compute *THIS_RTX + VCALL_OFFSET. */
29210 arm_split_constant (PLUS, Pmode, NULL_RTX, vcall_offset, temp, temp,
29211 false);
29212 /* Compute *(*THIS_RTX + VCALL_OFFSET). */
29213 emit_move_insn (temp, gen_rtx_MEM (Pmode, temp));
29214 emit_insn (gen_add3_insn (this_rtx, this_rtx, temp));
29215 }
29216
29217 /* Generate a tail call to the target function. */
29218 if (!TREE_USED (function))
29219 {
29220 assemble_external (function);
29221 TREE_USED (function) = 1;
29222 }
29223 rtx funexp = XEXP (DECL_RTL (function), 0);
29224 if (long_call_p)
29225 {
29226 emit_move_insn (temp, funexp);
29227 funexp = temp;
29228 }
29229 funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
29230 rtx_insn *insn = emit_call_insn (gen_sibcall (funexp, const0_rtx, NULL_RTX));
29231 SIBLING_CALL_P (insn) = 1;
29232 emit_barrier ();
29233
29234 /* Indirect calls require a bit of fixup in PIC mode. */
29235 if (long_call_p)
29236 {
29237 split_all_insns_noflow ();
29238 arm_reorg ();
29239 }
29240
29241 insn = get_insns ();
29242 shorten_branches (insn);
29243 final_start_function (insn, file, 1);
29244 final (insn, file, 1);
29245 final_end_function ();
29246
29247 /* Stop pretending this is a post-reload pass. */
29248 reload_completed = 0;
29249 }
29250
29251 /* Output code to add DELTA to the first argument, and then jump
29252 to FUNCTION. Used for C++ multiple inheritance. */
29253
29254 static void
29255 arm_output_mi_thunk (FILE *file, tree thunk, HOST_WIDE_INT delta,
29256 HOST_WIDE_INT vcall_offset, tree function)
29257 {
29258 const char *fnname = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (thunk));
29259
29260 assemble_start_function (thunk, fnname);
29261 if (TARGET_32BIT)
29262 arm32_output_mi_thunk (file, thunk, delta, vcall_offset, function);
29263 else
29264 arm_thumb1_mi_thunk (file, thunk, delta, vcall_offset, function);
29265 assemble_end_function (thunk, fnname);
29266 }
29267
29268 int
29269 arm_emit_vector_const (FILE *file, rtx x)
29270 {
29271 int i;
29272 const char * pattern;
29273
29274 gcc_assert (GET_CODE (x) == CONST_VECTOR);
29275
29276 switch (GET_MODE (x))
29277 {
29278 case E_V2SImode: pattern = "%08x"; break;
29279 case E_V4HImode: pattern = "%04x"; break;
29280 case E_V8QImode: pattern = "%02x"; break;
29281 default: gcc_unreachable ();
29282 }
29283
29284 fprintf (file, "0x");
29285 for (i = CONST_VECTOR_NUNITS (x); i--;)
29286 {
29287 rtx element;
29288
29289 element = CONST_VECTOR_ELT (x, i);
29290 fprintf (file, pattern, INTVAL (element));
29291 }
29292
29293 return 1;
29294 }
29295
29296 /* Emit a fp16 constant appropriately padded to occupy a 4-byte word.
29297 HFmode constant pool entries are actually loaded with ldr. */
29298 void
29299 arm_emit_fp16_const (rtx c)
29300 {
29301 long bits;
29302
29303 bits = real_to_target (NULL, CONST_DOUBLE_REAL_VALUE (c), HFmode);
29304 if (WORDS_BIG_ENDIAN)
29305 assemble_zeros (2);
29306 assemble_integer (GEN_INT (bits), 2, BITS_PER_WORD, 1);
29307 if (!WORDS_BIG_ENDIAN)
29308 assemble_zeros (2);
29309 }
29310
29311 const char *
29312 arm_output_load_gr (rtx *operands)
29313 {
29314 rtx reg;
29315 rtx offset;
29316 rtx wcgr;
29317 rtx sum;
29318
29319 if (!MEM_P (operands [1])
29320 || GET_CODE (sum = XEXP (operands [1], 0)) != PLUS
29321 || !REG_P (reg = XEXP (sum, 0))
29322 || !CONST_INT_P (offset = XEXP (sum, 1))
29323 || ((INTVAL (offset) < 1024) && (INTVAL (offset) > -1024)))
29324 return "wldrw%?\t%0, %1";
29325
29326 /* Fix up an out-of-range load of a GR register. */
29327 output_asm_insn ("str%?\t%0, [sp, #-4]!\t@ Start of GR load expansion", & reg);
29328 wcgr = operands[0];
29329 operands[0] = reg;
29330 output_asm_insn ("ldr%?\t%0, %1", operands);
29331
29332 operands[0] = wcgr;
29333 operands[1] = reg;
29334 output_asm_insn ("tmcr%?\t%0, %1", operands);
29335 output_asm_insn ("ldr%?\t%0, [sp], #4\t@ End of GR load expansion", & reg);
29336
29337 return "";
29338 }
29339
29340 /* Worker function for TARGET_SETUP_INCOMING_VARARGS.
29341
29342 On the ARM, PRETEND_SIZE is set in order to have the prologue push the last
29343 named arg and all anonymous args onto the stack.
29344 XXX I know the prologue shouldn't be pushing registers, but it is faster
29345 that way. */
29346
29347 static void
29348 arm_setup_incoming_varargs (cumulative_args_t pcum_v,
29349 const function_arg_info &arg,
29350 int *pretend_size,
29351 int second_time ATTRIBUTE_UNUSED)
29352 {
29353 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
29354 int nregs;
29355
29356 cfun->machine->uses_anonymous_args = 1;
29357 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
29358 {
29359 nregs = pcum->aapcs_ncrn;
29360 if (!TYPE_NO_NAMED_ARGS_STDARG_P (TREE_TYPE (current_function_decl))
29361 && (nregs & 1))
29362 {
29363 int res = arm_needs_doubleword_align (arg.mode, arg.type);
29364 if (res < 0 && warn_psabi)
29365 inform (input_location, "parameter passing for argument of "
29366 "type %qT changed in GCC 7.1", arg.type);
29367 else if (res > 0)
29368 {
29369 nregs++;
29370 if (res > 1 && warn_psabi)
29371 inform (input_location,
29372 "parameter passing for argument of type "
29373 "%qT changed in GCC 9.1", arg.type);
29374 }
29375 }
29376 }
29377 else
29378 nregs = pcum->nregs;
29379
29380 if (nregs < NUM_ARG_REGS)
29381 *pretend_size = (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
29382 }
29383
29384 /* We can't rely on the caller doing the proper promotion when
29385 using APCS or ATPCS. */
29386
29387 static bool
29388 arm_promote_prototypes (const_tree t ATTRIBUTE_UNUSED)
29389 {
29390 return !TARGET_AAPCS_BASED;
29391 }
29392
29393 static machine_mode
29394 arm_promote_function_mode (const_tree type ATTRIBUTE_UNUSED,
29395 machine_mode mode,
29396 int *punsignedp ATTRIBUTE_UNUSED,
29397 const_tree fntype ATTRIBUTE_UNUSED,
29398 int for_return ATTRIBUTE_UNUSED)
29399 {
29400 if (GET_MODE_CLASS (mode) == MODE_INT
29401 && GET_MODE_SIZE (mode) < 4)
29402 return SImode;
29403
29404 return mode;
29405 }
29406
29407
29408 static bool
29409 arm_default_short_enums (void)
29410 {
29411 return ARM_DEFAULT_SHORT_ENUMS;
29412 }
29413
29414
29415 /* AAPCS requires that anonymous bitfields affect structure alignment. */
29416
29417 static bool
29418 arm_align_anon_bitfield (void)
29419 {
29420 return TARGET_AAPCS_BASED;
29421 }
29422
29423
29424 /* The generic C++ ABI says 64-bit (long long). The EABI says 32-bit. */
29425
29426 static tree
29427 arm_cxx_guard_type (void)
29428 {
29429 return TARGET_AAPCS_BASED ? integer_type_node : long_long_integer_type_node;
29430 }
29431
29432
29433 /* The EABI says test the least significant bit of a guard variable. */
29434
29435 static bool
29436 arm_cxx_guard_mask_bit (void)
29437 {
29438 return TARGET_AAPCS_BASED;
29439 }
29440
29441
29442 /* The EABI specifies that all array cookies are 8 bytes long. */
29443
29444 static tree
29445 arm_get_cookie_size (tree type)
29446 {
29447 tree size;
29448
29449 if (!TARGET_AAPCS_BASED)
29450 return default_cxx_get_cookie_size (type);
29451
29452 size = build_int_cst (sizetype, 8);
29453 return size;
29454 }
29455
29456
29457 /* The EABI says that array cookies should also contain the element size. */
29458
29459 static bool
29460 arm_cookie_has_size (void)
29461 {
29462 return TARGET_AAPCS_BASED;
29463 }
29464
29465
29466 /* The EABI says constructors and destructors should return a pointer to
29467 the object constructed/destroyed. */
29468
29469 static bool
29470 arm_cxx_cdtor_returns_this (void)
29471 {
29472 return TARGET_AAPCS_BASED;
29473 }
29474
29475 /* The EABI says that an inline function may never be the key
29476 method. */
29477
29478 static bool
29479 arm_cxx_key_method_may_be_inline (void)
29480 {
29481 return !TARGET_AAPCS_BASED;
29482 }
29483
29484 static void
29485 arm_cxx_determine_class_data_visibility (tree decl)
29486 {
29487 if (!TARGET_AAPCS_BASED
29488 || !TARGET_DLLIMPORT_DECL_ATTRIBUTES)
29489 return;
29490
29491 /* In general, \S 3.2.5.5 of the ARM EABI requires that class data
29492 is exported. However, on systems without dynamic vague linkage,
29493 \S 3.2.5.6 says that COMDAT class data has hidden linkage. */
29494 if (!TARGET_ARM_DYNAMIC_VAGUE_LINKAGE_P && DECL_COMDAT (decl))
29495 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
29496 else
29497 DECL_VISIBILITY (decl) = VISIBILITY_DEFAULT;
29498 DECL_VISIBILITY_SPECIFIED (decl) = 1;
29499 }
29500
29501 static bool
29502 arm_cxx_class_data_always_comdat (void)
29503 {
29504 /* \S 3.2.5.4 of the ARM C++ ABI says that class data only have
29505 vague linkage if the class has no key function. */
29506 return !TARGET_AAPCS_BASED;
29507 }
29508
29509
29510 /* The EABI says __aeabi_atexit should be used to register static
29511 destructors. */
29512
29513 static bool
29514 arm_cxx_use_aeabi_atexit (void)
29515 {
29516 return TARGET_AAPCS_BASED;
29517 }
29518
29519
29520 void
29521 arm_set_return_address (rtx source, rtx scratch)
29522 {
29523 arm_stack_offsets *offsets;
29524 HOST_WIDE_INT delta;
29525 rtx addr, mem;
29526 unsigned long saved_regs;
29527
29528 offsets = arm_get_frame_offsets ();
29529 saved_regs = offsets->saved_regs_mask;
29530
29531 if ((saved_regs & (1 << LR_REGNUM)) == 0)
29532 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
29533 else
29534 {
29535 if (frame_pointer_needed)
29536 addr = plus_constant (Pmode, hard_frame_pointer_rtx, -4);
29537 else
29538 {
29539 /* LR will be the first saved register. */
29540 delta = offsets->outgoing_args - (offsets->frame + 4);
29541
29542
29543 if (delta >= 4096)
29544 {
29545 emit_insn (gen_addsi3 (scratch, stack_pointer_rtx,
29546 GEN_INT (delta & ~4095)));
29547 addr = scratch;
29548 delta &= 4095;
29549 }
29550 else
29551 addr = stack_pointer_rtx;
29552
29553 addr = plus_constant (Pmode, addr, delta);
29554 }
29555
29556 /* The store needs to be marked to prevent DSE from deleting
29557 it as dead if it is based on fp. */
29558 mem = gen_frame_mem (Pmode, addr);
29559 MEM_VOLATILE_P (mem) = true;
29560 emit_move_insn (mem, source);
29561 }
29562 }
29563
29564
29565 void
29566 thumb_set_return_address (rtx source, rtx scratch)
29567 {
29568 arm_stack_offsets *offsets;
29569 HOST_WIDE_INT delta;
29570 HOST_WIDE_INT limit;
29571 int reg;
29572 rtx addr, mem;
29573 unsigned long mask;
29574
29575 emit_use (source);
29576
29577 offsets = arm_get_frame_offsets ();
29578 mask = offsets->saved_regs_mask;
29579 if (mask & (1 << LR_REGNUM))
29580 {
29581 limit = 1024;
29582 /* Find the saved regs. */
29583 if (frame_pointer_needed)
29584 {
29585 delta = offsets->soft_frame - offsets->saved_args;
29586 reg = THUMB_HARD_FRAME_POINTER_REGNUM;
29587 if (TARGET_THUMB1)
29588 limit = 128;
29589 }
29590 else
29591 {
29592 delta = offsets->outgoing_args - offsets->saved_args;
29593 reg = SP_REGNUM;
29594 }
29595 /* Allow for the stack frame. */
29596 if (TARGET_THUMB1 && TARGET_BACKTRACE)
29597 delta -= 16;
29598 /* The link register is always the first saved register. */
29599 delta -= 4;
29600
29601 /* Construct the address. */
29602 addr = gen_rtx_REG (SImode, reg);
29603 if (delta > limit)
29604 {
29605 emit_insn (gen_movsi (scratch, GEN_INT (delta)));
29606 emit_insn (gen_addsi3 (scratch, scratch, stack_pointer_rtx));
29607 addr = scratch;
29608 }
29609 else
29610 addr = plus_constant (Pmode, addr, delta);
29611
29612 /* The store needs to be marked to prevent DSE from deleting
29613 it as dead if it is based on fp. */
29614 mem = gen_frame_mem (Pmode, addr);
29615 MEM_VOLATILE_P (mem) = true;
29616 emit_move_insn (mem, source);
29617 }
29618 else
29619 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
29620 }
29621
29622 /* Implements target hook vector_mode_supported_p. */
29623 bool
29624 arm_vector_mode_supported_p (machine_mode mode)
29625 {
29626 /* Neon also supports V2SImode, etc. listed in the clause below. */
29627 if (TARGET_NEON && (mode == V2SFmode || mode == V4SImode || mode == V8HImode
29628 || mode == V4HFmode || mode == V16QImode || mode == V4SFmode
29629 || mode == V2DImode || mode == V8HFmode || mode == V4BFmode
29630 || mode == V8BFmode))
29631 return true;
29632
29633 if ((TARGET_NEON || TARGET_IWMMXT)
29634 && ((mode == V2SImode)
29635 || (mode == V4HImode)
29636 || (mode == V8QImode)))
29637 return true;
29638
29639 if (TARGET_INT_SIMD && (mode == V4UQQmode || mode == V4QQmode
29640 || mode == V2UHQmode || mode == V2HQmode || mode == V2UHAmode
29641 || mode == V2HAmode))
29642 return true;
29643
29644 if (TARGET_HAVE_MVE
29645 && (VALID_MVE_SI_MODE (mode) || VALID_MVE_PRED_MODE (mode)))
29646 return true;
29647
29648 if (TARGET_HAVE_MVE_FLOAT
29649 && (mode == V2DFmode || mode == V4SFmode || mode == V8HFmode))
29650 return true;
29651
29652 return false;
29653 }
29654
29655 /* Implements target hook array_mode_supported_p. */
29656
29657 static bool
29658 arm_array_mode_supported_p (machine_mode mode,
29659 unsigned HOST_WIDE_INT nelems)
29660 {
29661 /* We don't want to enable interleaved loads and stores for BYTES_BIG_ENDIAN
29662 for now, as the lane-swapping logic needs to be extended in the expanders.
29663 See PR target/82518. */
29664 if (TARGET_NEON && !BYTES_BIG_ENDIAN
29665 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
29666 && (nelems >= 2 && nelems <= 4))
29667 return true;
29668
29669 if (TARGET_HAVE_MVE && !BYTES_BIG_ENDIAN
29670 && VALID_MVE_MODE (mode) && (nelems == 2 || nelems == 4))
29671 return true;
29672
29673 return false;
29674 }
29675
29676 /* Use the option -mvectorize-with-neon-double to override the use of quardword
29677 registers when autovectorizing for Neon, at least until multiple vector
29678 widths are supported properly by the middle-end. */
29679
29680 static machine_mode
29681 arm_preferred_simd_mode (scalar_mode mode)
29682 {
29683 if (TARGET_NEON)
29684 switch (mode)
29685 {
29686 case E_HFmode:
29687 return TARGET_NEON_VECTORIZE_DOUBLE ? V4HFmode : V8HFmode;
29688 case E_SFmode:
29689 return TARGET_NEON_VECTORIZE_DOUBLE ? V2SFmode : V4SFmode;
29690 case E_SImode:
29691 return TARGET_NEON_VECTORIZE_DOUBLE ? V2SImode : V4SImode;
29692 case E_HImode:
29693 return TARGET_NEON_VECTORIZE_DOUBLE ? V4HImode : V8HImode;
29694 case E_QImode:
29695 return TARGET_NEON_VECTORIZE_DOUBLE ? V8QImode : V16QImode;
29696 case E_DImode:
29697 if (!TARGET_NEON_VECTORIZE_DOUBLE)
29698 return V2DImode;
29699 break;
29700
29701 default:;
29702 }
29703
29704 if (TARGET_REALLY_IWMMXT)
29705 switch (mode)
29706 {
29707 case E_SImode:
29708 return V2SImode;
29709 case E_HImode:
29710 return V4HImode;
29711 case E_QImode:
29712 return V8QImode;
29713
29714 default:;
29715 }
29716
29717 if (TARGET_HAVE_MVE)
29718 switch (mode)
29719 {
29720 case E_QImode:
29721 return V16QImode;
29722 case E_HImode:
29723 return V8HImode;
29724 case E_SImode:
29725 return V4SImode;
29726
29727 default:;
29728 }
29729
29730 if (TARGET_HAVE_MVE_FLOAT)
29731 switch (mode)
29732 {
29733 case E_HFmode:
29734 return V8HFmode;
29735 case E_SFmode:
29736 return V4SFmode;
29737
29738 default:;
29739 }
29740
29741 return word_mode;
29742 }
29743
29744 /* Implement TARGET_CLASS_LIKELY_SPILLED_P.
29745
29746 We need to define this for LO_REGS on Thumb-1. Otherwise we can end up
29747 using r0-r4 for function arguments, r7 for the stack frame and don't have
29748 enough left over to do doubleword arithmetic. For Thumb-2 all the
29749 potentially problematic instructions accept high registers so this is not
29750 necessary. Care needs to be taken to avoid adding new Thumb-2 patterns
29751 that require many low registers. */
29752 static bool
29753 arm_class_likely_spilled_p (reg_class_t rclass)
29754 {
29755 if ((TARGET_THUMB1 && rclass == LO_REGS)
29756 || rclass == CC_REG)
29757 return true;
29758
29759 return default_class_likely_spilled_p (rclass);
29760 }
29761
29762 /* Implements target hook small_register_classes_for_mode_p. */
29763 bool
29764 arm_small_register_classes_for_mode_p (machine_mode mode ATTRIBUTE_UNUSED)
29765 {
29766 return TARGET_THUMB1;
29767 }
29768
29769 /* Implement TARGET_SHIFT_TRUNCATION_MASK. SImode shifts use normal
29770 ARM insns and therefore guarantee that the shift count is modulo 256.
29771 DImode shifts (those implemented by lib1funcs.S or by optabs.cc)
29772 guarantee no particular behavior for out-of-range counts. */
29773
29774 static unsigned HOST_WIDE_INT
29775 arm_shift_truncation_mask (machine_mode mode)
29776 {
29777 return mode == SImode ? 255 : 0;
29778 }
29779
29780
29781 /* Map internal gcc register numbers to DWARF2 register numbers. */
29782
29783 unsigned int
29784 arm_debugger_regno (unsigned int regno)
29785 {
29786 if (regno < 16)
29787 return regno;
29788
29789 if (IS_VFP_REGNUM (regno))
29790 {
29791 /* See comment in arm_dwarf_register_span. */
29792 if (VFP_REGNO_OK_FOR_SINGLE (regno))
29793 return 64 + regno - FIRST_VFP_REGNUM;
29794 else
29795 return 256 + (regno - FIRST_VFP_REGNUM) / 2;
29796 }
29797
29798 if (IS_IWMMXT_GR_REGNUM (regno))
29799 return 104 + regno - FIRST_IWMMXT_GR_REGNUM;
29800
29801 if (IS_IWMMXT_REGNUM (regno))
29802 return 112 + regno - FIRST_IWMMXT_REGNUM;
29803
29804 if (IS_PAC_REGNUM (regno))
29805 return DWARF_PAC_REGNUM;
29806
29807 return DWARF_FRAME_REGISTERS;
29808 }
29809
29810 /* Dwarf models VFPv3 registers as 32 64-bit registers.
29811 GCC models tham as 64 32-bit registers, so we need to describe this to
29812 the DWARF generation code. Other registers can use the default. */
29813 static rtx
29814 arm_dwarf_register_span (rtx rtl)
29815 {
29816 machine_mode mode;
29817 unsigned regno;
29818 rtx parts[16];
29819 int nregs;
29820 int i;
29821
29822 regno = REGNO (rtl);
29823 if (!IS_VFP_REGNUM (regno))
29824 return NULL_RTX;
29825
29826 /* XXX FIXME: The EABI defines two VFP register ranges:
29827 64-95: Legacy VFPv2 numbering for S0-S31 (obsolescent)
29828 256-287: D0-D31
29829 The recommended encoding for S0-S31 is a DW_OP_bit_piece of the
29830 corresponding D register. Until GDB supports this, we shall use the
29831 legacy encodings. We also use these encodings for D0-D15 for
29832 compatibility with older debuggers. */
29833 mode = GET_MODE (rtl);
29834 if (GET_MODE_SIZE (mode) < 8)
29835 return NULL_RTX;
29836
29837 if (VFP_REGNO_OK_FOR_SINGLE (regno))
29838 {
29839 nregs = GET_MODE_SIZE (mode) / 4;
29840 for (i = 0; i < nregs; i += 2)
29841 if (TARGET_BIG_END)
29842 {
29843 parts[i] = gen_rtx_REG (SImode, regno + i + 1);
29844 parts[i + 1] = gen_rtx_REG (SImode, regno + i);
29845 }
29846 else
29847 {
29848 parts[i] = gen_rtx_REG (SImode, regno + i);
29849 parts[i + 1] = gen_rtx_REG (SImode, regno + i + 1);
29850 }
29851 }
29852 else
29853 {
29854 nregs = GET_MODE_SIZE (mode) / 8;
29855 for (i = 0; i < nregs; i++)
29856 parts[i] = gen_rtx_REG (DImode, regno + i);
29857 }
29858
29859 return gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nregs , parts));
29860 }
29861
29862 #if ARM_UNWIND_INFO
29863 /* Emit unwind directives for a store-multiple instruction or stack pointer
29864 push during alignment.
29865 These should only ever be generated by the function prologue code, so
29866 expect them to have a particular form.
29867 The store-multiple instruction sometimes pushes pc as the last register,
29868 although it should not be tracked into unwind information, or for -Os
29869 sometimes pushes some dummy registers before first register that needs
29870 to be tracked in unwind information; such dummy registers are there just
29871 to avoid separate stack adjustment, and will not be restored in the
29872 epilogue. */
29873
29874 static void
29875 arm_unwind_emit_sequence (FILE * out_file, rtx p)
29876 {
29877 int i;
29878 HOST_WIDE_INT offset;
29879 HOST_WIDE_INT nregs;
29880 int reg_size;
29881 unsigned reg;
29882 unsigned lastreg;
29883 unsigned padfirst = 0, padlast = 0;
29884 rtx e;
29885
29886 e = XVECEXP (p, 0, 0);
29887 gcc_assert (GET_CODE (e) == SET);
29888
29889 /* First insn will adjust the stack pointer. */
29890 gcc_assert (GET_CODE (e) == SET
29891 && REG_P (SET_DEST (e))
29892 && REGNO (SET_DEST (e)) == SP_REGNUM
29893 && GET_CODE (SET_SRC (e)) == PLUS);
29894
29895 offset = -INTVAL (XEXP (SET_SRC (e), 1));
29896 nregs = XVECLEN (p, 0) - 1;
29897 gcc_assert (nregs);
29898
29899 reg = REGNO (SET_SRC (XVECEXP (p, 0, 1)));
29900 if (reg < 16 || IS_PAC_REGNUM (reg))
29901 {
29902 /* For -Os dummy registers can be pushed at the beginning to
29903 avoid separate stack pointer adjustment. */
29904 e = XVECEXP (p, 0, 1);
29905 e = XEXP (SET_DEST (e), 0);
29906 if (GET_CODE (e) == PLUS)
29907 padfirst = INTVAL (XEXP (e, 1));
29908 gcc_assert (padfirst == 0 || optimize_size);
29909 /* The function prologue may also push pc, but not annotate it as it is
29910 never restored. We turn this into a stack pointer adjustment. */
29911 e = XVECEXP (p, 0, nregs);
29912 e = XEXP (SET_DEST (e), 0);
29913 if (GET_CODE (e) == PLUS)
29914 padlast = offset - INTVAL (XEXP (e, 1)) - 4;
29915 else
29916 padlast = offset - 4;
29917 gcc_assert (padlast == 0 || padlast == 4);
29918 if (padlast == 4)
29919 fprintf (out_file, "\t.pad #4\n");
29920 reg_size = 4;
29921 fprintf (out_file, "\t.save {");
29922 }
29923 else if (IS_VFP_REGNUM (reg))
29924 {
29925 reg_size = 8;
29926 fprintf (out_file, "\t.vsave {");
29927 }
29928 else
29929 /* Unknown register type. */
29930 gcc_unreachable ();
29931
29932 /* If the stack increment doesn't match the size of the saved registers,
29933 something has gone horribly wrong. */
29934 gcc_assert (offset == padfirst + nregs * reg_size + padlast);
29935
29936 offset = padfirst;
29937 lastreg = 0;
29938 /* The remaining insns will describe the stores. */
29939 for (i = 1; i <= nregs; i++)
29940 {
29941 /* Expect (set (mem <addr>) (reg)).
29942 Where <addr> is (reg:SP) or (plus (reg:SP) (const_int)). */
29943 e = XVECEXP (p, 0, i);
29944 gcc_assert (GET_CODE (e) == SET
29945 && MEM_P (SET_DEST (e))
29946 && REG_P (SET_SRC (e)));
29947
29948 reg = REGNO (SET_SRC (e));
29949 gcc_assert (reg >= lastreg);
29950
29951 if (i != 1)
29952 fprintf (out_file, ", ");
29953 /* We can't use %r for vfp because we need to use the
29954 double precision register names. */
29955 if (IS_VFP_REGNUM (reg))
29956 asm_fprintf (out_file, "d%d", (reg - FIRST_VFP_REGNUM) / 2);
29957 else if (IS_PAC_REGNUM (reg))
29958 asm_fprintf (asm_out_file, "ra_auth_code");
29959 else
29960 asm_fprintf (out_file, "%r", reg);
29961
29962 if (flag_checking)
29963 {
29964 /* Check that the addresses are consecutive. */
29965 e = XEXP (SET_DEST (e), 0);
29966 if (GET_CODE (e) == PLUS)
29967 gcc_assert (REG_P (XEXP (e, 0))
29968 && REGNO (XEXP (e, 0)) == SP_REGNUM
29969 && CONST_INT_P (XEXP (e, 1))
29970 && offset == INTVAL (XEXP (e, 1)));
29971 else
29972 gcc_assert (i == 1
29973 && REG_P (e)
29974 && REGNO (e) == SP_REGNUM);
29975 offset += reg_size;
29976 }
29977 }
29978 fprintf (out_file, "}\n");
29979 if (padfirst)
29980 fprintf (out_file, "\t.pad #%d\n", padfirst);
29981 }
29982
29983 /* Emit unwind directives for a SET. */
29984
29985 static void
29986 arm_unwind_emit_set (FILE * out_file, rtx p)
29987 {
29988 rtx e0;
29989 rtx e1;
29990 unsigned reg;
29991
29992 e0 = XEXP (p, 0);
29993 e1 = XEXP (p, 1);
29994 switch (GET_CODE (e0))
29995 {
29996 case MEM:
29997 /* Pushing a single register. */
29998 if (GET_CODE (XEXP (e0, 0)) != PRE_DEC
29999 || !REG_P (XEXP (XEXP (e0, 0), 0))
30000 || REGNO (XEXP (XEXP (e0, 0), 0)) != SP_REGNUM)
30001 abort ();
30002
30003 asm_fprintf (out_file, "\t.save ");
30004 if (IS_VFP_REGNUM (REGNO (e1)))
30005 asm_fprintf(out_file, "{d%d}\n",
30006 (REGNO (e1) - FIRST_VFP_REGNUM) / 2);
30007 else
30008 asm_fprintf(out_file, "{%r}\n", REGNO (e1));
30009 break;
30010
30011 case REG:
30012 if (REGNO (e0) == SP_REGNUM)
30013 {
30014 /* A stack increment. */
30015 if (GET_CODE (e1) != PLUS
30016 || !REG_P (XEXP (e1, 0))
30017 || REGNO (XEXP (e1, 0)) != SP_REGNUM
30018 || !CONST_INT_P (XEXP (e1, 1)))
30019 abort ();
30020
30021 asm_fprintf (out_file, "\t.pad #%wd\n",
30022 -INTVAL (XEXP (e1, 1)));
30023 }
30024 else if (REGNO (e0) == HARD_FRAME_POINTER_REGNUM)
30025 {
30026 HOST_WIDE_INT offset;
30027
30028 if (GET_CODE (e1) == PLUS)
30029 {
30030 if (!REG_P (XEXP (e1, 0))
30031 || !CONST_INT_P (XEXP (e1, 1)))
30032 abort ();
30033 reg = REGNO (XEXP (e1, 0));
30034 offset = INTVAL (XEXP (e1, 1));
30035 asm_fprintf (out_file, "\t.setfp %r, %r, #%wd\n",
30036 HARD_FRAME_POINTER_REGNUM, reg,
30037 offset);
30038 }
30039 else if (REG_P (e1))
30040 {
30041 reg = REGNO (e1);
30042 asm_fprintf (out_file, "\t.setfp %r, %r\n",
30043 HARD_FRAME_POINTER_REGNUM, reg);
30044 }
30045 else
30046 abort ();
30047 }
30048 else if (REG_P (e1) && REGNO (e1) == SP_REGNUM)
30049 {
30050 /* Move from sp to reg. */
30051 asm_fprintf (out_file, "\t.movsp %r\n", REGNO (e0));
30052 }
30053 else if (GET_CODE (e1) == PLUS
30054 && REG_P (XEXP (e1, 0))
30055 && REGNO (XEXP (e1, 0)) == SP_REGNUM
30056 && CONST_INT_P (XEXP (e1, 1)))
30057 {
30058 /* Set reg to offset from sp. */
30059 asm_fprintf (out_file, "\t.movsp %r, #%d\n",
30060 REGNO (e0), (int)INTVAL(XEXP (e1, 1)));
30061 }
30062 else if (REGNO (e0) == IP_REGNUM && arm_current_function_pac_enabled_p ())
30063 {
30064 if (cfun->machine->pacspval_needed)
30065 asm_fprintf (out_file, "\t.pacspval\n");
30066 }
30067 else
30068 abort ();
30069 break;
30070
30071 default:
30072 abort ();
30073 }
30074 }
30075
30076
30077 /* Emit unwind directives for the given insn. */
30078
30079 static void
30080 arm_unwind_emit (FILE * out_file, rtx_insn *insn)
30081 {
30082 rtx note, pat;
30083 bool handled_one = false;
30084
30085 if (arm_except_unwind_info (&global_options) != UI_TARGET)
30086 return;
30087
30088 if (!(flag_unwind_tables || crtl->uses_eh_lsda)
30089 && (TREE_NOTHROW (current_function_decl)
30090 || crtl->all_throwers_are_sibcalls))
30091 return;
30092
30093 if (NOTE_P (insn) || !RTX_FRAME_RELATED_P (insn))
30094 return;
30095
30096 for (note = REG_NOTES (insn); note ; note = XEXP (note, 1))
30097 {
30098 switch (REG_NOTE_KIND (note))
30099 {
30100 case REG_FRAME_RELATED_EXPR:
30101 pat = XEXP (note, 0);
30102 goto found;
30103
30104 case REG_CFA_REGISTER:
30105 pat = XEXP (note, 0);
30106 if (pat == NULL)
30107 {
30108 pat = PATTERN (insn);
30109 if (GET_CODE (pat) == PARALLEL)
30110 pat = XVECEXP (pat, 0, 0);
30111 }
30112
30113 /* Only emitted for IS_STACKALIGN re-alignment. */
30114 {
30115 rtx dest, src;
30116 unsigned reg;
30117
30118 src = SET_SRC (pat);
30119 dest = SET_DEST (pat);
30120
30121 gcc_assert (src == stack_pointer_rtx
30122 || IS_PAC_REGNUM (REGNO (src)));
30123 reg = REGNO (dest);
30124
30125 if (IS_PAC_REGNUM (REGNO (src)))
30126 arm_unwind_emit_set (out_file, PATTERN (insn));
30127 else
30128 asm_fprintf (out_file, "\t.unwind_raw 0, 0x%x @ vsp = r%d\n",
30129 reg + 0x90, reg);
30130 }
30131 handled_one = true;
30132 break;
30133
30134 /* The INSN is generated in epilogue. It is set as RTX_FRAME_RELATED_P
30135 to get correct dwarf information for shrink-wrap. We should not
30136 emit unwind information for it because these are used either for
30137 pretend arguments or notes to adjust sp and restore registers from
30138 stack. */
30139 case REG_CFA_DEF_CFA:
30140 case REG_CFA_ADJUST_CFA:
30141 case REG_CFA_RESTORE:
30142 return;
30143
30144 case REG_CFA_EXPRESSION:
30145 case REG_CFA_OFFSET:
30146 /* ??? Only handling here what we actually emit. */
30147 gcc_unreachable ();
30148
30149 default:
30150 break;
30151 }
30152 }
30153 if (handled_one)
30154 return;
30155 pat = PATTERN (insn);
30156 found:
30157
30158 switch (GET_CODE (pat))
30159 {
30160 case SET:
30161 arm_unwind_emit_set (out_file, pat);
30162 break;
30163
30164 case SEQUENCE:
30165 /* Store multiple. */
30166 arm_unwind_emit_sequence (out_file, pat);
30167 break;
30168
30169 default:
30170 abort();
30171 }
30172 }
30173
30174
30175 /* Output a reference from a function exception table to the type_info
30176 object X. The EABI specifies that the symbol should be relocated by
30177 an R_ARM_TARGET2 relocation. */
30178
30179 static bool
30180 arm_output_ttype (rtx x)
30181 {
30182 fputs ("\t.word\t", asm_out_file);
30183 output_addr_const (asm_out_file, x);
30184 /* Use special relocations for symbol references. */
30185 if (!CONST_INT_P (x))
30186 fputs ("(TARGET2)", asm_out_file);
30187 fputc ('\n', asm_out_file);
30188
30189 return TRUE;
30190 }
30191
30192 /* Implement TARGET_ASM_EMIT_EXCEPT_PERSONALITY. */
30193
30194 static void
30195 arm_asm_emit_except_personality (rtx personality)
30196 {
30197 fputs ("\t.personality\t", asm_out_file);
30198 output_addr_const (asm_out_file, personality);
30199 fputc ('\n', asm_out_file);
30200 }
30201 #endif /* ARM_UNWIND_INFO */
30202
30203 /* Implement TARGET_ASM_INITIALIZE_SECTIONS. */
30204
30205 static void
30206 arm_asm_init_sections (void)
30207 {
30208 #if ARM_UNWIND_INFO
30209 exception_section = get_unnamed_section (0, output_section_asm_op,
30210 "\t.handlerdata");
30211 #endif /* ARM_UNWIND_INFO */
30212
30213 #ifdef OBJECT_FORMAT_ELF
30214 if (target_pure_code)
30215 text_section->unnamed.data = "\t.section .text,\"0x20000006\",%progbits";
30216 #endif
30217 }
30218
30219 /* Output unwind directives for the start/end of a function. */
30220
30221 void
30222 arm_output_fn_unwind (FILE * f, bool prologue)
30223 {
30224 if (arm_except_unwind_info (&global_options) != UI_TARGET)
30225 return;
30226
30227 if (prologue)
30228 fputs ("\t.fnstart\n", f);
30229 else
30230 {
30231 /* If this function will never be unwound, then mark it as such.
30232 The came condition is used in arm_unwind_emit to suppress
30233 the frame annotations. */
30234 if (!(flag_unwind_tables || crtl->uses_eh_lsda)
30235 && (TREE_NOTHROW (current_function_decl)
30236 || crtl->all_throwers_are_sibcalls))
30237 fputs("\t.cantunwind\n", f);
30238
30239 fputs ("\t.fnend\n", f);
30240 }
30241 }
30242
30243 static bool
30244 arm_emit_tls_decoration (FILE *fp, rtx x)
30245 {
30246 enum tls_reloc reloc;
30247 rtx val;
30248
30249 val = XVECEXP (x, 0, 0);
30250 reloc = (enum tls_reloc) INTVAL (XVECEXP (x, 0, 1));
30251
30252 output_addr_const (fp, val);
30253
30254 switch (reloc)
30255 {
30256 case TLS_GD32:
30257 fputs ("(tlsgd)", fp);
30258 break;
30259 case TLS_GD32_FDPIC:
30260 fputs ("(tlsgd_fdpic)", fp);
30261 break;
30262 case TLS_LDM32:
30263 fputs ("(tlsldm)", fp);
30264 break;
30265 case TLS_LDM32_FDPIC:
30266 fputs ("(tlsldm_fdpic)", fp);
30267 break;
30268 case TLS_LDO32:
30269 fputs ("(tlsldo)", fp);
30270 break;
30271 case TLS_IE32:
30272 fputs ("(gottpoff)", fp);
30273 break;
30274 case TLS_IE32_FDPIC:
30275 fputs ("(gottpoff_fdpic)", fp);
30276 break;
30277 case TLS_LE32:
30278 fputs ("(tpoff)", fp);
30279 break;
30280 case TLS_DESCSEQ:
30281 fputs ("(tlsdesc)", fp);
30282 break;
30283 default:
30284 gcc_unreachable ();
30285 }
30286
30287 switch (reloc)
30288 {
30289 case TLS_GD32:
30290 case TLS_LDM32:
30291 case TLS_IE32:
30292 case TLS_DESCSEQ:
30293 fputs (" + (. - ", fp);
30294 output_addr_const (fp, XVECEXP (x, 0, 2));
30295 /* For DESCSEQ the 3rd operand encodes thumbness, and is added */
30296 fputs (reloc == TLS_DESCSEQ ? " + " : " - ", fp);
30297 output_addr_const (fp, XVECEXP (x, 0, 3));
30298 fputc (')', fp);
30299 break;
30300 default:
30301 break;
30302 }
30303
30304 return TRUE;
30305 }
30306
30307 /* ARM implementation of TARGET_ASM_OUTPUT_DWARF_DTPREL. */
30308
30309 static void
30310 arm_output_dwarf_dtprel (FILE *file, int size, rtx x)
30311 {
30312 gcc_assert (size == 4);
30313 fputs ("\t.word\t", file);
30314 output_addr_const (file, x);
30315 fputs ("(tlsldo)", file);
30316 }
30317
30318 /* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
30319
30320 static bool
30321 arm_output_addr_const_extra (FILE *fp, rtx x)
30322 {
30323 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
30324 return arm_emit_tls_decoration (fp, x);
30325 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PIC_LABEL)
30326 {
30327 char label[256];
30328 int labelno = INTVAL (XVECEXP (x, 0, 0));
30329
30330 ASM_GENERATE_INTERNAL_LABEL (label, "LPIC", labelno);
30331 assemble_name_raw (fp, label);
30332
30333 return TRUE;
30334 }
30335 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_GOTSYM_OFF)
30336 {
30337 assemble_name (fp, "_GLOBAL_OFFSET_TABLE_");
30338 if (GOT_PCREL)
30339 fputs ("+.", fp);
30340 fputs ("-(", fp);
30341 output_addr_const (fp, XVECEXP (x, 0, 0));
30342 fputc (')', fp);
30343 return TRUE;
30344 }
30345 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_SYMBOL_OFFSET)
30346 {
30347 output_addr_const (fp, XVECEXP (x, 0, 0));
30348 if (GOT_PCREL)
30349 fputs ("+.", fp);
30350 fputs ("-(", fp);
30351 output_addr_const (fp, XVECEXP (x, 0, 1));
30352 fputc (')', fp);
30353 return TRUE;
30354 }
30355 else if (GET_CODE (x) == CONST_VECTOR)
30356 return arm_emit_vector_const (fp, x);
30357
30358 return FALSE;
30359 }
30360
30361 /* Output assembly for a shift instruction.
30362 SET_FLAGS determines how the instruction modifies the condition codes.
30363 0 - Do not set condition codes.
30364 1 - Set condition codes.
30365 2 - Use smallest instruction. */
30366 const char *
30367 arm_output_shift(rtx * operands, int set_flags)
30368 {
30369 char pattern[100];
30370 static const char flag_chars[3] = {'?', '.', '!'};
30371 const char *shift;
30372 HOST_WIDE_INT val;
30373 char c;
30374
30375 c = flag_chars[set_flags];
30376 shift = shift_op(operands[3], &val);
30377 if (shift)
30378 {
30379 if (val != -1)
30380 operands[2] = GEN_INT(val);
30381 sprintf (pattern, "%s%%%c\t%%0, %%1, %%2", shift, c);
30382 }
30383 else
30384 sprintf (pattern, "mov%%%c\t%%0, %%1", c);
30385
30386 output_asm_insn (pattern, operands);
30387 return "";
30388 }
30389
30390 /* Output assembly for a WMMX immediate shift instruction. */
30391 const char *
30392 arm_output_iwmmxt_shift_immediate (const char *insn_name, rtx *operands, bool wror_or_wsra)
30393 {
30394 int shift = INTVAL (operands[2]);
30395 char templ[50];
30396 machine_mode opmode = GET_MODE (operands[0]);
30397
30398 gcc_assert (shift >= 0);
30399
30400 /* If the shift value in the register versions is > 63 (for D qualifier),
30401 31 (for W qualifier) or 15 (for H qualifier). */
30402 if (((opmode == V4HImode) && (shift > 15))
30403 || ((opmode == V2SImode) && (shift > 31))
30404 || ((opmode == DImode) && (shift > 63)))
30405 {
30406 if (wror_or_wsra)
30407 {
30408 sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, 32);
30409 output_asm_insn (templ, operands);
30410 if (opmode == DImode)
30411 {
30412 sprintf (templ, "%s\t%%0, %%0, #%d", insn_name, 32);
30413 output_asm_insn (templ, operands);
30414 }
30415 }
30416 else
30417 {
30418 /* The destination register will contain all zeros. */
30419 sprintf (templ, "wzero\t%%0");
30420 output_asm_insn (templ, operands);
30421 }
30422 return "";
30423 }
30424
30425 if ((opmode == DImode) && (shift > 32))
30426 {
30427 sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, 32);
30428 output_asm_insn (templ, operands);
30429 sprintf (templ, "%s\t%%0, %%0, #%d", insn_name, shift - 32);
30430 output_asm_insn (templ, operands);
30431 }
30432 else
30433 {
30434 sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, shift);
30435 output_asm_insn (templ, operands);
30436 }
30437 return "";
30438 }
30439
30440 /* Output assembly for a WMMX tinsr instruction. */
30441 const char *
30442 arm_output_iwmmxt_tinsr (rtx *operands)
30443 {
30444 int mask = INTVAL (operands[3]);
30445 int i;
30446 char templ[50];
30447 int units = mode_nunits[GET_MODE (operands[0])];
30448 gcc_assert ((mask & (mask - 1)) == 0);
30449 for (i = 0; i < units; ++i)
30450 {
30451 if ((mask & 0x01) == 1)
30452 {
30453 break;
30454 }
30455 mask >>= 1;
30456 }
30457 gcc_assert (i < units);
30458 {
30459 switch (GET_MODE (operands[0]))
30460 {
30461 case E_V8QImode:
30462 sprintf (templ, "tinsrb%%?\t%%0, %%2, #%d", i);
30463 break;
30464 case E_V4HImode:
30465 sprintf (templ, "tinsrh%%?\t%%0, %%2, #%d", i);
30466 break;
30467 case E_V2SImode:
30468 sprintf (templ, "tinsrw%%?\t%%0, %%2, #%d", i);
30469 break;
30470 default:
30471 gcc_unreachable ();
30472 break;
30473 }
30474 output_asm_insn (templ, operands);
30475 }
30476 return "";
30477 }
30478
30479 /* Output an arm casesi dispatch sequence. Used by arm_casesi_internal insn.
30480 Responsible for the handling of switch statements in arm. */
30481 const char *
30482 arm_output_casesi (rtx *operands)
30483 {
30484 char label[100];
30485 rtx diff_vec = PATTERN (NEXT_INSN (as_a <rtx_insn *> (operands[2])));
30486 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
30487 output_asm_insn ("cmp\t%0, %1", operands);
30488 output_asm_insn ("bhi\t%l3", operands);
30489 ASM_GENERATE_INTERNAL_LABEL (label, "Lrtx", CODE_LABEL_NUMBER (operands[2]));
30490 switch (GET_MODE (diff_vec))
30491 {
30492 case E_QImode:
30493 if (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned)
30494 output_asm_insn ("ldrb\t%4, [%5, %0]", operands);
30495 else
30496 output_asm_insn ("ldrsb\t%4, [%5, %0]", operands);
30497 output_asm_insn ("add\t%|pc, %|pc, %4, lsl #2", operands);
30498 break;
30499 case E_HImode:
30500 if (REGNO (operands[4]) != REGNO (operands[5]))
30501 {
30502 output_asm_insn ("add\t%4, %0, %0", operands);
30503 if (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned)
30504 output_asm_insn ("ldrh\t%4, [%5, %4]", operands);
30505 else
30506 output_asm_insn ("ldrsh\t%4, [%5, %4]", operands);
30507 }
30508 else
30509 {
30510 output_asm_insn ("add\t%4, %5, %0", operands);
30511 if (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned)
30512 output_asm_insn ("ldrh\t%4, [%4, %0]", operands);
30513 else
30514 output_asm_insn ("ldrsh\t%4, [%4, %0]", operands);
30515 }
30516 output_asm_insn ("add\t%|pc, %|pc, %4, lsl #2", operands);
30517 break;
30518 case E_SImode:
30519 if (flag_pic)
30520 {
30521 output_asm_insn ("ldr\t%4, [%5, %0, lsl #2]", operands);
30522 output_asm_insn ("add\t%|pc, %|pc, %4", operands);
30523 }
30524 else
30525 output_asm_insn ("ldr\t%|pc, [%5, %0, lsl #2]", operands);
30526 break;
30527 default:
30528 gcc_unreachable ();
30529 }
30530 assemble_label (asm_out_file, label);
30531 output_asm_insn ("nop", operands);
30532 return "";
30533 }
30534
30535 /* Output a Thumb-1 casesi dispatch sequence. */
30536 const char *
30537 thumb1_output_casesi (rtx *operands)
30538 {
30539 rtx diff_vec = PATTERN (NEXT_INSN (as_a <rtx_insn *> (operands[0])));
30540
30541 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
30542
30543 switch (GET_MODE(diff_vec))
30544 {
30545 case E_QImode:
30546 return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
30547 "bl\t%___gnu_thumb1_case_uqi" : "bl\t%___gnu_thumb1_case_sqi");
30548 case E_HImode:
30549 return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
30550 "bl\t%___gnu_thumb1_case_uhi" : "bl\t%___gnu_thumb1_case_shi");
30551 case E_SImode:
30552 return "bl\t%___gnu_thumb1_case_si";
30553 default:
30554 gcc_unreachable ();
30555 }
30556 }
30557
30558 /* Output a Thumb-2 casesi instruction. */
30559 const char *
30560 thumb2_output_casesi (rtx *operands)
30561 {
30562 rtx diff_vec = PATTERN (NEXT_INSN (as_a <rtx_insn *> (operands[2])));
30563
30564 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
30565
30566 output_asm_insn ("cmp\t%0, %1", operands);
30567 output_asm_insn ("bhi\t%l3", operands);
30568 switch (GET_MODE(diff_vec))
30569 {
30570 case E_QImode:
30571 return "tbb\t[%|pc, %0]";
30572 case E_HImode:
30573 return "tbh\t[%|pc, %0, lsl #1]";
30574 case E_SImode:
30575 if (flag_pic)
30576 {
30577 output_asm_insn ("adr\t%4, %l2", operands);
30578 output_asm_insn ("ldr\t%5, [%4, %0, lsl #2]", operands);
30579 output_asm_insn ("add\t%4, %4, %5", operands);
30580 return "bx\t%4";
30581 }
30582 else
30583 {
30584 output_asm_insn ("adr\t%4, %l2", operands);
30585 return "ldr\t%|pc, [%4, %0, lsl #2]";
30586 }
30587 default:
30588 gcc_unreachable ();
30589 }
30590 }
30591
30592 /* Implement TARGET_SCHED_ISSUE_RATE. Lookup the issue rate in the
30593 per-core tuning structs. */
30594 static int
30595 arm_issue_rate (void)
30596 {
30597 return current_tune->issue_rate;
30598 }
30599
30600 /* Implement TARGET_SCHED_VARIABLE_ISSUE. */
30601 static int
30602 arm_sched_variable_issue (FILE *, int, rtx_insn *insn, int more)
30603 {
30604 if (DEBUG_INSN_P (insn))
30605 return more;
30606
30607 rtx_code code = GET_CODE (PATTERN (insn));
30608 if (code == USE || code == CLOBBER)
30609 return more;
30610
30611 if (get_attr_type (insn) == TYPE_NO_INSN)
30612 return more;
30613
30614 return more - 1;
30615 }
30616
30617 /* Return how many instructions should scheduler lookahead to choose the
30618 best one. */
30619 static int
30620 arm_first_cycle_multipass_dfa_lookahead (void)
30621 {
30622 int issue_rate = arm_issue_rate ();
30623
30624 return issue_rate > 1 && !sched_fusion ? issue_rate : 0;
30625 }
30626
30627 /* Enable modeling of L2 auto-prefetcher. */
30628 static int
30629 arm_first_cycle_multipass_dfa_lookahead_guard (rtx_insn *insn, int ready_index)
30630 {
30631 return autopref_multipass_dfa_lookahead_guard (insn, ready_index);
30632 }
30633
30634 const char *
30635 arm_mangle_type (const_tree type)
30636 {
30637 /* The ARM ABI documents (10th October 2008) say that "__va_list"
30638 has to be managled as if it is in the "std" namespace. */
30639 if (TARGET_AAPCS_BASED
30640 && lang_hooks.types_compatible_p (CONST_CAST_TREE (type), va_list_type))
30641 return "St9__va_list";
30642
30643 /* Half-precision floating point types. */
30644 if (SCALAR_FLOAT_TYPE_P (type) && TYPE_PRECISION (type) == 16)
30645 {
30646 if (TYPE_MAIN_VARIANT (type) == float16_type_node)
30647 return NULL;
30648 if (TYPE_MODE (type) == BFmode)
30649 return "u6__bf16";
30650 else
30651 return "Dh";
30652 }
30653
30654 /* Try mangling as a Neon type, TYPE_NAME is non-NULL if this is a
30655 builtin type. */
30656 if (TYPE_NAME (type) != NULL)
30657 return arm_mangle_builtin_type (type);
30658
30659 /* Use the default mangling. */
30660 return NULL;
30661 }
30662
30663 /* Order of allocation of core registers for Thumb: this allocation is
30664 written over the corresponding initial entries of the array
30665 initialized with REG_ALLOC_ORDER. We allocate all low registers
30666 first. Saving and restoring a low register is usually cheaper than
30667 using a call-clobbered high register. */
30668
30669 static const int thumb_core_reg_alloc_order[] =
30670 {
30671 3, 2, 1, 0, 4, 5, 6, 7,
30672 12, 14, 8, 9, 10, 11
30673 };
30674
30675 /* Adjust register allocation order when compiling for Thumb. */
30676
30677 void
30678 arm_order_regs_for_local_alloc (void)
30679 {
30680 const int arm_reg_alloc_order[] = REG_ALLOC_ORDER;
30681 memcpy(reg_alloc_order, arm_reg_alloc_order, sizeof (reg_alloc_order));
30682 if (TARGET_THUMB)
30683 memcpy (reg_alloc_order, thumb_core_reg_alloc_order,
30684 sizeof (thumb_core_reg_alloc_order));
30685 }
30686
30687 /* Implement TARGET_FRAME_POINTER_REQUIRED. */
30688
30689 bool
30690 arm_frame_pointer_required (void)
30691 {
30692 if (SUBTARGET_FRAME_POINTER_REQUIRED)
30693 return true;
30694
30695 /* If the function receives nonlocal gotos, it needs to save the frame
30696 pointer in the nonlocal_goto_save_area object. */
30697 if (cfun->has_nonlocal_label)
30698 return true;
30699
30700 /* The frame pointer is required for non-leaf APCS frames. */
30701 if (TARGET_ARM && TARGET_APCS_FRAME && !crtl->is_leaf)
30702 return true;
30703
30704 /* If we are probing the stack in the prologue, we will have a faulting
30705 instruction prior to the stack adjustment and this requires a frame
30706 pointer if we want to catch the exception using the EABI unwinder. */
30707 if (!IS_INTERRUPT (arm_current_func_type ())
30708 && (flag_stack_check == STATIC_BUILTIN_STACK_CHECK
30709 || flag_stack_clash_protection)
30710 && arm_except_unwind_info (&global_options) == UI_TARGET
30711 && cfun->can_throw_non_call_exceptions)
30712 {
30713 HOST_WIDE_INT size = get_frame_size ();
30714
30715 /* That's irrelevant if there is no stack adjustment. */
30716 if (size <= 0)
30717 return false;
30718
30719 /* That's relevant only if there is a stack probe. */
30720 if (crtl->is_leaf && !cfun->calls_alloca)
30721 {
30722 /* We don't have the final size of the frame so adjust. */
30723 size += 32 * UNITS_PER_WORD;
30724 if (size > PROBE_INTERVAL && size > get_stack_check_protect ())
30725 return true;
30726 }
30727 else
30728 return true;
30729 }
30730
30731 return false;
30732 }
30733
30734 /* Implement the TARGET_HAVE_CONDITIONAL_EXECUTION hook.
30735 All modes except THUMB1 have conditional execution.
30736 If we have conditional arithmetic, return false before reload to
30737 enable some ifcvt transformations. */
30738 static bool
30739 arm_have_conditional_execution (void)
30740 {
30741 bool has_cond_exec, enable_ifcvt_trans;
30742
30743 /* Only THUMB1 cannot support conditional execution. */
30744 has_cond_exec = !TARGET_THUMB1;
30745
30746 /* Enable ifcvt transformations if we have conditional arithmetic, but only
30747 before reload. */
30748 enable_ifcvt_trans = TARGET_COND_ARITH && !reload_completed;
30749
30750 return has_cond_exec && !enable_ifcvt_trans;
30751 }
30752
30753 /* The AAPCS sets the maximum alignment of a vector to 64 bits. */
30754 static HOST_WIDE_INT
30755 arm_vector_alignment (const_tree type)
30756 {
30757 HOST_WIDE_INT align = tree_to_shwi (TYPE_SIZE (type));
30758
30759 if (TARGET_AAPCS_BASED)
30760 align = MIN (align, 64);
30761
30762 return align;
30763 }
30764
30765 static unsigned int
30766 arm_autovectorize_vector_modes (vector_modes *modes, bool)
30767 {
30768 if (!TARGET_NEON_VECTORIZE_DOUBLE)
30769 {
30770 modes->safe_push (V16QImode);
30771 modes->safe_push (V8QImode);
30772 }
30773 return 0;
30774 }
30775
30776 static bool
30777 arm_vector_alignment_reachable (const_tree type, bool is_packed)
30778 {
30779 /* Vectors which aren't in packed structures will not be less aligned than
30780 the natural alignment of their element type, so this is safe. */
30781 if (TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access)
30782 return !is_packed;
30783
30784 return default_builtin_vector_alignment_reachable (type, is_packed);
30785 }
30786
30787 static bool
30788 arm_builtin_support_vector_misalignment (machine_mode mode,
30789 const_tree type, int misalignment,
30790 bool is_packed)
30791 {
30792 if (TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access)
30793 {
30794 HOST_WIDE_INT align = TYPE_ALIGN_UNIT (type);
30795
30796 if (is_packed)
30797 return align == 1;
30798
30799 /* If the misalignment is unknown, we should be able to handle the access
30800 so long as it is not to a member of a packed data structure. */
30801 if (misalignment == -1)
30802 return true;
30803
30804 /* Return true if the misalignment is a multiple of the natural alignment
30805 of the vector's element type. This is probably always going to be
30806 true in practice, since we've already established that this isn't a
30807 packed access. */
30808 return ((misalignment % align) == 0);
30809 }
30810
30811 return default_builtin_support_vector_misalignment (mode, type, misalignment,
30812 is_packed);
30813 }
30814
30815 static void
30816 arm_conditional_register_usage (void)
30817 {
30818 int regno;
30819
30820 if (TARGET_THUMB1 && optimize_size)
30821 {
30822 /* When optimizing for size on Thumb-1, it's better not
30823 to use the HI regs, because of the overhead of
30824 stacking them. */
30825 for (regno = FIRST_HI_REGNUM; regno <= LAST_HI_REGNUM; ++regno)
30826 fixed_regs[regno] = call_used_regs[regno] = 1;
30827 }
30828
30829 /* The link register can be clobbered by any branch insn,
30830 but we have no way to track that at present, so mark
30831 it as unavailable. */
30832 if (TARGET_THUMB1)
30833 fixed_regs[LR_REGNUM] = call_used_regs[LR_REGNUM] = 1;
30834
30835 if (TARGET_32BIT && TARGET_VFP_BASE)
30836 {
30837 /* VFPv3 registers are disabled when earlier VFP
30838 versions are selected due to the definition of
30839 LAST_VFP_REGNUM. */
30840 for (regno = FIRST_VFP_REGNUM;
30841 regno <= LAST_VFP_REGNUM; ++ regno)
30842 {
30843 fixed_regs[regno] = 0;
30844 call_used_regs[regno] = regno < FIRST_VFP_REGNUM + 16
30845 || regno >= FIRST_VFP_REGNUM + 32;
30846 }
30847 if (TARGET_HAVE_MVE)
30848 fixed_regs[VPR_REGNUM] = 0;
30849 }
30850
30851 if (TARGET_REALLY_IWMMXT && !TARGET_GENERAL_REGS_ONLY)
30852 {
30853 regno = FIRST_IWMMXT_GR_REGNUM;
30854 /* The 2002/10/09 revision of the XScale ABI has wCG0
30855 and wCG1 as call-preserved registers. The 2002/11/21
30856 revision changed this so that all wCG registers are
30857 scratch registers. */
30858 for (regno = FIRST_IWMMXT_GR_REGNUM;
30859 regno <= LAST_IWMMXT_GR_REGNUM; ++ regno)
30860 fixed_regs[regno] = 0;
30861 /* The XScale ABI has wR0 - wR9 as scratch registers,
30862 the rest as call-preserved registers. */
30863 for (regno = FIRST_IWMMXT_REGNUM;
30864 regno <= LAST_IWMMXT_REGNUM; ++ regno)
30865 {
30866 fixed_regs[regno] = 0;
30867 call_used_regs[regno] = regno < FIRST_IWMMXT_REGNUM + 10;
30868 }
30869 }
30870
30871 if ((unsigned) PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM)
30872 {
30873 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
30874 call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
30875 }
30876 else if (TARGET_APCS_STACK)
30877 {
30878 fixed_regs[10] = 1;
30879 call_used_regs[10] = 1;
30880 }
30881 /* -mcaller-super-interworking reserves r11 for calls to
30882 _interwork_r11_call_via_rN(). Making the register global
30883 is an easy way of ensuring that it remains valid for all
30884 calls. */
30885 if (TARGET_APCS_FRAME || TARGET_CALLER_INTERWORKING
30886 || TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME)
30887 {
30888 fixed_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
30889 call_used_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
30890 if (TARGET_CALLER_INTERWORKING)
30891 global_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
30892 }
30893
30894 /* The Q and GE bits are only accessed via special ACLE patterns. */
30895 CLEAR_HARD_REG_BIT (operand_reg_set, APSRQ_REGNUM);
30896 CLEAR_HARD_REG_BIT (operand_reg_set, APSRGE_REGNUM);
30897
30898 SUBTARGET_CONDITIONAL_REGISTER_USAGE
30899 }
30900
30901 static reg_class_t
30902 arm_preferred_rename_class (reg_class_t rclass)
30903 {
30904 /* Thumb-2 instructions using LO_REGS may be smaller than instructions
30905 using GENERIC_REGS. During register rename pass, we prefer LO_REGS,
30906 and code size can be reduced. */
30907 if (TARGET_THUMB2 && rclass == GENERAL_REGS)
30908 return LO_REGS;
30909 else
30910 return NO_REGS;
30911 }
30912
30913 /* Compute the attribute "length" of insn "*push_multi".
30914 So this function MUST be kept in sync with that insn pattern. */
30915 int
30916 arm_attr_length_push_multi(rtx parallel_op, rtx first_op)
30917 {
30918 int i, regno, hi_reg;
30919 int num_saves = XVECLEN (parallel_op, 0);
30920
30921 /* ARM mode. */
30922 if (TARGET_ARM)
30923 return 4;
30924 /* Thumb1 mode. */
30925 if (TARGET_THUMB1)
30926 return 2;
30927
30928 /* Thumb2 mode. */
30929 regno = REGNO (first_op);
30930 /* For PUSH/STM under Thumb2 mode, we can use 16-bit encodings if the register
30931 list is 8-bit. Normally this means all registers in the list must be
30932 LO_REGS, that is (R0 -R7). If any HI_REGS used, then we must use 32-bit
30933 encodings. There is one exception for PUSH that LR in HI_REGS can be used
30934 with 16-bit encoding. */
30935 hi_reg = (REGNO_REG_CLASS (regno) == HI_REGS) && (regno != LR_REGNUM);
30936 for (i = 1; i < num_saves && !hi_reg; i++)
30937 {
30938 regno = REGNO (XEXP (XVECEXP (parallel_op, 0, i), 0));
30939 hi_reg |= (REGNO_REG_CLASS (regno) == HI_REGS) && (regno != LR_REGNUM);
30940 }
30941
30942 if (!hi_reg)
30943 return 2;
30944 return 4;
30945 }
30946
30947 /* Compute the attribute "length" of insn. Currently, this function is used
30948 for "*load_multiple_with_writeback", "*pop_multiple_with_return" and
30949 "*pop_multiple_with_writeback_and_return". OPERANDS is the toplevel PARALLEL
30950 rtx, RETURN_PC is true if OPERANDS contains return insn. WRITE_BACK_P is
30951 true if OPERANDS contains insn which explicit updates base register. */
30952
30953 int
30954 arm_attr_length_pop_multi (rtx *operands, bool return_pc, bool write_back_p)
30955 {
30956 /* ARM mode. */
30957 if (TARGET_ARM)
30958 return 4;
30959 /* Thumb1 mode. */
30960 if (TARGET_THUMB1)
30961 return 2;
30962
30963 rtx parallel_op = operands[0];
30964 /* Initialize to elements number of PARALLEL. */
30965 unsigned indx = XVECLEN (parallel_op, 0) - 1;
30966 /* Initialize the value to base register. */
30967 unsigned regno = REGNO (operands[1]);
30968 /* Skip return and write back pattern.
30969 We only need register pop pattern for later analysis. */
30970 unsigned first_indx = 0;
30971 first_indx += return_pc ? 1 : 0;
30972 first_indx += write_back_p ? 1 : 0;
30973
30974 /* A pop operation can be done through LDM or POP. If the base register is SP
30975 and if it's with write back, then a LDM will be alias of POP. */
30976 bool pop_p = (regno == SP_REGNUM && write_back_p);
30977 bool ldm_p = !pop_p;
30978
30979 /* Check base register for LDM. */
30980 if (ldm_p && REGNO_REG_CLASS (regno) == HI_REGS)
30981 return 4;
30982
30983 /* Check each register in the list. */
30984 for (; indx >= first_indx; indx--)
30985 {
30986 regno = REGNO (XEXP (XVECEXP (parallel_op, 0, indx), 0));
30987 /* For POP, PC in HI_REGS can be used with 16-bit encoding. See similar
30988 comment in arm_attr_length_push_multi. */
30989 if (REGNO_REG_CLASS (regno) == HI_REGS
30990 && (regno != PC_REGNUM || ldm_p))
30991 return 4;
30992 }
30993
30994 return 2;
30995 }
30996
30997 /* Compute the number of instructions emitted by output_move_double. */
30998 int
30999 arm_count_output_move_double_insns (rtx *operands)
31000 {
31001 int count;
31002 rtx ops[2];
31003 /* output_move_double may modify the operands array, so call it
31004 here on a copy of the array. */
31005 ops[0] = operands[0];
31006 ops[1] = operands[1];
31007 output_move_double (ops, false, &count);
31008 return count;
31009 }
31010
31011 /* Same as above, but operands are a register/memory pair in SImode.
31012 Assumes operands has the base register in position 0 and memory in position
31013 2 (which is the order provided by the arm_{ldrd,strd} patterns). */
31014 int
31015 arm_count_ldrdstrd_insns (rtx *operands, bool load)
31016 {
31017 int count;
31018 rtx ops[2];
31019 int regnum, memnum;
31020 if (load)
31021 regnum = 0, memnum = 1;
31022 else
31023 regnum = 1, memnum = 0;
31024 ops[regnum] = gen_rtx_REG (DImode, REGNO (operands[0]));
31025 ops[memnum] = adjust_address (operands[2], DImode, 0);
31026 output_move_double (ops, false, &count);
31027 return count;
31028 }
31029
31030
31031 int
31032 vfp3_const_double_for_fract_bits (rtx operand)
31033 {
31034 REAL_VALUE_TYPE r0;
31035
31036 if (!CONST_DOUBLE_P (operand))
31037 return 0;
31038
31039 r0 = *CONST_DOUBLE_REAL_VALUE (operand);
31040 if (exact_real_inverse (DFmode, &r0)
31041 && !REAL_VALUE_NEGATIVE (r0))
31042 {
31043 if (exact_real_truncate (DFmode, &r0))
31044 {
31045 HOST_WIDE_INT value = real_to_integer (&r0);
31046 value = value & 0xffffffff;
31047 if ((value != 0) && ( (value & (value - 1)) == 0))
31048 {
31049 int ret = exact_log2 (value);
31050 gcc_assert (IN_RANGE (ret, 0, 31));
31051 return ret;
31052 }
31053 }
31054 }
31055 return 0;
31056 }
31057
31058 /* If X is a CONST_DOUBLE with a value that is a power of 2 whose
31059 log2 is in [1, 32], return that log2. Otherwise return -1.
31060 This is used in the patterns for vcvt.s32.f32 floating-point to
31061 fixed-point conversions. */
31062
31063 int
31064 vfp3_const_double_for_bits (rtx x)
31065 {
31066 const REAL_VALUE_TYPE *r;
31067
31068 if (!CONST_DOUBLE_P (x))
31069 return -1;
31070
31071 r = CONST_DOUBLE_REAL_VALUE (x);
31072
31073 if (REAL_VALUE_NEGATIVE (*r)
31074 || REAL_VALUE_ISNAN (*r)
31075 || REAL_VALUE_ISINF (*r)
31076 || !real_isinteger (r, SFmode))
31077 return -1;
31078
31079 HOST_WIDE_INT hwint = exact_log2 (real_to_integer (r));
31080
31081 /* The exact_log2 above will have returned -1 if this is
31082 not an exact log2. */
31083 if (!IN_RANGE (hwint, 1, 32))
31084 return -1;
31085
31086 return hwint;
31087 }
31088
31089 \f
31090 /* Emit a memory barrier around an atomic sequence according to MODEL. */
31091
31092 static void
31093 arm_pre_atomic_barrier (enum memmodel model)
31094 {
31095 if (need_atomic_barrier_p (model, true))
31096 emit_insn (gen_memory_barrier ());
31097 }
31098
31099 static void
31100 arm_post_atomic_barrier (enum memmodel model)
31101 {
31102 if (need_atomic_barrier_p (model, false))
31103 emit_insn (gen_memory_barrier ());
31104 }
31105
31106 /* Emit the load-exclusive and store-exclusive instructions.
31107 Use acquire and release versions if necessary. */
31108
31109 static void
31110 arm_emit_load_exclusive (machine_mode mode, rtx rval, rtx mem, bool acq)
31111 {
31112 rtx (*gen) (rtx, rtx);
31113
31114 if (acq)
31115 {
31116 switch (mode)
31117 {
31118 case E_QImode: gen = gen_arm_load_acquire_exclusiveqi; break;
31119 case E_HImode: gen = gen_arm_load_acquire_exclusivehi; break;
31120 case E_SImode: gen = gen_arm_load_acquire_exclusivesi; break;
31121 case E_DImode: gen = gen_arm_load_acquire_exclusivedi; break;
31122 default:
31123 gcc_unreachable ();
31124 }
31125 }
31126 else
31127 {
31128 switch (mode)
31129 {
31130 case E_QImode: gen = gen_arm_load_exclusiveqi; break;
31131 case E_HImode: gen = gen_arm_load_exclusivehi; break;
31132 case E_SImode: gen = gen_arm_load_exclusivesi; break;
31133 case E_DImode: gen = gen_arm_load_exclusivedi; break;
31134 default:
31135 gcc_unreachable ();
31136 }
31137 }
31138
31139 emit_insn (gen (rval, mem));
31140 }
31141
31142 static void
31143 arm_emit_store_exclusive (machine_mode mode, rtx bval, rtx rval,
31144 rtx mem, bool rel)
31145 {
31146 rtx (*gen) (rtx, rtx, rtx);
31147
31148 if (rel)
31149 {
31150 switch (mode)
31151 {
31152 case E_QImode: gen = gen_arm_store_release_exclusiveqi; break;
31153 case E_HImode: gen = gen_arm_store_release_exclusivehi; break;
31154 case E_SImode: gen = gen_arm_store_release_exclusivesi; break;
31155 case E_DImode: gen = gen_arm_store_release_exclusivedi; break;
31156 default:
31157 gcc_unreachable ();
31158 }
31159 }
31160 else
31161 {
31162 switch (mode)
31163 {
31164 case E_QImode: gen = gen_arm_store_exclusiveqi; break;
31165 case E_HImode: gen = gen_arm_store_exclusivehi; break;
31166 case E_SImode: gen = gen_arm_store_exclusivesi; break;
31167 case E_DImode: gen = gen_arm_store_exclusivedi; break;
31168 default:
31169 gcc_unreachable ();
31170 }
31171 }
31172
31173 emit_insn (gen (bval, rval, mem));
31174 }
31175
31176 /* Mark the previous jump instruction as unlikely. */
31177
31178 static void
31179 emit_unlikely_jump (rtx insn)
31180 {
31181 rtx_insn *jump = emit_jump_insn (insn);
31182 add_reg_br_prob_note (jump, profile_probability::very_unlikely ());
31183 }
31184
31185 /* Expand a compare and swap pattern. */
31186
31187 void
31188 arm_expand_compare_and_swap (rtx operands[])
31189 {
31190 rtx bval, bdst, rval, mem, oldval, newval, is_weak, mod_s, mod_f, x;
31191 machine_mode mode, cmp_mode;
31192
31193 bval = operands[0];
31194 rval = operands[1];
31195 mem = operands[2];
31196 oldval = operands[3];
31197 newval = operands[4];
31198 is_weak = operands[5];
31199 mod_s = operands[6];
31200 mod_f = operands[7];
31201 mode = GET_MODE (mem);
31202
31203 /* Normally the succ memory model must be stronger than fail, but in the
31204 unlikely event of fail being ACQUIRE and succ being RELEASE we need to
31205 promote succ to ACQ_REL so that we don't lose the acquire semantics. */
31206
31207 if (TARGET_HAVE_LDACQ
31208 && is_mm_acquire (memmodel_from_int (INTVAL (mod_f)))
31209 && is_mm_release (memmodel_from_int (INTVAL (mod_s))))
31210 mod_s = GEN_INT (MEMMODEL_ACQ_REL);
31211
31212 switch (mode)
31213 {
31214 case E_QImode:
31215 case E_HImode:
31216 /* For narrow modes, we're going to perform the comparison in SImode,
31217 so do the zero-extension now. */
31218 rval = gen_reg_rtx (SImode);
31219 oldval = convert_modes (SImode, mode, oldval, true);
31220 /* FALLTHRU */
31221
31222 case E_SImode:
31223 /* Force the value into a register if needed. We waited until after
31224 the zero-extension above to do this properly. */
31225 if (!arm_add_operand (oldval, SImode))
31226 oldval = force_reg (SImode, oldval);
31227 break;
31228
31229 case E_DImode:
31230 if (!cmpdi_operand (oldval, mode))
31231 oldval = force_reg (mode, oldval);
31232 break;
31233
31234 default:
31235 gcc_unreachable ();
31236 }
31237
31238 if (TARGET_THUMB1)
31239 cmp_mode = E_SImode;
31240 else
31241 cmp_mode = CC_Zmode;
31242
31243 bdst = TARGET_THUMB1 ? bval : gen_rtx_REG (CC_Zmode, CC_REGNUM);
31244 emit_insn (gen_atomic_compare_and_swap_1 (cmp_mode, mode, bdst, rval, mem,
31245 oldval, newval, is_weak, mod_s, mod_f));
31246
31247 if (mode == QImode || mode == HImode)
31248 emit_move_insn (operands[1], gen_lowpart (mode, rval));
31249
31250 /* In all cases, we arrange for success to be signaled by Z set.
31251 This arrangement allows for the boolean result to be used directly
31252 in a subsequent branch, post optimization. For Thumb-1 targets, the
31253 boolean negation of the result is also stored in bval because Thumb-1
31254 backend lacks dependency tracking for CC flag due to flag-setting not
31255 being represented at RTL level. */
31256 if (TARGET_THUMB1)
31257 emit_insn (gen_cstoresi_eq0_thumb1 (bval, bdst));
31258 else
31259 {
31260 x = gen_rtx_EQ (SImode, bdst, const0_rtx);
31261 emit_insn (gen_rtx_SET (bval, x));
31262 }
31263 }
31264
31265 /* Split a compare and swap pattern. It is IMPLEMENTATION DEFINED whether
31266 another memory store between the load-exclusive and store-exclusive can
31267 reset the monitor from Exclusive to Open state. This means we must wait
31268 until after reload to split the pattern, lest we get a register spill in
31269 the middle of the atomic sequence. Success of the compare and swap is
31270 indicated by the Z flag set for 32bit targets and by neg_bval being zero
31271 for Thumb-1 targets (ie. negation of the boolean value returned by
31272 atomic_compare_and_swapmode standard pattern in operand 0). */
31273
31274 void
31275 arm_split_compare_and_swap (rtx operands[])
31276 {
31277 rtx rval, mem, oldval, newval, neg_bval, mod_s_rtx;
31278 machine_mode mode;
31279 enum memmodel mod_s, mod_f;
31280 bool is_weak;
31281 rtx_code_label *label1, *label2;
31282 rtx x, cond;
31283
31284 rval = operands[1];
31285 mem = operands[2];
31286 oldval = operands[3];
31287 newval = operands[4];
31288 is_weak = (operands[5] != const0_rtx);
31289 mod_s_rtx = operands[6];
31290 mod_s = memmodel_from_int (INTVAL (mod_s_rtx));
31291 mod_f = memmodel_from_int (INTVAL (operands[7]));
31292 neg_bval = TARGET_THUMB1 ? operands[0] : operands[8];
31293 mode = GET_MODE (mem);
31294
31295 bool is_armv8_sync = arm_arch8 && is_mm_sync (mod_s);
31296
31297 bool use_acquire = TARGET_HAVE_LDACQ && aarch_mm_needs_acquire (mod_s_rtx);
31298 bool use_release = TARGET_HAVE_LDACQ && aarch_mm_needs_release (mod_s_rtx);
31299
31300 /* For ARMv8, the load-acquire is too weak for __sync memory orders. Instead,
31301 a full barrier is emitted after the store-release. */
31302 if (is_armv8_sync)
31303 use_acquire = false;
31304
31305 /* Checks whether a barrier is needed and emits one accordingly. */
31306 if (!(use_acquire || use_release))
31307 arm_pre_atomic_barrier (mod_s);
31308
31309 label1 = NULL;
31310 if (!is_weak)
31311 {
31312 label1 = gen_label_rtx ();
31313 emit_label (label1);
31314 }
31315 label2 = gen_label_rtx ();
31316
31317 arm_emit_load_exclusive (mode, rval, mem, use_acquire);
31318
31319 /* Z is set to 0 for 32bit targets (resp. rval set to 1) if oldval != rval,
31320 as required to communicate with arm_expand_compare_and_swap. */
31321 if (TARGET_32BIT)
31322 {
31323 cond = arm_gen_compare_reg (NE, rval, oldval, neg_bval);
31324 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
31325 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
31326 gen_rtx_LABEL_REF (Pmode, label2), pc_rtx);
31327 emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
31328 }
31329 else
31330 {
31331 cond = gen_rtx_NE (VOIDmode, rval, oldval);
31332 if (thumb1_cmpneg_operand (oldval, SImode))
31333 {
31334 rtx src = rval;
31335 if (!satisfies_constraint_L (oldval))
31336 {
31337 gcc_assert (satisfies_constraint_J (oldval));
31338
31339 /* For such immediates, ADDS needs the source and destination regs
31340 to be the same.
31341
31342 Normally this would be handled by RA, but this is all happening
31343 after RA. */
31344 emit_move_insn (neg_bval, rval);
31345 src = neg_bval;
31346 }
31347
31348 emit_unlikely_jump (gen_cbranchsi4_neg_late (neg_bval, src, oldval,
31349 label2, cond));
31350 }
31351 else
31352 {
31353 emit_move_insn (neg_bval, const1_rtx);
31354 emit_unlikely_jump (gen_cbranchsi4_insn (cond, rval, oldval, label2));
31355 }
31356 }
31357
31358 arm_emit_store_exclusive (mode, neg_bval, mem, newval, use_release);
31359
31360 /* Weak or strong, we want EQ to be true for success, so that we
31361 match the flags that we got from the compare above. */
31362 if (TARGET_32BIT)
31363 {
31364 cond = gen_rtx_REG (CCmode, CC_REGNUM);
31365 x = gen_rtx_COMPARE (CCmode, neg_bval, const0_rtx);
31366 emit_insn (gen_rtx_SET (cond, x));
31367 }
31368
31369 if (!is_weak)
31370 {
31371 /* Z is set to boolean value of !neg_bval, as required to communicate
31372 with arm_expand_compare_and_swap. */
31373 x = gen_rtx_NE (VOIDmode, neg_bval, const0_rtx);
31374 emit_unlikely_jump (gen_cbranchsi4 (x, neg_bval, const0_rtx, label1));
31375 }
31376
31377 if (!is_mm_relaxed (mod_f))
31378 emit_label (label2);
31379
31380 /* Checks whether a barrier is needed and emits one accordingly. */
31381 if (is_armv8_sync
31382 || !(use_acquire || use_release))
31383 arm_post_atomic_barrier (mod_s);
31384
31385 if (is_mm_relaxed (mod_f))
31386 emit_label (label2);
31387 }
31388
31389 /* Split an atomic operation pattern. Operation is given by CODE and is one
31390 of PLUS, MINUS, IOR, XOR, SET (for an exchange operation) or NOT (for a nand
31391 operation). Operation is performed on the content at MEM and on VALUE
31392 following the memory model MODEL_RTX. The content at MEM before and after
31393 the operation is returned in OLD_OUT and NEW_OUT respectively while the
31394 success of the operation is returned in COND. Using a scratch register or
31395 an operand register for these determines what result is returned for that
31396 pattern. */
31397
31398 void
31399 arm_split_atomic_op (enum rtx_code code, rtx old_out, rtx new_out, rtx mem,
31400 rtx value, rtx model_rtx, rtx cond)
31401 {
31402 enum memmodel model = memmodel_from_int (INTVAL (model_rtx));
31403 machine_mode mode = GET_MODE (mem);
31404 machine_mode wmode = (mode == DImode ? DImode : SImode);
31405 rtx_code_label *label;
31406 bool all_low_regs, bind_old_new;
31407 rtx x;
31408
31409 bool is_armv8_sync = arm_arch8 && is_mm_sync (model);
31410
31411 bool use_acquire = TARGET_HAVE_LDACQ && aarch_mm_needs_acquire (model_rtx);
31412 bool use_release = TARGET_HAVE_LDACQ && aarch_mm_needs_release (model_rtx);
31413
31414 /* For ARMv8, a load-acquire is too weak for __sync memory orders. Instead,
31415 a full barrier is emitted after the store-release. */
31416 if (is_armv8_sync)
31417 use_acquire = false;
31418
31419 /* Checks whether a barrier is needed and emits one accordingly. */
31420 if (!(use_acquire || use_release))
31421 arm_pre_atomic_barrier (model);
31422
31423 label = gen_label_rtx ();
31424 emit_label (label);
31425
31426 if (new_out)
31427 new_out = gen_lowpart (wmode, new_out);
31428 if (old_out)
31429 old_out = gen_lowpart (wmode, old_out);
31430 else
31431 old_out = new_out;
31432 value = simplify_gen_subreg (wmode, value, mode, 0);
31433
31434 arm_emit_load_exclusive (mode, old_out, mem, use_acquire);
31435
31436 /* Does the operation require destination and first operand to use the same
31437 register? This is decided by register constraints of relevant insn
31438 patterns in thumb1.md. */
31439 gcc_assert (!new_out || REG_P (new_out));
31440 all_low_regs = REG_P (value) && REGNO_REG_CLASS (REGNO (value)) == LO_REGS
31441 && new_out && REGNO_REG_CLASS (REGNO (new_out)) == LO_REGS
31442 && REGNO_REG_CLASS (REGNO (old_out)) == LO_REGS;
31443 bind_old_new =
31444 (TARGET_THUMB1
31445 && code != SET
31446 && code != MINUS
31447 && (code != PLUS || (!all_low_regs && !satisfies_constraint_L (value))));
31448
31449 /* We want to return the old value while putting the result of the operation
31450 in the same register as the old value so copy the old value over to the
31451 destination register and use that register for the operation. */
31452 if (old_out && bind_old_new)
31453 {
31454 emit_move_insn (new_out, old_out);
31455 old_out = new_out;
31456 }
31457
31458 switch (code)
31459 {
31460 case SET:
31461 new_out = value;
31462 break;
31463
31464 case NOT:
31465 x = gen_rtx_AND (wmode, old_out, value);
31466 emit_insn (gen_rtx_SET (new_out, x));
31467 x = gen_rtx_NOT (wmode, new_out);
31468 emit_insn (gen_rtx_SET (new_out, x));
31469 break;
31470
31471 case MINUS:
31472 if (CONST_INT_P (value))
31473 {
31474 value = gen_int_mode (-INTVAL (value), wmode);
31475 code = PLUS;
31476 }
31477 /* FALLTHRU */
31478
31479 case PLUS:
31480 if (mode == DImode)
31481 {
31482 /* DImode plus/minus need to clobber flags. */
31483 /* The adddi3 and subdi3 patterns are incorrectly written so that
31484 they require matching operands, even when we could easily support
31485 three operands. Thankfully, this can be fixed up post-splitting,
31486 as the individual add+adc patterns do accept three operands and
31487 post-reload cprop can make these moves go away. */
31488 emit_move_insn (new_out, old_out);
31489 if (code == PLUS)
31490 x = gen_adddi3 (new_out, new_out, value);
31491 else
31492 x = gen_subdi3 (new_out, new_out, value);
31493 emit_insn (x);
31494 break;
31495 }
31496 /* FALLTHRU */
31497
31498 default:
31499 x = gen_rtx_fmt_ee (code, wmode, old_out, value);
31500 emit_insn (gen_rtx_SET (new_out, x));
31501 break;
31502 }
31503
31504 arm_emit_store_exclusive (mode, cond, mem, gen_lowpart (mode, new_out),
31505 use_release);
31506
31507 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
31508 emit_unlikely_jump (gen_cbranchsi4 (x, cond, const0_rtx, label));
31509
31510 /* Checks whether a barrier is needed and emits one accordingly. */
31511 if (is_armv8_sync
31512 || !(use_acquire || use_release))
31513 arm_post_atomic_barrier (model);
31514 }
31515 \f
31516 /* Return the mode for the MVE vector of predicates corresponding to MODE. */
31517 opt_machine_mode
31518 arm_mode_to_pred_mode (machine_mode mode)
31519 {
31520 switch (GET_MODE_NUNITS (mode))
31521 {
31522 case 16: return V16BImode;
31523 case 8: return V8BImode;
31524 case 4: return V4BImode;
31525 case 2: return V2QImode;
31526 }
31527 return opt_machine_mode ();
31528 }
31529
31530 /* Expand code to compare vectors OP0 and OP1 using condition CODE.
31531 If CAN_INVERT, store either the result or its inverse in TARGET
31532 and return true if TARGET contains the inverse. If !CAN_INVERT,
31533 always store the result in TARGET, never its inverse.
31534
31535 Note that the handling of floating-point comparisons is not
31536 IEEE compliant. */
31537
31538 bool
31539 arm_expand_vector_compare (rtx target, rtx_code code, rtx op0, rtx op1,
31540 bool can_invert)
31541 {
31542 machine_mode cmp_result_mode = GET_MODE (target);
31543 machine_mode cmp_mode = GET_MODE (op0);
31544
31545 bool inverted;
31546
31547 /* MVE supports more comparisons than Neon. */
31548 if (TARGET_HAVE_MVE)
31549 inverted = false;
31550 else
31551 switch (code)
31552 {
31553 /* For these we need to compute the inverse of the requested
31554 comparison. */
31555 case UNORDERED:
31556 case UNLT:
31557 case UNLE:
31558 case UNGT:
31559 case UNGE:
31560 case UNEQ:
31561 case NE:
31562 code = reverse_condition_maybe_unordered (code);
31563 if (!can_invert)
31564 {
31565 /* Recursively emit the inverted comparison into a temporary
31566 and then store its inverse in TARGET. This avoids reusing
31567 TARGET (which for integer NE could be one of the inputs). */
31568 rtx tmp = gen_reg_rtx (cmp_result_mode);
31569 if (arm_expand_vector_compare (tmp, code, op0, op1, true))
31570 gcc_unreachable ();
31571 emit_insn (gen_rtx_SET (target, gen_rtx_NOT (cmp_result_mode, tmp)));
31572 return false;
31573 }
31574 inverted = true;
31575 break;
31576
31577 default:
31578 inverted = false;
31579 break;
31580 }
31581
31582 switch (code)
31583 {
31584 /* These are natively supported by Neon for zero comparisons, but otherwise
31585 require the operands to be swapped. For MVE, we can only compare
31586 registers. */
31587 case LE:
31588 case LT:
31589 if (!TARGET_HAVE_MVE)
31590 if (op1 != CONST0_RTX (cmp_mode))
31591 {
31592 code = swap_condition (code);
31593 std::swap (op0, op1);
31594 }
31595 /* Fall through. */
31596
31597 /* These are natively supported by Neon for both register and zero
31598 operands. MVE supports registers only. */
31599 case EQ:
31600 case GE:
31601 case GT:
31602 case NE:
31603 if (TARGET_HAVE_MVE)
31604 {
31605 switch (GET_MODE_CLASS (cmp_mode))
31606 {
31607 case MODE_VECTOR_INT:
31608 emit_insn (gen_mve_vcmpq (code, cmp_mode, target,
31609 op0, force_reg (cmp_mode, op1)));
31610 break;
31611 case MODE_VECTOR_FLOAT:
31612 if (TARGET_HAVE_MVE_FLOAT)
31613 emit_insn (gen_mve_vcmpq_f (code, cmp_mode, target,
31614 op0, force_reg (cmp_mode, op1)));
31615 else
31616 gcc_unreachable ();
31617 break;
31618 default:
31619 gcc_unreachable ();
31620 }
31621 }
31622 else
31623 emit_insn (gen_neon_vc (code, cmp_mode, target, op0, op1));
31624 return inverted;
31625
31626 /* These are natively supported for register operands only.
31627 Comparisons with zero aren't useful and should be folded
31628 or canonicalized by target-independent code. */
31629 case GEU:
31630 case GTU:
31631 if (TARGET_HAVE_MVE)
31632 emit_insn (gen_mve_vcmpq (code, cmp_mode, target,
31633 op0, force_reg (cmp_mode, op1)));
31634 else
31635 emit_insn (gen_neon_vc (code, cmp_mode, target,
31636 op0, force_reg (cmp_mode, op1)));
31637 return inverted;
31638
31639 /* These require the operands to be swapped and likewise do not
31640 support comparisons with zero. */
31641 case LEU:
31642 case LTU:
31643 if (TARGET_HAVE_MVE)
31644 emit_insn (gen_mve_vcmpq (swap_condition (code), cmp_mode, target,
31645 force_reg (cmp_mode, op1), op0));
31646 else
31647 emit_insn (gen_neon_vc (swap_condition (code), cmp_mode,
31648 target, force_reg (cmp_mode, op1), op0));
31649 return inverted;
31650
31651 /* These need a combination of two comparisons. */
31652 case LTGT:
31653 case ORDERED:
31654 {
31655 /* Operands are LTGT iff (a > b || a > b).
31656 Operands are ORDERED iff (a > b || a <= b). */
31657 rtx gt_res = gen_reg_rtx (cmp_result_mode);
31658 rtx alt_res = gen_reg_rtx (cmp_result_mode);
31659 rtx_code alt_code = (code == LTGT ? LT : LE);
31660 if (arm_expand_vector_compare (gt_res, GT, op0, op1, true)
31661 || arm_expand_vector_compare (alt_res, alt_code, op0, op1, true))
31662 gcc_unreachable ();
31663 emit_insn (gen_rtx_SET (target, gen_rtx_IOR (cmp_result_mode,
31664 gt_res, alt_res)));
31665 return inverted;
31666 }
31667
31668 default:
31669 gcc_unreachable ();
31670 }
31671 }
31672
31673 /* Expand a vcond or vcondu pattern with operands OPERANDS.
31674 CMP_RESULT_MODE is the mode of the comparison result. */
31675
31676 void
31677 arm_expand_vcond (rtx *operands, machine_mode cmp_result_mode)
31678 {
31679 /* When expanding for MVE, we do not want to emit a (useless) vpsel in
31680 arm_expand_vector_compare, and another one here. */
31681 rtx mask;
31682
31683 if (TARGET_HAVE_MVE)
31684 mask = gen_reg_rtx (arm_mode_to_pred_mode (cmp_result_mode).require ());
31685 else
31686 mask = gen_reg_rtx (cmp_result_mode);
31687
31688 bool inverted = arm_expand_vector_compare (mask, GET_CODE (operands[3]),
31689 operands[4], operands[5], true);
31690 if (inverted)
31691 std::swap (operands[1], operands[2]);
31692 if (TARGET_NEON)
31693 emit_insn (gen_neon_vbsl (GET_MODE (operands[0]), operands[0],
31694 mask, operands[1], operands[2]));
31695 else
31696 {
31697 machine_mode cmp_mode = GET_MODE (operands[0]);
31698
31699 switch (GET_MODE_CLASS (cmp_mode))
31700 {
31701 case MODE_VECTOR_INT:
31702 emit_insn (gen_mve_q (VPSELQ_S, VPSELQ_S, cmp_mode, operands[0],
31703 operands[1], operands[2], mask));
31704 break;
31705 case MODE_VECTOR_FLOAT:
31706 if (TARGET_HAVE_MVE_FLOAT)
31707 emit_insn (gen_mve_q_f (VPSELQ_F, cmp_mode, operands[0],
31708 operands[1], operands[2], mask));
31709 else
31710 gcc_unreachable ();
31711 break;
31712 default:
31713 gcc_unreachable ();
31714 }
31715 }
31716 }
31717 \f
31718 #define MAX_VECT_LEN 16
31719
31720 struct expand_vec_perm_d
31721 {
31722 rtx target, op0, op1;
31723 vec_perm_indices perm;
31724 machine_mode vmode;
31725 bool one_vector_p;
31726 bool testing_p;
31727 };
31728
31729 /* Generate a variable permutation. */
31730
31731 static void
31732 arm_expand_vec_perm_1 (rtx target, rtx op0, rtx op1, rtx sel)
31733 {
31734 machine_mode vmode = GET_MODE (target);
31735 bool one_vector_p = rtx_equal_p (op0, op1);
31736
31737 gcc_checking_assert (vmode == V8QImode || vmode == V16QImode);
31738 gcc_checking_assert (GET_MODE (op0) == vmode);
31739 gcc_checking_assert (GET_MODE (op1) == vmode);
31740 gcc_checking_assert (GET_MODE (sel) == vmode);
31741 gcc_checking_assert (TARGET_NEON);
31742
31743 if (one_vector_p)
31744 {
31745 if (vmode == V8QImode)
31746 emit_insn (gen_neon_vtbl1v8qi (target, op0, sel));
31747 else
31748 emit_insn (gen_neon_vtbl1v16qi (target, op0, sel));
31749 }
31750 else
31751 {
31752 rtx pair;
31753
31754 if (vmode == V8QImode)
31755 {
31756 pair = gen_reg_rtx (V16QImode);
31757 emit_insn (gen_neon_vcombinev8qi (pair, op0, op1));
31758 pair = gen_lowpart (TImode, pair);
31759 emit_insn (gen_neon_vtbl2v8qi (target, pair, sel));
31760 }
31761 else
31762 {
31763 pair = gen_reg_rtx (OImode);
31764 emit_insn (gen_neon_vcombinev16qi (pair, op0, op1));
31765 emit_insn (gen_neon_vtbl2v16qi (target, pair, sel));
31766 }
31767 }
31768 }
31769
31770 void
31771 arm_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel)
31772 {
31773 machine_mode vmode = GET_MODE (target);
31774 unsigned int nelt = GET_MODE_NUNITS (vmode);
31775 bool one_vector_p = rtx_equal_p (op0, op1);
31776 rtx mask;
31777
31778 /* TODO: ARM's VTBL indexing is little-endian. In order to handle GCC's
31779 numbering of elements for big-endian, we must reverse the order. */
31780 gcc_checking_assert (!BYTES_BIG_ENDIAN);
31781
31782 /* The VTBL instruction does not use a modulo index, so we must take care
31783 of that ourselves. */
31784 mask = GEN_INT (one_vector_p ? nelt - 1 : 2 * nelt - 1);
31785 mask = gen_const_vec_duplicate (vmode, mask);
31786 sel = expand_simple_binop (vmode, AND, sel, mask, NULL, 0, OPTAB_LIB_WIDEN);
31787
31788 arm_expand_vec_perm_1 (target, op0, op1, sel);
31789 }
31790
31791 /* Map lane ordering between architectural lane order, and GCC lane order,
31792 taking into account ABI. See comment above output_move_neon for details. */
31793
31794 static int
31795 neon_endian_lane_map (machine_mode mode, int lane)
31796 {
31797 if (BYTES_BIG_ENDIAN)
31798 {
31799 int nelems = GET_MODE_NUNITS (mode);
31800 /* Reverse lane order. */
31801 lane = (nelems - 1 - lane);
31802 /* Reverse D register order, to match ABI. */
31803 if (GET_MODE_SIZE (mode) == 16)
31804 lane = lane ^ (nelems / 2);
31805 }
31806 return lane;
31807 }
31808
31809 /* Some permutations index into pairs of vectors, this is a helper function
31810 to map indexes into those pairs of vectors. */
31811
31812 static int
31813 neon_pair_endian_lane_map (machine_mode mode, int lane)
31814 {
31815 int nelem = GET_MODE_NUNITS (mode);
31816 if (BYTES_BIG_ENDIAN)
31817 lane =
31818 neon_endian_lane_map (mode, lane & (nelem - 1)) + (lane & nelem);
31819 return lane;
31820 }
31821
31822 /* Generate or test for an insn that supports a constant permutation. */
31823
31824 /* Recognize patterns for the VUZP insns. */
31825
31826 static bool
31827 arm_evpc_neon_vuzp (struct expand_vec_perm_d *d)
31828 {
31829 unsigned int i, odd, mask, nelt = d->perm.length ();
31830 rtx out0, out1, in0, in1;
31831 int first_elem;
31832 int swap_nelt;
31833
31834 if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
31835 return false;
31836
31837 /* arm_expand_vec_perm_const_1 () helpfully swaps the operands for the
31838 big endian pattern on 64 bit vectors, so we correct for that. */
31839 swap_nelt = BYTES_BIG_ENDIAN && !d->one_vector_p
31840 && GET_MODE_SIZE (d->vmode) == 8 ? nelt : 0;
31841
31842 first_elem = d->perm[neon_endian_lane_map (d->vmode, 0)] ^ swap_nelt;
31843
31844 if (first_elem == neon_endian_lane_map (d->vmode, 0))
31845 odd = 0;
31846 else if (first_elem == neon_endian_lane_map (d->vmode, 1))
31847 odd = 1;
31848 else
31849 return false;
31850 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
31851
31852 for (i = 0; i < nelt; i++)
31853 {
31854 unsigned elt =
31855 (neon_pair_endian_lane_map (d->vmode, i) * 2 + odd) & mask;
31856 if ((d->perm[i] ^ swap_nelt) != neon_pair_endian_lane_map (d->vmode, elt))
31857 return false;
31858 }
31859
31860 /* Success! */
31861 if (d->testing_p)
31862 return true;
31863
31864 in0 = d->op0;
31865 in1 = d->op1;
31866 if (swap_nelt != 0)
31867 std::swap (in0, in1);
31868
31869 out0 = d->target;
31870 out1 = gen_reg_rtx (d->vmode);
31871 if (odd)
31872 std::swap (out0, out1);
31873
31874 emit_insn (gen_neon_vuzp_internal (d->vmode, out0, in0, in1, out1));
31875 return true;
31876 }
31877
31878 /* Recognize patterns for the VZIP insns. */
31879
31880 static bool
31881 arm_evpc_neon_vzip (struct expand_vec_perm_d *d)
31882 {
31883 unsigned int i, high, mask, nelt = d->perm.length ();
31884 rtx out0, out1, in0, in1;
31885 int first_elem;
31886 bool is_swapped;
31887
31888 if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
31889 return false;
31890
31891 is_swapped = BYTES_BIG_ENDIAN;
31892
31893 first_elem = d->perm[neon_endian_lane_map (d->vmode, 0) ^ is_swapped];
31894
31895 high = nelt / 2;
31896 if (first_elem == neon_endian_lane_map (d->vmode, high))
31897 ;
31898 else if (first_elem == neon_endian_lane_map (d->vmode, 0))
31899 high = 0;
31900 else
31901 return false;
31902 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
31903
31904 for (i = 0; i < nelt / 2; i++)
31905 {
31906 unsigned elt =
31907 neon_pair_endian_lane_map (d->vmode, i + high) & mask;
31908 if (d->perm[neon_pair_endian_lane_map (d->vmode, 2 * i + is_swapped)]
31909 != elt)
31910 return false;
31911 elt =
31912 neon_pair_endian_lane_map (d->vmode, i + nelt + high) & mask;
31913 if (d->perm[neon_pair_endian_lane_map (d->vmode, 2 * i + !is_swapped)]
31914 != elt)
31915 return false;
31916 }
31917
31918 /* Success! */
31919 if (d->testing_p)
31920 return true;
31921
31922 in0 = d->op0;
31923 in1 = d->op1;
31924 if (is_swapped)
31925 std::swap (in0, in1);
31926
31927 out0 = d->target;
31928 out1 = gen_reg_rtx (d->vmode);
31929 if (high)
31930 std::swap (out0, out1);
31931
31932 emit_insn (gen_neon_vzip_internal (d->vmode, out0, in0, in1, out1));
31933 return true;
31934 }
31935
31936 /* Recognize patterns for the VREV insns. */
31937 static bool
31938 arm_evpc_neon_vrev (struct expand_vec_perm_d *d)
31939 {
31940 unsigned int i, j, diff, nelt = d->perm.length ();
31941 rtx (*gen) (machine_mode, rtx, rtx);
31942
31943 if (!d->one_vector_p)
31944 return false;
31945
31946 diff = d->perm[0];
31947 switch (diff)
31948 {
31949 case 7:
31950 switch (d->vmode)
31951 {
31952 case E_V16QImode:
31953 case E_V8QImode:
31954 gen = gen_neon_vrev64;
31955 break;
31956 default:
31957 return false;
31958 }
31959 break;
31960 case 3:
31961 switch (d->vmode)
31962 {
31963 case E_V16QImode:
31964 case E_V8QImode:
31965 gen = gen_neon_vrev32;
31966 break;
31967 case E_V8HImode:
31968 case E_V4HImode:
31969 case E_V8HFmode:
31970 case E_V4HFmode:
31971 gen = gen_neon_vrev64;
31972 break;
31973 default:
31974 return false;
31975 }
31976 break;
31977 case 1:
31978 switch (d->vmode)
31979 {
31980 case E_V16QImode:
31981 case E_V8QImode:
31982 gen = gen_neon_vrev16;
31983 break;
31984 case E_V8HImode:
31985 case E_V4HImode:
31986 gen = gen_neon_vrev32;
31987 break;
31988 case E_V4SImode:
31989 case E_V2SImode:
31990 case E_V4SFmode:
31991 case E_V2SFmode:
31992 gen = gen_neon_vrev64;
31993 break;
31994 default:
31995 return false;
31996 }
31997 break;
31998 default:
31999 return false;
32000 }
32001
32002 for (i = 0; i < nelt ; i += diff + 1)
32003 for (j = 0; j <= diff; j += 1)
32004 {
32005 /* This is guaranteed to be true as the value of diff
32006 is 7, 3, 1 and we should have enough elements in the
32007 queue to generate this. Getting a vector mask with a
32008 value of diff other than these values implies that
32009 something is wrong by the time we get here. */
32010 gcc_assert (i + j < nelt);
32011 if (d->perm[i + j] != i + diff - j)
32012 return false;
32013 }
32014
32015 /* Success! */
32016 if (d->testing_p)
32017 return true;
32018
32019 emit_insn (gen (d->vmode, d->target, d->op0));
32020 return true;
32021 }
32022
32023 /* Recognize patterns for the VTRN insns. */
32024
32025 static bool
32026 arm_evpc_neon_vtrn (struct expand_vec_perm_d *d)
32027 {
32028 unsigned int i, odd, mask, nelt = d->perm.length ();
32029 rtx out0, out1, in0, in1;
32030
32031 if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
32032 return false;
32033
32034 /* Note that these are little-endian tests. Adjust for big-endian later. */
32035 if (d->perm[0] == 0)
32036 odd = 0;
32037 else if (d->perm[0] == 1)
32038 odd = 1;
32039 else
32040 return false;
32041 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
32042
32043 for (i = 0; i < nelt; i += 2)
32044 {
32045 if (d->perm[i] != i + odd)
32046 return false;
32047 if (d->perm[i + 1] != ((i + nelt + odd) & mask))
32048 return false;
32049 }
32050
32051 /* Success! */
32052 if (d->testing_p)
32053 return true;
32054
32055 in0 = d->op0;
32056 in1 = d->op1;
32057 if (BYTES_BIG_ENDIAN)
32058 {
32059 std::swap (in0, in1);
32060 odd = !odd;
32061 }
32062
32063 out0 = d->target;
32064 out1 = gen_reg_rtx (d->vmode);
32065 if (odd)
32066 std::swap (out0, out1);
32067
32068 emit_insn (gen_neon_vtrn_internal (d->vmode, out0, in0, in1, out1));
32069 return true;
32070 }
32071
32072 /* Recognize patterns for the VEXT insns. */
32073
32074 static bool
32075 arm_evpc_neon_vext (struct expand_vec_perm_d *d)
32076 {
32077 unsigned int i, nelt = d->perm.length ();
32078 rtx offset;
32079
32080 unsigned int location;
32081
32082 unsigned int next = d->perm[0] + 1;
32083
32084 /* TODO: Handle GCC's numbering of elements for big-endian. */
32085 if (BYTES_BIG_ENDIAN)
32086 return false;
32087
32088 /* Check if the extracted indexes are increasing by one. */
32089 for (i = 1; i < nelt; next++, i++)
32090 {
32091 /* If we hit the most significant element of the 2nd vector in
32092 the previous iteration, no need to test further. */
32093 if (next == 2 * nelt)
32094 return false;
32095
32096 /* If we are operating on only one vector: it could be a
32097 rotation. If there are only two elements of size < 64, let
32098 arm_evpc_neon_vrev catch it. */
32099 if (d->one_vector_p && (next == nelt))
32100 {
32101 if ((nelt == 2) && (d->vmode != V2DImode))
32102 return false;
32103 else
32104 next = 0;
32105 }
32106
32107 if (d->perm[i] != next)
32108 return false;
32109 }
32110
32111 location = d->perm[0];
32112
32113 /* Success! */
32114 if (d->testing_p)
32115 return true;
32116
32117 offset = GEN_INT (location);
32118
32119 if(d->vmode == E_DImode)
32120 return false;
32121
32122 emit_insn (gen_neon_vext (d->vmode, d->target, d->op0, d->op1, offset));
32123 return true;
32124 }
32125
32126 /* The NEON VTBL instruction is a fully variable permuation that's even
32127 stronger than what we expose via VEC_PERM_EXPR. What it doesn't do
32128 is mask the index operand as VEC_PERM_EXPR requires. Therefore we
32129 can do slightly better by expanding this as a constant where we don't
32130 have to apply a mask. */
32131
32132 static bool
32133 arm_evpc_neon_vtbl (struct expand_vec_perm_d *d)
32134 {
32135 rtx rperm[MAX_VECT_LEN], sel;
32136 machine_mode vmode = d->vmode;
32137 unsigned int i, nelt = d->perm.length ();
32138
32139 /* TODO: ARM's VTBL indexing is little-endian. In order to handle GCC's
32140 numbering of elements for big-endian, we must reverse the order. */
32141 if (BYTES_BIG_ENDIAN)
32142 return false;
32143
32144 if (d->testing_p)
32145 return true;
32146
32147 /* Generic code will try constant permutation twice. Once with the
32148 original mode and again with the elements lowered to QImode.
32149 So wait and don't do the selector expansion ourselves. */
32150 if (vmode != V8QImode && vmode != V16QImode)
32151 return false;
32152
32153 for (i = 0; i < nelt; ++i)
32154 rperm[i] = GEN_INT (d->perm[i]);
32155 sel = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm));
32156 sel = force_reg (vmode, sel);
32157
32158 arm_expand_vec_perm_1 (d->target, d->op0, d->op1, sel);
32159 return true;
32160 }
32161
32162 static bool
32163 arm_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
32164 {
32165 /* Check if the input mask matches vext before reordering the
32166 operands. */
32167 if (TARGET_NEON)
32168 if (arm_evpc_neon_vext (d))
32169 return true;
32170
32171 /* The pattern matching functions above are written to look for a small
32172 number to begin the sequence (0, 1, N/2). If we begin with an index
32173 from the second operand, we can swap the operands. */
32174 unsigned int nelt = d->perm.length ();
32175 if (d->perm[0] >= nelt)
32176 {
32177 d->perm.rotate_inputs (1);
32178 std::swap (d->op0, d->op1);
32179 }
32180
32181 if (TARGET_NEON)
32182 {
32183 if (arm_evpc_neon_vuzp (d))
32184 return true;
32185 if (arm_evpc_neon_vzip (d))
32186 return true;
32187 if (arm_evpc_neon_vrev (d))
32188 return true;
32189 if (arm_evpc_neon_vtrn (d))
32190 return true;
32191 return arm_evpc_neon_vtbl (d);
32192 }
32193 return false;
32194 }
32195
32196 /* Implement TARGET_VECTORIZE_VEC_PERM_CONST. */
32197
32198 static bool
32199 arm_vectorize_vec_perm_const (machine_mode vmode, machine_mode op_mode,
32200 rtx target, rtx op0, rtx op1,
32201 const vec_perm_indices &sel)
32202 {
32203 if (vmode != op_mode)
32204 return false;
32205
32206 struct expand_vec_perm_d d;
32207 int i, nelt, which;
32208
32209 if (!VALID_NEON_DREG_MODE (vmode) && !VALID_NEON_QREG_MODE (vmode))
32210 return false;
32211
32212 d.target = target;
32213 if (op0)
32214 {
32215 rtx nop0 = force_reg (vmode, op0);
32216 if (op0 == op1)
32217 op1 = nop0;
32218 op0 = nop0;
32219 }
32220 if (op1)
32221 op1 = force_reg (vmode, op1);
32222 d.op0 = op0;
32223 d.op1 = op1;
32224
32225 d.vmode = vmode;
32226 gcc_assert (VECTOR_MODE_P (d.vmode));
32227 d.testing_p = !target;
32228
32229 nelt = GET_MODE_NUNITS (d.vmode);
32230 for (i = which = 0; i < nelt; ++i)
32231 {
32232 int ei = sel[i] & (2 * nelt - 1);
32233 which |= (ei < nelt ? 1 : 2);
32234 }
32235
32236 switch (which)
32237 {
32238 default:
32239 gcc_unreachable();
32240
32241 case 3:
32242 d.one_vector_p = false;
32243 if (d.testing_p || !rtx_equal_p (op0, op1))
32244 break;
32245
32246 /* The elements of PERM do not suggest that only the first operand
32247 is used, but both operands are identical. Allow easier matching
32248 of the permutation by folding the permutation into the single
32249 input vector. */
32250 /* FALLTHRU */
32251 case 2:
32252 d.op0 = op1;
32253 d.one_vector_p = true;
32254 break;
32255
32256 case 1:
32257 d.op1 = op0;
32258 d.one_vector_p = true;
32259 break;
32260 }
32261
32262 d.perm.new_vector (sel.encoding (), d.one_vector_p ? 1 : 2, nelt);
32263
32264 if (!d.testing_p)
32265 return arm_expand_vec_perm_const_1 (&d);
32266
32267 d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
32268 d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
32269 if (!d.one_vector_p)
32270 d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
32271
32272 start_sequence ();
32273 bool ret = arm_expand_vec_perm_const_1 (&d);
32274 end_sequence ();
32275
32276 return ret;
32277 }
32278
32279 bool
32280 arm_autoinc_modes_ok_p (machine_mode mode, enum arm_auto_incmodes code)
32281 {
32282 /* If we are soft float and we do not have ldrd
32283 then all auto increment forms are ok. */
32284 if (TARGET_SOFT_FLOAT && (TARGET_LDRD || GET_MODE_SIZE (mode) <= 4))
32285 return true;
32286
32287 switch (code)
32288 {
32289 /* Post increment and Pre Decrement are supported for all
32290 instruction forms except for vector forms. */
32291 case ARM_POST_INC:
32292 case ARM_PRE_DEC:
32293 if (VECTOR_MODE_P (mode))
32294 {
32295 if (code != ARM_PRE_DEC)
32296 return true;
32297 else
32298 return false;
32299 }
32300
32301 return true;
32302
32303 case ARM_POST_DEC:
32304 case ARM_PRE_INC:
32305 /* Without LDRD and mode size greater than
32306 word size, there is no point in auto-incrementing
32307 because ldm and stm will not have these forms. */
32308 if (!TARGET_LDRD && GET_MODE_SIZE (mode) > 4)
32309 return false;
32310
32311 /* Vector and floating point modes do not support
32312 these auto increment forms. */
32313 if (FLOAT_MODE_P (mode) || VECTOR_MODE_P (mode))
32314 return false;
32315
32316 return true;
32317
32318 default:
32319 return false;
32320
32321 }
32322
32323 return false;
32324 }
32325
32326 /* The default expansion of general 64-bit shifts in core-regs is suboptimal,
32327 on ARM, since we know that shifts by negative amounts are no-ops.
32328 Additionally, the default expansion code is not available or suitable
32329 for post-reload insn splits (this can occur when the register allocator
32330 chooses not to do a shift in NEON).
32331
32332 This function is used in both initial expand and post-reload splits, and
32333 handles all kinds of 64-bit shifts.
32334
32335 Input requirements:
32336 - It is safe for the input and output to be the same register, but
32337 early-clobber rules apply for the shift amount and scratch registers.
32338 - Shift by register requires both scratch registers. In all other cases
32339 the scratch registers may be NULL.
32340 - Ashiftrt by a register also clobbers the CC register. */
32341 void
32342 arm_emit_coreregs_64bit_shift (enum rtx_code code, rtx out, rtx in,
32343 rtx amount, rtx scratch1, rtx scratch2)
32344 {
32345 rtx out_high = gen_highpart (SImode, out);
32346 rtx out_low = gen_lowpart (SImode, out);
32347 rtx in_high = gen_highpart (SImode, in);
32348 rtx in_low = gen_lowpart (SImode, in);
32349
32350 /* Terminology:
32351 in = the register pair containing the input value.
32352 out = the destination register pair.
32353 up = the high- or low-part of each pair.
32354 down = the opposite part to "up".
32355 In a shift, we can consider bits to shift from "up"-stream to
32356 "down"-stream, so in a left-shift "up" is the low-part and "down"
32357 is the high-part of each register pair. */
32358
32359 rtx out_up = code == ASHIFT ? out_low : out_high;
32360 rtx out_down = code == ASHIFT ? out_high : out_low;
32361 rtx in_up = code == ASHIFT ? in_low : in_high;
32362 rtx in_down = code == ASHIFT ? in_high : in_low;
32363
32364 gcc_assert (code == ASHIFT || code == ASHIFTRT || code == LSHIFTRT);
32365 gcc_assert (out
32366 && (REG_P (out) || SUBREG_P (out))
32367 && GET_MODE (out) == DImode);
32368 gcc_assert (in
32369 && (REG_P (in) || SUBREG_P (in))
32370 && GET_MODE (in) == DImode);
32371 gcc_assert (amount
32372 && (((REG_P (amount) || SUBREG_P (amount))
32373 && GET_MODE (amount) == SImode)
32374 || CONST_INT_P (amount)));
32375 gcc_assert (scratch1 == NULL
32376 || (GET_CODE (scratch1) == SCRATCH)
32377 || (GET_MODE (scratch1) == SImode
32378 && REG_P (scratch1)));
32379 gcc_assert (scratch2 == NULL
32380 || (GET_CODE (scratch2) == SCRATCH)
32381 || (GET_MODE (scratch2) == SImode
32382 && REG_P (scratch2)));
32383 gcc_assert (!REG_P (out) || !REG_P (amount)
32384 || !HARD_REGISTER_P (out)
32385 || (REGNO (out) != REGNO (amount)
32386 && REGNO (out) + 1 != REGNO (amount)));
32387
32388 /* Macros to make following code more readable. */
32389 #define SUB_32(DEST,SRC) \
32390 gen_addsi3 ((DEST), (SRC), GEN_INT (-32))
32391 #define RSB_32(DEST,SRC) \
32392 gen_subsi3 ((DEST), GEN_INT (32), (SRC))
32393 #define SUB_S_32(DEST,SRC) \
32394 gen_addsi3_compare0 ((DEST), (SRC), \
32395 GEN_INT (-32))
32396 #define SET(DEST,SRC) \
32397 gen_rtx_SET ((DEST), (SRC))
32398 #define SHIFT(CODE,SRC,AMOUNT) \
32399 gen_rtx_fmt_ee ((CODE), SImode, (SRC), (AMOUNT))
32400 #define LSHIFT(CODE,SRC,AMOUNT) \
32401 gen_rtx_fmt_ee ((CODE) == ASHIFT ? ASHIFT : LSHIFTRT, \
32402 SImode, (SRC), (AMOUNT))
32403 #define REV_LSHIFT(CODE,SRC,AMOUNT) \
32404 gen_rtx_fmt_ee ((CODE) == ASHIFT ? LSHIFTRT : ASHIFT, \
32405 SImode, (SRC), (AMOUNT))
32406 #define ORR(A,B) \
32407 gen_rtx_IOR (SImode, (A), (B))
32408 #define BRANCH(COND,LABEL) \
32409 gen_arm_cond_branch ((LABEL), \
32410 gen_rtx_ ## COND (CCmode, cc_reg, \
32411 const0_rtx), \
32412 cc_reg)
32413
32414 /* Shifts by register and shifts by constant are handled separately. */
32415 if (CONST_INT_P (amount))
32416 {
32417 /* We have a shift-by-constant. */
32418
32419 /* First, handle out-of-range shift amounts.
32420 In both cases we try to match the result an ARM instruction in a
32421 shift-by-register would give. This helps reduce execution
32422 differences between optimization levels, but it won't stop other
32423 parts of the compiler doing different things. This is "undefined
32424 behavior, in any case. */
32425 if (INTVAL (amount) <= 0)
32426 emit_insn (gen_movdi (out, in));
32427 else if (INTVAL (amount) >= 64)
32428 {
32429 if (code == ASHIFTRT)
32430 {
32431 rtx const31_rtx = GEN_INT (31);
32432 emit_insn (SET (out_down, SHIFT (code, in_up, const31_rtx)));
32433 emit_insn (SET (out_up, SHIFT (code, in_up, const31_rtx)));
32434 }
32435 else
32436 emit_insn (gen_movdi (out, const0_rtx));
32437 }
32438
32439 /* Now handle valid shifts. */
32440 else if (INTVAL (amount) < 32)
32441 {
32442 /* Shifts by a constant less than 32. */
32443 rtx reverse_amount = GEN_INT (32 - INTVAL (amount));
32444
32445 /* Clearing the out register in DImode first avoids lots
32446 of spilling and results in less stack usage.
32447 Later this redundant insn is completely removed.
32448 Do that only if "in" and "out" are different registers. */
32449 if (REG_P (out) && REG_P (in) && REGNO (out) != REGNO (in))
32450 emit_insn (SET (out, const0_rtx));
32451 emit_insn (SET (out_down, LSHIFT (code, in_down, amount)));
32452 emit_insn (SET (out_down,
32453 ORR (REV_LSHIFT (code, in_up, reverse_amount),
32454 out_down)));
32455 emit_insn (SET (out_up, SHIFT (code, in_up, amount)));
32456 }
32457 else
32458 {
32459 /* Shifts by a constant greater than 31. */
32460 rtx adj_amount = GEN_INT (INTVAL (amount) - 32);
32461
32462 if (REG_P (out) && REG_P (in) && REGNO (out) != REGNO (in))
32463 emit_insn (SET (out, const0_rtx));
32464 emit_insn (SET (out_down, SHIFT (code, in_up, adj_amount)));
32465 if (code == ASHIFTRT)
32466 emit_insn (gen_ashrsi3 (out_up, in_up,
32467 GEN_INT (31)));
32468 else
32469 emit_insn (SET (out_up, const0_rtx));
32470 }
32471 }
32472 else
32473 {
32474 /* We have a shift-by-register. */
32475 rtx cc_reg = gen_rtx_REG (CC_NZmode, CC_REGNUM);
32476
32477 /* This alternative requires the scratch registers. */
32478 gcc_assert (scratch1 && REG_P (scratch1));
32479 gcc_assert (scratch2 && REG_P (scratch2));
32480
32481 /* We will need the values "amount-32" and "32-amount" later.
32482 Swapping them around now allows the later code to be more general. */
32483 switch (code)
32484 {
32485 case ASHIFT:
32486 emit_insn (SUB_32 (scratch1, amount));
32487 emit_insn (RSB_32 (scratch2, amount));
32488 break;
32489 case ASHIFTRT:
32490 emit_insn (RSB_32 (scratch1, amount));
32491 /* Also set CC = amount > 32. */
32492 emit_insn (SUB_S_32 (scratch2, amount));
32493 break;
32494 case LSHIFTRT:
32495 emit_insn (RSB_32 (scratch1, amount));
32496 emit_insn (SUB_32 (scratch2, amount));
32497 break;
32498 default:
32499 gcc_unreachable ();
32500 }
32501
32502 /* Emit code like this:
32503
32504 arithmetic-left:
32505 out_down = in_down << amount;
32506 out_down = (in_up << (amount - 32)) | out_down;
32507 out_down = ((unsigned)in_up >> (32 - amount)) | out_down;
32508 out_up = in_up << amount;
32509
32510 arithmetic-right:
32511 out_down = in_down >> amount;
32512 out_down = (in_up << (32 - amount)) | out_down;
32513 if (amount < 32)
32514 out_down = ((signed)in_up >> (amount - 32)) | out_down;
32515 out_up = in_up << amount;
32516
32517 logical-right:
32518 out_down = in_down >> amount;
32519 out_down = (in_up << (32 - amount)) | out_down;
32520 if (amount < 32)
32521 out_down = ((unsigned)in_up >> (amount - 32)) | out_down;
32522 out_up = in_up << amount;
32523
32524 The ARM and Thumb2 variants are the same but implemented slightly
32525 differently. If this were only called during expand we could just
32526 use the Thumb2 case and let combine do the right thing, but this
32527 can also be called from post-reload splitters. */
32528
32529 emit_insn (SET (out_down, LSHIFT (code, in_down, amount)));
32530
32531 if (!TARGET_THUMB2)
32532 {
32533 /* Emit code for ARM mode. */
32534 emit_insn (SET (out_down,
32535 ORR (SHIFT (ASHIFT, in_up, scratch1), out_down)));
32536 if (code == ASHIFTRT)
32537 {
32538 rtx_code_label *done_label = gen_label_rtx ();
32539 emit_jump_insn (BRANCH (LT, done_label));
32540 emit_insn (SET (out_down, ORR (SHIFT (ASHIFTRT, in_up, scratch2),
32541 out_down)));
32542 emit_label (done_label);
32543 }
32544 else
32545 emit_insn (SET (out_down, ORR (SHIFT (LSHIFTRT, in_up, scratch2),
32546 out_down)));
32547 }
32548 else
32549 {
32550 /* Emit code for Thumb2 mode.
32551 Thumb2 can't do shift and or in one insn. */
32552 emit_insn (SET (scratch1, SHIFT (ASHIFT, in_up, scratch1)));
32553 emit_insn (gen_iorsi3 (out_down, out_down, scratch1));
32554
32555 if (code == ASHIFTRT)
32556 {
32557 rtx_code_label *done_label = gen_label_rtx ();
32558 emit_jump_insn (BRANCH (LT, done_label));
32559 emit_insn (SET (scratch2, SHIFT (ASHIFTRT, in_up, scratch2)));
32560 emit_insn (SET (out_down, ORR (out_down, scratch2)));
32561 emit_label (done_label);
32562 }
32563 else
32564 {
32565 emit_insn (SET (scratch2, SHIFT (LSHIFTRT, in_up, scratch2)));
32566 emit_insn (gen_iorsi3 (out_down, out_down, scratch2));
32567 }
32568 }
32569
32570 emit_insn (SET (out_up, SHIFT (code, in_up, amount)));
32571 }
32572
32573 #undef SUB_32
32574 #undef RSB_32
32575 #undef SUB_S_32
32576 #undef SET
32577 #undef SHIFT
32578 #undef LSHIFT
32579 #undef REV_LSHIFT
32580 #undef ORR
32581 #undef BRANCH
32582 }
32583
32584 /* Returns true if the pattern is a valid symbolic address, which is either a
32585 symbol_ref or (symbol_ref + addend).
32586
32587 According to the ARM ELF ABI, the initial addend of REL-type relocations
32588 processing MOVW and MOVT instructions is formed by interpreting the 16-bit
32589 literal field of the instruction as a 16-bit signed value in the range
32590 -32768 <= A < 32768.
32591
32592 In Thumb-1 mode, we use upper/lower relocations which have an 8-bit
32593 unsigned range of 0 <= A < 256 as described in the AAELF32
32594 relocation handling documentation: REL-type relocations are encoded
32595 as unsigned in this case. */
32596
32597 bool
32598 arm_valid_symbolic_address_p (rtx addr)
32599 {
32600 rtx xop0, xop1 = NULL_RTX;
32601 rtx tmp = addr;
32602
32603 if (target_word_relocations)
32604 return false;
32605
32606 if (SYMBOL_REF_P (tmp) || LABEL_REF_P (tmp))
32607 return true;
32608
32609 /* (const (plus: symbol_ref const_int)) */
32610 if (GET_CODE (addr) == CONST)
32611 tmp = XEXP (addr, 0);
32612
32613 if (GET_CODE (tmp) == PLUS)
32614 {
32615 xop0 = XEXP (tmp, 0);
32616 xop1 = XEXP (tmp, 1);
32617
32618 if (GET_CODE (xop0) == SYMBOL_REF && CONST_INT_P (xop1))
32619 {
32620 if (TARGET_THUMB1 && !TARGET_HAVE_MOVT)
32621 return IN_RANGE (INTVAL (xop1), 0, 0xff);
32622 else
32623 return IN_RANGE (INTVAL (xop1), -0x8000, 0x7fff);
32624 }
32625 }
32626
32627 return false;
32628 }
32629
32630 /* Returns true if a valid comparison operation and makes
32631 the operands in a form that is valid. */
32632 bool
32633 arm_validize_comparison (rtx *comparison, rtx * op1, rtx * op2)
32634 {
32635 enum rtx_code code = GET_CODE (*comparison);
32636 int code_int;
32637 machine_mode mode = (GET_MODE (*op1) == VOIDmode)
32638 ? GET_MODE (*op2) : GET_MODE (*op1);
32639
32640 gcc_assert (GET_MODE (*op1) != VOIDmode || GET_MODE (*op2) != VOIDmode);
32641
32642 if (code == UNEQ || code == LTGT)
32643 return false;
32644
32645 code_int = (int)code;
32646 arm_canonicalize_comparison (&code_int, op1, op2, 0);
32647 PUT_CODE (*comparison, (enum rtx_code)code_int);
32648
32649 switch (mode)
32650 {
32651 case E_SImode:
32652 if (!arm_add_operand (*op1, mode))
32653 *op1 = force_reg (mode, *op1);
32654 if (!arm_add_operand (*op2, mode))
32655 *op2 = force_reg (mode, *op2);
32656 return true;
32657
32658 case E_DImode:
32659 /* gen_compare_reg() will sort out any invalid operands. */
32660 return true;
32661
32662 case E_HFmode:
32663 if (!TARGET_VFP_FP16INST)
32664 break;
32665 /* FP16 comparisons are done in SF mode. */
32666 mode = SFmode;
32667 *op1 = convert_to_mode (mode, *op1, 1);
32668 *op2 = convert_to_mode (mode, *op2, 1);
32669 /* Fall through. */
32670 case E_SFmode:
32671 case E_DFmode:
32672 if (!vfp_compare_operand (*op1, mode))
32673 *op1 = force_reg (mode, *op1);
32674 if (!vfp_compare_operand (*op2, mode))
32675 *op2 = force_reg (mode, *op2);
32676 return true;
32677 default:
32678 break;
32679 }
32680
32681 return false;
32682
32683 }
32684
32685 /* Maximum number of instructions to set block of memory. */
32686 static int
32687 arm_block_set_max_insns (void)
32688 {
32689 if (optimize_function_for_size_p (cfun))
32690 return 4;
32691 else
32692 return current_tune->max_insns_inline_memset;
32693 }
32694
32695 /* Return TRUE if it's profitable to set block of memory for
32696 non-vectorized case. VAL is the value to set the memory
32697 with. LENGTH is the number of bytes to set. ALIGN is the
32698 alignment of the destination memory in bytes. UNALIGNED_P
32699 is TRUE if we can only set the memory with instructions
32700 meeting alignment requirements. USE_STRD_P is TRUE if we
32701 can use strd to set the memory. */
32702 static bool
32703 arm_block_set_non_vect_profit_p (rtx val,
32704 unsigned HOST_WIDE_INT length,
32705 unsigned HOST_WIDE_INT align,
32706 bool unaligned_p, bool use_strd_p)
32707 {
32708 int num = 0;
32709 /* For leftovers in bytes of 0-7, we can set the memory block using
32710 strb/strh/str with minimum instruction number. */
32711 const int leftover[8] = {0, 1, 1, 2, 1, 2, 2, 3};
32712
32713 if (unaligned_p)
32714 {
32715 num = arm_const_inline_cost (SET, val);
32716 num += length / align + length % align;
32717 }
32718 else if (use_strd_p)
32719 {
32720 num = arm_const_double_inline_cost (val);
32721 num += (length >> 3) + leftover[length & 7];
32722 }
32723 else
32724 {
32725 num = arm_const_inline_cost (SET, val);
32726 num += (length >> 2) + leftover[length & 3];
32727 }
32728
32729 /* We may be able to combine last pair STRH/STRB into a single STR
32730 by shifting one byte back. */
32731 if (unaligned_access && length > 3 && (length & 3) == 3)
32732 num--;
32733
32734 return (num <= arm_block_set_max_insns ());
32735 }
32736
32737 /* Return TRUE if it's profitable to set block of memory for
32738 vectorized case. LENGTH is the number of bytes to set.
32739 ALIGN is the alignment of destination memory in bytes.
32740 MODE is the vector mode used to set the memory. */
32741 static bool
32742 arm_block_set_vect_profit_p (unsigned HOST_WIDE_INT length,
32743 unsigned HOST_WIDE_INT align,
32744 machine_mode mode)
32745 {
32746 int num;
32747 bool unaligned_p = ((align & 3) != 0);
32748 unsigned int nelt = GET_MODE_NUNITS (mode);
32749
32750 /* Instruction loading constant value. */
32751 num = 1;
32752 /* Instructions storing the memory. */
32753 num += (length + nelt - 1) / nelt;
32754 /* Instructions adjusting the address expression. Only need to
32755 adjust address expression if it's 4 bytes aligned and bytes
32756 leftover can only be stored by mis-aligned store instruction. */
32757 if (!unaligned_p && (length & 3) != 0)
32758 num++;
32759
32760 /* Store the first 16 bytes using vst1:v16qi for the aligned case. */
32761 if (!unaligned_p && mode == V16QImode)
32762 num--;
32763
32764 return (num <= arm_block_set_max_insns ());
32765 }
32766
32767 /* Set a block of memory using vectorization instructions for the
32768 unaligned case. We fill the first LENGTH bytes of the memory
32769 area starting from DSTBASE with byte constant VALUE. ALIGN is
32770 the alignment requirement of memory. Return TRUE if succeeded. */
32771 static bool
32772 arm_block_set_unaligned_vect (rtx dstbase,
32773 unsigned HOST_WIDE_INT length,
32774 unsigned HOST_WIDE_INT value,
32775 unsigned HOST_WIDE_INT align)
32776 {
32777 unsigned int i, nelt_v16, nelt_v8, nelt_mode;
32778 rtx dst, mem;
32779 rtx val_vec, reg;
32780 rtx (*gen_func) (rtx, rtx);
32781 machine_mode mode;
32782 unsigned HOST_WIDE_INT v = value;
32783 unsigned int offset = 0;
32784 gcc_assert ((align & 0x3) != 0);
32785 nelt_v8 = GET_MODE_NUNITS (V8QImode);
32786 nelt_v16 = GET_MODE_NUNITS (V16QImode);
32787 if (length >= nelt_v16)
32788 {
32789 mode = V16QImode;
32790 gen_func = gen_movmisalignv16qi;
32791 }
32792 else
32793 {
32794 mode = V8QImode;
32795 gen_func = gen_movmisalignv8qi;
32796 }
32797 nelt_mode = GET_MODE_NUNITS (mode);
32798 gcc_assert (length >= nelt_mode);
32799 /* Skip if it isn't profitable. */
32800 if (!arm_block_set_vect_profit_p (length, align, mode))
32801 return false;
32802
32803 dst = copy_addr_to_reg (XEXP (dstbase, 0));
32804 mem = adjust_automodify_address (dstbase, mode, dst, offset);
32805
32806 v = sext_hwi (v, BITS_PER_WORD);
32807
32808 reg = gen_reg_rtx (mode);
32809 val_vec = gen_const_vec_duplicate (mode, GEN_INT (v));
32810 /* Emit instruction loading the constant value. */
32811 emit_move_insn (reg, val_vec);
32812
32813 /* Handle nelt_mode bytes in a vector. */
32814 for (i = 0; (i + nelt_mode <= length); i += nelt_mode)
32815 {
32816 emit_insn ((*gen_func) (mem, reg));
32817 if (i + 2 * nelt_mode <= length)
32818 {
32819 emit_insn (gen_add2_insn (dst, GEN_INT (nelt_mode)));
32820 offset += nelt_mode;
32821 mem = adjust_automodify_address (dstbase, mode, dst, offset);
32822 }
32823 }
32824
32825 /* If there are not less than nelt_v8 bytes leftover, we must be in
32826 V16QI mode. */
32827 gcc_assert ((i + nelt_v8) > length || mode == V16QImode);
32828
32829 /* Handle (8, 16) bytes leftover. */
32830 if (i + nelt_v8 < length)
32831 {
32832 emit_insn (gen_add2_insn (dst, GEN_INT (length - i)));
32833 offset += length - i;
32834 mem = adjust_automodify_address (dstbase, mode, dst, offset);
32835
32836 /* We are shifting bytes back, set the alignment accordingly. */
32837 if ((length & 1) != 0 && align >= 2)
32838 set_mem_align (mem, BITS_PER_UNIT);
32839
32840 emit_insn (gen_movmisalignv16qi (mem, reg));
32841 }
32842 /* Handle (0, 8] bytes leftover. */
32843 else if (i < length && i + nelt_v8 >= length)
32844 {
32845 if (mode == V16QImode)
32846 reg = gen_lowpart (V8QImode, reg);
32847
32848 emit_insn (gen_add2_insn (dst, GEN_INT ((length - i)
32849 + (nelt_mode - nelt_v8))));
32850 offset += (length - i) + (nelt_mode - nelt_v8);
32851 mem = adjust_automodify_address (dstbase, V8QImode, dst, offset);
32852
32853 /* We are shifting bytes back, set the alignment accordingly. */
32854 if ((length & 1) != 0 && align >= 2)
32855 set_mem_align (mem, BITS_PER_UNIT);
32856
32857 emit_insn (gen_movmisalignv8qi (mem, reg));
32858 }
32859
32860 return true;
32861 }
32862
32863 /* Set a block of memory using vectorization instructions for the
32864 aligned case. We fill the first LENGTH bytes of the memory area
32865 starting from DSTBASE with byte constant VALUE. ALIGN is the
32866 alignment requirement of memory. Return TRUE if succeeded. */
32867 static bool
32868 arm_block_set_aligned_vect (rtx dstbase,
32869 unsigned HOST_WIDE_INT length,
32870 unsigned HOST_WIDE_INT value,
32871 unsigned HOST_WIDE_INT align)
32872 {
32873 unsigned int i, nelt_v8, nelt_v16, nelt_mode;
32874 rtx dst, addr, mem;
32875 rtx val_vec, reg;
32876 machine_mode mode;
32877 unsigned int offset = 0;
32878
32879 gcc_assert ((align & 0x3) == 0);
32880 nelt_v8 = GET_MODE_NUNITS (V8QImode);
32881 nelt_v16 = GET_MODE_NUNITS (V16QImode);
32882 if (length >= nelt_v16 && unaligned_access && !BYTES_BIG_ENDIAN)
32883 mode = V16QImode;
32884 else
32885 mode = V8QImode;
32886
32887 nelt_mode = GET_MODE_NUNITS (mode);
32888 gcc_assert (length >= nelt_mode);
32889 /* Skip if it isn't profitable. */
32890 if (!arm_block_set_vect_profit_p (length, align, mode))
32891 return false;
32892
32893 dst = copy_addr_to_reg (XEXP (dstbase, 0));
32894
32895 reg = gen_reg_rtx (mode);
32896 val_vec = gen_const_vec_duplicate (mode, gen_int_mode (value, QImode));
32897 /* Emit instruction loading the constant value. */
32898 emit_move_insn (reg, val_vec);
32899
32900 i = 0;
32901 /* Handle first 16 bytes specially using vst1:v16qi instruction. */
32902 if (mode == V16QImode)
32903 {
32904 mem = adjust_automodify_address (dstbase, mode, dst, offset);
32905 emit_insn (gen_movmisalignv16qi (mem, reg));
32906 i += nelt_mode;
32907 /* Handle (8, 16) bytes leftover using vst1:v16qi again. */
32908 if (i + nelt_v8 < length && i + nelt_v16 > length)
32909 {
32910 emit_insn (gen_add2_insn (dst, GEN_INT (length - nelt_mode)));
32911 offset += length - nelt_mode;
32912 mem = adjust_automodify_address (dstbase, mode, dst, offset);
32913 /* We are shifting bytes back, set the alignment accordingly. */
32914 if ((length & 0x3) == 0)
32915 set_mem_align (mem, BITS_PER_UNIT * 4);
32916 else if ((length & 0x1) == 0)
32917 set_mem_align (mem, BITS_PER_UNIT * 2);
32918 else
32919 set_mem_align (mem, BITS_PER_UNIT);
32920
32921 emit_insn (gen_movmisalignv16qi (mem, reg));
32922 return true;
32923 }
32924 /* Fall through for bytes leftover. */
32925 mode = V8QImode;
32926 nelt_mode = GET_MODE_NUNITS (mode);
32927 reg = gen_lowpart (V8QImode, reg);
32928 }
32929
32930 /* Handle 8 bytes in a vector. */
32931 for (; (i + nelt_mode <= length); i += nelt_mode)
32932 {
32933 addr = plus_constant (Pmode, dst, i);
32934 mem = adjust_automodify_address (dstbase, mode, addr, offset + i);
32935 if (MEM_ALIGN (mem) >= 2 * BITS_PER_WORD)
32936 emit_move_insn (mem, reg);
32937 else
32938 emit_insn (gen_unaligned_storev8qi (mem, reg));
32939 }
32940
32941 /* Handle single word leftover by shifting 4 bytes back. We can
32942 use aligned access for this case. */
32943 if (i + UNITS_PER_WORD == length)
32944 {
32945 addr = plus_constant (Pmode, dst, i - UNITS_PER_WORD);
32946 offset += i - UNITS_PER_WORD;
32947 mem = adjust_automodify_address (dstbase, mode, addr, offset);
32948 /* We are shifting 4 bytes back, set the alignment accordingly. */
32949 if (align > UNITS_PER_WORD)
32950 set_mem_align (mem, BITS_PER_UNIT * UNITS_PER_WORD);
32951
32952 emit_insn (gen_unaligned_storev8qi (mem, reg));
32953 }
32954 /* Handle (0, 4), (4, 8) bytes leftover by shifting bytes back.
32955 We have to use unaligned access for this case. */
32956 else if (i < length)
32957 {
32958 emit_insn (gen_add2_insn (dst, GEN_INT (length - nelt_mode)));
32959 offset += length - nelt_mode;
32960 mem = adjust_automodify_address (dstbase, mode, dst, offset);
32961 /* We are shifting bytes back, set the alignment accordingly. */
32962 if ((length & 1) == 0)
32963 set_mem_align (mem, BITS_PER_UNIT * 2);
32964 else
32965 set_mem_align (mem, BITS_PER_UNIT);
32966
32967 emit_insn (gen_movmisalignv8qi (mem, reg));
32968 }
32969
32970 return true;
32971 }
32972
32973 /* Set a block of memory using plain strh/strb instructions, only
32974 using instructions allowed by ALIGN on processor. We fill the
32975 first LENGTH bytes of the memory area starting from DSTBASE
32976 with byte constant VALUE. ALIGN is the alignment requirement
32977 of memory. */
32978 static bool
32979 arm_block_set_unaligned_non_vect (rtx dstbase,
32980 unsigned HOST_WIDE_INT length,
32981 unsigned HOST_WIDE_INT value,
32982 unsigned HOST_WIDE_INT align)
32983 {
32984 unsigned int i;
32985 rtx dst, addr, mem;
32986 rtx val_exp, val_reg, reg;
32987 machine_mode mode;
32988 HOST_WIDE_INT v = value;
32989
32990 gcc_assert (align == 1 || align == 2);
32991
32992 if (align == 2)
32993 v |= (value << BITS_PER_UNIT);
32994
32995 v = sext_hwi (v, BITS_PER_WORD);
32996 val_exp = GEN_INT (v);
32997 /* Skip if it isn't profitable. */
32998 if (!arm_block_set_non_vect_profit_p (val_exp, length,
32999 align, true, false))
33000 return false;
33001
33002 dst = copy_addr_to_reg (XEXP (dstbase, 0));
33003 mode = (align == 2 ? HImode : QImode);
33004 val_reg = force_reg (SImode, val_exp);
33005 reg = gen_lowpart (mode, val_reg);
33006
33007 for (i = 0; (i + GET_MODE_SIZE (mode) <= length); i += GET_MODE_SIZE (mode))
33008 {
33009 addr = plus_constant (Pmode, dst, i);
33010 mem = adjust_automodify_address (dstbase, mode, addr, i);
33011 emit_move_insn (mem, reg);
33012 }
33013
33014 /* Handle single byte leftover. */
33015 if (i + 1 == length)
33016 {
33017 reg = gen_lowpart (QImode, val_reg);
33018 addr = plus_constant (Pmode, dst, i);
33019 mem = adjust_automodify_address (dstbase, QImode, addr, i);
33020 emit_move_insn (mem, reg);
33021 i++;
33022 }
33023
33024 gcc_assert (i == length);
33025 return true;
33026 }
33027
33028 /* Set a block of memory using plain strd/str/strh/strb instructions,
33029 to permit unaligned copies on processors which support unaligned
33030 semantics for those instructions. We fill the first LENGTH bytes
33031 of the memory area starting from DSTBASE with byte constant VALUE.
33032 ALIGN is the alignment requirement of memory. */
33033 static bool
33034 arm_block_set_aligned_non_vect (rtx dstbase,
33035 unsigned HOST_WIDE_INT length,
33036 unsigned HOST_WIDE_INT value,
33037 unsigned HOST_WIDE_INT align)
33038 {
33039 unsigned int i;
33040 rtx dst, addr, mem;
33041 rtx val_exp, val_reg, reg;
33042 unsigned HOST_WIDE_INT v;
33043 bool use_strd_p;
33044
33045 use_strd_p = (length >= 2 * UNITS_PER_WORD && (align & 3) == 0
33046 && TARGET_LDRD && current_tune->prefer_ldrd_strd);
33047
33048 v = (value | (value << 8) | (value << 16) | (value << 24));
33049 if (length < UNITS_PER_WORD)
33050 v &= (0xFFFFFFFF >> (UNITS_PER_WORD - length) * BITS_PER_UNIT);
33051
33052 if (use_strd_p)
33053 v |= (v << BITS_PER_WORD);
33054 else
33055 v = sext_hwi (v, BITS_PER_WORD);
33056
33057 val_exp = GEN_INT (v);
33058 /* Skip if it isn't profitable. */
33059 if (!arm_block_set_non_vect_profit_p (val_exp, length,
33060 align, false, use_strd_p))
33061 {
33062 if (!use_strd_p)
33063 return false;
33064
33065 /* Try without strd. */
33066 v = (v >> BITS_PER_WORD);
33067 v = sext_hwi (v, BITS_PER_WORD);
33068 val_exp = GEN_INT (v);
33069 use_strd_p = false;
33070 if (!arm_block_set_non_vect_profit_p (val_exp, length,
33071 align, false, use_strd_p))
33072 return false;
33073 }
33074
33075 i = 0;
33076 dst = copy_addr_to_reg (XEXP (dstbase, 0));
33077 /* Handle double words using strd if possible. */
33078 if (use_strd_p)
33079 {
33080 val_reg = force_reg (DImode, val_exp);
33081 reg = val_reg;
33082 for (; (i + 8 <= length); i += 8)
33083 {
33084 addr = plus_constant (Pmode, dst, i);
33085 mem = adjust_automodify_address (dstbase, DImode, addr, i);
33086 if (MEM_ALIGN (mem) >= 2 * BITS_PER_WORD)
33087 emit_move_insn (mem, reg);
33088 else
33089 emit_insn (gen_unaligned_storedi (mem, reg));
33090 }
33091 }
33092 else
33093 val_reg = force_reg (SImode, val_exp);
33094
33095 /* Handle words. */
33096 reg = (use_strd_p ? gen_lowpart (SImode, val_reg) : val_reg);
33097 for (; (i + 4 <= length); i += 4)
33098 {
33099 addr = plus_constant (Pmode, dst, i);
33100 mem = adjust_automodify_address (dstbase, SImode, addr, i);
33101 if ((align & 3) == 0)
33102 emit_move_insn (mem, reg);
33103 else
33104 emit_insn (gen_unaligned_storesi (mem, reg));
33105 }
33106
33107 /* Merge last pair of STRH and STRB into a STR if possible. */
33108 if (unaligned_access && i > 0 && (i + 3) == length)
33109 {
33110 addr = plus_constant (Pmode, dst, i - 1);
33111 mem = adjust_automodify_address (dstbase, SImode, addr, i - 1);
33112 /* We are shifting one byte back, set the alignment accordingly. */
33113 if ((align & 1) == 0)
33114 set_mem_align (mem, BITS_PER_UNIT);
33115
33116 /* Most likely this is an unaligned access, and we can't tell at
33117 compilation time. */
33118 emit_insn (gen_unaligned_storesi (mem, reg));
33119 return true;
33120 }
33121
33122 /* Handle half word leftover. */
33123 if (i + 2 <= length)
33124 {
33125 reg = gen_lowpart (HImode, val_reg);
33126 addr = plus_constant (Pmode, dst, i);
33127 mem = adjust_automodify_address (dstbase, HImode, addr, i);
33128 if ((align & 1) == 0)
33129 emit_move_insn (mem, reg);
33130 else
33131 emit_insn (gen_unaligned_storehi (mem, reg));
33132
33133 i += 2;
33134 }
33135
33136 /* Handle single byte leftover. */
33137 if (i + 1 == length)
33138 {
33139 reg = gen_lowpart (QImode, val_reg);
33140 addr = plus_constant (Pmode, dst, i);
33141 mem = adjust_automodify_address (dstbase, QImode, addr, i);
33142 emit_move_insn (mem, reg);
33143 }
33144
33145 return true;
33146 }
33147
33148 /* Set a block of memory using vectorization instructions for both
33149 aligned and unaligned cases. We fill the first LENGTH bytes of
33150 the memory area starting from DSTBASE with byte constant VALUE.
33151 ALIGN is the alignment requirement of memory. */
33152 static bool
33153 arm_block_set_vect (rtx dstbase,
33154 unsigned HOST_WIDE_INT length,
33155 unsigned HOST_WIDE_INT value,
33156 unsigned HOST_WIDE_INT align)
33157 {
33158 /* Check whether we need to use unaligned store instruction. */
33159 if (((align & 3) != 0 || (length & 3) != 0)
33160 /* Check whether unaligned store instruction is available. */
33161 && (!unaligned_access || BYTES_BIG_ENDIAN))
33162 return false;
33163
33164 if ((align & 3) == 0)
33165 return arm_block_set_aligned_vect (dstbase, length, value, align);
33166 else
33167 return arm_block_set_unaligned_vect (dstbase, length, value, align);
33168 }
33169
33170 /* Expand string store operation. Firstly we try to do that by using
33171 vectorization instructions, then try with ARM unaligned access and
33172 double-word store if profitable. OPERANDS[0] is the destination,
33173 OPERANDS[1] is the number of bytes, operands[2] is the value to
33174 initialize the memory, OPERANDS[3] is the known alignment of the
33175 destination. */
33176 bool
33177 arm_gen_setmem (rtx *operands)
33178 {
33179 rtx dstbase = operands[0];
33180 unsigned HOST_WIDE_INT length;
33181 unsigned HOST_WIDE_INT value;
33182 unsigned HOST_WIDE_INT align;
33183
33184 if (!CONST_INT_P (operands[2]) || !CONST_INT_P (operands[1]))
33185 return false;
33186
33187 length = UINTVAL (operands[1]);
33188 if (length > 64)
33189 return false;
33190
33191 value = (UINTVAL (operands[2]) & 0xFF);
33192 align = UINTVAL (operands[3]);
33193 if (TARGET_NEON && length >= 8
33194 && current_tune->string_ops_prefer_neon
33195 && arm_block_set_vect (dstbase, length, value, align))
33196 return true;
33197
33198 if (!unaligned_access && (align & 3) != 0)
33199 return arm_block_set_unaligned_non_vect (dstbase, length, value, align);
33200
33201 return arm_block_set_aligned_non_vect (dstbase, length, value, align);
33202 }
33203
33204
33205 static bool
33206 arm_macro_fusion_p (void)
33207 {
33208 return current_tune->fusible_ops != tune_params::FUSE_NOTHING;
33209 }
33210
33211 /* Return true if the two back-to-back sets PREV_SET, CURR_SET are suitable
33212 for MOVW / MOVT macro fusion. */
33213
33214 static bool
33215 arm_sets_movw_movt_fusible_p (rtx prev_set, rtx curr_set)
33216 {
33217 /* We are trying to fuse
33218 movw imm / movt imm
33219 instructions as a group that gets scheduled together. */
33220
33221 rtx set_dest = SET_DEST (curr_set);
33222
33223 if (GET_MODE (set_dest) != SImode)
33224 return false;
33225
33226 /* We are trying to match:
33227 prev (movw) == (set (reg r0) (const_int imm16))
33228 curr (movt) == (set (zero_extract (reg r0)
33229 (const_int 16)
33230 (const_int 16))
33231 (const_int imm16_1))
33232 or
33233 prev (movw) == (set (reg r1)
33234 (high (symbol_ref ("SYM"))))
33235 curr (movt) == (set (reg r0)
33236 (lo_sum (reg r1)
33237 (symbol_ref ("SYM")))) */
33238
33239 if (GET_CODE (set_dest) == ZERO_EXTRACT)
33240 {
33241 if (CONST_INT_P (SET_SRC (curr_set))
33242 && CONST_INT_P (SET_SRC (prev_set))
33243 && REG_P (XEXP (set_dest, 0))
33244 && REG_P (SET_DEST (prev_set))
33245 && REGNO (XEXP (set_dest, 0)) == REGNO (SET_DEST (prev_set)))
33246 return true;
33247
33248 }
33249 else if (GET_CODE (SET_SRC (curr_set)) == LO_SUM
33250 && REG_P (SET_DEST (curr_set))
33251 && REG_P (SET_DEST (prev_set))
33252 && GET_CODE (SET_SRC (prev_set)) == HIGH
33253 && REGNO (SET_DEST (curr_set)) == REGNO (SET_DEST (prev_set)))
33254 return true;
33255
33256 return false;
33257 }
33258
33259 static bool
33260 aarch_macro_fusion_pair_p (rtx_insn* prev, rtx_insn* curr)
33261 {
33262 rtx prev_set = single_set (prev);
33263 rtx curr_set = single_set (curr);
33264
33265 if (!prev_set
33266 || !curr_set)
33267 return false;
33268
33269 if (any_condjump_p (curr))
33270 return false;
33271
33272 if (!arm_macro_fusion_p ())
33273 return false;
33274
33275 if (current_tune->fusible_ops & tune_params::FUSE_MOVW_MOVT
33276 && arm_sets_movw_movt_fusible_p (prev_set, curr_set))
33277 return true;
33278
33279 return false;
33280 }
33281
33282 /* Return true iff the instruction fusion described by OP is enabled. */
33283 bool
33284 arm_fusion_enabled_p (tune_params::fuse_ops op)
33285 {
33286 return current_tune->fusible_ops & op;
33287 }
33288
33289 /* Return TRUE if return address signing mechanism is enabled. */
33290 bool
33291 arm_current_function_pac_enabled_p (void)
33292 {
33293 return (aarch_ra_sign_scope == AARCH_FUNCTION_ALL
33294 || (aarch_ra_sign_scope == AARCH_FUNCTION_NON_LEAF
33295 && !crtl->is_leaf));
33296 }
33297
33298 /* Raise an error if the current target arch is not bti compatible. */
33299 void aarch_bti_arch_check (void)
33300 {
33301 if (!arm_arch8m_main)
33302 error ("This architecture does not support branch protection instructions");
33303 }
33304
33305 /* Return TRUE if Branch Target Identification Mechanism is enabled. */
33306 bool
33307 aarch_bti_enabled (void)
33308 {
33309 return aarch_enable_bti != 0;
33310 }
33311
33312 /* Check if INSN is a BTI J insn. */
33313 bool
33314 aarch_bti_j_insn_p (rtx_insn *insn)
33315 {
33316 if (!insn || !INSN_P (insn))
33317 return false;
33318
33319 rtx pat = PATTERN (insn);
33320 return GET_CODE (pat) == UNSPEC_VOLATILE && XINT (pat, 1) == VUNSPEC_BTI_NOP;
33321 }
33322
33323 /* Check if X (or any sub-rtx of X) is a PACIASP/PACIBSP instruction. */
33324 bool
33325 aarch_pac_insn_p (rtx x)
33326 {
33327 if (!x || !INSN_P (x))
33328 return false;
33329
33330 rtx pat = PATTERN (x);
33331
33332 if (GET_CODE (pat) == SET)
33333 {
33334 rtx tmp = XEXP (pat, 1);
33335 if (tmp
33336 && ((GET_CODE (tmp) == UNSPEC
33337 && XINT (tmp, 1) == UNSPEC_PAC_NOP)
33338 || (GET_CODE (tmp) == UNSPEC_VOLATILE
33339 && XINT (tmp, 1) == VUNSPEC_PACBTI_NOP)))
33340 return true;
33341 }
33342
33343 return false;
33344 }
33345
33346 /* Target specific mapping for aarch_gen_bti_c and aarch_gen_bti_j.
33347 For Arm, both of these map to a simple BTI instruction. */
33348
33349 rtx
33350 aarch_gen_bti_c (void)
33351 {
33352 return gen_bti_nop ();
33353 }
33354
33355 rtx
33356 aarch_gen_bti_j (void)
33357 {
33358 return gen_bti_nop ();
33359 }
33360
33361 /* Implement TARGET_SCHED_CAN_SPECULATE_INSN. Return true if INSN can be
33362 scheduled for speculative execution. Reject the long-running division
33363 and square-root instructions. */
33364
33365 static bool
33366 arm_sched_can_speculate_insn (rtx_insn *insn)
33367 {
33368 switch (get_attr_type (insn))
33369 {
33370 case TYPE_SDIV:
33371 case TYPE_UDIV:
33372 case TYPE_FDIVS:
33373 case TYPE_FDIVD:
33374 case TYPE_FSQRTS:
33375 case TYPE_FSQRTD:
33376 case TYPE_NEON_FP_SQRT_S:
33377 case TYPE_NEON_FP_SQRT_D:
33378 case TYPE_NEON_FP_SQRT_S_Q:
33379 case TYPE_NEON_FP_SQRT_D_Q:
33380 case TYPE_NEON_FP_DIV_S:
33381 case TYPE_NEON_FP_DIV_D:
33382 case TYPE_NEON_FP_DIV_S_Q:
33383 case TYPE_NEON_FP_DIV_D_Q:
33384 return false;
33385 default:
33386 return true;
33387 }
33388 }
33389
33390 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
33391
33392 static unsigned HOST_WIDE_INT
33393 arm_asan_shadow_offset (void)
33394 {
33395 return HOST_WIDE_INT_1U << 29;
33396 }
33397
33398
33399 /* This is a temporary fix for PR60655. Ideally we need
33400 to handle most of these cases in the generic part but
33401 currently we reject minus (..) (sym_ref). We try to
33402 ameliorate the case with minus (sym_ref1) (sym_ref2)
33403 where they are in the same section. */
33404
33405 static bool
33406 arm_const_not_ok_for_debug_p (rtx p)
33407 {
33408 tree decl_op0 = NULL;
33409 tree decl_op1 = NULL;
33410
33411 if (GET_CODE (p) == UNSPEC)
33412 return true;
33413 if (GET_CODE (p) == MINUS)
33414 {
33415 if (GET_CODE (XEXP (p, 1)) == SYMBOL_REF)
33416 {
33417 decl_op1 = SYMBOL_REF_DECL (XEXP (p, 1));
33418 if (decl_op1
33419 && GET_CODE (XEXP (p, 0)) == SYMBOL_REF
33420 && (decl_op0 = SYMBOL_REF_DECL (XEXP (p, 0))))
33421 {
33422 if ((VAR_P (decl_op1)
33423 || TREE_CODE (decl_op1) == CONST_DECL)
33424 && (VAR_P (decl_op0)
33425 || TREE_CODE (decl_op0) == CONST_DECL))
33426 return (get_variable_section (decl_op1, false)
33427 != get_variable_section (decl_op0, false));
33428
33429 if (TREE_CODE (decl_op1) == LABEL_DECL
33430 && TREE_CODE (decl_op0) == LABEL_DECL)
33431 return (DECL_CONTEXT (decl_op1)
33432 != DECL_CONTEXT (decl_op0));
33433 }
33434
33435 return true;
33436 }
33437 }
33438
33439 return false;
33440 }
33441
33442 /* return TRUE if x is a reference to a value in a constant pool */
33443 extern bool
33444 arm_is_constant_pool_ref (rtx x)
33445 {
33446 return (MEM_P (x)
33447 && GET_CODE (XEXP (x, 0)) == SYMBOL_REF
33448 && CONSTANT_POOL_ADDRESS_P (XEXP (x, 0)));
33449 }
33450
33451 /* Remember the last target of arm_set_current_function. */
33452 static GTY(()) tree arm_previous_fndecl;
33453
33454 /* Restore or save the TREE_TARGET_GLOBALS from or to NEW_TREE. */
33455
33456 void
33457 save_restore_target_globals (tree new_tree)
33458 {
33459 /* If we have a previous state, use it. */
33460 if (TREE_TARGET_GLOBALS (new_tree))
33461 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
33462 else if (new_tree == target_option_default_node)
33463 restore_target_globals (&default_target_globals);
33464 else
33465 {
33466 /* Call target_reinit and save the state for TARGET_GLOBALS. */
33467 TREE_TARGET_GLOBALS (new_tree) = save_target_globals_default_opts ();
33468 }
33469
33470 arm_option_params_internal ();
33471 }
33472
33473 /* Invalidate arm_previous_fndecl. */
33474
33475 void
33476 arm_reset_previous_fndecl (void)
33477 {
33478 arm_previous_fndecl = NULL_TREE;
33479 }
33480
33481 /* Establish appropriate back-end context for processing the function
33482 FNDECL. The argument might be NULL to indicate processing at top
33483 level, outside of any function scope. */
33484
33485 static void
33486 arm_set_current_function (tree fndecl)
33487 {
33488 if (!fndecl || fndecl == arm_previous_fndecl)
33489 return;
33490
33491 tree old_tree = (arm_previous_fndecl
33492 ? DECL_FUNCTION_SPECIFIC_TARGET (arm_previous_fndecl)
33493 : NULL_TREE);
33494
33495 tree new_tree = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
33496
33497 /* If current function has no attributes but previous one did,
33498 use the default node. */
33499 if (! new_tree && old_tree)
33500 new_tree = target_option_default_node;
33501
33502 /* If nothing to do return. #pragma GCC reset or #pragma GCC pop to
33503 the default have been handled by save_restore_target_globals from
33504 arm_pragma_target_parse. */
33505 if (old_tree == new_tree)
33506 return;
33507
33508 arm_previous_fndecl = fndecl;
33509
33510 /* First set the target options. */
33511 cl_target_option_restore (&global_options, &global_options_set,
33512 TREE_TARGET_OPTION (new_tree));
33513
33514 save_restore_target_globals (new_tree);
33515
33516 arm_override_options_after_change_1 (&global_options, &global_options_set);
33517 }
33518
33519 /* Implement TARGET_OPTION_PRINT. */
33520
33521 static void
33522 arm_option_print (FILE *file, int indent, struct cl_target_option *ptr)
33523 {
33524 int flags = ptr->x_target_flags;
33525 const char *fpu_name;
33526
33527 fpu_name = (ptr->x_arm_fpu_index == TARGET_FPU_auto
33528 ? "auto" : all_fpus[ptr->x_arm_fpu_index].name);
33529
33530 fprintf (file, "%*sselected isa %s\n", indent, "",
33531 TARGET_THUMB2_P (flags) ? "thumb2" :
33532 TARGET_THUMB_P (flags) ? "thumb1" :
33533 "arm");
33534
33535 if (ptr->x_arm_arch_string)
33536 fprintf (file, "%*sselected architecture %s\n", indent, "",
33537 ptr->x_arm_arch_string);
33538
33539 if (ptr->x_arm_cpu_string)
33540 fprintf (file, "%*sselected CPU %s\n", indent, "",
33541 ptr->x_arm_cpu_string);
33542
33543 if (ptr->x_arm_tune_string)
33544 fprintf (file, "%*sselected tune %s\n", indent, "",
33545 ptr->x_arm_tune_string);
33546
33547 fprintf (file, "%*sselected fpu %s\n", indent, "", fpu_name);
33548 }
33549
33550 /* Hook to determine if one function can safely inline another. */
33551
33552 static bool
33553 arm_can_inline_p (tree caller, tree callee)
33554 {
33555 tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
33556 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
33557 bool can_inline = true;
33558
33559 struct cl_target_option *caller_opts
33560 = TREE_TARGET_OPTION (caller_tree ? caller_tree
33561 : target_option_default_node);
33562
33563 struct cl_target_option *callee_opts
33564 = TREE_TARGET_OPTION (callee_tree ? callee_tree
33565 : target_option_default_node);
33566
33567 if (callee_opts == caller_opts)
33568 return true;
33569
33570 /* Callee's ISA features should be a subset of the caller's. */
33571 struct arm_build_target caller_target;
33572 struct arm_build_target callee_target;
33573 caller_target.isa = sbitmap_alloc (isa_num_bits);
33574 callee_target.isa = sbitmap_alloc (isa_num_bits);
33575
33576 arm_configure_build_target (&caller_target, caller_opts, false);
33577 arm_configure_build_target (&callee_target, callee_opts, false);
33578 if (!bitmap_subset_p (callee_target.isa, caller_target.isa))
33579 can_inline = false;
33580
33581 sbitmap_free (caller_target.isa);
33582 sbitmap_free (callee_target.isa);
33583
33584 /* OK to inline between different modes.
33585 Function with mode specific instructions, e.g using asm,
33586 must be explicitly protected with noinline. */
33587 return can_inline;
33588 }
33589
33590 /* Hook to fix function's alignment affected by target attribute. */
33591
33592 static void
33593 arm_relayout_function (tree fndecl)
33594 {
33595 if (DECL_USER_ALIGN (fndecl))
33596 return;
33597
33598 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
33599
33600 if (!callee_tree)
33601 callee_tree = target_option_default_node;
33602
33603 struct cl_target_option *opts = TREE_TARGET_OPTION (callee_tree);
33604 SET_DECL_ALIGN
33605 (fndecl,
33606 FUNCTION_ALIGNMENT (FUNCTION_BOUNDARY_P (opts->x_target_flags)));
33607 }
33608
33609 /* Inner function to process the attribute((target(...))), take an argument and
33610 set the current options from the argument. If we have a list, recursively
33611 go over the list. */
33612
33613 static bool
33614 arm_valid_target_attribute_rec (tree args, struct gcc_options *opts)
33615 {
33616 if (TREE_CODE (args) == TREE_LIST)
33617 {
33618 bool ret = true;
33619
33620 for (; args; args = TREE_CHAIN (args))
33621 if (TREE_VALUE (args)
33622 && !arm_valid_target_attribute_rec (TREE_VALUE (args), opts))
33623 ret = false;
33624 return ret;
33625 }
33626
33627 else if (TREE_CODE (args) != STRING_CST)
33628 {
33629 error ("attribute %<target%> argument not a string");
33630 return false;
33631 }
33632
33633 char *argstr = ASTRDUP (TREE_STRING_POINTER (args));
33634 char *q;
33635
33636 while ((q = strtok (argstr, ",")) != NULL)
33637 {
33638 argstr = NULL;
33639 if (!strcmp (q, "thumb"))
33640 {
33641 opts->x_target_flags |= MASK_THUMB;
33642 if (TARGET_FDPIC && !arm_arch_thumb2)
33643 sorry ("FDPIC mode is not supported in Thumb-1 mode");
33644 }
33645
33646 else if (!strcmp (q, "arm"))
33647 opts->x_target_flags &= ~MASK_THUMB;
33648
33649 else if (!strcmp (q, "general-regs-only"))
33650 opts->x_target_flags |= MASK_GENERAL_REGS_ONLY;
33651
33652 else if (startswith (q, "fpu="))
33653 {
33654 int fpu_index;
33655 if (! opt_enum_arg_to_value (OPT_mfpu_, q + 4,
33656 &fpu_index, CL_TARGET))
33657 {
33658 error ("invalid fpu for target attribute or pragma %qs", q);
33659 return false;
33660 }
33661 if (fpu_index == TARGET_FPU_auto)
33662 {
33663 /* This doesn't really make sense until we support
33664 general dynamic selection of the architecture and all
33665 sub-features. */
33666 sorry ("auto fpu selection not currently permitted here");
33667 return false;
33668 }
33669 opts->x_arm_fpu_index = (enum fpu_type) fpu_index;
33670 }
33671 else if (startswith (q, "arch="))
33672 {
33673 char *arch = q + 5;
33674 const arch_option *arm_selected_arch
33675 = arm_parse_arch_option_name (all_architectures, "arch", arch);
33676
33677 if (!arm_selected_arch)
33678 {
33679 error ("invalid architecture for target attribute or pragma %qs",
33680 q);
33681 return false;
33682 }
33683
33684 opts->x_arm_arch_string = xstrndup (arch, strlen (arch));
33685 }
33686 else if (q[0] == '+')
33687 {
33688 opts->x_arm_arch_string
33689 = xasprintf ("%s%s", opts->x_arm_arch_string, q);
33690 }
33691 else
33692 {
33693 error ("unknown target attribute or pragma %qs", q);
33694 return false;
33695 }
33696 }
33697
33698 return true;
33699 }
33700
33701 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL. */
33702
33703 tree
33704 arm_valid_target_attribute_tree (tree args, struct gcc_options *opts,
33705 struct gcc_options *opts_set)
33706 {
33707 struct cl_target_option cl_opts;
33708
33709 if (!arm_valid_target_attribute_rec (args, opts))
33710 return NULL_TREE;
33711
33712 cl_target_option_save (&cl_opts, opts, opts_set);
33713 arm_configure_build_target (&arm_active_target, &cl_opts, false);
33714 arm_option_check_internal (opts);
33715 /* Do any overrides, such as global options arch=xxx.
33716 We do this since arm_active_target was overridden. */
33717 arm_option_reconfigure_globals ();
33718 arm_options_perform_arch_sanity_checks ();
33719 arm_option_override_internal (opts, opts_set);
33720
33721 return build_target_option_node (opts, opts_set);
33722 }
33723
33724 static void
33725 add_attribute (const char * mode, tree *attributes)
33726 {
33727 size_t len = strlen (mode);
33728 tree value = build_string (len, mode);
33729
33730 TREE_TYPE (value) = build_array_type (char_type_node,
33731 build_index_type (size_int (len)));
33732
33733 *attributes = tree_cons (get_identifier ("target"),
33734 build_tree_list (NULL_TREE, value),
33735 *attributes);
33736 }
33737
33738 /* For testing. Insert thumb or arm modes alternatively on functions. */
33739
33740 static void
33741 arm_insert_attributes (tree fndecl, tree * attributes)
33742 {
33743 const char *mode;
33744
33745 if (! TARGET_FLIP_THUMB)
33746 return;
33747
33748 if (TREE_CODE (fndecl) != FUNCTION_DECL || DECL_EXTERNAL(fndecl)
33749 || fndecl_built_in_p (fndecl) || DECL_ARTIFICIAL (fndecl))
33750 return;
33751
33752 /* Nested definitions must inherit mode. */
33753 if (current_function_decl)
33754 {
33755 mode = TARGET_THUMB ? "thumb" : "arm";
33756 add_attribute (mode, attributes);
33757 return;
33758 }
33759
33760 /* If there is already a setting don't change it. */
33761 if (lookup_attribute ("target", *attributes) != NULL)
33762 return;
33763
33764 mode = thumb_flipper ? "thumb" : "arm";
33765 add_attribute (mode, attributes);
33766
33767 thumb_flipper = !thumb_flipper;
33768 }
33769
33770 /* Hook to validate attribute((target("string"))). */
33771
33772 static bool
33773 arm_valid_target_attribute_p (tree fndecl, tree ARG_UNUSED (name),
33774 tree args, int ARG_UNUSED (flags))
33775 {
33776 bool ret = true;
33777 struct gcc_options func_options, func_options_set;
33778 tree cur_tree, new_optimize;
33779 gcc_assert ((fndecl != NULL_TREE) && (args != NULL_TREE));
33780
33781 /* Get the optimization options of the current function. */
33782 tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
33783
33784 /* If the function changed the optimization levels as well as setting target
33785 options, start with the optimizations specified. */
33786 if (!func_optimize)
33787 func_optimize = optimization_default_node;
33788
33789 /* Init func_options. */
33790 memset (&func_options, 0, sizeof (func_options));
33791 init_options_struct (&func_options, NULL);
33792 lang_hooks.init_options_struct (&func_options);
33793 memset (&func_options_set, 0, sizeof (func_options_set));
33794
33795 /* Initialize func_options to the defaults. */
33796 cl_optimization_restore (&func_options, &func_options_set,
33797 TREE_OPTIMIZATION (func_optimize));
33798
33799 cl_target_option_restore (&func_options, &func_options_set,
33800 TREE_TARGET_OPTION (target_option_default_node));
33801
33802 /* Set func_options flags with new target mode. */
33803 cur_tree = arm_valid_target_attribute_tree (args, &func_options,
33804 &func_options_set);
33805
33806 if (cur_tree == NULL_TREE)
33807 ret = false;
33808
33809 new_optimize = build_optimization_node (&func_options, &func_options_set);
33810
33811 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = cur_tree;
33812
33813 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
33814
33815 return ret;
33816 }
33817
33818 /* Match an ISA feature bitmap to a named FPU. We always use the
33819 first entry that exactly matches the feature set, so that we
33820 effectively canonicalize the FPU name for the assembler. */
33821 static const char*
33822 arm_identify_fpu_from_isa (sbitmap isa)
33823 {
33824 auto_sbitmap fpubits (isa_num_bits);
33825 auto_sbitmap cand_fpubits (isa_num_bits);
33826
33827 bitmap_and (fpubits, isa, isa_all_fpubits_internal);
33828
33829 /* If there are no ISA feature bits relating to the FPU, we must be
33830 doing soft-float. */
33831 if (bitmap_empty_p (fpubits))
33832 return "softvfp";
33833
33834 for (unsigned int i = 0; i < TARGET_FPU_auto; i++)
33835 {
33836 arm_initialize_isa (cand_fpubits, all_fpus[i].isa_bits);
33837 if (bitmap_equal_p (fpubits, cand_fpubits))
33838 return all_fpus[i].name;
33839 }
33840 /* We must find an entry, or things have gone wrong. */
33841 gcc_unreachable ();
33842 }
33843
33844 /* Implement ASM_DECLARE_FUNCTION_NAME. Output the ISA features used
33845 by the function fndecl. */
33846 void
33847 arm_declare_function_name (FILE *stream, const char *name, tree decl)
33848 {
33849 tree target_parts = DECL_FUNCTION_SPECIFIC_TARGET (decl);
33850
33851 struct cl_target_option *targ_options;
33852 if (target_parts)
33853 targ_options = TREE_TARGET_OPTION (target_parts);
33854 else
33855 targ_options = TREE_TARGET_OPTION (target_option_current_node);
33856 gcc_assert (targ_options);
33857
33858 arm_print_asm_arch_directives (stream, targ_options);
33859
33860 fprintf (stream, "\t.syntax unified\n");
33861
33862 if (TARGET_THUMB)
33863 {
33864 if (is_called_in_ARM_mode (decl)
33865 || (TARGET_THUMB1 && !TARGET_THUMB1_ONLY
33866 && cfun->is_thunk))
33867 fprintf (stream, "\t.code 32\n");
33868 else if (TARGET_THUMB1)
33869 fprintf (stream, "\t.code\t16\n\t.thumb_func\n");
33870 else
33871 fprintf (stream, "\t.thumb\n\t.thumb_func\n");
33872 }
33873 else
33874 fprintf (stream, "\t.arm\n");
33875
33876 if (TARGET_POKE_FUNCTION_NAME)
33877 arm_poke_function_name (stream, (const char *) name);
33878 }
33879
33880 /* If MEM is in the form of [base+offset], extract the two parts
33881 of address and set to BASE and OFFSET, otherwise return false
33882 after clearing BASE and OFFSET. */
33883
33884 static bool
33885 extract_base_offset_in_addr (rtx mem, rtx *base, rtx *offset)
33886 {
33887 rtx addr;
33888
33889 gcc_assert (MEM_P (mem));
33890
33891 addr = XEXP (mem, 0);
33892
33893 /* Strip off const from addresses like (const (addr)). */
33894 if (GET_CODE (addr) == CONST)
33895 addr = XEXP (addr, 0);
33896
33897 if (REG_P (addr))
33898 {
33899 *base = addr;
33900 *offset = const0_rtx;
33901 return true;
33902 }
33903
33904 if (GET_CODE (addr) == PLUS
33905 && GET_CODE (XEXP (addr, 0)) == REG
33906 && CONST_INT_P (XEXP (addr, 1)))
33907 {
33908 *base = XEXP (addr, 0);
33909 *offset = XEXP (addr, 1);
33910 return true;
33911 }
33912
33913 *base = NULL_RTX;
33914 *offset = NULL_RTX;
33915
33916 return false;
33917 }
33918
33919 /* If INSN is a load or store of address in the form of [base+offset],
33920 extract the two parts and set to BASE and OFFSET. IS_LOAD is set
33921 to TRUE if it's a load. Return TRUE if INSN is such an instruction,
33922 otherwise return FALSE. */
33923
33924 static bool
33925 fusion_load_store (rtx_insn *insn, rtx *base, rtx *offset, bool *is_load)
33926 {
33927 rtx x, dest, src;
33928
33929 gcc_assert (INSN_P (insn));
33930 x = PATTERN (insn);
33931 if (GET_CODE (x) != SET)
33932 return false;
33933
33934 src = SET_SRC (x);
33935 dest = SET_DEST (x);
33936 if (REG_P (src) && MEM_P (dest))
33937 {
33938 *is_load = false;
33939 extract_base_offset_in_addr (dest, base, offset);
33940 }
33941 else if (MEM_P (src) && REG_P (dest))
33942 {
33943 *is_load = true;
33944 extract_base_offset_in_addr (src, base, offset);
33945 }
33946 else
33947 return false;
33948
33949 return (*base != NULL_RTX && *offset != NULL_RTX);
33950 }
33951
33952 /* Implement the TARGET_SCHED_FUSION_PRIORITY hook.
33953
33954 Currently we only support to fuse ldr or str instructions, so FUSION_PRI
33955 and PRI are only calculated for these instructions. For other instruction,
33956 FUSION_PRI and PRI are simply set to MAX_PRI. In the future, other kind
33957 instruction fusion can be supported by returning different priorities.
33958
33959 It's important that irrelevant instructions get the largest FUSION_PRI. */
33960
33961 static void
33962 arm_sched_fusion_priority (rtx_insn *insn, int max_pri,
33963 int *fusion_pri, int *pri)
33964 {
33965 int tmp, off_val;
33966 bool is_load;
33967 rtx base, offset;
33968
33969 gcc_assert (INSN_P (insn));
33970
33971 tmp = max_pri - 1;
33972 if (!fusion_load_store (insn, &base, &offset, &is_load))
33973 {
33974 *pri = tmp;
33975 *fusion_pri = tmp;
33976 return;
33977 }
33978
33979 /* Load goes first. */
33980 if (is_load)
33981 *fusion_pri = tmp - 1;
33982 else
33983 *fusion_pri = tmp - 2;
33984
33985 tmp /= 2;
33986
33987 /* INSN with smaller base register goes first. */
33988 tmp -= ((REGNO (base) & 0xff) << 20);
33989
33990 /* INSN with smaller offset goes first. */
33991 off_val = (int)(INTVAL (offset));
33992 if (off_val >= 0)
33993 tmp -= (off_val & 0xfffff);
33994 else
33995 tmp += ((- off_val) & 0xfffff);
33996
33997 *pri = tmp;
33998 return;
33999 }
34000
34001
34002 /* Construct and return a PARALLEL RTX vector with elements numbering the
34003 lanes of either the high (HIGH == TRUE) or low (HIGH == FALSE) half of
34004 the vector - from the perspective of the architecture. This does not
34005 line up with GCC's perspective on lane numbers, so we end up with
34006 different masks depending on our target endian-ness. The diagram
34007 below may help. We must draw the distinction when building masks
34008 which select one half of the vector. An instruction selecting
34009 architectural low-lanes for a big-endian target, must be described using
34010 a mask selecting GCC high-lanes.
34011
34012 Big-Endian Little-Endian
34013
34014 GCC 0 1 2 3 3 2 1 0
34015 | x | x | x | x | | x | x | x | x |
34016 Architecture 3 2 1 0 3 2 1 0
34017
34018 Low Mask: { 2, 3 } { 0, 1 }
34019 High Mask: { 0, 1 } { 2, 3 }
34020 */
34021
34022 rtx
34023 arm_simd_vect_par_cnst_half (machine_mode mode, bool high)
34024 {
34025 int nunits = GET_MODE_NUNITS (mode);
34026 rtvec v = rtvec_alloc (nunits / 2);
34027 int high_base = nunits / 2;
34028 int low_base = 0;
34029 int base;
34030 rtx t1;
34031 int i;
34032
34033 if (BYTES_BIG_ENDIAN)
34034 base = high ? low_base : high_base;
34035 else
34036 base = high ? high_base : low_base;
34037
34038 for (i = 0; i < nunits / 2; i++)
34039 RTVEC_ELT (v, i) = GEN_INT (base + i);
34040
34041 t1 = gen_rtx_PARALLEL (mode, v);
34042 return t1;
34043 }
34044
34045 /* Check OP for validity as a PARALLEL RTX vector with elements
34046 numbering the lanes of either the high (HIGH == TRUE) or low lanes,
34047 from the perspective of the architecture. See the diagram above
34048 arm_simd_vect_par_cnst_half_p for more details. */
34049
34050 bool
34051 arm_simd_check_vect_par_cnst_half_p (rtx op, machine_mode mode,
34052 bool high)
34053 {
34054 rtx ideal = arm_simd_vect_par_cnst_half (mode, high);
34055 HOST_WIDE_INT count_op = XVECLEN (op, 0);
34056 HOST_WIDE_INT count_ideal = XVECLEN (ideal, 0);
34057 int i = 0;
34058
34059 if (!VECTOR_MODE_P (mode))
34060 return false;
34061
34062 if (count_op != count_ideal)
34063 return false;
34064
34065 for (i = 0; i < count_ideal; i++)
34066 {
34067 rtx elt_op = XVECEXP (op, 0, i);
34068 rtx elt_ideal = XVECEXP (ideal, 0, i);
34069
34070 if (!CONST_INT_P (elt_op)
34071 || INTVAL (elt_ideal) != INTVAL (elt_op))
34072 return false;
34073 }
34074 return true;
34075 }
34076
34077 /* Can output mi_thunk for all cases except for non-zero vcall_offset
34078 in Thumb1. */
34079 static bool
34080 arm_can_output_mi_thunk (const_tree, HOST_WIDE_INT, HOST_WIDE_INT vcall_offset,
34081 const_tree)
34082 {
34083 /* For now, we punt and not handle this for TARGET_THUMB1. */
34084 if (vcall_offset && TARGET_THUMB1)
34085 return false;
34086
34087 /* Otherwise ok. */
34088 return true;
34089 }
34090
34091 /* Generate RTL for a conditional branch with rtx comparison CODE in
34092 mode CC_MODE. The destination of the unlikely conditional branch
34093 is LABEL_REF. */
34094
34095 void
34096 arm_gen_unlikely_cbranch (enum rtx_code code, machine_mode cc_mode,
34097 rtx label_ref)
34098 {
34099 rtx x;
34100 x = gen_rtx_fmt_ee (code, VOIDmode,
34101 gen_rtx_REG (cc_mode, CC_REGNUM),
34102 const0_rtx);
34103
34104 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
34105 gen_rtx_LABEL_REF (VOIDmode, label_ref),
34106 pc_rtx);
34107 emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
34108 }
34109
34110 /* Implement the TARGET_ASM_ELF_FLAGS_NUMERIC hook.
34111
34112 For pure-code sections there is no letter code for this attribute, so
34113 output all the section flags numerically when this is needed. */
34114
34115 static bool
34116 arm_asm_elf_flags_numeric (unsigned int flags, unsigned int *num)
34117 {
34118
34119 if (flags & SECTION_ARM_PURECODE)
34120 {
34121 *num = 0x20000000;
34122
34123 if (!(flags & SECTION_DEBUG))
34124 *num |= 0x2;
34125 if (flags & SECTION_EXCLUDE)
34126 *num |= 0x80000000;
34127 if (flags & SECTION_WRITE)
34128 *num |= 0x1;
34129 if (flags & SECTION_CODE)
34130 *num |= 0x4;
34131 if (flags & SECTION_MERGE)
34132 *num |= 0x10;
34133 if (flags & SECTION_STRINGS)
34134 *num |= 0x20;
34135 if (flags & SECTION_TLS)
34136 *num |= 0x400;
34137 if (HAVE_COMDAT_GROUP && (flags & SECTION_LINKONCE))
34138 *num |= 0x200;
34139
34140 return true;
34141 }
34142
34143 return false;
34144 }
34145
34146 /* Implement the TARGET_ASM_FUNCTION_SECTION hook.
34147
34148 If pure-code is passed as an option, make sure all functions are in
34149 sections that have the SHF_ARM_PURECODE attribute. */
34150
34151 static section *
34152 arm_function_section (tree decl, enum node_frequency freq,
34153 bool startup, bool exit)
34154 {
34155 const char * section_name;
34156 section * sec;
34157
34158 if (!decl || TREE_CODE (decl) != FUNCTION_DECL)
34159 return default_function_section (decl, freq, startup, exit);
34160
34161 if (!target_pure_code)
34162 return default_function_section (decl, freq, startup, exit);
34163
34164
34165 section_name = DECL_SECTION_NAME (decl);
34166
34167 /* If a function is not in a named section then it falls under the 'default'
34168 text section, also known as '.text'. We can preserve previous behavior as
34169 the default text section already has the SHF_ARM_PURECODE section
34170 attribute. */
34171 if (!section_name)
34172 {
34173 section *default_sec = default_function_section (decl, freq, startup,
34174 exit);
34175
34176 /* If default_sec is not null, then it must be a special section like for
34177 example .text.startup. We set the pure-code attribute and return the
34178 same section to preserve existing behavior. */
34179 if (default_sec)
34180 default_sec->common.flags |= SECTION_ARM_PURECODE;
34181 return default_sec;
34182 }
34183
34184 /* Otherwise look whether a section has already been created with
34185 'section_name'. */
34186 sec = get_named_section (decl, section_name, 0);
34187 if (!sec)
34188 /* If that is not the case passing NULL as the section's name to
34189 'get_named_section' will create a section with the declaration's
34190 section name. */
34191 sec = get_named_section (decl, NULL, 0);
34192
34193 /* Set the SHF_ARM_PURECODE attribute. */
34194 sec->common.flags |= SECTION_ARM_PURECODE;
34195
34196 return sec;
34197 }
34198
34199 /* Implements the TARGET_SECTION_FLAGS hook.
34200
34201 If DECL is a function declaration and pure-code is passed as an option
34202 then add the SFH_ARM_PURECODE attribute to the section flags. NAME is the
34203 section's name and RELOC indicates whether the declarations initializer may
34204 contain runtime relocations. */
34205
34206 static unsigned int
34207 arm_elf_section_type_flags (tree decl, const char *name, int reloc)
34208 {
34209 unsigned int flags = default_section_type_flags (decl, name, reloc);
34210
34211 if (decl && TREE_CODE (decl) == FUNCTION_DECL && target_pure_code)
34212 flags |= SECTION_ARM_PURECODE;
34213
34214 return flags;
34215 }
34216
34217 /* Generate call to __aeabi_[mode]divmod (op0, op1). */
34218
34219 static void
34220 arm_expand_divmod_libfunc (rtx libfunc, machine_mode mode,
34221 rtx op0, rtx op1,
34222 rtx *quot_p, rtx *rem_p)
34223 {
34224 if (mode == SImode)
34225 gcc_assert (!TARGET_IDIV);
34226
34227 scalar_int_mode libval_mode
34228 = smallest_int_mode_for_size (2 * GET_MODE_BITSIZE (mode));
34229
34230 rtx libval = emit_library_call_value (libfunc, NULL_RTX, LCT_CONST,
34231 libval_mode, op0, mode, op1, mode);
34232
34233 rtx quotient = simplify_gen_subreg (mode, libval, libval_mode, 0);
34234 rtx remainder = simplify_gen_subreg (mode, libval, libval_mode,
34235 GET_MODE_SIZE (mode));
34236
34237 gcc_assert (quotient);
34238 gcc_assert (remainder);
34239
34240 *quot_p = quotient;
34241 *rem_p = remainder;
34242 }
34243
34244 /* This function checks for the availability of the coprocessor builtin passed
34245 in BUILTIN for the current target. Returns true if it is available and
34246 false otherwise. If a BUILTIN is passed for which this function has not
34247 been implemented it will cause an exception. */
34248
34249 bool
34250 arm_coproc_builtin_available (enum unspecv builtin)
34251 {
34252 /* None of these builtins are available in Thumb mode if the target only
34253 supports Thumb-1. */
34254 if (TARGET_THUMB1)
34255 return false;
34256
34257 switch (builtin)
34258 {
34259 case VUNSPEC_CDP:
34260 case VUNSPEC_LDC:
34261 case VUNSPEC_LDCL:
34262 case VUNSPEC_STC:
34263 case VUNSPEC_STCL:
34264 case VUNSPEC_MCR:
34265 case VUNSPEC_MRC:
34266 if (arm_arch4)
34267 return true;
34268 break;
34269 case VUNSPEC_CDP2:
34270 case VUNSPEC_LDC2:
34271 case VUNSPEC_LDC2L:
34272 case VUNSPEC_STC2:
34273 case VUNSPEC_STC2L:
34274 case VUNSPEC_MCR2:
34275 case VUNSPEC_MRC2:
34276 /* Only present in ARMv5*, ARMv6 (but not ARMv6-M), ARMv7* and
34277 ARMv8-{A,M}. */
34278 if (arm_arch5t)
34279 return true;
34280 break;
34281 case VUNSPEC_MCRR:
34282 case VUNSPEC_MRRC:
34283 /* Only present in ARMv5TE, ARMv6 (but not ARMv6-M), ARMv7* and
34284 ARMv8-{A,M}. */
34285 if (arm_arch6 || arm_arch5te)
34286 return true;
34287 break;
34288 case VUNSPEC_MCRR2:
34289 case VUNSPEC_MRRC2:
34290 if (arm_arch6)
34291 return true;
34292 break;
34293 default:
34294 gcc_unreachable ();
34295 }
34296 return false;
34297 }
34298
34299 /* This function returns true if OP is a valid memory operand for the ldc and
34300 stc coprocessor instructions and false otherwise. */
34301
34302 bool
34303 arm_coproc_ldc_stc_legitimate_address (rtx op)
34304 {
34305 HOST_WIDE_INT range;
34306 /* Has to be a memory operand. */
34307 if (!MEM_P (op))
34308 return false;
34309
34310 op = XEXP (op, 0);
34311
34312 /* We accept registers. */
34313 if (REG_P (op))
34314 return true;
34315
34316 switch GET_CODE (op)
34317 {
34318 case PLUS:
34319 {
34320 /* Or registers with an offset. */
34321 if (!REG_P (XEXP (op, 0)))
34322 return false;
34323
34324 op = XEXP (op, 1);
34325
34326 /* The offset must be an immediate though. */
34327 if (!CONST_INT_P (op))
34328 return false;
34329
34330 range = INTVAL (op);
34331
34332 /* Within the range of [-1020,1020]. */
34333 if (!IN_RANGE (range, -1020, 1020))
34334 return false;
34335
34336 /* And a multiple of 4. */
34337 return (range % 4) == 0;
34338 }
34339 case PRE_INC:
34340 case POST_INC:
34341 case PRE_DEC:
34342 case POST_DEC:
34343 return REG_P (XEXP (op, 0));
34344 default:
34345 gcc_unreachable ();
34346 }
34347 return false;
34348 }
34349
34350 /* Return the diagnostic message string if conversion from FROMTYPE to
34351 TOTYPE is not allowed, NULL otherwise. */
34352
34353 static const char *
34354 arm_invalid_conversion (const_tree fromtype, const_tree totype)
34355 {
34356 if (element_mode (fromtype) != element_mode (totype))
34357 {
34358 /* Do no allow conversions to/from BFmode scalar types. */
34359 if (TYPE_MODE (fromtype) == BFmode)
34360 return N_("invalid conversion from type %<bfloat16_t%>");
34361 if (TYPE_MODE (totype) == BFmode)
34362 return N_("invalid conversion to type %<bfloat16_t%>");
34363 }
34364
34365 /* Conversion allowed. */
34366 return NULL;
34367 }
34368
34369 /* Return the diagnostic message string if the unary operation OP is
34370 not permitted on TYPE, NULL otherwise. */
34371
34372 static const char *
34373 arm_invalid_unary_op (int op, const_tree type)
34374 {
34375 /* Reject all single-operand operations on BFmode except for &. */
34376 if (element_mode (type) == BFmode && op != ADDR_EXPR)
34377 return N_("operation not permitted on type %<bfloat16_t%>");
34378
34379 /* Operation allowed. */
34380 return NULL;
34381 }
34382
34383 /* Return the diagnostic message string if the binary operation OP is
34384 not permitted on TYPE1 and TYPE2, NULL otherwise. */
34385
34386 static const char *
34387 arm_invalid_binary_op (int op ATTRIBUTE_UNUSED, const_tree type1,
34388 const_tree type2)
34389 {
34390 /* Reject all 2-operand operations on BFmode. */
34391 if (element_mode (type1) == BFmode
34392 || element_mode (type2) == BFmode)
34393 return N_("operation not permitted on type %<bfloat16_t%>");
34394
34395 /* Operation allowed. */
34396 return NULL;
34397 }
34398
34399 /* Implement TARGET_CAN_CHANGE_MODE_CLASS.
34400
34401 In VFPv1, VFP registers could only be accessed in the mode they were
34402 set, so subregs would be invalid there. However, we don't support
34403 VFPv1 at the moment, and the restriction was lifted in VFPv2.
34404
34405 In big-endian mode, modes greater than word size (i.e. DFmode) are stored in
34406 VFP registers in little-endian order. We can't describe that accurately to
34407 GCC, so avoid taking subregs of such values.
34408
34409 The only exception is going from a 128-bit to a 64-bit type. In that
34410 case the data layout happens to be consistent for big-endian, so we
34411 explicitly allow that case. */
34412
34413 static bool
34414 arm_can_change_mode_class (machine_mode from, machine_mode to,
34415 reg_class_t rclass)
34416 {
34417 if (TARGET_BIG_END
34418 && !(GET_MODE_SIZE (from) == 16 && GET_MODE_SIZE (to) == 8)
34419 && (GET_MODE_SIZE (from) > UNITS_PER_WORD
34420 || GET_MODE_SIZE (to) > UNITS_PER_WORD)
34421 && reg_classes_intersect_p (VFP_REGS, rclass))
34422 return false;
34423 return true;
34424 }
34425
34426 /* Implement TARGET_CONSTANT_ALIGNMENT. Make strings word-aligned so
34427 strcpy from constants will be faster. */
34428
34429 static HOST_WIDE_INT
34430 arm_constant_alignment (const_tree exp, HOST_WIDE_INT align)
34431 {
34432 unsigned int factor = (TARGET_THUMB || ! arm_tune_xscale ? 1 : 2);
34433 if (TREE_CODE (exp) == STRING_CST && !optimize_size)
34434 return MAX (align, BITS_PER_WORD * factor);
34435 return align;
34436 }
34437
34438 /* Emit a speculation barrier on target architectures that do not have
34439 DSB/ISB directly. Such systems probably don't need a barrier
34440 themselves, but if the code is ever run on a later architecture, it
34441 might become a problem. */
34442 void
34443 arm_emit_speculation_barrier_function ()
34444 {
34445 emit_library_call (speculation_barrier_libfunc, LCT_NORMAL, VOIDmode);
34446 }
34447
34448 /* Have we recorded an explicit access to the Q bit of APSR?. */
34449 bool
34450 arm_q_bit_access (void)
34451 {
34452 if (cfun && cfun->decl)
34453 return lookup_attribute ("acle qbit",
34454 DECL_ATTRIBUTES (cfun->decl));
34455 return true;
34456 }
34457
34458 /* Have we recorded an explicit access to the GE bits of PSTATE?. */
34459 bool
34460 arm_ge_bits_access (void)
34461 {
34462 if (cfun && cfun->decl)
34463 return lookup_attribute ("acle gebits",
34464 DECL_ATTRIBUTES (cfun->decl));
34465 return true;
34466 }
34467
34468 /* NULL if insn INSN is valid within a low-overhead loop.
34469 Otherwise return why doloop cannot be applied. */
34470
34471 static const char *
34472 arm_invalid_within_doloop (const rtx_insn *insn)
34473 {
34474 if (!TARGET_HAVE_LOB)
34475 return default_invalid_within_doloop (insn);
34476
34477 if (CALL_P (insn))
34478 return "Function call in the loop.";
34479
34480 if (reg_mentioned_p (gen_rtx_REG (SImode, LR_REGNUM), insn))
34481 return "LR is used inside loop.";
34482
34483 return NULL;
34484 }
34485
34486 bool
34487 arm_target_insn_ok_for_lob (rtx insn)
34488 {
34489 basic_block bb = BLOCK_FOR_INSN (insn);
34490 /* Make sure the basic block of the target insn is a simple latch
34491 having as single predecessor and successor the body of the loop
34492 itself. Only simple loops with a single basic block as body are
34493 supported for 'low over head loop' making sure that LE target is
34494 above LE itself in the generated code. */
34495
34496 return single_succ_p (bb)
34497 && single_pred_p (bb)
34498 && single_succ_edge (bb)->dest == single_pred_edge (bb)->src
34499 && contains_no_active_insn_p (bb);
34500 }
34501
34502 #if CHECKING_P
34503 namespace selftest {
34504
34505 /* Scan the static data tables generated by parsecpu.awk looking for
34506 potential issues with the data. We primarily check for
34507 inconsistencies in the option extensions at present (extensions
34508 that duplicate others but aren't marked as aliases). Furthermore,
34509 for correct canonicalization later options must never be a subset
34510 of an earlier option. Any extension should also only specify other
34511 feature bits and never an architecture bit. The architecture is inferred
34512 from the declaration of the extension. */
34513 static void
34514 arm_test_cpu_arch_data (void)
34515 {
34516 const arch_option *arch;
34517 const cpu_option *cpu;
34518 auto_sbitmap target_isa (isa_num_bits);
34519 auto_sbitmap isa1 (isa_num_bits);
34520 auto_sbitmap isa2 (isa_num_bits);
34521
34522 for (arch = all_architectures; arch->common.name != NULL; ++arch)
34523 {
34524 const cpu_arch_extension *ext1, *ext2;
34525
34526 if (arch->common.extensions == NULL)
34527 continue;
34528
34529 arm_initialize_isa (target_isa, arch->common.isa_bits);
34530
34531 for (ext1 = arch->common.extensions; ext1->name != NULL; ++ext1)
34532 {
34533 if (ext1->alias)
34534 continue;
34535
34536 arm_initialize_isa (isa1, ext1->isa_bits);
34537 for (ext2 = ext1 + 1; ext2->name != NULL; ++ext2)
34538 {
34539 if (ext2->alias || ext1->remove != ext2->remove)
34540 continue;
34541
34542 arm_initialize_isa (isa2, ext2->isa_bits);
34543 /* If the option is a subset of the parent option, it doesn't
34544 add anything and so isn't useful. */
34545 ASSERT_TRUE (!bitmap_subset_p (isa2, isa1));
34546
34547 /* If the extension specifies any architectural bits then
34548 disallow it. Extensions should only specify feature bits. */
34549 ASSERT_TRUE (!bitmap_intersect_p (isa2, target_isa));
34550 }
34551 }
34552 }
34553
34554 for (cpu = all_cores; cpu->common.name != NULL; ++cpu)
34555 {
34556 const cpu_arch_extension *ext1, *ext2;
34557
34558 if (cpu->common.extensions == NULL)
34559 continue;
34560
34561 arm_initialize_isa (target_isa, arch->common.isa_bits);
34562
34563 for (ext1 = cpu->common.extensions; ext1->name != NULL; ++ext1)
34564 {
34565 if (ext1->alias)
34566 continue;
34567
34568 arm_initialize_isa (isa1, ext1->isa_bits);
34569 for (ext2 = ext1 + 1; ext2->name != NULL; ++ext2)
34570 {
34571 if (ext2->alias || ext1->remove != ext2->remove)
34572 continue;
34573
34574 arm_initialize_isa (isa2, ext2->isa_bits);
34575 /* If the option is a subset of the parent option, it doesn't
34576 add anything and so isn't useful. */
34577 ASSERT_TRUE (!bitmap_subset_p (isa2, isa1));
34578
34579 /* If the extension specifies any architectural bits then
34580 disallow it. Extensions should only specify feature bits. */
34581 ASSERT_TRUE (!bitmap_intersect_p (isa2, target_isa));
34582 }
34583 }
34584 }
34585 }
34586
34587 /* Scan the static data tables generated by parsecpu.awk looking for
34588 potential issues with the data. Here we check for consistency between the
34589 fpu bits, in particular we check that ISA_ALL_FPU_INTERNAL does not contain
34590 a feature bit that is not defined by any FPU flag. */
34591 static void
34592 arm_test_fpu_data (void)
34593 {
34594 auto_sbitmap isa_all_fpubits_internal (isa_num_bits);
34595 auto_sbitmap fpubits (isa_num_bits);
34596 auto_sbitmap tmpset (isa_num_bits);
34597
34598 static const enum isa_feature fpu_bitlist_internal[]
34599 = { ISA_ALL_FPU_INTERNAL, isa_nobit };
34600 arm_initialize_isa (isa_all_fpubits_internal, fpu_bitlist_internal);
34601
34602 for (unsigned int i = 0; i < TARGET_FPU_auto; i++)
34603 {
34604 arm_initialize_isa (fpubits, all_fpus[i].isa_bits);
34605 bitmap_and_compl (tmpset, isa_all_fpubits_internal, fpubits);
34606 bitmap_clear (isa_all_fpubits_internal);
34607 bitmap_copy (isa_all_fpubits_internal, tmpset);
34608 }
34609
34610 if (!bitmap_empty_p (isa_all_fpubits_internal))
34611 {
34612 fprintf (stderr, "Error: found feature bits in the ALL_FPU_INTERAL"
34613 " group that are not defined by any FPU.\n"
34614 " Check your arm-cpus.in.\n");
34615 ASSERT_TRUE (bitmap_empty_p (isa_all_fpubits_internal));
34616 }
34617 }
34618
34619 static void
34620 arm_run_selftests (void)
34621 {
34622 arm_test_cpu_arch_data ();
34623 arm_test_fpu_data ();
34624 }
34625 } /* Namespace selftest. */
34626
34627 #undef TARGET_RUN_TARGET_SELFTESTS
34628 #define TARGET_RUN_TARGET_SELFTESTS selftest::arm_run_selftests
34629 #endif /* CHECKING_P */
34630
34631 /* Implement TARGET_STACK_PROTECT_GUARD. In case of a
34632 global variable based guard use the default else
34633 return a null tree. */
34634 static tree
34635 arm_stack_protect_guard (void)
34636 {
34637 if (arm_stack_protector_guard == SSP_GLOBAL)
34638 return default_stack_protect_guard ();
34639
34640 return NULL_TREE;
34641 }
34642
34643 /* Worker function for TARGET_MD_ASM_ADJUST, while in thumb1 mode.
34644 Unlike the arm version, we do NOT implement asm flag outputs. */
34645
34646 rtx_insn *
34647 thumb1_md_asm_adjust (vec<rtx> &outputs, vec<rtx> & /*inputs*/,
34648 vec<machine_mode> & /*input_modes*/,
34649 vec<const char *> &constraints,
34650 vec<rtx> &, vec<rtx> & /*clobbers*/,
34651 HARD_REG_SET & /*clobbered_regs*/, location_t /*loc*/)
34652 {
34653 for (unsigned i = 0, n = outputs.length (); i < n; ++i)
34654 if (startswith (constraints[i], "=@cc"))
34655 {
34656 sorry ("%<asm%> flags not supported in thumb1 mode");
34657 break;
34658 }
34659 return NULL;
34660 }
34661
34662 /* Generate code to enable conditional branches in functions over 1 MiB.
34663 Parameters are:
34664 operands: is the operands list of the asm insn (see arm_cond_branch or
34665 arm_cond_branch_reversed).
34666 pos_label: is an index into the operands array where operands[pos_label] is
34667 the asm label of the final jump destination.
34668 dest: is a string which is used to generate the asm label of the intermediate
34669 destination
34670 branch_format: is a string denoting the intermediate branch format, e.g.
34671 "beq", "bne", etc. */
34672
34673 const char *
34674 arm_gen_far_branch (rtx * operands, int pos_label, const char * dest,
34675 const char * branch_format)
34676 {
34677 rtx_code_label * tmp_label = gen_label_rtx ();
34678 char label_buf[256];
34679 char buffer[128];
34680 ASM_GENERATE_INTERNAL_LABEL (label_buf, dest , \
34681 CODE_LABEL_NUMBER (tmp_label));
34682 const char *label_ptr = arm_strip_name_encoding (label_buf);
34683 rtx dest_label = operands[pos_label];
34684 operands[pos_label] = tmp_label;
34685
34686 snprintf (buffer, sizeof (buffer), "%s%s", branch_format , label_ptr);
34687 output_asm_insn (buffer, operands);
34688
34689 snprintf (buffer, sizeof (buffer), "b\t%%l0%d\n%s:", pos_label, label_ptr);
34690 operands[pos_label] = dest_label;
34691 output_asm_insn (buffer, operands);
34692 return "";
34693 }
34694
34695 /* If given mode matches, load from memory to LO_REGS.
34696 (i.e [Rn], Rn <= LO_REGS). */
34697 enum reg_class
34698 arm_mode_base_reg_class (machine_mode mode)
34699 {
34700 if (TARGET_HAVE_MVE
34701 && (mode == E_V8QImode || mode == E_V4QImode || mode == E_V4HImode))
34702 return LO_REGS;
34703
34704 return MODE_BASE_REG_REG_CLASS (mode);
34705 }
34706
34707 struct gcc_target targetm = TARGET_INITIALIZER;
34708
34709 /* Implement TARGET_VECTORIZE_GET_MASK_MODE. */
34710
34711 opt_machine_mode
34712 arm_get_mask_mode (machine_mode mode)
34713 {
34714 if (TARGET_HAVE_MVE)
34715 return arm_mode_to_pred_mode (mode);
34716
34717 return default_get_mask_mode (mode);
34718 }
34719
34720 /* Output assembly to read the thread pointer from the appropriate TPIDR
34721 register into DEST. If PRED_P also emit the %? that can be used to
34722 output the predication code. */
34723
34724 const char *
34725 arm_output_load_tpidr (rtx dst, bool pred_p)
34726 {
34727 char buf[64];
34728 int tpidr_coproc_num = -1;
34729 switch (target_thread_pointer)
34730 {
34731 case TP_TPIDRURW:
34732 tpidr_coproc_num = 2;
34733 break;
34734 case TP_TPIDRURO:
34735 tpidr_coproc_num = 3;
34736 break;
34737 case TP_TPIDRPRW:
34738 tpidr_coproc_num = 4;
34739 break;
34740 default:
34741 gcc_unreachable ();
34742 }
34743 snprintf (buf, sizeof (buf),
34744 "mrc%s\tp15, 0, %%0, c13, c0, %d\t@ load_tp_hard",
34745 pred_p ? "%?" : "", tpidr_coproc_num);
34746 output_asm_insn (buf, &dst);
34747 return "";
34748 }
34749
34750 #include "gt-arm.h"
This page took 1.694282 seconds and 5 git commands to generate.