]> gcc.gnu.org Git - gcc.git/blob - gcc/config/arm/arm.c
0371d9818fd83512113ed4f46742979cc016b516
[gcc.git] / gcc / config / arm / arm.c
1 /* Output routines for GCC for ARM.
2 Copyright (C) 1991-2021 Free Software Foundation, Inc.
3 Contributed by Pieter `Tiggr' Schoenmakers (rcpieter@win.tue.nl)
4 and Martin Simmons (@harleqn.co.uk).
5 More major hacks by Richard Earnshaw (rearnsha@arm.com).
6
7 This file is part of GCC.
8
9 GCC is free software; you can redistribute it and/or modify it
10 under the terms of the GNU General Public License as published
11 by the Free Software Foundation; either version 3, or (at your
12 option) any later version.
13
14 GCC is distributed in the hope that it will be useful, but WITHOUT
15 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
16 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
17 License for more details.
18
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
22
23 #define IN_TARGET_CODE 1
24
25 #include "config.h"
26 #define INCLUDE_STRING
27 #include "system.h"
28 #include "coretypes.h"
29 #include "backend.h"
30 #include "target.h"
31 #include "rtl.h"
32 #include "tree.h"
33 #include "memmodel.h"
34 #include "cfghooks.h"
35 #include "df.h"
36 #include "tm_p.h"
37 #include "stringpool.h"
38 #include "attribs.h"
39 #include "optabs.h"
40 #include "regs.h"
41 #include "emit-rtl.h"
42 #include "recog.h"
43 #include "cgraph.h"
44 #include "diagnostic-core.h"
45 #include "alias.h"
46 #include "fold-const.h"
47 #include "stor-layout.h"
48 #include "calls.h"
49 #include "varasm.h"
50 #include "output.h"
51 #include "insn-attr.h"
52 #include "flags.h"
53 #include "reload.h"
54 #include "explow.h"
55 #include "expr.h"
56 #include "cfgrtl.h"
57 #include "sched-int.h"
58 #include "common/common-target.h"
59 #include "langhooks.h"
60 #include "intl.h"
61 #include "libfuncs.h"
62 #include "opts.h"
63 #include "dumpfile.h"
64 #include "target-globals.h"
65 #include "builtins.h"
66 #include "tm-constrs.h"
67 #include "rtl-iter.h"
68 #include "optabs-libfuncs.h"
69 #include "gimplify.h"
70 #include "gimple.h"
71 #include "selftest.h"
72
73 /* This file should be included last. */
74 #include "target-def.h"
75
76 /* Forward definitions of types. */
77 typedef struct minipool_node Mnode;
78 typedef struct minipool_fixup Mfix;
79
80 /* The last .arch and .fpu assembly strings that we printed. */
81 static std::string arm_last_printed_arch_string;
82 static std::string arm_last_printed_fpu_string;
83
84 void (*arm_lang_output_object_attributes_hook)(void);
85
86 struct four_ints
87 {
88 int i[4];
89 };
90
91 /* Forward function declarations. */
92 static bool arm_const_not_ok_for_debug_p (rtx);
93 static int arm_needs_doubleword_align (machine_mode, const_tree);
94 static int arm_compute_static_chain_stack_bytes (void);
95 static arm_stack_offsets *arm_get_frame_offsets (void);
96 static void arm_compute_frame_layout (void);
97 static void arm_add_gc_roots (void);
98 static int arm_gen_constant (enum rtx_code, machine_mode, rtx,
99 unsigned HOST_WIDE_INT, rtx, rtx, int, int);
100 static unsigned bit_count (unsigned long);
101 static unsigned bitmap_popcount (const sbitmap);
102 static int arm_address_register_rtx_p (rtx, int);
103 static int arm_legitimate_index_p (machine_mode, rtx, RTX_CODE, int);
104 static bool is_called_in_ARM_mode (tree);
105 static int thumb2_legitimate_index_p (machine_mode, rtx, int);
106 static int thumb1_base_register_rtx_p (rtx, machine_mode, int);
107 static rtx arm_legitimize_address (rtx, rtx, machine_mode);
108 static reg_class_t arm_preferred_reload_class (rtx, reg_class_t);
109 static rtx thumb_legitimize_address (rtx, rtx, machine_mode);
110 inline static int thumb1_index_register_rtx_p (rtx, int);
111 static int thumb_far_jump_used_p (void);
112 static bool thumb_force_lr_save (void);
113 static unsigned arm_size_return_regs (void);
114 static bool arm_assemble_integer (rtx, unsigned int, int);
115 static void arm_print_operand (FILE *, rtx, int);
116 static void arm_print_operand_address (FILE *, machine_mode, rtx);
117 static bool arm_print_operand_punct_valid_p (unsigned char code);
118 static const char *fp_const_from_val (REAL_VALUE_TYPE *);
119 static arm_cc get_arm_condition_code (rtx);
120 static bool arm_fixed_condition_code_regs (unsigned int *, unsigned int *);
121 static const char *output_multi_immediate (rtx *, const char *, const char *,
122 int, HOST_WIDE_INT);
123 static const char *shift_op (rtx, HOST_WIDE_INT *);
124 static struct machine_function *arm_init_machine_status (void);
125 static void thumb_exit (FILE *, int);
126 static HOST_WIDE_INT get_jump_table_size (rtx_jump_table_data *);
127 static Mnode *move_minipool_fix_forward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
128 static Mnode *add_minipool_forward_ref (Mfix *);
129 static Mnode *move_minipool_fix_backward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
130 static Mnode *add_minipool_backward_ref (Mfix *);
131 static void assign_minipool_offsets (Mfix *);
132 static void arm_print_value (FILE *, rtx);
133 static void dump_minipool (rtx_insn *);
134 static int arm_barrier_cost (rtx_insn *);
135 static Mfix *create_fix_barrier (Mfix *, HOST_WIDE_INT);
136 static void push_minipool_barrier (rtx_insn *, HOST_WIDE_INT);
137 static void push_minipool_fix (rtx_insn *, HOST_WIDE_INT, rtx *,
138 machine_mode, rtx);
139 static void arm_reorg (void);
140 static void note_invalid_constants (rtx_insn *, HOST_WIDE_INT, int);
141 static unsigned long arm_compute_save_reg0_reg12_mask (void);
142 static unsigned long arm_compute_save_core_reg_mask (void);
143 static unsigned long arm_isr_value (tree);
144 static unsigned long arm_compute_func_type (void);
145 static tree arm_handle_fndecl_attribute (tree *, tree, tree, int, bool *);
146 static tree arm_handle_pcs_attribute (tree *, tree, tree, int, bool *);
147 static tree arm_handle_isr_attribute (tree *, tree, tree, int, bool *);
148 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
149 static tree arm_handle_notshared_attribute (tree *, tree, tree, int, bool *);
150 #endif
151 static tree arm_handle_cmse_nonsecure_entry (tree *, tree, tree, int, bool *);
152 static tree arm_handle_cmse_nonsecure_call (tree *, tree, tree, int, bool *);
153 static void arm_output_function_epilogue (FILE *);
154 static void arm_output_function_prologue (FILE *);
155 static int arm_comp_type_attributes (const_tree, const_tree);
156 static void arm_set_default_type_attributes (tree);
157 static int arm_adjust_cost (rtx_insn *, int, rtx_insn *, int, unsigned int);
158 static int arm_sched_reorder (FILE *, int, rtx_insn **, int *, int);
159 static int optimal_immediate_sequence (enum rtx_code code,
160 unsigned HOST_WIDE_INT val,
161 struct four_ints *return_sequence);
162 static int optimal_immediate_sequence_1 (enum rtx_code code,
163 unsigned HOST_WIDE_INT val,
164 struct four_ints *return_sequence,
165 int i);
166 static int arm_get_strip_length (int);
167 static bool arm_function_ok_for_sibcall (tree, tree);
168 static machine_mode arm_promote_function_mode (const_tree,
169 machine_mode, int *,
170 const_tree, int);
171 static bool arm_return_in_memory (const_tree, const_tree);
172 static rtx arm_function_value (const_tree, const_tree, bool);
173 static rtx arm_libcall_value_1 (machine_mode);
174 static rtx arm_libcall_value (machine_mode, const_rtx);
175 static bool arm_function_value_regno_p (const unsigned int);
176 static void arm_internal_label (FILE *, const char *, unsigned long);
177 static void arm_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT,
178 tree);
179 static bool arm_have_conditional_execution (void);
180 static bool arm_cannot_force_const_mem (machine_mode, rtx);
181 static bool arm_legitimate_constant_p (machine_mode, rtx);
182 static bool arm_rtx_costs (rtx, machine_mode, int, int, int *, bool);
183 static int arm_insn_cost (rtx_insn *, bool);
184 static int arm_address_cost (rtx, machine_mode, addr_space_t, bool);
185 static int arm_register_move_cost (machine_mode, reg_class_t, reg_class_t);
186 static int arm_memory_move_cost (machine_mode, reg_class_t, bool);
187 static void emit_constant_insn (rtx cond, rtx pattern);
188 static rtx_insn *emit_set_insn (rtx, rtx);
189 static void arm_add_cfa_adjust_cfa_note (rtx, int, rtx, rtx);
190 static rtx emit_multi_reg_push (unsigned long, unsigned long);
191 static void arm_emit_multi_reg_pop (unsigned long);
192 static int vfp_emit_fstmd (int, int);
193 static void arm_emit_vfp_multi_reg_pop (int, int, rtx);
194 static int arm_arg_partial_bytes (cumulative_args_t,
195 const function_arg_info &);
196 static rtx arm_function_arg (cumulative_args_t, const function_arg_info &);
197 static void arm_function_arg_advance (cumulative_args_t,
198 const function_arg_info &);
199 static pad_direction arm_function_arg_padding (machine_mode, const_tree);
200 static unsigned int arm_function_arg_boundary (machine_mode, const_tree);
201 static rtx aapcs_allocate_return_reg (machine_mode, const_tree,
202 const_tree);
203 static rtx aapcs_libcall_value (machine_mode);
204 static int aapcs_select_return_coproc (const_tree, const_tree);
205
206 #ifdef OBJECT_FORMAT_ELF
207 static void arm_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED;
208 static void arm_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED;
209 #endif
210 #ifndef ARM_PE
211 static void arm_encode_section_info (tree, rtx, int);
212 #endif
213
214 static void arm_file_end (void);
215 static void arm_file_start (void);
216 static void arm_insert_attributes (tree, tree *);
217
218 static void arm_setup_incoming_varargs (cumulative_args_t,
219 const function_arg_info &, int *, int);
220 static bool arm_pass_by_reference (cumulative_args_t,
221 const function_arg_info &);
222 static bool arm_promote_prototypes (const_tree);
223 static bool arm_default_short_enums (void);
224 static bool arm_align_anon_bitfield (void);
225 static bool arm_return_in_msb (const_tree);
226 static bool arm_must_pass_in_stack (const function_arg_info &);
227 static bool arm_return_in_memory (const_tree, const_tree);
228 #if ARM_UNWIND_INFO
229 static void arm_unwind_emit (FILE *, rtx_insn *);
230 static bool arm_output_ttype (rtx);
231 static void arm_asm_emit_except_personality (rtx);
232 #endif
233 static void arm_asm_init_sections (void);
234 static rtx arm_dwarf_register_span (rtx);
235
236 static tree arm_cxx_guard_type (void);
237 static bool arm_cxx_guard_mask_bit (void);
238 static tree arm_get_cookie_size (tree);
239 static bool arm_cookie_has_size (void);
240 static bool arm_cxx_cdtor_returns_this (void);
241 static bool arm_cxx_key_method_may_be_inline (void);
242 static void arm_cxx_determine_class_data_visibility (tree);
243 static bool arm_cxx_class_data_always_comdat (void);
244 static bool arm_cxx_use_aeabi_atexit (void);
245 static void arm_init_libfuncs (void);
246 static tree arm_build_builtin_va_list (void);
247 static void arm_expand_builtin_va_start (tree, rtx);
248 static tree arm_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *);
249 static void arm_option_override (void);
250 static void arm_option_restore (struct gcc_options *, struct gcc_options *,
251 struct cl_target_option *);
252 static void arm_override_options_after_change (void);
253 static void arm_option_print (FILE *, int, struct cl_target_option *);
254 static void arm_set_current_function (tree);
255 static bool arm_can_inline_p (tree, tree);
256 static void arm_relayout_function (tree);
257 static bool arm_valid_target_attribute_p (tree, tree, tree, int);
258 static unsigned HOST_WIDE_INT arm_shift_truncation_mask (machine_mode);
259 static bool arm_sched_can_speculate_insn (rtx_insn *);
260 static bool arm_macro_fusion_p (void);
261 static bool arm_cannot_copy_insn_p (rtx_insn *);
262 static int arm_issue_rate (void);
263 static int arm_sched_variable_issue (FILE *, int, rtx_insn *, int);
264 static int arm_first_cycle_multipass_dfa_lookahead (void);
265 static int arm_first_cycle_multipass_dfa_lookahead_guard (rtx_insn *, int);
266 static void arm_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
267 static bool arm_output_addr_const_extra (FILE *, rtx);
268 static bool arm_allocate_stack_slots_for_args (void);
269 static bool arm_warn_func_return (tree);
270 static tree arm_promoted_type (const_tree t);
271 static bool arm_scalar_mode_supported_p (scalar_mode);
272 static bool arm_frame_pointer_required (void);
273 static bool arm_can_eliminate (const int, const int);
274 static void arm_asm_trampoline_template (FILE *);
275 static void arm_trampoline_init (rtx, tree, rtx);
276 static rtx arm_trampoline_adjust_address (rtx);
277 static rtx_insn *arm_pic_static_addr (rtx orig, rtx reg);
278 static bool cortex_a9_sched_adjust_cost (rtx_insn *, int, rtx_insn *, int *);
279 static bool xscale_sched_adjust_cost (rtx_insn *, int, rtx_insn *, int *);
280 static bool fa726te_sched_adjust_cost (rtx_insn *, int, rtx_insn *, int *);
281 static bool arm_array_mode_supported_p (machine_mode,
282 unsigned HOST_WIDE_INT);
283 static machine_mode arm_preferred_simd_mode (scalar_mode);
284 static bool arm_class_likely_spilled_p (reg_class_t);
285 static HOST_WIDE_INT arm_vector_alignment (const_tree type);
286 static bool arm_vector_alignment_reachable (const_tree type, bool is_packed);
287 static bool arm_builtin_support_vector_misalignment (machine_mode mode,
288 const_tree type,
289 int misalignment,
290 bool is_packed);
291 static void arm_conditional_register_usage (void);
292 static enum flt_eval_method arm_excess_precision (enum excess_precision_type);
293 static reg_class_t arm_preferred_rename_class (reg_class_t rclass);
294 static unsigned int arm_autovectorize_vector_modes (vector_modes *, bool);
295 static int arm_default_branch_cost (bool, bool);
296 static int arm_cortex_a5_branch_cost (bool, bool);
297 static int arm_cortex_m_branch_cost (bool, bool);
298 static int arm_cortex_m7_branch_cost (bool, bool);
299
300 static bool arm_vectorize_vec_perm_const (machine_mode, rtx, rtx, rtx,
301 const vec_perm_indices &);
302
303 static bool aarch_macro_fusion_pair_p (rtx_insn*, rtx_insn*);
304
305 static int arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
306 tree vectype,
307 int misalign ATTRIBUTE_UNUSED);
308 static unsigned arm_add_stmt_cost (vec_info *vinfo, void *data, int count,
309 enum vect_cost_for_stmt kind,
310 struct _stmt_vec_info *stmt_info,
311 tree vectype, int misalign,
312 enum vect_cost_model_location where);
313
314 static void arm_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
315 bool op0_preserve_value);
316 static unsigned HOST_WIDE_INT arm_asan_shadow_offset (void);
317
318 static void arm_sched_fusion_priority (rtx_insn *, int, int *, int*);
319 static bool arm_can_output_mi_thunk (const_tree, HOST_WIDE_INT, HOST_WIDE_INT,
320 const_tree);
321 static section *arm_function_section (tree, enum node_frequency, bool, bool);
322 static bool arm_asm_elf_flags_numeric (unsigned int flags, unsigned int *num);
323 static unsigned int arm_elf_section_type_flags (tree decl, const char *name,
324 int reloc);
325 static void arm_expand_divmod_libfunc (rtx, machine_mode, rtx, rtx, rtx *, rtx *);
326 static opt_scalar_float_mode arm_floatn_mode (int, bool);
327 static unsigned int arm_hard_regno_nregs (unsigned int, machine_mode);
328 static bool arm_hard_regno_mode_ok (unsigned int, machine_mode);
329 static bool arm_modes_tieable_p (machine_mode, machine_mode);
330 static HOST_WIDE_INT arm_constant_alignment (const_tree, HOST_WIDE_INT);
331 static rtx_insn *thumb1_md_asm_adjust (vec<rtx> &, vec<rtx> &,
332 vec<machine_mode> &,
333 vec<const char *> &, vec<rtx> &,
334 HARD_REG_SET &);
335 \f
336 /* Table of machine attributes. */
337 static const struct attribute_spec arm_attribute_table[] =
338 {
339 /* { name, min_len, max_len, decl_req, type_req, fn_type_req,
340 affects_type_identity, handler, exclude } */
341 /* Function calls made to this symbol must be done indirectly, because
342 it may lie outside of the 26 bit addressing range of a normal function
343 call. */
344 { "long_call", 0, 0, false, true, true, false, NULL, NULL },
345 /* Whereas these functions are always known to reside within the 26 bit
346 addressing range. */
347 { "short_call", 0, 0, false, true, true, false, NULL, NULL },
348 /* Specify the procedure call conventions for a function. */
349 { "pcs", 1, 1, false, true, true, false, arm_handle_pcs_attribute,
350 NULL },
351 /* Interrupt Service Routines have special prologue and epilogue requirements. */
352 { "isr", 0, 1, false, false, false, false, arm_handle_isr_attribute,
353 NULL },
354 { "interrupt", 0, 1, false, false, false, false, arm_handle_isr_attribute,
355 NULL },
356 { "naked", 0, 0, true, false, false, false,
357 arm_handle_fndecl_attribute, NULL },
358 #ifdef ARM_PE
359 /* ARM/PE has three new attributes:
360 interfacearm - ?
361 dllexport - for exporting a function/variable that will live in a dll
362 dllimport - for importing a function/variable from a dll
363
364 Microsoft allows multiple declspecs in one __declspec, separating
365 them with spaces. We do NOT support this. Instead, use __declspec
366 multiple times.
367 */
368 { "dllimport", 0, 0, true, false, false, false, NULL, NULL },
369 { "dllexport", 0, 0, true, false, false, false, NULL, NULL },
370 { "interfacearm", 0, 0, true, false, false, false,
371 arm_handle_fndecl_attribute, NULL },
372 #elif TARGET_DLLIMPORT_DECL_ATTRIBUTES
373 { "dllimport", 0, 0, false, false, false, false, handle_dll_attribute,
374 NULL },
375 { "dllexport", 0, 0, false, false, false, false, handle_dll_attribute,
376 NULL },
377 { "notshared", 0, 0, false, true, false, false,
378 arm_handle_notshared_attribute, NULL },
379 #endif
380 /* ARMv8-M Security Extensions support. */
381 { "cmse_nonsecure_entry", 0, 0, true, false, false, false,
382 arm_handle_cmse_nonsecure_entry, NULL },
383 { "cmse_nonsecure_call", 0, 0, true, false, false, true,
384 arm_handle_cmse_nonsecure_call, NULL },
385 { "Advanced SIMD type", 1, 1, false, true, false, true, NULL, NULL },
386 { NULL, 0, 0, false, false, false, false, NULL, NULL }
387 };
388 \f
389 /* Initialize the GCC target structure. */
390 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
391 #undef TARGET_MERGE_DECL_ATTRIBUTES
392 #define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
393 #endif
394
395 #undef TARGET_CHECK_BUILTIN_CALL
396 #define TARGET_CHECK_BUILTIN_CALL arm_check_builtin_call
397
398 #undef TARGET_LEGITIMIZE_ADDRESS
399 #define TARGET_LEGITIMIZE_ADDRESS arm_legitimize_address
400
401 #undef TARGET_ATTRIBUTE_TABLE
402 #define TARGET_ATTRIBUTE_TABLE arm_attribute_table
403
404 #undef TARGET_INSERT_ATTRIBUTES
405 #define TARGET_INSERT_ATTRIBUTES arm_insert_attributes
406
407 #undef TARGET_ASM_FILE_START
408 #define TARGET_ASM_FILE_START arm_file_start
409 #undef TARGET_ASM_FILE_END
410 #define TARGET_ASM_FILE_END arm_file_end
411
412 #undef TARGET_ASM_ALIGNED_SI_OP
413 #define TARGET_ASM_ALIGNED_SI_OP NULL
414 #undef TARGET_ASM_INTEGER
415 #define TARGET_ASM_INTEGER arm_assemble_integer
416
417 #undef TARGET_PRINT_OPERAND
418 #define TARGET_PRINT_OPERAND arm_print_operand
419 #undef TARGET_PRINT_OPERAND_ADDRESS
420 #define TARGET_PRINT_OPERAND_ADDRESS arm_print_operand_address
421 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
422 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P arm_print_operand_punct_valid_p
423
424 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
425 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA arm_output_addr_const_extra
426
427 #undef TARGET_ASM_FUNCTION_PROLOGUE
428 #define TARGET_ASM_FUNCTION_PROLOGUE arm_output_function_prologue
429
430 #undef TARGET_ASM_FUNCTION_EPILOGUE
431 #define TARGET_ASM_FUNCTION_EPILOGUE arm_output_function_epilogue
432
433 #undef TARGET_CAN_INLINE_P
434 #define TARGET_CAN_INLINE_P arm_can_inline_p
435
436 #undef TARGET_RELAYOUT_FUNCTION
437 #define TARGET_RELAYOUT_FUNCTION arm_relayout_function
438
439 #undef TARGET_OPTION_OVERRIDE
440 #define TARGET_OPTION_OVERRIDE arm_option_override
441
442 #undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
443 #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE arm_override_options_after_change
444
445 #undef TARGET_OPTION_RESTORE
446 #define TARGET_OPTION_RESTORE arm_option_restore
447
448 #undef TARGET_OPTION_PRINT
449 #define TARGET_OPTION_PRINT arm_option_print
450
451 #undef TARGET_COMP_TYPE_ATTRIBUTES
452 #define TARGET_COMP_TYPE_ATTRIBUTES arm_comp_type_attributes
453
454 #undef TARGET_SCHED_CAN_SPECULATE_INSN
455 #define TARGET_SCHED_CAN_SPECULATE_INSN arm_sched_can_speculate_insn
456
457 #undef TARGET_SCHED_MACRO_FUSION_P
458 #define TARGET_SCHED_MACRO_FUSION_P arm_macro_fusion_p
459
460 #undef TARGET_SCHED_MACRO_FUSION_PAIR_P
461 #define TARGET_SCHED_MACRO_FUSION_PAIR_P aarch_macro_fusion_pair_p
462
463 #undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
464 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES arm_set_default_type_attributes
465
466 #undef TARGET_SCHED_ADJUST_COST
467 #define TARGET_SCHED_ADJUST_COST arm_adjust_cost
468
469 #undef TARGET_SET_CURRENT_FUNCTION
470 #define TARGET_SET_CURRENT_FUNCTION arm_set_current_function
471
472 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
473 #define TARGET_OPTION_VALID_ATTRIBUTE_P arm_valid_target_attribute_p
474
475 #undef TARGET_SCHED_REORDER
476 #define TARGET_SCHED_REORDER arm_sched_reorder
477
478 #undef TARGET_REGISTER_MOVE_COST
479 #define TARGET_REGISTER_MOVE_COST arm_register_move_cost
480
481 #undef TARGET_MEMORY_MOVE_COST
482 #define TARGET_MEMORY_MOVE_COST arm_memory_move_cost
483
484 #undef TARGET_ENCODE_SECTION_INFO
485 #ifdef ARM_PE
486 #define TARGET_ENCODE_SECTION_INFO arm_pe_encode_section_info
487 #else
488 #define TARGET_ENCODE_SECTION_INFO arm_encode_section_info
489 #endif
490
491 #undef TARGET_STRIP_NAME_ENCODING
492 #define TARGET_STRIP_NAME_ENCODING arm_strip_name_encoding
493
494 #undef TARGET_ASM_INTERNAL_LABEL
495 #define TARGET_ASM_INTERNAL_LABEL arm_internal_label
496
497 #undef TARGET_FLOATN_MODE
498 #define TARGET_FLOATN_MODE arm_floatn_mode
499
500 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
501 #define TARGET_FUNCTION_OK_FOR_SIBCALL arm_function_ok_for_sibcall
502
503 #undef TARGET_FUNCTION_VALUE
504 #define TARGET_FUNCTION_VALUE arm_function_value
505
506 #undef TARGET_LIBCALL_VALUE
507 #define TARGET_LIBCALL_VALUE arm_libcall_value
508
509 #undef TARGET_FUNCTION_VALUE_REGNO_P
510 #define TARGET_FUNCTION_VALUE_REGNO_P arm_function_value_regno_p
511
512 #undef TARGET_ASM_OUTPUT_MI_THUNK
513 #define TARGET_ASM_OUTPUT_MI_THUNK arm_output_mi_thunk
514 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
515 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK arm_can_output_mi_thunk
516
517 #undef TARGET_RTX_COSTS
518 #define TARGET_RTX_COSTS arm_rtx_costs
519 #undef TARGET_ADDRESS_COST
520 #define TARGET_ADDRESS_COST arm_address_cost
521 #undef TARGET_INSN_COST
522 #define TARGET_INSN_COST arm_insn_cost
523
524 #undef TARGET_SHIFT_TRUNCATION_MASK
525 #define TARGET_SHIFT_TRUNCATION_MASK arm_shift_truncation_mask
526 #undef TARGET_VECTOR_MODE_SUPPORTED_P
527 #define TARGET_VECTOR_MODE_SUPPORTED_P arm_vector_mode_supported_p
528 #undef TARGET_ARRAY_MODE_SUPPORTED_P
529 #define TARGET_ARRAY_MODE_SUPPORTED_P arm_array_mode_supported_p
530 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
531 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE arm_preferred_simd_mode
532 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_MODES
533 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_MODES \
534 arm_autovectorize_vector_modes
535
536 #undef TARGET_MACHINE_DEPENDENT_REORG
537 #define TARGET_MACHINE_DEPENDENT_REORG arm_reorg
538
539 #undef TARGET_INIT_BUILTINS
540 #define TARGET_INIT_BUILTINS arm_init_builtins
541 #undef TARGET_EXPAND_BUILTIN
542 #define TARGET_EXPAND_BUILTIN arm_expand_builtin
543 #undef TARGET_BUILTIN_DECL
544 #define TARGET_BUILTIN_DECL arm_builtin_decl
545
546 #undef TARGET_INIT_LIBFUNCS
547 #define TARGET_INIT_LIBFUNCS arm_init_libfuncs
548
549 #undef TARGET_PROMOTE_FUNCTION_MODE
550 #define TARGET_PROMOTE_FUNCTION_MODE arm_promote_function_mode
551 #undef TARGET_PROMOTE_PROTOTYPES
552 #define TARGET_PROMOTE_PROTOTYPES arm_promote_prototypes
553 #undef TARGET_PASS_BY_REFERENCE
554 #define TARGET_PASS_BY_REFERENCE arm_pass_by_reference
555 #undef TARGET_ARG_PARTIAL_BYTES
556 #define TARGET_ARG_PARTIAL_BYTES arm_arg_partial_bytes
557 #undef TARGET_FUNCTION_ARG
558 #define TARGET_FUNCTION_ARG arm_function_arg
559 #undef TARGET_FUNCTION_ARG_ADVANCE
560 #define TARGET_FUNCTION_ARG_ADVANCE arm_function_arg_advance
561 #undef TARGET_FUNCTION_ARG_PADDING
562 #define TARGET_FUNCTION_ARG_PADDING arm_function_arg_padding
563 #undef TARGET_FUNCTION_ARG_BOUNDARY
564 #define TARGET_FUNCTION_ARG_BOUNDARY arm_function_arg_boundary
565
566 #undef TARGET_SETUP_INCOMING_VARARGS
567 #define TARGET_SETUP_INCOMING_VARARGS arm_setup_incoming_varargs
568
569 #undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS
570 #define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS arm_allocate_stack_slots_for_args
571
572 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
573 #define TARGET_ASM_TRAMPOLINE_TEMPLATE arm_asm_trampoline_template
574 #undef TARGET_TRAMPOLINE_INIT
575 #define TARGET_TRAMPOLINE_INIT arm_trampoline_init
576 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
577 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS arm_trampoline_adjust_address
578
579 #undef TARGET_WARN_FUNC_RETURN
580 #define TARGET_WARN_FUNC_RETURN arm_warn_func_return
581
582 #undef TARGET_DEFAULT_SHORT_ENUMS
583 #define TARGET_DEFAULT_SHORT_ENUMS arm_default_short_enums
584
585 #undef TARGET_ALIGN_ANON_BITFIELD
586 #define TARGET_ALIGN_ANON_BITFIELD arm_align_anon_bitfield
587
588 #undef TARGET_NARROW_VOLATILE_BITFIELD
589 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
590
591 #undef TARGET_CXX_GUARD_TYPE
592 #define TARGET_CXX_GUARD_TYPE arm_cxx_guard_type
593
594 #undef TARGET_CXX_GUARD_MASK_BIT
595 #define TARGET_CXX_GUARD_MASK_BIT arm_cxx_guard_mask_bit
596
597 #undef TARGET_CXX_GET_COOKIE_SIZE
598 #define TARGET_CXX_GET_COOKIE_SIZE arm_get_cookie_size
599
600 #undef TARGET_CXX_COOKIE_HAS_SIZE
601 #define TARGET_CXX_COOKIE_HAS_SIZE arm_cookie_has_size
602
603 #undef TARGET_CXX_CDTOR_RETURNS_THIS
604 #define TARGET_CXX_CDTOR_RETURNS_THIS arm_cxx_cdtor_returns_this
605
606 #undef TARGET_CXX_KEY_METHOD_MAY_BE_INLINE
607 #define TARGET_CXX_KEY_METHOD_MAY_BE_INLINE arm_cxx_key_method_may_be_inline
608
609 #undef TARGET_CXX_USE_AEABI_ATEXIT
610 #define TARGET_CXX_USE_AEABI_ATEXIT arm_cxx_use_aeabi_atexit
611
612 #undef TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY
613 #define TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY \
614 arm_cxx_determine_class_data_visibility
615
616 #undef TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT
617 #define TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT arm_cxx_class_data_always_comdat
618
619 #undef TARGET_RETURN_IN_MSB
620 #define TARGET_RETURN_IN_MSB arm_return_in_msb
621
622 #undef TARGET_RETURN_IN_MEMORY
623 #define TARGET_RETURN_IN_MEMORY arm_return_in_memory
624
625 #undef TARGET_MUST_PASS_IN_STACK
626 #define TARGET_MUST_PASS_IN_STACK arm_must_pass_in_stack
627
628 #if ARM_UNWIND_INFO
629 #undef TARGET_ASM_UNWIND_EMIT
630 #define TARGET_ASM_UNWIND_EMIT arm_unwind_emit
631
632 /* EABI unwinding tables use a different format for the typeinfo tables. */
633 #undef TARGET_ASM_TTYPE
634 #define TARGET_ASM_TTYPE arm_output_ttype
635
636 #undef TARGET_ARM_EABI_UNWINDER
637 #define TARGET_ARM_EABI_UNWINDER true
638
639 #undef TARGET_ASM_EMIT_EXCEPT_PERSONALITY
640 #define TARGET_ASM_EMIT_EXCEPT_PERSONALITY arm_asm_emit_except_personality
641
642 #endif /* ARM_UNWIND_INFO */
643
644 #undef TARGET_ASM_INIT_SECTIONS
645 #define TARGET_ASM_INIT_SECTIONS arm_asm_init_sections
646
647 #undef TARGET_DWARF_REGISTER_SPAN
648 #define TARGET_DWARF_REGISTER_SPAN arm_dwarf_register_span
649
650 #undef TARGET_CANNOT_COPY_INSN_P
651 #define TARGET_CANNOT_COPY_INSN_P arm_cannot_copy_insn_p
652
653 #ifdef HAVE_AS_TLS
654 #undef TARGET_HAVE_TLS
655 #define TARGET_HAVE_TLS true
656 #endif
657
658 #undef TARGET_HAVE_CONDITIONAL_EXECUTION
659 #define TARGET_HAVE_CONDITIONAL_EXECUTION arm_have_conditional_execution
660
661 #undef TARGET_LEGITIMATE_CONSTANT_P
662 #define TARGET_LEGITIMATE_CONSTANT_P arm_legitimate_constant_p
663
664 #undef TARGET_CANNOT_FORCE_CONST_MEM
665 #define TARGET_CANNOT_FORCE_CONST_MEM arm_cannot_force_const_mem
666
667 #undef TARGET_MAX_ANCHOR_OFFSET
668 #define TARGET_MAX_ANCHOR_OFFSET 4095
669
670 /* The minimum is set such that the total size of the block
671 for a particular anchor is -4088 + 1 + 4095 bytes, which is
672 divisible by eight, ensuring natural spacing of anchors. */
673 #undef TARGET_MIN_ANCHOR_OFFSET
674 #define TARGET_MIN_ANCHOR_OFFSET -4088
675
676 #undef TARGET_SCHED_ISSUE_RATE
677 #define TARGET_SCHED_ISSUE_RATE arm_issue_rate
678
679 #undef TARGET_SCHED_VARIABLE_ISSUE
680 #define TARGET_SCHED_VARIABLE_ISSUE arm_sched_variable_issue
681
682 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
683 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
684 arm_first_cycle_multipass_dfa_lookahead
685
686 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD
687 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD \
688 arm_first_cycle_multipass_dfa_lookahead_guard
689
690 #undef TARGET_MANGLE_TYPE
691 #define TARGET_MANGLE_TYPE arm_mangle_type
692
693 #undef TARGET_INVALID_CONVERSION
694 #define TARGET_INVALID_CONVERSION arm_invalid_conversion
695
696 #undef TARGET_INVALID_UNARY_OP
697 #define TARGET_INVALID_UNARY_OP arm_invalid_unary_op
698
699 #undef TARGET_INVALID_BINARY_OP
700 #define TARGET_INVALID_BINARY_OP arm_invalid_binary_op
701
702 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
703 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV arm_atomic_assign_expand_fenv
704
705 #undef TARGET_BUILD_BUILTIN_VA_LIST
706 #define TARGET_BUILD_BUILTIN_VA_LIST arm_build_builtin_va_list
707 #undef TARGET_EXPAND_BUILTIN_VA_START
708 #define TARGET_EXPAND_BUILTIN_VA_START arm_expand_builtin_va_start
709 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
710 #define TARGET_GIMPLIFY_VA_ARG_EXPR arm_gimplify_va_arg_expr
711
712 #ifdef HAVE_AS_TLS
713 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
714 #define TARGET_ASM_OUTPUT_DWARF_DTPREL arm_output_dwarf_dtprel
715 #endif
716
717 #undef TARGET_LEGITIMATE_ADDRESS_P
718 #define TARGET_LEGITIMATE_ADDRESS_P arm_legitimate_address_p
719
720 #undef TARGET_PREFERRED_RELOAD_CLASS
721 #define TARGET_PREFERRED_RELOAD_CLASS arm_preferred_reload_class
722
723 #undef TARGET_PROMOTED_TYPE
724 #define TARGET_PROMOTED_TYPE arm_promoted_type
725
726 #undef TARGET_SCALAR_MODE_SUPPORTED_P
727 #define TARGET_SCALAR_MODE_SUPPORTED_P arm_scalar_mode_supported_p
728
729 #undef TARGET_COMPUTE_FRAME_LAYOUT
730 #define TARGET_COMPUTE_FRAME_LAYOUT arm_compute_frame_layout
731
732 #undef TARGET_FRAME_POINTER_REQUIRED
733 #define TARGET_FRAME_POINTER_REQUIRED arm_frame_pointer_required
734
735 #undef TARGET_CAN_ELIMINATE
736 #define TARGET_CAN_ELIMINATE arm_can_eliminate
737
738 #undef TARGET_CONDITIONAL_REGISTER_USAGE
739 #define TARGET_CONDITIONAL_REGISTER_USAGE arm_conditional_register_usage
740
741 #undef TARGET_CLASS_LIKELY_SPILLED_P
742 #define TARGET_CLASS_LIKELY_SPILLED_P arm_class_likely_spilled_p
743
744 #undef TARGET_VECTORIZE_BUILTINS
745 #define TARGET_VECTORIZE_BUILTINS
746
747 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
748 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
749 arm_builtin_vectorized_function
750
751 #undef TARGET_VECTOR_ALIGNMENT
752 #define TARGET_VECTOR_ALIGNMENT arm_vector_alignment
753
754 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
755 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
756 arm_vector_alignment_reachable
757
758 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
759 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
760 arm_builtin_support_vector_misalignment
761
762 #undef TARGET_PREFERRED_RENAME_CLASS
763 #define TARGET_PREFERRED_RENAME_CLASS \
764 arm_preferred_rename_class
765
766 #undef TARGET_VECTORIZE_VEC_PERM_CONST
767 #define TARGET_VECTORIZE_VEC_PERM_CONST arm_vectorize_vec_perm_const
768
769 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
770 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
771 arm_builtin_vectorization_cost
772 #undef TARGET_VECTORIZE_ADD_STMT_COST
773 #define TARGET_VECTORIZE_ADD_STMT_COST arm_add_stmt_cost
774
775 #undef TARGET_CANONICALIZE_COMPARISON
776 #define TARGET_CANONICALIZE_COMPARISON \
777 arm_canonicalize_comparison
778
779 #undef TARGET_ASAN_SHADOW_OFFSET
780 #define TARGET_ASAN_SHADOW_OFFSET arm_asan_shadow_offset
781
782 #undef MAX_INSN_PER_IT_BLOCK
783 #define MAX_INSN_PER_IT_BLOCK (arm_restrict_it ? 1 : 4)
784
785 #undef TARGET_CAN_USE_DOLOOP_P
786 #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
787
788 #undef TARGET_CONST_NOT_OK_FOR_DEBUG_P
789 #define TARGET_CONST_NOT_OK_FOR_DEBUG_P arm_const_not_ok_for_debug_p
790
791 #undef TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS
792 #define TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS true
793
794 #undef TARGET_SCHED_FUSION_PRIORITY
795 #define TARGET_SCHED_FUSION_PRIORITY arm_sched_fusion_priority
796
797 #undef TARGET_ASM_FUNCTION_SECTION
798 #define TARGET_ASM_FUNCTION_SECTION arm_function_section
799
800 #undef TARGET_ASM_ELF_FLAGS_NUMERIC
801 #define TARGET_ASM_ELF_FLAGS_NUMERIC arm_asm_elf_flags_numeric
802
803 #undef TARGET_SECTION_TYPE_FLAGS
804 #define TARGET_SECTION_TYPE_FLAGS arm_elf_section_type_flags
805
806 #undef TARGET_EXPAND_DIVMOD_LIBFUNC
807 #define TARGET_EXPAND_DIVMOD_LIBFUNC arm_expand_divmod_libfunc
808
809 #undef TARGET_C_EXCESS_PRECISION
810 #define TARGET_C_EXCESS_PRECISION arm_excess_precision
811
812 /* Although the architecture reserves bits 0 and 1, only the former is
813 used for ARM/Thumb ISA selection in v7 and earlier versions. */
814 #undef TARGET_CUSTOM_FUNCTION_DESCRIPTORS
815 #define TARGET_CUSTOM_FUNCTION_DESCRIPTORS 2
816
817 #undef TARGET_FIXED_CONDITION_CODE_REGS
818 #define TARGET_FIXED_CONDITION_CODE_REGS arm_fixed_condition_code_regs
819
820 #undef TARGET_HARD_REGNO_NREGS
821 #define TARGET_HARD_REGNO_NREGS arm_hard_regno_nregs
822 #undef TARGET_HARD_REGNO_MODE_OK
823 #define TARGET_HARD_REGNO_MODE_OK arm_hard_regno_mode_ok
824
825 #undef TARGET_MODES_TIEABLE_P
826 #define TARGET_MODES_TIEABLE_P arm_modes_tieable_p
827
828 #undef TARGET_CAN_CHANGE_MODE_CLASS
829 #define TARGET_CAN_CHANGE_MODE_CLASS arm_can_change_mode_class
830
831 #undef TARGET_CONSTANT_ALIGNMENT
832 #define TARGET_CONSTANT_ALIGNMENT arm_constant_alignment
833
834 #undef TARGET_INVALID_WITHIN_DOLOOP
835 #define TARGET_INVALID_WITHIN_DOLOOP arm_invalid_within_doloop
836
837 #undef TARGET_MD_ASM_ADJUST
838 #define TARGET_MD_ASM_ADJUST arm_md_asm_adjust
839 \f
840 /* Obstack for minipool constant handling. */
841 static struct obstack minipool_obstack;
842 static char * minipool_startobj;
843
844 /* The maximum number of insns skipped which
845 will be conditionalised if possible. */
846 static int max_insns_skipped = 5;
847
848 extern FILE * asm_out_file;
849
850 /* True if we are currently building a constant table. */
851 int making_const_table;
852
853 /* The processor for which instructions should be scheduled. */
854 enum processor_type arm_tune = TARGET_CPU_arm_none;
855
856 /* The current tuning set. */
857 const struct tune_params *current_tune;
858
859 /* Which floating point hardware to schedule for. */
860 int arm_fpu_attr;
861
862 /* Used for Thumb call_via trampolines. */
863 rtx thumb_call_via_label[14];
864 static int thumb_call_reg_needed;
865
866 /* The bits in this mask specify which instruction scheduling options should
867 be used. */
868 unsigned int tune_flags = 0;
869
870 /* The highest ARM architecture version supported by the
871 target. */
872 enum base_architecture arm_base_arch = BASE_ARCH_0;
873
874 /* Active target architecture and tuning. */
875
876 struct arm_build_target arm_active_target;
877
878 /* The following are used in the arm.md file as equivalents to bits
879 in the above two flag variables. */
880
881 /* Nonzero if this chip supports the ARM Architecture 4 extensions. */
882 int arm_arch4 = 0;
883
884 /* Nonzero if this chip supports the ARM Architecture 4t extensions. */
885 int arm_arch4t = 0;
886
887 /* Nonzero if this chip supports the ARM Architecture 5T extensions. */
888 int arm_arch5t = 0;
889
890 /* Nonzero if this chip supports the ARM Architecture 5TE extensions. */
891 int arm_arch5te = 0;
892
893 /* Nonzero if this chip supports the ARM Architecture 6 extensions. */
894 int arm_arch6 = 0;
895
896 /* Nonzero if this chip supports the ARM 6K extensions. */
897 int arm_arch6k = 0;
898
899 /* Nonzero if this chip supports the ARM 6KZ extensions. */
900 int arm_arch6kz = 0;
901
902 /* Nonzero if instructions present in ARMv6-M can be used. */
903 int arm_arch6m = 0;
904
905 /* Nonzero if this chip supports the ARM 7 extensions. */
906 int arm_arch7 = 0;
907
908 /* Nonzero if this chip supports the Large Physical Address Extension. */
909 int arm_arch_lpae = 0;
910
911 /* Nonzero if instructions not present in the 'M' profile can be used. */
912 int arm_arch_notm = 0;
913
914 /* Nonzero if instructions present in ARMv7E-M can be used. */
915 int arm_arch7em = 0;
916
917 /* Nonzero if instructions present in ARMv8 can be used. */
918 int arm_arch8 = 0;
919
920 /* Nonzero if this chip supports the ARMv8.1 extensions. */
921 int arm_arch8_1 = 0;
922
923 /* Nonzero if this chip supports the ARM Architecture 8.2 extensions. */
924 int arm_arch8_2 = 0;
925
926 /* Nonzero if this chip supports the ARM Architecture 8.3 extensions. */
927 int arm_arch8_3 = 0;
928
929 /* Nonzero if this chip supports the ARM Architecture 8.4 extensions. */
930 int arm_arch8_4 = 0;
931 /* Nonzero if this chip supports the ARM Architecture 8.1-M Mainline
932 extensions. */
933 int arm_arch8_1m_main = 0;
934
935 /* Nonzero if this chip supports the FP16 instructions extension of ARM
936 Architecture 8.2. */
937 int arm_fp16_inst = 0;
938
939 /* Nonzero if this chip can benefit from load scheduling. */
940 int arm_ld_sched = 0;
941
942 /* Nonzero if this chip is a StrongARM. */
943 int arm_tune_strongarm = 0;
944
945 /* Nonzero if this chip supports Intel Wireless MMX technology. */
946 int arm_arch_iwmmxt = 0;
947
948 /* Nonzero if this chip supports Intel Wireless MMX2 technology. */
949 int arm_arch_iwmmxt2 = 0;
950
951 /* Nonzero if this chip is an XScale. */
952 int arm_arch_xscale = 0;
953
954 /* Nonzero if tuning for XScale */
955 int arm_tune_xscale = 0;
956
957 /* Nonzero if we want to tune for stores that access the write-buffer.
958 This typically means an ARM6 or ARM7 with MMU or MPU. */
959 int arm_tune_wbuf = 0;
960
961 /* Nonzero if tuning for Cortex-A9. */
962 int arm_tune_cortex_a9 = 0;
963
964 /* Nonzero if we should define __THUMB_INTERWORK__ in the
965 preprocessor.
966 XXX This is a bit of a hack, it's intended to help work around
967 problems in GLD which doesn't understand that armv5t code is
968 interworking clean. */
969 int arm_cpp_interwork = 0;
970
971 /* Nonzero if chip supports Thumb 1. */
972 int arm_arch_thumb1;
973
974 /* Nonzero if chip supports Thumb 2. */
975 int arm_arch_thumb2;
976
977 /* Nonzero if chip supports integer division instruction. */
978 int arm_arch_arm_hwdiv;
979 int arm_arch_thumb_hwdiv;
980
981 /* Nonzero if chip disallows volatile memory access in IT block. */
982 int arm_arch_no_volatile_ce;
983
984 /* Nonzero if we shouldn't use literal pools. */
985 bool arm_disable_literal_pool = false;
986
987 /* The register number to be used for the PIC offset register. */
988 unsigned arm_pic_register = INVALID_REGNUM;
989
990 enum arm_pcs arm_pcs_default;
991
992 /* For an explanation of these variables, see final_prescan_insn below. */
993 int arm_ccfsm_state;
994 /* arm_current_cc is also used for Thumb-2 cond_exec blocks. */
995 enum arm_cond_code arm_current_cc;
996
997 rtx arm_target_insn;
998 int arm_target_label;
999 /* The number of conditionally executed insns, including the current insn. */
1000 int arm_condexec_count = 0;
1001 /* A bitmask specifying the patterns for the IT block.
1002 Zero means do not output an IT block before this insn. */
1003 int arm_condexec_mask = 0;
1004 /* The number of bits used in arm_condexec_mask. */
1005 int arm_condexec_masklen = 0;
1006
1007 /* Nonzero if chip supports the ARMv8 CRC instructions. */
1008 int arm_arch_crc = 0;
1009
1010 /* Nonzero if chip supports the AdvSIMD Dot Product instructions. */
1011 int arm_arch_dotprod = 0;
1012
1013 /* Nonzero if chip supports the ARMv8-M security extensions. */
1014 int arm_arch_cmse = 0;
1015
1016 /* Nonzero if the core has a very small, high-latency, multiply unit. */
1017 int arm_m_profile_small_mul = 0;
1018
1019 /* Nonzero if chip supports the AdvSIMD I8MM instructions. */
1020 int arm_arch_i8mm = 0;
1021
1022 /* Nonzero if chip supports the BFloat16 instructions. */
1023 int arm_arch_bf16 = 0;
1024
1025 /* Nonzero if chip supports the Custom Datapath Extension. */
1026 int arm_arch_cde = 0;
1027 int arm_arch_cde_coproc = 0;
1028 const int arm_arch_cde_coproc_bits[] = {
1029 0x1, 0x2, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80
1030 };
1031
1032 /* The condition codes of the ARM, and the inverse function. */
1033 static const char * const arm_condition_codes[] =
1034 {
1035 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
1036 "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
1037 };
1038
1039 /* The register numbers in sequence, for passing to arm_gen_load_multiple. */
1040 int arm_regs_in_sequence[] =
1041 {
1042 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
1043 };
1044
1045 #define DEF_FP_SYSREG(reg) #reg,
1046 const char *fp_sysreg_names[NB_FP_SYSREGS] = {
1047 FP_SYSREGS
1048 };
1049 #undef DEF_FP_SYSREG
1050
1051 #define ARM_LSL_NAME "lsl"
1052 #define streq(string1, string2) (strcmp (string1, string2) == 0)
1053
1054 #define THUMB2_WORK_REGS \
1055 (0xff & ~((1 << THUMB_HARD_FRAME_POINTER_REGNUM) \
1056 | (1 << SP_REGNUM) \
1057 | (1 << PC_REGNUM) \
1058 | (PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM \
1059 ? (1 << PIC_OFFSET_TABLE_REGNUM) \
1060 : 0)))
1061 \f
1062 /* Initialization code. */
1063
1064 struct cpu_tune
1065 {
1066 enum processor_type scheduler;
1067 unsigned int tune_flags;
1068 const struct tune_params *tune;
1069 };
1070
1071 #define ARM_PREFETCH_NOT_BENEFICIAL { 0, -1, -1 }
1072 #define ARM_PREFETCH_BENEFICIAL(num_slots,l1_size,l1_line_size) \
1073 { \
1074 num_slots, \
1075 l1_size, \
1076 l1_line_size \
1077 }
1078
1079 /* arm generic vectorizer costs. */
1080 static const
1081 struct cpu_vec_costs arm_default_vec_cost = {
1082 1, /* scalar_stmt_cost. */
1083 1, /* scalar load_cost. */
1084 1, /* scalar_store_cost. */
1085 1, /* vec_stmt_cost. */
1086 1, /* vec_to_scalar_cost. */
1087 1, /* scalar_to_vec_cost. */
1088 1, /* vec_align_load_cost. */
1089 1, /* vec_unalign_load_cost. */
1090 1, /* vec_unalign_store_cost. */
1091 1, /* vec_store_cost. */
1092 3, /* cond_taken_branch_cost. */
1093 1, /* cond_not_taken_branch_cost. */
1094 };
1095
1096 /* Cost tables for AArch32 + AArch64 cores should go in aarch-cost-tables.h */
1097 #include "aarch-cost-tables.h"
1098
1099
1100
1101 const struct cpu_cost_table cortexa9_extra_costs =
1102 {
1103 /* ALU */
1104 {
1105 0, /* arith. */
1106 0, /* logical. */
1107 0, /* shift. */
1108 COSTS_N_INSNS (1), /* shift_reg. */
1109 COSTS_N_INSNS (1), /* arith_shift. */
1110 COSTS_N_INSNS (2), /* arith_shift_reg. */
1111 0, /* log_shift. */
1112 COSTS_N_INSNS (1), /* log_shift_reg. */
1113 COSTS_N_INSNS (1), /* extend. */
1114 COSTS_N_INSNS (2), /* extend_arith. */
1115 COSTS_N_INSNS (1), /* bfi. */
1116 COSTS_N_INSNS (1), /* bfx. */
1117 0, /* clz. */
1118 0, /* rev. */
1119 0, /* non_exec. */
1120 true /* non_exec_costs_exec. */
1121 },
1122 {
1123 /* MULT SImode */
1124 {
1125 COSTS_N_INSNS (3), /* simple. */
1126 COSTS_N_INSNS (3), /* flag_setting. */
1127 COSTS_N_INSNS (2), /* extend. */
1128 COSTS_N_INSNS (3), /* add. */
1129 COSTS_N_INSNS (2), /* extend_add. */
1130 COSTS_N_INSNS (30) /* idiv. No HW div on Cortex A9. */
1131 },
1132 /* MULT DImode */
1133 {
1134 0, /* simple (N/A). */
1135 0, /* flag_setting (N/A). */
1136 COSTS_N_INSNS (4), /* extend. */
1137 0, /* add (N/A). */
1138 COSTS_N_INSNS (4), /* extend_add. */
1139 0 /* idiv (N/A). */
1140 }
1141 },
1142 /* LD/ST */
1143 {
1144 COSTS_N_INSNS (2), /* load. */
1145 COSTS_N_INSNS (2), /* load_sign_extend. */
1146 COSTS_N_INSNS (2), /* ldrd. */
1147 COSTS_N_INSNS (2), /* ldm_1st. */
1148 1, /* ldm_regs_per_insn_1st. */
1149 2, /* ldm_regs_per_insn_subsequent. */
1150 COSTS_N_INSNS (5), /* loadf. */
1151 COSTS_N_INSNS (5), /* loadd. */
1152 COSTS_N_INSNS (1), /* load_unaligned. */
1153 COSTS_N_INSNS (2), /* store. */
1154 COSTS_N_INSNS (2), /* strd. */
1155 COSTS_N_INSNS (2), /* stm_1st. */
1156 1, /* stm_regs_per_insn_1st. */
1157 2, /* stm_regs_per_insn_subsequent. */
1158 COSTS_N_INSNS (1), /* storef. */
1159 COSTS_N_INSNS (1), /* stored. */
1160 COSTS_N_INSNS (1), /* store_unaligned. */
1161 COSTS_N_INSNS (1), /* loadv. */
1162 COSTS_N_INSNS (1) /* storev. */
1163 },
1164 {
1165 /* FP SFmode */
1166 {
1167 COSTS_N_INSNS (14), /* div. */
1168 COSTS_N_INSNS (4), /* mult. */
1169 COSTS_N_INSNS (7), /* mult_addsub. */
1170 COSTS_N_INSNS (30), /* fma. */
1171 COSTS_N_INSNS (3), /* addsub. */
1172 COSTS_N_INSNS (1), /* fpconst. */
1173 COSTS_N_INSNS (1), /* neg. */
1174 COSTS_N_INSNS (3), /* compare. */
1175 COSTS_N_INSNS (3), /* widen. */
1176 COSTS_N_INSNS (3), /* narrow. */
1177 COSTS_N_INSNS (3), /* toint. */
1178 COSTS_N_INSNS (3), /* fromint. */
1179 COSTS_N_INSNS (3) /* roundint. */
1180 },
1181 /* FP DFmode */
1182 {
1183 COSTS_N_INSNS (24), /* div. */
1184 COSTS_N_INSNS (5), /* mult. */
1185 COSTS_N_INSNS (8), /* mult_addsub. */
1186 COSTS_N_INSNS (30), /* fma. */
1187 COSTS_N_INSNS (3), /* addsub. */
1188 COSTS_N_INSNS (1), /* fpconst. */
1189 COSTS_N_INSNS (1), /* neg. */
1190 COSTS_N_INSNS (3), /* compare. */
1191 COSTS_N_INSNS (3), /* widen. */
1192 COSTS_N_INSNS (3), /* narrow. */
1193 COSTS_N_INSNS (3), /* toint. */
1194 COSTS_N_INSNS (3), /* fromint. */
1195 COSTS_N_INSNS (3) /* roundint. */
1196 }
1197 },
1198 /* Vector */
1199 {
1200 COSTS_N_INSNS (1), /* alu. */
1201 COSTS_N_INSNS (4) /* mult. */
1202 }
1203 };
1204
1205 const struct cpu_cost_table cortexa8_extra_costs =
1206 {
1207 /* ALU */
1208 {
1209 0, /* arith. */
1210 0, /* logical. */
1211 COSTS_N_INSNS (1), /* shift. */
1212 0, /* shift_reg. */
1213 COSTS_N_INSNS (1), /* arith_shift. */
1214 0, /* arith_shift_reg. */
1215 COSTS_N_INSNS (1), /* log_shift. */
1216 0, /* log_shift_reg. */
1217 0, /* extend. */
1218 0, /* extend_arith. */
1219 0, /* bfi. */
1220 0, /* bfx. */
1221 0, /* clz. */
1222 0, /* rev. */
1223 0, /* non_exec. */
1224 true /* non_exec_costs_exec. */
1225 },
1226 {
1227 /* MULT SImode */
1228 {
1229 COSTS_N_INSNS (1), /* simple. */
1230 COSTS_N_INSNS (1), /* flag_setting. */
1231 COSTS_N_INSNS (1), /* extend. */
1232 COSTS_N_INSNS (1), /* add. */
1233 COSTS_N_INSNS (1), /* extend_add. */
1234 COSTS_N_INSNS (30) /* idiv. No HW div on Cortex A8. */
1235 },
1236 /* MULT DImode */
1237 {
1238 0, /* simple (N/A). */
1239 0, /* flag_setting (N/A). */
1240 COSTS_N_INSNS (2), /* extend. */
1241 0, /* add (N/A). */
1242 COSTS_N_INSNS (2), /* extend_add. */
1243 0 /* idiv (N/A). */
1244 }
1245 },
1246 /* LD/ST */
1247 {
1248 COSTS_N_INSNS (1), /* load. */
1249 COSTS_N_INSNS (1), /* load_sign_extend. */
1250 COSTS_N_INSNS (1), /* ldrd. */
1251 COSTS_N_INSNS (1), /* ldm_1st. */
1252 1, /* ldm_regs_per_insn_1st. */
1253 2, /* ldm_regs_per_insn_subsequent. */
1254 COSTS_N_INSNS (1), /* loadf. */
1255 COSTS_N_INSNS (1), /* loadd. */
1256 COSTS_N_INSNS (1), /* load_unaligned. */
1257 COSTS_N_INSNS (1), /* store. */
1258 COSTS_N_INSNS (1), /* strd. */
1259 COSTS_N_INSNS (1), /* stm_1st. */
1260 1, /* stm_regs_per_insn_1st. */
1261 2, /* stm_regs_per_insn_subsequent. */
1262 COSTS_N_INSNS (1), /* storef. */
1263 COSTS_N_INSNS (1), /* stored. */
1264 COSTS_N_INSNS (1), /* store_unaligned. */
1265 COSTS_N_INSNS (1), /* loadv. */
1266 COSTS_N_INSNS (1) /* storev. */
1267 },
1268 {
1269 /* FP SFmode */
1270 {
1271 COSTS_N_INSNS (36), /* div. */
1272 COSTS_N_INSNS (11), /* mult. */
1273 COSTS_N_INSNS (20), /* mult_addsub. */
1274 COSTS_N_INSNS (30), /* fma. */
1275 COSTS_N_INSNS (9), /* addsub. */
1276 COSTS_N_INSNS (3), /* fpconst. */
1277 COSTS_N_INSNS (3), /* neg. */
1278 COSTS_N_INSNS (6), /* compare. */
1279 COSTS_N_INSNS (4), /* widen. */
1280 COSTS_N_INSNS (4), /* narrow. */
1281 COSTS_N_INSNS (8), /* toint. */
1282 COSTS_N_INSNS (8), /* fromint. */
1283 COSTS_N_INSNS (8) /* roundint. */
1284 },
1285 /* FP DFmode */
1286 {
1287 COSTS_N_INSNS (64), /* div. */
1288 COSTS_N_INSNS (16), /* mult. */
1289 COSTS_N_INSNS (25), /* mult_addsub. */
1290 COSTS_N_INSNS (30), /* fma. */
1291 COSTS_N_INSNS (9), /* addsub. */
1292 COSTS_N_INSNS (3), /* fpconst. */
1293 COSTS_N_INSNS (3), /* neg. */
1294 COSTS_N_INSNS (6), /* compare. */
1295 COSTS_N_INSNS (6), /* widen. */
1296 COSTS_N_INSNS (6), /* narrow. */
1297 COSTS_N_INSNS (8), /* toint. */
1298 COSTS_N_INSNS (8), /* fromint. */
1299 COSTS_N_INSNS (8) /* roundint. */
1300 }
1301 },
1302 /* Vector */
1303 {
1304 COSTS_N_INSNS (1), /* alu. */
1305 COSTS_N_INSNS (4) /* mult. */
1306 }
1307 };
1308
1309 const struct cpu_cost_table cortexa5_extra_costs =
1310 {
1311 /* ALU */
1312 {
1313 0, /* arith. */
1314 0, /* logical. */
1315 COSTS_N_INSNS (1), /* shift. */
1316 COSTS_N_INSNS (1), /* shift_reg. */
1317 COSTS_N_INSNS (1), /* arith_shift. */
1318 COSTS_N_INSNS (1), /* arith_shift_reg. */
1319 COSTS_N_INSNS (1), /* log_shift. */
1320 COSTS_N_INSNS (1), /* log_shift_reg. */
1321 COSTS_N_INSNS (1), /* extend. */
1322 COSTS_N_INSNS (1), /* extend_arith. */
1323 COSTS_N_INSNS (1), /* bfi. */
1324 COSTS_N_INSNS (1), /* bfx. */
1325 COSTS_N_INSNS (1), /* clz. */
1326 COSTS_N_INSNS (1), /* rev. */
1327 0, /* non_exec. */
1328 true /* non_exec_costs_exec. */
1329 },
1330
1331 {
1332 /* MULT SImode */
1333 {
1334 0, /* simple. */
1335 COSTS_N_INSNS (1), /* flag_setting. */
1336 COSTS_N_INSNS (1), /* extend. */
1337 COSTS_N_INSNS (1), /* add. */
1338 COSTS_N_INSNS (1), /* extend_add. */
1339 COSTS_N_INSNS (7) /* idiv. */
1340 },
1341 /* MULT DImode */
1342 {
1343 0, /* simple (N/A). */
1344 0, /* flag_setting (N/A). */
1345 COSTS_N_INSNS (1), /* extend. */
1346 0, /* add. */
1347 COSTS_N_INSNS (2), /* extend_add. */
1348 0 /* idiv (N/A). */
1349 }
1350 },
1351 /* LD/ST */
1352 {
1353 COSTS_N_INSNS (1), /* load. */
1354 COSTS_N_INSNS (1), /* load_sign_extend. */
1355 COSTS_N_INSNS (6), /* ldrd. */
1356 COSTS_N_INSNS (1), /* ldm_1st. */
1357 1, /* ldm_regs_per_insn_1st. */
1358 2, /* ldm_regs_per_insn_subsequent. */
1359 COSTS_N_INSNS (2), /* loadf. */
1360 COSTS_N_INSNS (4), /* loadd. */
1361 COSTS_N_INSNS (1), /* load_unaligned. */
1362 COSTS_N_INSNS (1), /* store. */
1363 COSTS_N_INSNS (3), /* strd. */
1364 COSTS_N_INSNS (1), /* stm_1st. */
1365 1, /* stm_regs_per_insn_1st. */
1366 2, /* stm_regs_per_insn_subsequent. */
1367 COSTS_N_INSNS (2), /* storef. */
1368 COSTS_N_INSNS (2), /* stored. */
1369 COSTS_N_INSNS (1), /* store_unaligned. */
1370 COSTS_N_INSNS (1), /* loadv. */
1371 COSTS_N_INSNS (1) /* storev. */
1372 },
1373 {
1374 /* FP SFmode */
1375 {
1376 COSTS_N_INSNS (15), /* div. */
1377 COSTS_N_INSNS (3), /* mult. */
1378 COSTS_N_INSNS (7), /* mult_addsub. */
1379 COSTS_N_INSNS (7), /* fma. */
1380 COSTS_N_INSNS (3), /* addsub. */
1381 COSTS_N_INSNS (3), /* fpconst. */
1382 COSTS_N_INSNS (3), /* neg. */
1383 COSTS_N_INSNS (3), /* compare. */
1384 COSTS_N_INSNS (3), /* widen. */
1385 COSTS_N_INSNS (3), /* narrow. */
1386 COSTS_N_INSNS (3), /* toint. */
1387 COSTS_N_INSNS (3), /* fromint. */
1388 COSTS_N_INSNS (3) /* roundint. */
1389 },
1390 /* FP DFmode */
1391 {
1392 COSTS_N_INSNS (30), /* div. */
1393 COSTS_N_INSNS (6), /* mult. */
1394 COSTS_N_INSNS (10), /* mult_addsub. */
1395 COSTS_N_INSNS (7), /* fma. */
1396 COSTS_N_INSNS (3), /* addsub. */
1397 COSTS_N_INSNS (3), /* fpconst. */
1398 COSTS_N_INSNS (3), /* neg. */
1399 COSTS_N_INSNS (3), /* compare. */
1400 COSTS_N_INSNS (3), /* widen. */
1401 COSTS_N_INSNS (3), /* narrow. */
1402 COSTS_N_INSNS (3), /* toint. */
1403 COSTS_N_INSNS (3), /* fromint. */
1404 COSTS_N_INSNS (3) /* roundint. */
1405 }
1406 },
1407 /* Vector */
1408 {
1409 COSTS_N_INSNS (1), /* alu. */
1410 COSTS_N_INSNS (4) /* mult. */
1411 }
1412 };
1413
1414
1415 const struct cpu_cost_table cortexa7_extra_costs =
1416 {
1417 /* ALU */
1418 {
1419 0, /* arith. */
1420 0, /* logical. */
1421 COSTS_N_INSNS (1), /* shift. */
1422 COSTS_N_INSNS (1), /* shift_reg. */
1423 COSTS_N_INSNS (1), /* arith_shift. */
1424 COSTS_N_INSNS (1), /* arith_shift_reg. */
1425 COSTS_N_INSNS (1), /* log_shift. */
1426 COSTS_N_INSNS (1), /* log_shift_reg. */
1427 COSTS_N_INSNS (1), /* extend. */
1428 COSTS_N_INSNS (1), /* extend_arith. */
1429 COSTS_N_INSNS (1), /* bfi. */
1430 COSTS_N_INSNS (1), /* bfx. */
1431 COSTS_N_INSNS (1), /* clz. */
1432 COSTS_N_INSNS (1), /* rev. */
1433 0, /* non_exec. */
1434 true /* non_exec_costs_exec. */
1435 },
1436
1437 {
1438 /* MULT SImode */
1439 {
1440 0, /* simple. */
1441 COSTS_N_INSNS (1), /* flag_setting. */
1442 COSTS_N_INSNS (1), /* extend. */
1443 COSTS_N_INSNS (1), /* add. */
1444 COSTS_N_INSNS (1), /* extend_add. */
1445 COSTS_N_INSNS (7) /* idiv. */
1446 },
1447 /* MULT DImode */
1448 {
1449 0, /* simple (N/A). */
1450 0, /* flag_setting (N/A). */
1451 COSTS_N_INSNS (1), /* extend. */
1452 0, /* add. */
1453 COSTS_N_INSNS (2), /* extend_add. */
1454 0 /* idiv (N/A). */
1455 }
1456 },
1457 /* LD/ST */
1458 {
1459 COSTS_N_INSNS (1), /* load. */
1460 COSTS_N_INSNS (1), /* load_sign_extend. */
1461 COSTS_N_INSNS (3), /* ldrd. */
1462 COSTS_N_INSNS (1), /* ldm_1st. */
1463 1, /* ldm_regs_per_insn_1st. */
1464 2, /* ldm_regs_per_insn_subsequent. */
1465 COSTS_N_INSNS (2), /* loadf. */
1466 COSTS_N_INSNS (2), /* loadd. */
1467 COSTS_N_INSNS (1), /* load_unaligned. */
1468 COSTS_N_INSNS (1), /* store. */
1469 COSTS_N_INSNS (3), /* strd. */
1470 COSTS_N_INSNS (1), /* stm_1st. */
1471 1, /* stm_regs_per_insn_1st. */
1472 2, /* stm_regs_per_insn_subsequent. */
1473 COSTS_N_INSNS (2), /* storef. */
1474 COSTS_N_INSNS (2), /* stored. */
1475 COSTS_N_INSNS (1), /* store_unaligned. */
1476 COSTS_N_INSNS (1), /* loadv. */
1477 COSTS_N_INSNS (1) /* storev. */
1478 },
1479 {
1480 /* FP SFmode */
1481 {
1482 COSTS_N_INSNS (15), /* div. */
1483 COSTS_N_INSNS (3), /* mult. */
1484 COSTS_N_INSNS (7), /* mult_addsub. */
1485 COSTS_N_INSNS (7), /* fma. */
1486 COSTS_N_INSNS (3), /* addsub. */
1487 COSTS_N_INSNS (3), /* fpconst. */
1488 COSTS_N_INSNS (3), /* neg. */
1489 COSTS_N_INSNS (3), /* compare. */
1490 COSTS_N_INSNS (3), /* widen. */
1491 COSTS_N_INSNS (3), /* narrow. */
1492 COSTS_N_INSNS (3), /* toint. */
1493 COSTS_N_INSNS (3), /* fromint. */
1494 COSTS_N_INSNS (3) /* roundint. */
1495 },
1496 /* FP DFmode */
1497 {
1498 COSTS_N_INSNS (30), /* div. */
1499 COSTS_N_INSNS (6), /* mult. */
1500 COSTS_N_INSNS (10), /* mult_addsub. */
1501 COSTS_N_INSNS (7), /* fma. */
1502 COSTS_N_INSNS (3), /* addsub. */
1503 COSTS_N_INSNS (3), /* fpconst. */
1504 COSTS_N_INSNS (3), /* neg. */
1505 COSTS_N_INSNS (3), /* compare. */
1506 COSTS_N_INSNS (3), /* widen. */
1507 COSTS_N_INSNS (3), /* narrow. */
1508 COSTS_N_INSNS (3), /* toint. */
1509 COSTS_N_INSNS (3), /* fromint. */
1510 COSTS_N_INSNS (3) /* roundint. */
1511 }
1512 },
1513 /* Vector */
1514 {
1515 COSTS_N_INSNS (1), /* alu. */
1516 COSTS_N_INSNS (4) /* mult. */
1517 }
1518 };
1519
1520 const struct cpu_cost_table cortexa12_extra_costs =
1521 {
1522 /* ALU */
1523 {
1524 0, /* arith. */
1525 0, /* logical. */
1526 0, /* shift. */
1527 COSTS_N_INSNS (1), /* shift_reg. */
1528 COSTS_N_INSNS (1), /* arith_shift. */
1529 COSTS_N_INSNS (1), /* arith_shift_reg. */
1530 COSTS_N_INSNS (1), /* log_shift. */
1531 COSTS_N_INSNS (1), /* log_shift_reg. */
1532 0, /* extend. */
1533 COSTS_N_INSNS (1), /* extend_arith. */
1534 0, /* bfi. */
1535 COSTS_N_INSNS (1), /* bfx. */
1536 COSTS_N_INSNS (1), /* clz. */
1537 COSTS_N_INSNS (1), /* rev. */
1538 0, /* non_exec. */
1539 true /* non_exec_costs_exec. */
1540 },
1541 /* MULT SImode */
1542 {
1543 {
1544 COSTS_N_INSNS (2), /* simple. */
1545 COSTS_N_INSNS (3), /* flag_setting. */
1546 COSTS_N_INSNS (2), /* extend. */
1547 COSTS_N_INSNS (3), /* add. */
1548 COSTS_N_INSNS (2), /* extend_add. */
1549 COSTS_N_INSNS (18) /* idiv. */
1550 },
1551 /* MULT DImode */
1552 {
1553 0, /* simple (N/A). */
1554 0, /* flag_setting (N/A). */
1555 COSTS_N_INSNS (3), /* extend. */
1556 0, /* add (N/A). */
1557 COSTS_N_INSNS (3), /* extend_add. */
1558 0 /* idiv (N/A). */
1559 }
1560 },
1561 /* LD/ST */
1562 {
1563 COSTS_N_INSNS (3), /* load. */
1564 COSTS_N_INSNS (3), /* load_sign_extend. */
1565 COSTS_N_INSNS (3), /* ldrd. */
1566 COSTS_N_INSNS (3), /* ldm_1st. */
1567 1, /* ldm_regs_per_insn_1st. */
1568 2, /* ldm_regs_per_insn_subsequent. */
1569 COSTS_N_INSNS (3), /* loadf. */
1570 COSTS_N_INSNS (3), /* loadd. */
1571 0, /* load_unaligned. */
1572 0, /* store. */
1573 0, /* strd. */
1574 0, /* stm_1st. */
1575 1, /* stm_regs_per_insn_1st. */
1576 2, /* stm_regs_per_insn_subsequent. */
1577 COSTS_N_INSNS (2), /* storef. */
1578 COSTS_N_INSNS (2), /* stored. */
1579 0, /* store_unaligned. */
1580 COSTS_N_INSNS (1), /* loadv. */
1581 COSTS_N_INSNS (1) /* storev. */
1582 },
1583 {
1584 /* FP SFmode */
1585 {
1586 COSTS_N_INSNS (17), /* div. */
1587 COSTS_N_INSNS (4), /* mult. */
1588 COSTS_N_INSNS (8), /* mult_addsub. */
1589 COSTS_N_INSNS (8), /* fma. */
1590 COSTS_N_INSNS (4), /* addsub. */
1591 COSTS_N_INSNS (2), /* fpconst. */
1592 COSTS_N_INSNS (2), /* neg. */
1593 COSTS_N_INSNS (2), /* compare. */
1594 COSTS_N_INSNS (4), /* widen. */
1595 COSTS_N_INSNS (4), /* narrow. */
1596 COSTS_N_INSNS (4), /* toint. */
1597 COSTS_N_INSNS (4), /* fromint. */
1598 COSTS_N_INSNS (4) /* roundint. */
1599 },
1600 /* FP DFmode */
1601 {
1602 COSTS_N_INSNS (31), /* div. */
1603 COSTS_N_INSNS (4), /* mult. */
1604 COSTS_N_INSNS (8), /* mult_addsub. */
1605 COSTS_N_INSNS (8), /* fma. */
1606 COSTS_N_INSNS (4), /* addsub. */
1607 COSTS_N_INSNS (2), /* fpconst. */
1608 COSTS_N_INSNS (2), /* neg. */
1609 COSTS_N_INSNS (2), /* compare. */
1610 COSTS_N_INSNS (4), /* widen. */
1611 COSTS_N_INSNS (4), /* narrow. */
1612 COSTS_N_INSNS (4), /* toint. */
1613 COSTS_N_INSNS (4), /* fromint. */
1614 COSTS_N_INSNS (4) /* roundint. */
1615 }
1616 },
1617 /* Vector */
1618 {
1619 COSTS_N_INSNS (1), /* alu. */
1620 COSTS_N_INSNS (4) /* mult. */
1621 }
1622 };
1623
1624 const struct cpu_cost_table cortexa15_extra_costs =
1625 {
1626 /* ALU */
1627 {
1628 0, /* arith. */
1629 0, /* logical. */
1630 0, /* shift. */
1631 0, /* shift_reg. */
1632 COSTS_N_INSNS (1), /* arith_shift. */
1633 COSTS_N_INSNS (1), /* arith_shift_reg. */
1634 COSTS_N_INSNS (1), /* log_shift. */
1635 COSTS_N_INSNS (1), /* log_shift_reg. */
1636 0, /* extend. */
1637 COSTS_N_INSNS (1), /* extend_arith. */
1638 COSTS_N_INSNS (1), /* bfi. */
1639 0, /* bfx. */
1640 0, /* clz. */
1641 0, /* rev. */
1642 0, /* non_exec. */
1643 true /* non_exec_costs_exec. */
1644 },
1645 /* MULT SImode */
1646 {
1647 {
1648 COSTS_N_INSNS (2), /* simple. */
1649 COSTS_N_INSNS (3), /* flag_setting. */
1650 COSTS_N_INSNS (2), /* extend. */
1651 COSTS_N_INSNS (2), /* add. */
1652 COSTS_N_INSNS (2), /* extend_add. */
1653 COSTS_N_INSNS (18) /* idiv. */
1654 },
1655 /* MULT DImode */
1656 {
1657 0, /* simple (N/A). */
1658 0, /* flag_setting (N/A). */
1659 COSTS_N_INSNS (3), /* extend. */
1660 0, /* add (N/A). */
1661 COSTS_N_INSNS (3), /* extend_add. */
1662 0 /* idiv (N/A). */
1663 }
1664 },
1665 /* LD/ST */
1666 {
1667 COSTS_N_INSNS (3), /* load. */
1668 COSTS_N_INSNS (3), /* load_sign_extend. */
1669 COSTS_N_INSNS (3), /* ldrd. */
1670 COSTS_N_INSNS (4), /* ldm_1st. */
1671 1, /* ldm_regs_per_insn_1st. */
1672 2, /* ldm_regs_per_insn_subsequent. */
1673 COSTS_N_INSNS (4), /* loadf. */
1674 COSTS_N_INSNS (4), /* loadd. */
1675 0, /* load_unaligned. */
1676 0, /* store. */
1677 0, /* strd. */
1678 COSTS_N_INSNS (1), /* stm_1st. */
1679 1, /* stm_regs_per_insn_1st. */
1680 2, /* stm_regs_per_insn_subsequent. */
1681 0, /* storef. */
1682 0, /* stored. */
1683 0, /* store_unaligned. */
1684 COSTS_N_INSNS (1), /* loadv. */
1685 COSTS_N_INSNS (1) /* storev. */
1686 },
1687 {
1688 /* FP SFmode */
1689 {
1690 COSTS_N_INSNS (17), /* div. */
1691 COSTS_N_INSNS (4), /* mult. */
1692 COSTS_N_INSNS (8), /* mult_addsub. */
1693 COSTS_N_INSNS (8), /* fma. */
1694 COSTS_N_INSNS (4), /* addsub. */
1695 COSTS_N_INSNS (2), /* fpconst. */
1696 COSTS_N_INSNS (2), /* neg. */
1697 COSTS_N_INSNS (5), /* compare. */
1698 COSTS_N_INSNS (4), /* widen. */
1699 COSTS_N_INSNS (4), /* narrow. */
1700 COSTS_N_INSNS (4), /* toint. */
1701 COSTS_N_INSNS (4), /* fromint. */
1702 COSTS_N_INSNS (4) /* roundint. */
1703 },
1704 /* FP DFmode */
1705 {
1706 COSTS_N_INSNS (31), /* div. */
1707 COSTS_N_INSNS (4), /* mult. */
1708 COSTS_N_INSNS (8), /* mult_addsub. */
1709 COSTS_N_INSNS (8), /* fma. */
1710 COSTS_N_INSNS (4), /* addsub. */
1711 COSTS_N_INSNS (2), /* fpconst. */
1712 COSTS_N_INSNS (2), /* neg. */
1713 COSTS_N_INSNS (2), /* compare. */
1714 COSTS_N_INSNS (4), /* widen. */
1715 COSTS_N_INSNS (4), /* narrow. */
1716 COSTS_N_INSNS (4), /* toint. */
1717 COSTS_N_INSNS (4), /* fromint. */
1718 COSTS_N_INSNS (4) /* roundint. */
1719 }
1720 },
1721 /* Vector */
1722 {
1723 COSTS_N_INSNS (1), /* alu. */
1724 COSTS_N_INSNS (4) /* mult. */
1725 }
1726 };
1727
1728 const struct cpu_cost_table v7m_extra_costs =
1729 {
1730 /* ALU */
1731 {
1732 0, /* arith. */
1733 0, /* logical. */
1734 0, /* shift. */
1735 0, /* shift_reg. */
1736 0, /* arith_shift. */
1737 COSTS_N_INSNS (1), /* arith_shift_reg. */
1738 0, /* log_shift. */
1739 COSTS_N_INSNS (1), /* log_shift_reg. */
1740 0, /* extend. */
1741 COSTS_N_INSNS (1), /* extend_arith. */
1742 0, /* bfi. */
1743 0, /* bfx. */
1744 0, /* clz. */
1745 0, /* rev. */
1746 COSTS_N_INSNS (1), /* non_exec. */
1747 false /* non_exec_costs_exec. */
1748 },
1749 {
1750 /* MULT SImode */
1751 {
1752 COSTS_N_INSNS (1), /* simple. */
1753 COSTS_N_INSNS (1), /* flag_setting. */
1754 COSTS_N_INSNS (2), /* extend. */
1755 COSTS_N_INSNS (1), /* add. */
1756 COSTS_N_INSNS (3), /* extend_add. */
1757 COSTS_N_INSNS (8) /* idiv. */
1758 },
1759 /* MULT DImode */
1760 {
1761 0, /* simple (N/A). */
1762 0, /* flag_setting (N/A). */
1763 COSTS_N_INSNS (2), /* extend. */
1764 0, /* add (N/A). */
1765 COSTS_N_INSNS (3), /* extend_add. */
1766 0 /* idiv (N/A). */
1767 }
1768 },
1769 /* LD/ST */
1770 {
1771 COSTS_N_INSNS (2), /* load. */
1772 0, /* load_sign_extend. */
1773 COSTS_N_INSNS (3), /* ldrd. */
1774 COSTS_N_INSNS (2), /* ldm_1st. */
1775 1, /* ldm_regs_per_insn_1st. */
1776 1, /* ldm_regs_per_insn_subsequent. */
1777 COSTS_N_INSNS (2), /* loadf. */
1778 COSTS_N_INSNS (3), /* loadd. */
1779 COSTS_N_INSNS (1), /* load_unaligned. */
1780 COSTS_N_INSNS (2), /* store. */
1781 COSTS_N_INSNS (3), /* strd. */
1782 COSTS_N_INSNS (2), /* stm_1st. */
1783 1, /* stm_regs_per_insn_1st. */
1784 1, /* stm_regs_per_insn_subsequent. */
1785 COSTS_N_INSNS (2), /* storef. */
1786 COSTS_N_INSNS (3), /* stored. */
1787 COSTS_N_INSNS (1), /* store_unaligned. */
1788 COSTS_N_INSNS (1), /* loadv. */
1789 COSTS_N_INSNS (1) /* storev. */
1790 },
1791 {
1792 /* FP SFmode */
1793 {
1794 COSTS_N_INSNS (7), /* div. */
1795 COSTS_N_INSNS (2), /* mult. */
1796 COSTS_N_INSNS (5), /* mult_addsub. */
1797 COSTS_N_INSNS (3), /* fma. */
1798 COSTS_N_INSNS (1), /* addsub. */
1799 0, /* fpconst. */
1800 0, /* neg. */
1801 0, /* compare. */
1802 0, /* widen. */
1803 0, /* narrow. */
1804 0, /* toint. */
1805 0, /* fromint. */
1806 0 /* roundint. */
1807 },
1808 /* FP DFmode */
1809 {
1810 COSTS_N_INSNS (15), /* div. */
1811 COSTS_N_INSNS (5), /* mult. */
1812 COSTS_N_INSNS (7), /* mult_addsub. */
1813 COSTS_N_INSNS (7), /* fma. */
1814 COSTS_N_INSNS (3), /* addsub. */
1815 0, /* fpconst. */
1816 0, /* neg. */
1817 0, /* compare. */
1818 0, /* widen. */
1819 0, /* narrow. */
1820 0, /* toint. */
1821 0, /* fromint. */
1822 0 /* roundint. */
1823 }
1824 },
1825 /* Vector */
1826 {
1827 COSTS_N_INSNS (1), /* alu. */
1828 COSTS_N_INSNS (4) /* mult. */
1829 }
1830 };
1831
1832 const struct addr_mode_cost_table generic_addr_mode_costs =
1833 {
1834 /* int. */
1835 {
1836 COSTS_N_INSNS (0), /* AMO_DEFAULT. */
1837 COSTS_N_INSNS (0), /* AMO_NO_WB. */
1838 COSTS_N_INSNS (0) /* AMO_WB. */
1839 },
1840 /* float. */
1841 {
1842 COSTS_N_INSNS (0), /* AMO_DEFAULT. */
1843 COSTS_N_INSNS (0), /* AMO_NO_WB. */
1844 COSTS_N_INSNS (0) /* AMO_WB. */
1845 },
1846 /* vector. */
1847 {
1848 COSTS_N_INSNS (0), /* AMO_DEFAULT. */
1849 COSTS_N_INSNS (0), /* AMO_NO_WB. */
1850 COSTS_N_INSNS (0) /* AMO_WB. */
1851 }
1852 };
1853
1854 const struct tune_params arm_slowmul_tune =
1855 {
1856 &generic_extra_costs, /* Insn extra costs. */
1857 &generic_addr_mode_costs, /* Addressing mode costs. */
1858 NULL, /* Sched adj cost. */
1859 arm_default_branch_cost,
1860 &arm_default_vec_cost,
1861 3, /* Constant limit. */
1862 5, /* Max cond insns. */
1863 8, /* Memset max inline. */
1864 1, /* Issue rate. */
1865 ARM_PREFETCH_NOT_BENEFICIAL,
1866 tune_params::PREF_CONST_POOL_TRUE,
1867 tune_params::PREF_LDRD_FALSE,
1868 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1869 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1870 tune_params::DISPARAGE_FLAGS_NEITHER,
1871 tune_params::PREF_NEON_STRINGOPS_FALSE,
1872 tune_params::FUSE_NOTHING,
1873 tune_params::SCHED_AUTOPREF_OFF
1874 };
1875
1876 const struct tune_params arm_fastmul_tune =
1877 {
1878 &generic_extra_costs, /* Insn extra costs. */
1879 &generic_addr_mode_costs, /* Addressing mode costs. */
1880 NULL, /* Sched adj cost. */
1881 arm_default_branch_cost,
1882 &arm_default_vec_cost,
1883 1, /* Constant limit. */
1884 5, /* Max cond insns. */
1885 8, /* Memset max inline. */
1886 1, /* Issue rate. */
1887 ARM_PREFETCH_NOT_BENEFICIAL,
1888 tune_params::PREF_CONST_POOL_TRUE,
1889 tune_params::PREF_LDRD_FALSE,
1890 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1891 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1892 tune_params::DISPARAGE_FLAGS_NEITHER,
1893 tune_params::PREF_NEON_STRINGOPS_FALSE,
1894 tune_params::FUSE_NOTHING,
1895 tune_params::SCHED_AUTOPREF_OFF
1896 };
1897
1898 /* StrongARM has early execution of branches, so a sequence that is worth
1899 skipping is shorter. Set max_insns_skipped to a lower value. */
1900
1901 const struct tune_params arm_strongarm_tune =
1902 {
1903 &generic_extra_costs, /* Insn extra costs. */
1904 &generic_addr_mode_costs, /* Addressing mode costs. */
1905 NULL, /* Sched adj cost. */
1906 arm_default_branch_cost,
1907 &arm_default_vec_cost,
1908 1, /* Constant limit. */
1909 3, /* Max cond insns. */
1910 8, /* Memset max inline. */
1911 1, /* Issue rate. */
1912 ARM_PREFETCH_NOT_BENEFICIAL,
1913 tune_params::PREF_CONST_POOL_TRUE,
1914 tune_params::PREF_LDRD_FALSE,
1915 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1916 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1917 tune_params::DISPARAGE_FLAGS_NEITHER,
1918 tune_params::PREF_NEON_STRINGOPS_FALSE,
1919 tune_params::FUSE_NOTHING,
1920 tune_params::SCHED_AUTOPREF_OFF
1921 };
1922
1923 const struct tune_params arm_xscale_tune =
1924 {
1925 &generic_extra_costs, /* Insn extra costs. */
1926 &generic_addr_mode_costs, /* Addressing mode costs. */
1927 xscale_sched_adjust_cost,
1928 arm_default_branch_cost,
1929 &arm_default_vec_cost,
1930 2, /* Constant limit. */
1931 3, /* Max cond insns. */
1932 8, /* Memset max inline. */
1933 1, /* Issue rate. */
1934 ARM_PREFETCH_NOT_BENEFICIAL,
1935 tune_params::PREF_CONST_POOL_TRUE,
1936 tune_params::PREF_LDRD_FALSE,
1937 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1938 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1939 tune_params::DISPARAGE_FLAGS_NEITHER,
1940 tune_params::PREF_NEON_STRINGOPS_FALSE,
1941 tune_params::FUSE_NOTHING,
1942 tune_params::SCHED_AUTOPREF_OFF
1943 };
1944
1945 const struct tune_params arm_9e_tune =
1946 {
1947 &generic_extra_costs, /* Insn extra costs. */
1948 &generic_addr_mode_costs, /* Addressing mode costs. */
1949 NULL, /* Sched adj cost. */
1950 arm_default_branch_cost,
1951 &arm_default_vec_cost,
1952 1, /* Constant limit. */
1953 5, /* Max cond insns. */
1954 8, /* Memset max inline. */
1955 1, /* Issue rate. */
1956 ARM_PREFETCH_NOT_BENEFICIAL,
1957 tune_params::PREF_CONST_POOL_TRUE,
1958 tune_params::PREF_LDRD_FALSE,
1959 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1960 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1961 tune_params::DISPARAGE_FLAGS_NEITHER,
1962 tune_params::PREF_NEON_STRINGOPS_FALSE,
1963 tune_params::FUSE_NOTHING,
1964 tune_params::SCHED_AUTOPREF_OFF
1965 };
1966
1967 const struct tune_params arm_marvell_pj4_tune =
1968 {
1969 &generic_extra_costs, /* Insn extra costs. */
1970 &generic_addr_mode_costs, /* Addressing mode costs. */
1971 NULL, /* Sched adj cost. */
1972 arm_default_branch_cost,
1973 &arm_default_vec_cost,
1974 1, /* Constant limit. */
1975 5, /* Max cond insns. */
1976 8, /* Memset max inline. */
1977 2, /* Issue rate. */
1978 ARM_PREFETCH_NOT_BENEFICIAL,
1979 tune_params::PREF_CONST_POOL_TRUE,
1980 tune_params::PREF_LDRD_FALSE,
1981 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1982 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1983 tune_params::DISPARAGE_FLAGS_NEITHER,
1984 tune_params::PREF_NEON_STRINGOPS_FALSE,
1985 tune_params::FUSE_NOTHING,
1986 tune_params::SCHED_AUTOPREF_OFF
1987 };
1988
1989 const struct tune_params arm_v6t2_tune =
1990 {
1991 &generic_extra_costs, /* Insn extra costs. */
1992 &generic_addr_mode_costs, /* Addressing mode costs. */
1993 NULL, /* Sched adj cost. */
1994 arm_default_branch_cost,
1995 &arm_default_vec_cost,
1996 1, /* Constant limit. */
1997 5, /* Max cond insns. */
1998 8, /* Memset max inline. */
1999 1, /* Issue rate. */
2000 ARM_PREFETCH_NOT_BENEFICIAL,
2001 tune_params::PREF_CONST_POOL_FALSE,
2002 tune_params::PREF_LDRD_FALSE,
2003 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2004 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2005 tune_params::DISPARAGE_FLAGS_NEITHER,
2006 tune_params::PREF_NEON_STRINGOPS_FALSE,
2007 tune_params::FUSE_NOTHING,
2008 tune_params::SCHED_AUTOPREF_OFF
2009 };
2010
2011
2012 /* Generic Cortex tuning. Use more specific tunings if appropriate. */
2013 const struct tune_params arm_cortex_tune =
2014 {
2015 &generic_extra_costs,
2016 &generic_addr_mode_costs, /* Addressing mode costs. */
2017 NULL, /* Sched adj cost. */
2018 arm_default_branch_cost,
2019 &arm_default_vec_cost,
2020 1, /* Constant limit. */
2021 5, /* Max cond insns. */
2022 8, /* Memset max inline. */
2023 2, /* Issue rate. */
2024 ARM_PREFETCH_NOT_BENEFICIAL,
2025 tune_params::PREF_CONST_POOL_FALSE,
2026 tune_params::PREF_LDRD_FALSE,
2027 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2028 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2029 tune_params::DISPARAGE_FLAGS_NEITHER,
2030 tune_params::PREF_NEON_STRINGOPS_FALSE,
2031 tune_params::FUSE_NOTHING,
2032 tune_params::SCHED_AUTOPREF_OFF
2033 };
2034
2035 const struct tune_params arm_cortex_a8_tune =
2036 {
2037 &cortexa8_extra_costs,
2038 &generic_addr_mode_costs, /* Addressing mode costs. */
2039 NULL, /* Sched adj cost. */
2040 arm_default_branch_cost,
2041 &arm_default_vec_cost,
2042 1, /* Constant limit. */
2043 5, /* Max cond insns. */
2044 8, /* Memset max inline. */
2045 2, /* Issue rate. */
2046 ARM_PREFETCH_NOT_BENEFICIAL,
2047 tune_params::PREF_CONST_POOL_FALSE,
2048 tune_params::PREF_LDRD_FALSE,
2049 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2050 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2051 tune_params::DISPARAGE_FLAGS_NEITHER,
2052 tune_params::PREF_NEON_STRINGOPS_TRUE,
2053 tune_params::FUSE_NOTHING,
2054 tune_params::SCHED_AUTOPREF_OFF
2055 };
2056
2057 const struct tune_params arm_cortex_a7_tune =
2058 {
2059 &cortexa7_extra_costs,
2060 &generic_addr_mode_costs, /* Addressing mode costs. */
2061 NULL, /* Sched adj cost. */
2062 arm_default_branch_cost,
2063 &arm_default_vec_cost,
2064 1, /* Constant limit. */
2065 5, /* Max cond insns. */
2066 8, /* Memset max inline. */
2067 2, /* Issue rate. */
2068 ARM_PREFETCH_NOT_BENEFICIAL,
2069 tune_params::PREF_CONST_POOL_FALSE,
2070 tune_params::PREF_LDRD_FALSE,
2071 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2072 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2073 tune_params::DISPARAGE_FLAGS_NEITHER,
2074 tune_params::PREF_NEON_STRINGOPS_TRUE,
2075 tune_params::FUSE_NOTHING,
2076 tune_params::SCHED_AUTOPREF_OFF
2077 };
2078
2079 const struct tune_params arm_cortex_a15_tune =
2080 {
2081 &cortexa15_extra_costs,
2082 &generic_addr_mode_costs, /* Addressing mode costs. */
2083 NULL, /* Sched adj cost. */
2084 arm_default_branch_cost,
2085 &arm_default_vec_cost,
2086 1, /* Constant limit. */
2087 2, /* Max cond insns. */
2088 8, /* Memset max inline. */
2089 3, /* Issue rate. */
2090 ARM_PREFETCH_NOT_BENEFICIAL,
2091 tune_params::PREF_CONST_POOL_FALSE,
2092 tune_params::PREF_LDRD_TRUE,
2093 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2094 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2095 tune_params::DISPARAGE_FLAGS_ALL,
2096 tune_params::PREF_NEON_STRINGOPS_TRUE,
2097 tune_params::FUSE_NOTHING,
2098 tune_params::SCHED_AUTOPREF_FULL
2099 };
2100
2101 const struct tune_params arm_cortex_a35_tune =
2102 {
2103 &cortexa53_extra_costs,
2104 &generic_addr_mode_costs, /* Addressing mode costs. */
2105 NULL, /* Sched adj cost. */
2106 arm_default_branch_cost,
2107 &arm_default_vec_cost,
2108 1, /* Constant limit. */
2109 5, /* Max cond insns. */
2110 8, /* Memset max inline. */
2111 1, /* Issue rate. */
2112 ARM_PREFETCH_NOT_BENEFICIAL,
2113 tune_params::PREF_CONST_POOL_FALSE,
2114 tune_params::PREF_LDRD_FALSE,
2115 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2116 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2117 tune_params::DISPARAGE_FLAGS_NEITHER,
2118 tune_params::PREF_NEON_STRINGOPS_TRUE,
2119 FUSE_OPS (tune_params::FUSE_MOVW_MOVT),
2120 tune_params::SCHED_AUTOPREF_OFF
2121 };
2122
2123 const struct tune_params arm_cortex_a53_tune =
2124 {
2125 &cortexa53_extra_costs,
2126 &generic_addr_mode_costs, /* Addressing mode costs. */
2127 NULL, /* Sched adj cost. */
2128 arm_default_branch_cost,
2129 &arm_default_vec_cost,
2130 1, /* Constant limit. */
2131 5, /* Max cond insns. */
2132 8, /* Memset max inline. */
2133 2, /* Issue rate. */
2134 ARM_PREFETCH_NOT_BENEFICIAL,
2135 tune_params::PREF_CONST_POOL_FALSE,
2136 tune_params::PREF_LDRD_FALSE,
2137 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2138 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2139 tune_params::DISPARAGE_FLAGS_NEITHER,
2140 tune_params::PREF_NEON_STRINGOPS_TRUE,
2141 FUSE_OPS (tune_params::FUSE_MOVW_MOVT | tune_params::FUSE_AES_AESMC),
2142 tune_params::SCHED_AUTOPREF_OFF
2143 };
2144
2145 const struct tune_params arm_cortex_a57_tune =
2146 {
2147 &cortexa57_extra_costs,
2148 &generic_addr_mode_costs, /* addressing mode costs */
2149 NULL, /* Sched adj cost. */
2150 arm_default_branch_cost,
2151 &arm_default_vec_cost,
2152 1, /* Constant limit. */
2153 2, /* Max cond insns. */
2154 8, /* Memset max inline. */
2155 3, /* Issue rate. */
2156 ARM_PREFETCH_NOT_BENEFICIAL,
2157 tune_params::PREF_CONST_POOL_FALSE,
2158 tune_params::PREF_LDRD_TRUE,
2159 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2160 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2161 tune_params::DISPARAGE_FLAGS_ALL,
2162 tune_params::PREF_NEON_STRINGOPS_TRUE,
2163 FUSE_OPS (tune_params::FUSE_MOVW_MOVT | tune_params::FUSE_AES_AESMC),
2164 tune_params::SCHED_AUTOPREF_FULL
2165 };
2166
2167 const struct tune_params arm_exynosm1_tune =
2168 {
2169 &exynosm1_extra_costs,
2170 &generic_addr_mode_costs, /* Addressing mode costs. */
2171 NULL, /* Sched adj cost. */
2172 arm_default_branch_cost,
2173 &arm_default_vec_cost,
2174 1, /* Constant limit. */
2175 2, /* Max cond insns. */
2176 8, /* Memset max inline. */
2177 3, /* Issue rate. */
2178 ARM_PREFETCH_NOT_BENEFICIAL,
2179 tune_params::PREF_CONST_POOL_FALSE,
2180 tune_params::PREF_LDRD_TRUE,
2181 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* Thumb. */
2182 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* ARM. */
2183 tune_params::DISPARAGE_FLAGS_ALL,
2184 tune_params::PREF_NEON_STRINGOPS_TRUE,
2185 tune_params::FUSE_NOTHING,
2186 tune_params::SCHED_AUTOPREF_OFF
2187 };
2188
2189 const struct tune_params arm_xgene1_tune =
2190 {
2191 &xgene1_extra_costs,
2192 &generic_addr_mode_costs, /* Addressing mode costs. */
2193 NULL, /* Sched adj cost. */
2194 arm_default_branch_cost,
2195 &arm_default_vec_cost,
2196 1, /* Constant limit. */
2197 2, /* Max cond insns. */
2198 32, /* Memset max inline. */
2199 4, /* Issue rate. */
2200 ARM_PREFETCH_NOT_BENEFICIAL,
2201 tune_params::PREF_CONST_POOL_FALSE,
2202 tune_params::PREF_LDRD_TRUE,
2203 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2204 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2205 tune_params::DISPARAGE_FLAGS_ALL,
2206 tune_params::PREF_NEON_STRINGOPS_FALSE,
2207 tune_params::FUSE_NOTHING,
2208 tune_params::SCHED_AUTOPREF_OFF
2209 };
2210
2211 /* Branches can be dual-issued on Cortex-A5, so conditional execution is
2212 less appealing. Set max_insns_skipped to a low value. */
2213
2214 const struct tune_params arm_cortex_a5_tune =
2215 {
2216 &cortexa5_extra_costs,
2217 &generic_addr_mode_costs, /* Addressing mode costs. */
2218 NULL, /* Sched adj cost. */
2219 arm_cortex_a5_branch_cost,
2220 &arm_default_vec_cost,
2221 1, /* Constant limit. */
2222 1, /* Max cond insns. */
2223 8, /* Memset max inline. */
2224 2, /* Issue rate. */
2225 ARM_PREFETCH_NOT_BENEFICIAL,
2226 tune_params::PREF_CONST_POOL_FALSE,
2227 tune_params::PREF_LDRD_FALSE,
2228 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* Thumb. */
2229 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* ARM. */
2230 tune_params::DISPARAGE_FLAGS_NEITHER,
2231 tune_params::PREF_NEON_STRINGOPS_TRUE,
2232 tune_params::FUSE_NOTHING,
2233 tune_params::SCHED_AUTOPREF_OFF
2234 };
2235
2236 const struct tune_params arm_cortex_a9_tune =
2237 {
2238 &cortexa9_extra_costs,
2239 &generic_addr_mode_costs, /* Addressing mode costs. */
2240 cortex_a9_sched_adjust_cost,
2241 arm_default_branch_cost,
2242 &arm_default_vec_cost,
2243 1, /* Constant limit. */
2244 5, /* Max cond insns. */
2245 8, /* Memset max inline. */
2246 2, /* Issue rate. */
2247 ARM_PREFETCH_BENEFICIAL(4,32,32),
2248 tune_params::PREF_CONST_POOL_FALSE,
2249 tune_params::PREF_LDRD_FALSE,
2250 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2251 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2252 tune_params::DISPARAGE_FLAGS_NEITHER,
2253 tune_params::PREF_NEON_STRINGOPS_FALSE,
2254 tune_params::FUSE_NOTHING,
2255 tune_params::SCHED_AUTOPREF_OFF
2256 };
2257
2258 const struct tune_params arm_cortex_a12_tune =
2259 {
2260 &cortexa12_extra_costs,
2261 &generic_addr_mode_costs, /* Addressing mode costs. */
2262 NULL, /* Sched adj cost. */
2263 arm_default_branch_cost,
2264 &arm_default_vec_cost, /* Vectorizer costs. */
2265 1, /* Constant limit. */
2266 2, /* Max cond insns. */
2267 8, /* Memset max inline. */
2268 2, /* Issue rate. */
2269 ARM_PREFETCH_NOT_BENEFICIAL,
2270 tune_params::PREF_CONST_POOL_FALSE,
2271 tune_params::PREF_LDRD_TRUE,
2272 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2273 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2274 tune_params::DISPARAGE_FLAGS_ALL,
2275 tune_params::PREF_NEON_STRINGOPS_TRUE,
2276 FUSE_OPS (tune_params::FUSE_MOVW_MOVT),
2277 tune_params::SCHED_AUTOPREF_OFF
2278 };
2279
2280 const struct tune_params arm_cortex_a73_tune =
2281 {
2282 &cortexa57_extra_costs,
2283 &generic_addr_mode_costs, /* Addressing mode costs. */
2284 NULL, /* Sched adj cost. */
2285 arm_default_branch_cost,
2286 &arm_default_vec_cost, /* Vectorizer costs. */
2287 1, /* Constant limit. */
2288 2, /* Max cond insns. */
2289 8, /* Memset max inline. */
2290 2, /* Issue rate. */
2291 ARM_PREFETCH_NOT_BENEFICIAL,
2292 tune_params::PREF_CONST_POOL_FALSE,
2293 tune_params::PREF_LDRD_TRUE,
2294 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2295 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2296 tune_params::DISPARAGE_FLAGS_ALL,
2297 tune_params::PREF_NEON_STRINGOPS_TRUE,
2298 FUSE_OPS (tune_params::FUSE_AES_AESMC | tune_params::FUSE_MOVW_MOVT),
2299 tune_params::SCHED_AUTOPREF_FULL
2300 };
2301
2302 /* armv7m tuning. On Cortex-M4 cores for example, MOVW/MOVT take a single
2303 cycle to execute each. An LDR from the constant pool also takes two cycles
2304 to execute, but mildly increases pipelining opportunity (consecutive
2305 loads/stores can be pipelined together, saving one cycle), and may also
2306 improve icache utilisation. Hence we prefer the constant pool for such
2307 processors. */
2308
2309 const struct tune_params arm_v7m_tune =
2310 {
2311 &v7m_extra_costs,
2312 &generic_addr_mode_costs, /* Addressing mode costs. */
2313 NULL, /* Sched adj cost. */
2314 arm_cortex_m_branch_cost,
2315 &arm_default_vec_cost,
2316 1, /* Constant limit. */
2317 2, /* Max cond insns. */
2318 8, /* Memset max inline. */
2319 1, /* Issue rate. */
2320 ARM_PREFETCH_NOT_BENEFICIAL,
2321 tune_params::PREF_CONST_POOL_TRUE,
2322 tune_params::PREF_LDRD_FALSE,
2323 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* Thumb. */
2324 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* ARM. */
2325 tune_params::DISPARAGE_FLAGS_NEITHER,
2326 tune_params::PREF_NEON_STRINGOPS_FALSE,
2327 tune_params::FUSE_NOTHING,
2328 tune_params::SCHED_AUTOPREF_OFF
2329 };
2330
2331 /* Cortex-M7 tuning. */
2332
2333 const struct tune_params arm_cortex_m7_tune =
2334 {
2335 &v7m_extra_costs,
2336 &generic_addr_mode_costs, /* Addressing mode costs. */
2337 NULL, /* Sched adj cost. */
2338 arm_cortex_m7_branch_cost,
2339 &arm_default_vec_cost,
2340 0, /* Constant limit. */
2341 1, /* Max cond insns. */
2342 8, /* Memset max inline. */
2343 2, /* Issue rate. */
2344 ARM_PREFETCH_NOT_BENEFICIAL,
2345 tune_params::PREF_CONST_POOL_TRUE,
2346 tune_params::PREF_LDRD_FALSE,
2347 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2348 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2349 tune_params::DISPARAGE_FLAGS_NEITHER,
2350 tune_params::PREF_NEON_STRINGOPS_FALSE,
2351 tune_params::FUSE_NOTHING,
2352 tune_params::SCHED_AUTOPREF_OFF
2353 };
2354
2355 /* The arm_v6m_tune is duplicated from arm_cortex_tune, rather than
2356 arm_v6t2_tune. It is used for cortex-m0, cortex-m1, cortex-m0plus and
2357 cortex-m23. */
2358 const struct tune_params arm_v6m_tune =
2359 {
2360 &generic_extra_costs, /* Insn extra costs. */
2361 &generic_addr_mode_costs, /* Addressing mode costs. */
2362 NULL, /* Sched adj cost. */
2363 arm_default_branch_cost,
2364 &arm_default_vec_cost, /* Vectorizer costs. */
2365 1, /* Constant limit. */
2366 5, /* Max cond insns. */
2367 8, /* Memset max inline. */
2368 1, /* Issue rate. */
2369 ARM_PREFETCH_NOT_BENEFICIAL,
2370 tune_params::PREF_CONST_POOL_FALSE,
2371 tune_params::PREF_LDRD_FALSE,
2372 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* Thumb. */
2373 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* ARM. */
2374 tune_params::DISPARAGE_FLAGS_NEITHER,
2375 tune_params::PREF_NEON_STRINGOPS_FALSE,
2376 tune_params::FUSE_NOTHING,
2377 tune_params::SCHED_AUTOPREF_OFF
2378 };
2379
2380 const struct tune_params arm_fa726te_tune =
2381 {
2382 &generic_extra_costs, /* Insn extra costs. */
2383 &generic_addr_mode_costs, /* Addressing mode costs. */
2384 fa726te_sched_adjust_cost,
2385 arm_default_branch_cost,
2386 &arm_default_vec_cost,
2387 1, /* Constant limit. */
2388 5, /* Max cond insns. */
2389 8, /* Memset max inline. */
2390 2, /* Issue rate. */
2391 ARM_PREFETCH_NOT_BENEFICIAL,
2392 tune_params::PREF_CONST_POOL_TRUE,
2393 tune_params::PREF_LDRD_FALSE,
2394 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2395 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2396 tune_params::DISPARAGE_FLAGS_NEITHER,
2397 tune_params::PREF_NEON_STRINGOPS_FALSE,
2398 tune_params::FUSE_NOTHING,
2399 tune_params::SCHED_AUTOPREF_OFF
2400 };
2401
2402 /* Auto-generated CPU, FPU and architecture tables. */
2403 #include "arm-cpu-data.h"
2404
2405 /* The name of the preprocessor macro to define for this architecture. PROFILE
2406 is replaced by the architecture name (eg. 8A) in arm_option_override () and
2407 is thus chosen to be big enough to hold the longest architecture name. */
2408
2409 char arm_arch_name[] = "__ARM_ARCH_PROFILE__";
2410
2411 /* Supported TLS relocations. */
2412
2413 enum tls_reloc {
2414 TLS_GD32,
2415 TLS_GD32_FDPIC,
2416 TLS_LDM32,
2417 TLS_LDM32_FDPIC,
2418 TLS_LDO32,
2419 TLS_IE32,
2420 TLS_IE32_FDPIC,
2421 TLS_LE32,
2422 TLS_DESCSEQ /* GNU scheme */
2423 };
2424
2425 /* The maximum number of insns to be used when loading a constant. */
2426 inline static int
2427 arm_constant_limit (bool size_p)
2428 {
2429 return size_p ? 1 : current_tune->constant_limit;
2430 }
2431
2432 /* Emit an insn that's a simple single-set. Both the operands must be known
2433 to be valid. */
2434 inline static rtx_insn *
2435 emit_set_insn (rtx x, rtx y)
2436 {
2437 return emit_insn (gen_rtx_SET (x, y));
2438 }
2439
2440 /* Return the number of bits set in VALUE. */
2441 static unsigned
2442 bit_count (unsigned long value)
2443 {
2444 unsigned long count = 0;
2445
2446 while (value)
2447 {
2448 count++;
2449 value &= value - 1; /* Clear the least-significant set bit. */
2450 }
2451
2452 return count;
2453 }
2454
2455 /* Return the number of bits set in BMAP. */
2456 static unsigned
2457 bitmap_popcount (const sbitmap bmap)
2458 {
2459 unsigned int count = 0;
2460 unsigned int n = 0;
2461 sbitmap_iterator sbi;
2462
2463 EXECUTE_IF_SET_IN_BITMAP (bmap, 0, n, sbi)
2464 count++;
2465 return count;
2466 }
2467
2468 typedef struct
2469 {
2470 machine_mode mode;
2471 const char *name;
2472 } arm_fixed_mode_set;
2473
2474 /* A small helper for setting fixed-point library libfuncs. */
2475
2476 static void
2477 arm_set_fixed_optab_libfunc (optab optable, machine_mode mode,
2478 const char *funcname, const char *modename,
2479 int num_suffix)
2480 {
2481 char buffer[50];
2482
2483 if (num_suffix == 0)
2484 sprintf (buffer, "__gnu_%s%s", funcname, modename);
2485 else
2486 sprintf (buffer, "__gnu_%s%s%d", funcname, modename, num_suffix);
2487
2488 set_optab_libfunc (optable, mode, buffer);
2489 }
2490
2491 static void
2492 arm_set_fixed_conv_libfunc (convert_optab optable, machine_mode to,
2493 machine_mode from, const char *funcname,
2494 const char *toname, const char *fromname)
2495 {
2496 char buffer[50];
2497 const char *maybe_suffix_2 = "";
2498
2499 /* Follow the logic for selecting a "2" suffix in fixed-bit.h. */
2500 if (ALL_FIXED_POINT_MODE_P (from) && ALL_FIXED_POINT_MODE_P (to)
2501 && UNSIGNED_FIXED_POINT_MODE_P (from) == UNSIGNED_FIXED_POINT_MODE_P (to)
2502 && ALL_FRACT_MODE_P (from) == ALL_FRACT_MODE_P (to))
2503 maybe_suffix_2 = "2";
2504
2505 sprintf (buffer, "__gnu_%s%s%s%s", funcname, fromname, toname,
2506 maybe_suffix_2);
2507
2508 set_conv_libfunc (optable, to, from, buffer);
2509 }
2510
2511 static GTY(()) rtx speculation_barrier_libfunc;
2512
2513 /* Record that we have no arithmetic or comparison libfuncs for
2514 machine mode MODE. */
2515
2516 static void
2517 arm_block_arith_comp_libfuncs_for_mode (machine_mode mode)
2518 {
2519 /* Arithmetic. */
2520 set_optab_libfunc (add_optab, mode, NULL);
2521 set_optab_libfunc (sdiv_optab, mode, NULL);
2522 set_optab_libfunc (smul_optab, mode, NULL);
2523 set_optab_libfunc (neg_optab, mode, NULL);
2524 set_optab_libfunc (sub_optab, mode, NULL);
2525
2526 /* Comparisons. */
2527 set_optab_libfunc (eq_optab, mode, NULL);
2528 set_optab_libfunc (ne_optab, mode, NULL);
2529 set_optab_libfunc (lt_optab, mode, NULL);
2530 set_optab_libfunc (le_optab, mode, NULL);
2531 set_optab_libfunc (ge_optab, mode, NULL);
2532 set_optab_libfunc (gt_optab, mode, NULL);
2533 set_optab_libfunc (unord_optab, mode, NULL);
2534 }
2535
2536 /* Set up library functions unique to ARM. */
2537 static void
2538 arm_init_libfuncs (void)
2539 {
2540 machine_mode mode_iter;
2541
2542 /* For Linux, we have access to kernel support for atomic operations. */
2543 if (arm_abi == ARM_ABI_AAPCS_LINUX)
2544 init_sync_libfuncs (MAX_SYNC_LIBFUNC_SIZE);
2545
2546 /* There are no special library functions unless we are using the
2547 ARM BPABI. */
2548 if (!TARGET_BPABI)
2549 return;
2550
2551 /* The functions below are described in Section 4 of the "Run-Time
2552 ABI for the ARM architecture", Version 1.0. */
2553
2554 /* Double-precision floating-point arithmetic. Table 2. */
2555 set_optab_libfunc (add_optab, DFmode, "__aeabi_dadd");
2556 set_optab_libfunc (sdiv_optab, DFmode, "__aeabi_ddiv");
2557 set_optab_libfunc (smul_optab, DFmode, "__aeabi_dmul");
2558 set_optab_libfunc (neg_optab, DFmode, "__aeabi_dneg");
2559 set_optab_libfunc (sub_optab, DFmode, "__aeabi_dsub");
2560
2561 /* Double-precision comparisons. Table 3. */
2562 set_optab_libfunc (eq_optab, DFmode, "__aeabi_dcmpeq");
2563 set_optab_libfunc (ne_optab, DFmode, NULL);
2564 set_optab_libfunc (lt_optab, DFmode, "__aeabi_dcmplt");
2565 set_optab_libfunc (le_optab, DFmode, "__aeabi_dcmple");
2566 set_optab_libfunc (ge_optab, DFmode, "__aeabi_dcmpge");
2567 set_optab_libfunc (gt_optab, DFmode, "__aeabi_dcmpgt");
2568 set_optab_libfunc (unord_optab, DFmode, "__aeabi_dcmpun");
2569
2570 /* Single-precision floating-point arithmetic. Table 4. */
2571 set_optab_libfunc (add_optab, SFmode, "__aeabi_fadd");
2572 set_optab_libfunc (sdiv_optab, SFmode, "__aeabi_fdiv");
2573 set_optab_libfunc (smul_optab, SFmode, "__aeabi_fmul");
2574 set_optab_libfunc (neg_optab, SFmode, "__aeabi_fneg");
2575 set_optab_libfunc (sub_optab, SFmode, "__aeabi_fsub");
2576
2577 /* Single-precision comparisons. Table 5. */
2578 set_optab_libfunc (eq_optab, SFmode, "__aeabi_fcmpeq");
2579 set_optab_libfunc (ne_optab, SFmode, NULL);
2580 set_optab_libfunc (lt_optab, SFmode, "__aeabi_fcmplt");
2581 set_optab_libfunc (le_optab, SFmode, "__aeabi_fcmple");
2582 set_optab_libfunc (ge_optab, SFmode, "__aeabi_fcmpge");
2583 set_optab_libfunc (gt_optab, SFmode, "__aeabi_fcmpgt");
2584 set_optab_libfunc (unord_optab, SFmode, "__aeabi_fcmpun");
2585
2586 /* Floating-point to integer conversions. Table 6. */
2587 set_conv_libfunc (sfix_optab, SImode, DFmode, "__aeabi_d2iz");
2588 set_conv_libfunc (ufix_optab, SImode, DFmode, "__aeabi_d2uiz");
2589 set_conv_libfunc (sfix_optab, DImode, DFmode, "__aeabi_d2lz");
2590 set_conv_libfunc (ufix_optab, DImode, DFmode, "__aeabi_d2ulz");
2591 set_conv_libfunc (sfix_optab, SImode, SFmode, "__aeabi_f2iz");
2592 set_conv_libfunc (ufix_optab, SImode, SFmode, "__aeabi_f2uiz");
2593 set_conv_libfunc (sfix_optab, DImode, SFmode, "__aeabi_f2lz");
2594 set_conv_libfunc (ufix_optab, DImode, SFmode, "__aeabi_f2ulz");
2595
2596 /* Conversions between floating types. Table 7. */
2597 set_conv_libfunc (trunc_optab, SFmode, DFmode, "__aeabi_d2f");
2598 set_conv_libfunc (sext_optab, DFmode, SFmode, "__aeabi_f2d");
2599
2600 /* Integer to floating-point conversions. Table 8. */
2601 set_conv_libfunc (sfloat_optab, DFmode, SImode, "__aeabi_i2d");
2602 set_conv_libfunc (ufloat_optab, DFmode, SImode, "__aeabi_ui2d");
2603 set_conv_libfunc (sfloat_optab, DFmode, DImode, "__aeabi_l2d");
2604 set_conv_libfunc (ufloat_optab, DFmode, DImode, "__aeabi_ul2d");
2605 set_conv_libfunc (sfloat_optab, SFmode, SImode, "__aeabi_i2f");
2606 set_conv_libfunc (ufloat_optab, SFmode, SImode, "__aeabi_ui2f");
2607 set_conv_libfunc (sfloat_optab, SFmode, DImode, "__aeabi_l2f");
2608 set_conv_libfunc (ufloat_optab, SFmode, DImode, "__aeabi_ul2f");
2609
2610 /* Long long. Table 9. */
2611 set_optab_libfunc (smul_optab, DImode, "__aeabi_lmul");
2612 set_optab_libfunc (sdivmod_optab, DImode, "__aeabi_ldivmod");
2613 set_optab_libfunc (udivmod_optab, DImode, "__aeabi_uldivmod");
2614 set_optab_libfunc (ashl_optab, DImode, "__aeabi_llsl");
2615 set_optab_libfunc (lshr_optab, DImode, "__aeabi_llsr");
2616 set_optab_libfunc (ashr_optab, DImode, "__aeabi_lasr");
2617 set_optab_libfunc (cmp_optab, DImode, "__aeabi_lcmp");
2618 set_optab_libfunc (ucmp_optab, DImode, "__aeabi_ulcmp");
2619
2620 /* Integer (32/32->32) division. \S 4.3.1. */
2621 set_optab_libfunc (sdivmod_optab, SImode, "__aeabi_idivmod");
2622 set_optab_libfunc (udivmod_optab, SImode, "__aeabi_uidivmod");
2623
2624 /* The divmod functions are designed so that they can be used for
2625 plain division, even though they return both the quotient and the
2626 remainder. The quotient is returned in the usual location (i.e.,
2627 r0 for SImode, {r0, r1} for DImode), just as would be expected
2628 for an ordinary division routine. Because the AAPCS calling
2629 conventions specify that all of { r0, r1, r2, r3 } are
2630 callee-saved registers, there is no need to tell the compiler
2631 explicitly that those registers are clobbered by these
2632 routines. */
2633 set_optab_libfunc (sdiv_optab, DImode, "__aeabi_ldivmod");
2634 set_optab_libfunc (udiv_optab, DImode, "__aeabi_uldivmod");
2635
2636 /* For SImode division the ABI provides div-without-mod routines,
2637 which are faster. */
2638 set_optab_libfunc (sdiv_optab, SImode, "__aeabi_idiv");
2639 set_optab_libfunc (udiv_optab, SImode, "__aeabi_uidiv");
2640
2641 /* We don't have mod libcalls. Fortunately gcc knows how to use the
2642 divmod libcalls instead. */
2643 set_optab_libfunc (smod_optab, DImode, NULL);
2644 set_optab_libfunc (umod_optab, DImode, NULL);
2645 set_optab_libfunc (smod_optab, SImode, NULL);
2646 set_optab_libfunc (umod_optab, SImode, NULL);
2647
2648 /* Half-precision float operations. The compiler handles all operations
2649 with NULL libfuncs by converting the SFmode. */
2650 switch (arm_fp16_format)
2651 {
2652 case ARM_FP16_FORMAT_IEEE:
2653 case ARM_FP16_FORMAT_ALTERNATIVE:
2654
2655 /* Conversions. */
2656 set_conv_libfunc (trunc_optab, HFmode, SFmode,
2657 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
2658 ? "__gnu_f2h_ieee"
2659 : "__gnu_f2h_alternative"));
2660 set_conv_libfunc (sext_optab, SFmode, HFmode,
2661 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
2662 ? "__gnu_h2f_ieee"
2663 : "__gnu_h2f_alternative"));
2664
2665 set_conv_libfunc (trunc_optab, HFmode, DFmode,
2666 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
2667 ? "__gnu_d2h_ieee"
2668 : "__gnu_d2h_alternative"));
2669
2670 arm_block_arith_comp_libfuncs_for_mode (HFmode);
2671 break;
2672
2673 default:
2674 break;
2675 }
2676
2677 /* For all possible libcalls in BFmode, record NULL. */
2678 FOR_EACH_MODE_IN_CLASS (mode_iter, MODE_FLOAT)
2679 {
2680 set_conv_libfunc (trunc_optab, BFmode, mode_iter, NULL);
2681 set_conv_libfunc (trunc_optab, mode_iter, BFmode, NULL);
2682 set_conv_libfunc (sext_optab, mode_iter, BFmode, NULL);
2683 set_conv_libfunc (sext_optab, BFmode, mode_iter, NULL);
2684 }
2685 arm_block_arith_comp_libfuncs_for_mode (BFmode);
2686
2687 /* Use names prefixed with __gnu_ for fixed-point helper functions. */
2688 {
2689 const arm_fixed_mode_set fixed_arith_modes[] =
2690 {
2691 { E_QQmode, "qq" },
2692 { E_UQQmode, "uqq" },
2693 { E_HQmode, "hq" },
2694 { E_UHQmode, "uhq" },
2695 { E_SQmode, "sq" },
2696 { E_USQmode, "usq" },
2697 { E_DQmode, "dq" },
2698 { E_UDQmode, "udq" },
2699 { E_TQmode, "tq" },
2700 { E_UTQmode, "utq" },
2701 { E_HAmode, "ha" },
2702 { E_UHAmode, "uha" },
2703 { E_SAmode, "sa" },
2704 { E_USAmode, "usa" },
2705 { E_DAmode, "da" },
2706 { E_UDAmode, "uda" },
2707 { E_TAmode, "ta" },
2708 { E_UTAmode, "uta" }
2709 };
2710 const arm_fixed_mode_set fixed_conv_modes[] =
2711 {
2712 { E_QQmode, "qq" },
2713 { E_UQQmode, "uqq" },
2714 { E_HQmode, "hq" },
2715 { E_UHQmode, "uhq" },
2716 { E_SQmode, "sq" },
2717 { E_USQmode, "usq" },
2718 { E_DQmode, "dq" },
2719 { E_UDQmode, "udq" },
2720 { E_TQmode, "tq" },
2721 { E_UTQmode, "utq" },
2722 { E_HAmode, "ha" },
2723 { E_UHAmode, "uha" },
2724 { E_SAmode, "sa" },
2725 { E_USAmode, "usa" },
2726 { E_DAmode, "da" },
2727 { E_UDAmode, "uda" },
2728 { E_TAmode, "ta" },
2729 { E_UTAmode, "uta" },
2730 { E_QImode, "qi" },
2731 { E_HImode, "hi" },
2732 { E_SImode, "si" },
2733 { E_DImode, "di" },
2734 { E_TImode, "ti" },
2735 { E_SFmode, "sf" },
2736 { E_DFmode, "df" }
2737 };
2738 unsigned int i, j;
2739
2740 for (i = 0; i < ARRAY_SIZE (fixed_arith_modes); i++)
2741 {
2742 arm_set_fixed_optab_libfunc (add_optab, fixed_arith_modes[i].mode,
2743 "add", fixed_arith_modes[i].name, 3);
2744 arm_set_fixed_optab_libfunc (ssadd_optab, fixed_arith_modes[i].mode,
2745 "ssadd", fixed_arith_modes[i].name, 3);
2746 arm_set_fixed_optab_libfunc (usadd_optab, fixed_arith_modes[i].mode,
2747 "usadd", fixed_arith_modes[i].name, 3);
2748 arm_set_fixed_optab_libfunc (sub_optab, fixed_arith_modes[i].mode,
2749 "sub", fixed_arith_modes[i].name, 3);
2750 arm_set_fixed_optab_libfunc (sssub_optab, fixed_arith_modes[i].mode,
2751 "sssub", fixed_arith_modes[i].name, 3);
2752 arm_set_fixed_optab_libfunc (ussub_optab, fixed_arith_modes[i].mode,
2753 "ussub", fixed_arith_modes[i].name, 3);
2754 arm_set_fixed_optab_libfunc (smul_optab, fixed_arith_modes[i].mode,
2755 "mul", fixed_arith_modes[i].name, 3);
2756 arm_set_fixed_optab_libfunc (ssmul_optab, fixed_arith_modes[i].mode,
2757 "ssmul", fixed_arith_modes[i].name, 3);
2758 arm_set_fixed_optab_libfunc (usmul_optab, fixed_arith_modes[i].mode,
2759 "usmul", fixed_arith_modes[i].name, 3);
2760 arm_set_fixed_optab_libfunc (sdiv_optab, fixed_arith_modes[i].mode,
2761 "div", fixed_arith_modes[i].name, 3);
2762 arm_set_fixed_optab_libfunc (udiv_optab, fixed_arith_modes[i].mode,
2763 "udiv", fixed_arith_modes[i].name, 3);
2764 arm_set_fixed_optab_libfunc (ssdiv_optab, fixed_arith_modes[i].mode,
2765 "ssdiv", fixed_arith_modes[i].name, 3);
2766 arm_set_fixed_optab_libfunc (usdiv_optab, fixed_arith_modes[i].mode,
2767 "usdiv", fixed_arith_modes[i].name, 3);
2768 arm_set_fixed_optab_libfunc (neg_optab, fixed_arith_modes[i].mode,
2769 "neg", fixed_arith_modes[i].name, 2);
2770 arm_set_fixed_optab_libfunc (ssneg_optab, fixed_arith_modes[i].mode,
2771 "ssneg", fixed_arith_modes[i].name, 2);
2772 arm_set_fixed_optab_libfunc (usneg_optab, fixed_arith_modes[i].mode,
2773 "usneg", fixed_arith_modes[i].name, 2);
2774 arm_set_fixed_optab_libfunc (ashl_optab, fixed_arith_modes[i].mode,
2775 "ashl", fixed_arith_modes[i].name, 3);
2776 arm_set_fixed_optab_libfunc (ashr_optab, fixed_arith_modes[i].mode,
2777 "ashr", fixed_arith_modes[i].name, 3);
2778 arm_set_fixed_optab_libfunc (lshr_optab, fixed_arith_modes[i].mode,
2779 "lshr", fixed_arith_modes[i].name, 3);
2780 arm_set_fixed_optab_libfunc (ssashl_optab, fixed_arith_modes[i].mode,
2781 "ssashl", fixed_arith_modes[i].name, 3);
2782 arm_set_fixed_optab_libfunc (usashl_optab, fixed_arith_modes[i].mode,
2783 "usashl", fixed_arith_modes[i].name, 3);
2784 arm_set_fixed_optab_libfunc (cmp_optab, fixed_arith_modes[i].mode,
2785 "cmp", fixed_arith_modes[i].name, 2);
2786 }
2787
2788 for (i = 0; i < ARRAY_SIZE (fixed_conv_modes); i++)
2789 for (j = 0; j < ARRAY_SIZE (fixed_conv_modes); j++)
2790 {
2791 if (i == j
2792 || (!ALL_FIXED_POINT_MODE_P (fixed_conv_modes[i].mode)
2793 && !ALL_FIXED_POINT_MODE_P (fixed_conv_modes[j].mode)))
2794 continue;
2795
2796 arm_set_fixed_conv_libfunc (fract_optab, fixed_conv_modes[i].mode,
2797 fixed_conv_modes[j].mode, "fract",
2798 fixed_conv_modes[i].name,
2799 fixed_conv_modes[j].name);
2800 arm_set_fixed_conv_libfunc (satfract_optab,
2801 fixed_conv_modes[i].mode,
2802 fixed_conv_modes[j].mode, "satfract",
2803 fixed_conv_modes[i].name,
2804 fixed_conv_modes[j].name);
2805 arm_set_fixed_conv_libfunc (fractuns_optab,
2806 fixed_conv_modes[i].mode,
2807 fixed_conv_modes[j].mode, "fractuns",
2808 fixed_conv_modes[i].name,
2809 fixed_conv_modes[j].name);
2810 arm_set_fixed_conv_libfunc (satfractuns_optab,
2811 fixed_conv_modes[i].mode,
2812 fixed_conv_modes[j].mode, "satfractuns",
2813 fixed_conv_modes[i].name,
2814 fixed_conv_modes[j].name);
2815 }
2816 }
2817
2818 if (TARGET_AAPCS_BASED)
2819 synchronize_libfunc = init_one_libfunc ("__sync_synchronize");
2820
2821 speculation_barrier_libfunc = init_one_libfunc ("__speculation_barrier");
2822 }
2823
2824 /* On AAPCS systems, this is the "struct __va_list". */
2825 static GTY(()) tree va_list_type;
2826
2827 /* Return the type to use as __builtin_va_list. */
2828 static tree
2829 arm_build_builtin_va_list (void)
2830 {
2831 tree va_list_name;
2832 tree ap_field;
2833
2834 if (!TARGET_AAPCS_BASED)
2835 return std_build_builtin_va_list ();
2836
2837 /* AAPCS \S 7.1.4 requires that va_list be a typedef for a type
2838 defined as:
2839
2840 struct __va_list
2841 {
2842 void *__ap;
2843 };
2844
2845 The C Library ABI further reinforces this definition in \S
2846 4.1.
2847
2848 We must follow this definition exactly. The structure tag
2849 name is visible in C++ mangled names, and thus forms a part
2850 of the ABI. The field name may be used by people who
2851 #include <stdarg.h>. */
2852 /* Create the type. */
2853 va_list_type = lang_hooks.types.make_type (RECORD_TYPE);
2854 /* Give it the required name. */
2855 va_list_name = build_decl (BUILTINS_LOCATION,
2856 TYPE_DECL,
2857 get_identifier ("__va_list"),
2858 va_list_type);
2859 DECL_ARTIFICIAL (va_list_name) = 1;
2860 TYPE_NAME (va_list_type) = va_list_name;
2861 TYPE_STUB_DECL (va_list_type) = va_list_name;
2862 /* Create the __ap field. */
2863 ap_field = build_decl (BUILTINS_LOCATION,
2864 FIELD_DECL,
2865 get_identifier ("__ap"),
2866 ptr_type_node);
2867 DECL_ARTIFICIAL (ap_field) = 1;
2868 DECL_FIELD_CONTEXT (ap_field) = va_list_type;
2869 TYPE_FIELDS (va_list_type) = ap_field;
2870 /* Compute its layout. */
2871 layout_type (va_list_type);
2872
2873 return va_list_type;
2874 }
2875
2876 /* Return an expression of type "void *" pointing to the next
2877 available argument in a variable-argument list. VALIST is the
2878 user-level va_list object, of type __builtin_va_list. */
2879 static tree
2880 arm_extract_valist_ptr (tree valist)
2881 {
2882 if (TREE_TYPE (valist) == error_mark_node)
2883 return error_mark_node;
2884
2885 /* On an AAPCS target, the pointer is stored within "struct
2886 va_list". */
2887 if (TARGET_AAPCS_BASED)
2888 {
2889 tree ap_field = TYPE_FIELDS (TREE_TYPE (valist));
2890 valist = build3 (COMPONENT_REF, TREE_TYPE (ap_field),
2891 valist, ap_field, NULL_TREE);
2892 }
2893
2894 return valist;
2895 }
2896
2897 /* Implement TARGET_EXPAND_BUILTIN_VA_START. */
2898 static void
2899 arm_expand_builtin_va_start (tree valist, rtx nextarg)
2900 {
2901 valist = arm_extract_valist_ptr (valist);
2902 std_expand_builtin_va_start (valist, nextarg);
2903 }
2904
2905 /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR. */
2906 static tree
2907 arm_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
2908 gimple_seq *post_p)
2909 {
2910 valist = arm_extract_valist_ptr (valist);
2911 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
2912 }
2913
2914 /* Check any incompatible options that the user has specified. */
2915 static void
2916 arm_option_check_internal (struct gcc_options *opts)
2917 {
2918 int flags = opts->x_target_flags;
2919
2920 /* iWMMXt and NEON are incompatible. */
2921 if (TARGET_IWMMXT
2922 && bitmap_bit_p (arm_active_target.isa, isa_bit_neon))
2923 error ("iWMMXt and NEON are incompatible");
2924
2925 /* Make sure that the processor choice does not conflict with any of the
2926 other command line choices. */
2927 if (TARGET_ARM_P (flags)
2928 && !bitmap_bit_p (arm_active_target.isa, isa_bit_notm))
2929 error ("target CPU does not support ARM mode");
2930
2931 /* TARGET_BACKTRACE cannot be used here as crtl->is_leaf is not set yet. */
2932 if ((TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME) && TARGET_ARM_P (flags))
2933 warning (0, "enabling backtrace support is only meaningful when compiling for the Thumb");
2934
2935 if (TARGET_ARM_P (flags) && TARGET_CALLEE_INTERWORKING)
2936 warning (0, "enabling callee interworking support is only meaningful when compiling for the Thumb");
2937
2938 /* If this target is normally configured to use APCS frames, warn if they
2939 are turned off and debugging is turned on. */
2940 if (TARGET_ARM_P (flags)
2941 && write_symbols != NO_DEBUG
2942 && !TARGET_APCS_FRAME
2943 && (TARGET_DEFAULT & MASK_APCS_FRAME))
2944 warning (0, "%<-g%> with %<-mno-apcs-frame%> may not give sensible "
2945 "debugging");
2946
2947 /* iWMMXt unsupported under Thumb mode. */
2948 if (TARGET_THUMB_P (flags) && TARGET_IWMMXT)
2949 error ("iWMMXt unsupported under Thumb mode");
2950
2951 if (TARGET_HARD_TP && TARGET_THUMB1_P (flags))
2952 error ("cannot use %<-mtp=cp15%> with 16-bit Thumb");
2953
2954 if (TARGET_THUMB_P (flags) && TARGET_VXWORKS_RTP && flag_pic)
2955 {
2956 error ("RTP PIC is incompatible with Thumb");
2957 flag_pic = 0;
2958 }
2959
2960 if (target_pure_code || target_slow_flash_data)
2961 {
2962 const char *flag = (target_pure_code ? "-mpure-code" :
2963 "-mslow-flash-data");
2964 bool common_unsupported_modes = arm_arch_notm || flag_pic || TARGET_NEON;
2965
2966 /* We only support -mslow-flash-data on M-profile targets with
2967 MOVT. */
2968 if (target_slow_flash_data && (!TARGET_HAVE_MOVT || common_unsupported_modes))
2969 error ("%s only supports non-pic code on M-profile targets with the "
2970 "MOVT instruction", flag);
2971
2972 /* We only support -mpure-code on M-profile targets. */
2973 if (target_pure_code && common_unsupported_modes)
2974 error ("%s only supports non-pic code on M-profile targets", flag);
2975
2976 /* Cannot load addresses: -mslow-flash-data forbids literal pool and
2977 -mword-relocations forbids relocation of MOVT/MOVW. */
2978 if (target_word_relocations)
2979 error ("%s incompatible with %<-mword-relocations%>", flag);
2980 }
2981 }
2982
2983 /* Recompute the global settings depending on target attribute options. */
2984
2985 static void
2986 arm_option_params_internal (void)
2987 {
2988 /* If we are not using the default (ARM mode) section anchor offset
2989 ranges, then set the correct ranges now. */
2990 if (TARGET_THUMB1)
2991 {
2992 /* Thumb-1 LDR instructions cannot have negative offsets.
2993 Permissible positive offset ranges are 5-bit (for byte loads),
2994 6-bit (for halfword loads), or 7-bit (for word loads).
2995 Empirical results suggest a 7-bit anchor range gives the best
2996 overall code size. */
2997 targetm.min_anchor_offset = 0;
2998 targetm.max_anchor_offset = 127;
2999 }
3000 else if (TARGET_THUMB2)
3001 {
3002 /* The minimum is set such that the total size of the block
3003 for a particular anchor is 248 + 1 + 4095 bytes, which is
3004 divisible by eight, ensuring natural spacing of anchors. */
3005 targetm.min_anchor_offset = -248;
3006 targetm.max_anchor_offset = 4095;
3007 }
3008 else
3009 {
3010 targetm.min_anchor_offset = TARGET_MIN_ANCHOR_OFFSET;
3011 targetm.max_anchor_offset = TARGET_MAX_ANCHOR_OFFSET;
3012 }
3013
3014 /* Increase the number of conditional instructions with -Os. */
3015 max_insns_skipped = optimize_size ? 4 : current_tune->max_insns_skipped;
3016
3017 /* For THUMB2, we limit the conditional sequence to one IT block. */
3018 if (TARGET_THUMB2)
3019 max_insns_skipped = MIN (max_insns_skipped, MAX_INSN_PER_IT_BLOCK);
3020
3021 if (TARGET_THUMB1)
3022 targetm.md_asm_adjust = thumb1_md_asm_adjust;
3023 else
3024 targetm.md_asm_adjust = arm_md_asm_adjust;
3025 }
3026
3027 /* True if -mflip-thumb should next add an attribute for the default
3028 mode, false if it should next add an attribute for the opposite mode. */
3029 static GTY(()) bool thumb_flipper;
3030
3031 /* Options after initial target override. */
3032 static GTY(()) tree init_optimize;
3033
3034 static void
3035 arm_override_options_after_change_1 (struct gcc_options *opts,
3036 struct gcc_options *opts_set)
3037 {
3038 /* -falign-functions without argument: supply one. */
3039 if (opts->x_flag_align_functions && !opts_set->x_str_align_functions)
3040 opts->x_str_align_functions = TARGET_THUMB_P (opts->x_target_flags)
3041 && opts->x_optimize_size ? "2" : "4";
3042 }
3043
3044 /* Implement targetm.override_options_after_change. */
3045
3046 static void
3047 arm_override_options_after_change (void)
3048 {
3049 arm_override_options_after_change_1 (&global_options, &global_options_set);
3050 }
3051
3052 /* Implement TARGET_OPTION_RESTORE. */
3053 static void
3054 arm_option_restore (struct gcc_options */* opts */,
3055 struct gcc_options *opts_set, struct cl_target_option *ptr)
3056 {
3057 arm_configure_build_target (&arm_active_target, ptr, opts_set, false);
3058 }
3059
3060 /* Reset options between modes that the user has specified. */
3061 static void
3062 arm_option_override_internal (struct gcc_options *opts,
3063 struct gcc_options *opts_set)
3064 {
3065 arm_override_options_after_change_1 (opts, opts_set);
3066
3067 if (TARGET_INTERWORK && !bitmap_bit_p (arm_active_target.isa, isa_bit_thumb))
3068 {
3069 /* The default is to enable interworking, so this warning message would
3070 be confusing to users who have just compiled with
3071 eg, -march=armv4. */
3072 /* warning (0, "ignoring -minterwork because target CPU does not support THUMB"); */
3073 opts->x_target_flags &= ~MASK_INTERWORK;
3074 }
3075
3076 if (TARGET_THUMB_P (opts->x_target_flags)
3077 && !bitmap_bit_p (arm_active_target.isa, isa_bit_thumb))
3078 {
3079 warning (0, "target CPU does not support THUMB instructions");
3080 opts->x_target_flags &= ~MASK_THUMB;
3081 }
3082
3083 if (TARGET_APCS_FRAME && TARGET_THUMB_P (opts->x_target_flags))
3084 {
3085 /* warning (0, "ignoring -mapcs-frame because -mthumb was used"); */
3086 opts->x_target_flags &= ~MASK_APCS_FRAME;
3087 }
3088
3089 /* Callee super interworking implies thumb interworking. Adding
3090 this to the flags here simplifies the logic elsewhere. */
3091 if (TARGET_THUMB_P (opts->x_target_flags) && TARGET_CALLEE_INTERWORKING)
3092 opts->x_target_flags |= MASK_INTERWORK;
3093
3094 /* need to remember initial values so combinaisons of options like
3095 -mflip-thumb -mthumb -fno-schedule-insns work for any attribute. */
3096 cl_optimization *to = TREE_OPTIMIZATION (init_optimize);
3097
3098 if (! opts_set->x_arm_restrict_it)
3099 opts->x_arm_restrict_it = arm_arch8;
3100
3101 /* ARM execution state and M profile don't have [restrict] IT. */
3102 if (!TARGET_THUMB2_P (opts->x_target_flags) || !arm_arch_notm)
3103 opts->x_arm_restrict_it = 0;
3104
3105 /* Use the IT size from CPU specific tuning unless -mrestrict-it is used. */
3106 if (!opts_set->x_arm_restrict_it
3107 && (opts_set->x_arm_cpu_string || opts_set->x_arm_tune_string))
3108 opts->x_arm_restrict_it = 0;
3109
3110 /* Enable -munaligned-access by default for
3111 - all ARMv6 architecture-based processors when compiling for a 32-bit ISA
3112 i.e. Thumb2 and ARM state only.
3113 - ARMv7-A, ARMv7-R, and ARMv7-M architecture-based processors.
3114 - ARMv8 architecture-base processors.
3115
3116 Disable -munaligned-access by default for
3117 - all pre-ARMv6 architecture-based processors
3118 - ARMv6-M architecture-based processors
3119 - ARMv8-M Baseline processors. */
3120
3121 if (! opts_set->x_unaligned_access)
3122 {
3123 opts->x_unaligned_access = (TARGET_32BIT_P (opts->x_target_flags)
3124 && arm_arch6 && (arm_arch_notm || arm_arch7));
3125 }
3126 else if (opts->x_unaligned_access == 1
3127 && !(arm_arch6 && (arm_arch_notm || arm_arch7)))
3128 {
3129 warning (0, "target CPU does not support unaligned accesses");
3130 opts->x_unaligned_access = 0;
3131 }
3132
3133 /* Don't warn since it's on by default in -O2. */
3134 if (TARGET_THUMB1_P (opts->x_target_flags))
3135 opts->x_flag_schedule_insns = 0;
3136 else
3137 opts->x_flag_schedule_insns = to->x_flag_schedule_insns;
3138
3139 /* Disable shrink-wrap when optimizing function for size, since it tends to
3140 generate additional returns. */
3141 if (optimize_function_for_size_p (cfun)
3142 && TARGET_THUMB2_P (opts->x_target_flags))
3143 opts->x_flag_shrink_wrap = false;
3144 else
3145 opts->x_flag_shrink_wrap = to->x_flag_shrink_wrap;
3146
3147 /* In Thumb1 mode, we emit the epilogue in RTL, but the last insn
3148 - epilogue_insns - does not accurately model the corresponding insns
3149 emitted in the asm file. In particular, see the comment in thumb_exit
3150 'Find out how many of the (return) argument registers we can corrupt'.
3151 As a consequence, the epilogue may clobber registers without fipa-ra
3152 finding out about it. Therefore, disable fipa-ra in Thumb1 mode.
3153 TODO: Accurately model clobbers for epilogue_insns and reenable
3154 fipa-ra. */
3155 if (TARGET_THUMB1_P (opts->x_target_flags))
3156 opts->x_flag_ipa_ra = 0;
3157 else
3158 opts->x_flag_ipa_ra = to->x_flag_ipa_ra;
3159
3160 /* Thumb2 inline assembly code should always use unified syntax.
3161 This will apply to ARM and Thumb1 eventually. */
3162 if (TARGET_THUMB2_P (opts->x_target_flags))
3163 opts->x_inline_asm_unified = true;
3164
3165 #ifdef SUBTARGET_OVERRIDE_INTERNAL_OPTIONS
3166 SUBTARGET_OVERRIDE_INTERNAL_OPTIONS;
3167 #endif
3168 }
3169
3170 static sbitmap isa_all_fpubits_internal;
3171 static sbitmap isa_all_fpbits;
3172 static sbitmap isa_quirkbits;
3173
3174 /* Configure a build target TARGET from the user-specified options OPTS and
3175 OPTS_SET. If WARN_COMPATIBLE, emit a diagnostic if both the CPU and
3176 architecture have been specified, but the two are not identical. */
3177 void
3178 arm_configure_build_target (struct arm_build_target *target,
3179 struct cl_target_option *opts,
3180 struct gcc_options *opts_set,
3181 bool warn_compatible)
3182 {
3183 const cpu_option *arm_selected_tune = NULL;
3184 const arch_option *arm_selected_arch = NULL;
3185 const cpu_option *arm_selected_cpu = NULL;
3186 const arm_fpu_desc *arm_selected_fpu = NULL;
3187 const char *tune_opts = NULL;
3188 const char *arch_opts = NULL;
3189 const char *cpu_opts = NULL;
3190
3191 bitmap_clear (target->isa);
3192 target->core_name = NULL;
3193 target->arch_name = NULL;
3194
3195 if (opts_set->x_arm_arch_string)
3196 {
3197 arm_selected_arch = arm_parse_arch_option_name (all_architectures,
3198 "-march",
3199 opts->x_arm_arch_string);
3200 arch_opts = strchr (opts->x_arm_arch_string, '+');
3201 }
3202
3203 if (opts_set->x_arm_cpu_string)
3204 {
3205 arm_selected_cpu = arm_parse_cpu_option_name (all_cores, "-mcpu",
3206 opts->x_arm_cpu_string);
3207 cpu_opts = strchr (opts->x_arm_cpu_string, '+');
3208 arm_selected_tune = arm_selected_cpu;
3209 /* If taking the tuning from -mcpu, we don't need to rescan the
3210 options for tuning. */
3211 }
3212
3213 if (opts_set->x_arm_tune_string)
3214 {
3215 arm_selected_tune = arm_parse_cpu_option_name (all_cores, "-mtune",
3216 opts->x_arm_tune_string);
3217 tune_opts = strchr (opts->x_arm_tune_string, '+');
3218 }
3219
3220 if (arm_selected_arch)
3221 {
3222 arm_initialize_isa (target->isa, arm_selected_arch->common.isa_bits);
3223 arm_parse_option_features (target->isa, &arm_selected_arch->common,
3224 arch_opts);
3225
3226 if (arm_selected_cpu)
3227 {
3228 auto_sbitmap cpu_isa (isa_num_bits);
3229 auto_sbitmap isa_delta (isa_num_bits);
3230
3231 arm_initialize_isa (cpu_isa, arm_selected_cpu->common.isa_bits);
3232 arm_parse_option_features (cpu_isa, &arm_selected_cpu->common,
3233 cpu_opts);
3234 bitmap_xor (isa_delta, cpu_isa, target->isa);
3235 /* Ignore any bits that are quirk bits. */
3236 bitmap_and_compl (isa_delta, isa_delta, isa_quirkbits);
3237 /* If the user (or the default configuration) has specified a
3238 specific FPU, then ignore any bits that depend on the FPU
3239 configuration. Do similarly if using the soft-float
3240 ABI. */
3241 if (opts->x_arm_fpu_index != TARGET_FPU_auto
3242 || arm_float_abi == ARM_FLOAT_ABI_SOFT)
3243 bitmap_and_compl (isa_delta, isa_delta, isa_all_fpbits);
3244
3245 if (!bitmap_empty_p (isa_delta))
3246 {
3247 if (warn_compatible)
3248 warning (0, "switch %<-mcpu=%s%> conflicts "
3249 "with switch %<-march=%s%>",
3250 opts->x_arm_cpu_string,
3251 opts->x_arm_arch_string);
3252
3253 /* -march wins for code generation.
3254 -mcpu wins for default tuning. */
3255 if (!arm_selected_tune)
3256 arm_selected_tune = arm_selected_cpu;
3257
3258 arm_selected_cpu = all_cores + arm_selected_arch->tune_id;
3259 target->arch_name = arm_selected_arch->common.name;
3260 }
3261 else
3262 {
3263 /* Architecture and CPU are essentially the same.
3264 Prefer the CPU setting. */
3265 arm_selected_arch = all_architectures + arm_selected_cpu->arch;
3266 target->core_name = arm_selected_cpu->common.name;
3267 /* Copy the CPU's capabilities, so that we inherit the
3268 appropriate extensions and quirks. */
3269 bitmap_copy (target->isa, cpu_isa);
3270 }
3271 }
3272 else
3273 {
3274 /* Pick a CPU based on the architecture. */
3275 arm_selected_cpu = all_cores + arm_selected_arch->tune_id;
3276 target->arch_name = arm_selected_arch->common.name;
3277 /* Note: target->core_name is left unset in this path. */
3278 }
3279 }
3280 else if (arm_selected_cpu)
3281 {
3282 target->core_name = arm_selected_cpu->common.name;
3283 arm_initialize_isa (target->isa, arm_selected_cpu->common.isa_bits);
3284 arm_parse_option_features (target->isa, &arm_selected_cpu->common,
3285 cpu_opts);
3286 arm_selected_arch = all_architectures + arm_selected_cpu->arch;
3287 }
3288 /* If the user did not specify a processor or architecture, choose
3289 one for them. */
3290 else
3291 {
3292 const cpu_option *sel;
3293 auto_sbitmap sought_isa (isa_num_bits);
3294 bitmap_clear (sought_isa);
3295 auto_sbitmap default_isa (isa_num_bits);
3296
3297 arm_selected_cpu = arm_parse_cpu_option_name (all_cores, "default CPU",
3298 TARGET_CPU_DEFAULT);
3299 cpu_opts = strchr (TARGET_CPU_DEFAULT, '+');
3300 gcc_assert (arm_selected_cpu->common.name);
3301
3302 /* RWE: All of the selection logic below (to the end of this
3303 'if' clause) looks somewhat suspect. It appears to be mostly
3304 there to support forcing thumb support when the default CPU
3305 does not have thumb (somewhat dubious in terms of what the
3306 user might be expecting). I think it should be removed once
3307 support for the pre-thumb era cores is removed. */
3308 sel = arm_selected_cpu;
3309 arm_initialize_isa (default_isa, sel->common.isa_bits);
3310 arm_parse_option_features (default_isa, &arm_selected_cpu->common,
3311 cpu_opts);
3312
3313 /* Now check to see if the user has specified any command line
3314 switches that require certain abilities from the cpu. */
3315
3316 if (TARGET_INTERWORK || TARGET_THUMB)
3317 bitmap_set_bit (sought_isa, isa_bit_thumb);
3318
3319 /* If there are such requirements and the default CPU does not
3320 satisfy them, we need to run over the complete list of
3321 cores looking for one that is satisfactory. */
3322 if (!bitmap_empty_p (sought_isa)
3323 && !bitmap_subset_p (sought_isa, default_isa))
3324 {
3325 auto_sbitmap candidate_isa (isa_num_bits);
3326 /* We're only interested in a CPU with at least the
3327 capabilities of the default CPU and the required
3328 additional features. */
3329 bitmap_ior (default_isa, default_isa, sought_isa);
3330
3331 /* Try to locate a CPU type that supports all of the abilities
3332 of the default CPU, plus the extra abilities requested by
3333 the user. */
3334 for (sel = all_cores; sel->common.name != NULL; sel++)
3335 {
3336 arm_initialize_isa (candidate_isa, sel->common.isa_bits);
3337 /* An exact match? */
3338 if (bitmap_equal_p (default_isa, candidate_isa))
3339 break;
3340 }
3341
3342 if (sel->common.name == NULL)
3343 {
3344 unsigned current_bit_count = isa_num_bits;
3345 const cpu_option *best_fit = NULL;
3346
3347 /* Ideally we would like to issue an error message here
3348 saying that it was not possible to find a CPU compatible
3349 with the default CPU, but which also supports the command
3350 line options specified by the programmer, and so they
3351 ought to use the -mcpu=<name> command line option to
3352 override the default CPU type.
3353
3354 If we cannot find a CPU that has exactly the
3355 characteristics of the default CPU and the given
3356 command line options we scan the array again looking
3357 for a best match. The best match must have at least
3358 the capabilities of the perfect match. */
3359 for (sel = all_cores; sel->common.name != NULL; sel++)
3360 {
3361 arm_initialize_isa (candidate_isa, sel->common.isa_bits);
3362
3363 if (bitmap_subset_p (default_isa, candidate_isa))
3364 {
3365 unsigned count;
3366
3367 bitmap_and_compl (candidate_isa, candidate_isa,
3368 default_isa);
3369 count = bitmap_popcount (candidate_isa);
3370
3371 if (count < current_bit_count)
3372 {
3373 best_fit = sel;
3374 current_bit_count = count;
3375 }
3376 }
3377
3378 gcc_assert (best_fit);
3379 sel = best_fit;
3380 }
3381 }
3382 arm_selected_cpu = sel;
3383 }
3384
3385 /* Now we know the CPU, we can finally initialize the target
3386 structure. */
3387 target->core_name = arm_selected_cpu->common.name;
3388 arm_initialize_isa (target->isa, arm_selected_cpu->common.isa_bits);
3389 arm_parse_option_features (target->isa, &arm_selected_cpu->common,
3390 cpu_opts);
3391 arm_selected_arch = all_architectures + arm_selected_cpu->arch;
3392 }
3393
3394 gcc_assert (arm_selected_cpu);
3395 gcc_assert (arm_selected_arch);
3396
3397 if (opts->x_arm_fpu_index != TARGET_FPU_auto)
3398 {
3399 arm_selected_fpu = &all_fpus[opts->x_arm_fpu_index];
3400 auto_sbitmap fpu_bits (isa_num_bits);
3401
3402 arm_initialize_isa (fpu_bits, arm_selected_fpu->isa_bits);
3403 /* This should clear out ALL bits relating to the FPU/simd
3404 extensions, to avoid potentially invalid combinations later on
3405 that we can't match. At present we only clear out those bits
3406 that can be set by -mfpu. This should be fixed in GCC-12. */
3407 bitmap_and_compl (target->isa, target->isa, isa_all_fpubits_internal);
3408 bitmap_ior (target->isa, target->isa, fpu_bits);
3409 }
3410
3411 /* There may be implied bits which we still need to enable. These are
3412 non-named features which are needed to complete other sets of features,
3413 but cannot be enabled from arm-cpus.in due to being shared between
3414 multiple fgroups. Each entry in all_implied_fbits is of the form
3415 ante -> cons, meaning that if the feature "ante" is enabled, we should
3416 implicitly enable "cons". */
3417 const struct fbit_implication *impl = all_implied_fbits;
3418 while (impl->ante)
3419 {
3420 if (bitmap_bit_p (target->isa, impl->ante))
3421 bitmap_set_bit (target->isa, impl->cons);
3422 impl++;
3423 }
3424
3425 if (!arm_selected_tune)
3426 arm_selected_tune = arm_selected_cpu;
3427 else /* Validate the features passed to -mtune. */
3428 arm_parse_option_features (NULL, &arm_selected_tune->common, tune_opts);
3429
3430 const cpu_tune *tune_data = &all_tunes[arm_selected_tune - all_cores];
3431
3432 /* Finish initializing the target structure. */
3433 target->arch_pp_name = arm_selected_arch->arch;
3434 target->base_arch = arm_selected_arch->base_arch;
3435 target->profile = arm_selected_arch->profile;
3436
3437 target->tune_flags = tune_data->tune_flags;
3438 target->tune = tune_data->tune;
3439 target->tune_core = tune_data->scheduler;
3440 arm_option_reconfigure_globals ();
3441 }
3442
3443 /* Fix up any incompatible options that the user has specified. */
3444 static void
3445 arm_option_override (void)
3446 {
3447 static const enum isa_feature fpu_bitlist_internal[]
3448 = { ISA_ALL_FPU_INTERNAL, isa_nobit };
3449 /* isa_bit_mve_float is also part of FP bit list for arch v8.1-m.main. */
3450 static const enum isa_feature fp_bitlist[]
3451 = { ISA_ALL_FP, isa_bit_mve_float, isa_nobit };
3452 static const enum isa_feature quirk_bitlist[] = { ISA_ALL_QUIRKS, isa_nobit};
3453 cl_target_option opts;
3454
3455 isa_quirkbits = sbitmap_alloc (isa_num_bits);
3456 arm_initialize_isa (isa_quirkbits, quirk_bitlist);
3457
3458 isa_all_fpubits_internal = sbitmap_alloc (isa_num_bits);
3459 isa_all_fpbits = sbitmap_alloc (isa_num_bits);
3460 arm_initialize_isa (isa_all_fpubits_internal, fpu_bitlist_internal);
3461 arm_initialize_isa (isa_all_fpbits, fp_bitlist);
3462
3463 arm_active_target.isa = sbitmap_alloc (isa_num_bits);
3464
3465 if (!global_options_set.x_arm_fpu_index)
3466 {
3467 bool ok;
3468 int fpu_index;
3469
3470 ok = opt_enum_arg_to_value (OPT_mfpu_, FPUTYPE_AUTO, &fpu_index,
3471 CL_TARGET);
3472 gcc_assert (ok);
3473 arm_fpu_index = (enum fpu_type) fpu_index;
3474 }
3475
3476 cl_target_option_save (&opts, &global_options, &global_options_set);
3477 arm_configure_build_target (&arm_active_target, &opts, &global_options_set,
3478 true);
3479
3480 #ifdef SUBTARGET_OVERRIDE_OPTIONS
3481 SUBTARGET_OVERRIDE_OPTIONS;
3482 #endif
3483
3484 /* Initialize boolean versions of the architectural flags, for use
3485 in the arm.md file and for enabling feature flags. */
3486 arm_option_reconfigure_globals ();
3487
3488 arm_tune = arm_active_target.tune_core;
3489 tune_flags = arm_active_target.tune_flags;
3490 current_tune = arm_active_target.tune;
3491
3492 /* TBD: Dwarf info for apcs frame is not handled yet. */
3493 if (TARGET_APCS_FRAME)
3494 flag_shrink_wrap = false;
3495
3496 if (TARGET_APCS_STACK && !TARGET_APCS_FRAME)
3497 {
3498 warning (0, "%<-mapcs-stack-check%> incompatible with "
3499 "%<-mno-apcs-frame%>");
3500 target_flags |= MASK_APCS_FRAME;
3501 }
3502
3503 if (TARGET_POKE_FUNCTION_NAME)
3504 target_flags |= MASK_APCS_FRAME;
3505
3506 if (TARGET_APCS_REENT && flag_pic)
3507 error ("%<-fpic%> and %<-mapcs-reent%> are incompatible");
3508
3509 if (TARGET_APCS_REENT)
3510 warning (0, "APCS reentrant code not supported. Ignored");
3511
3512 /* Set up some tuning parameters. */
3513 arm_ld_sched = (tune_flags & TF_LDSCHED) != 0;
3514 arm_tune_strongarm = (tune_flags & TF_STRONG) != 0;
3515 arm_tune_wbuf = (tune_flags & TF_WBUF) != 0;
3516 arm_tune_xscale = (tune_flags & TF_XSCALE) != 0;
3517 arm_tune_cortex_a9 = (arm_tune == TARGET_CPU_cortexa9) != 0;
3518 arm_m_profile_small_mul = (tune_flags & TF_SMALLMUL) != 0;
3519
3520 /* For arm2/3 there is no need to do any scheduling if we are doing
3521 software floating-point. */
3522 if (TARGET_SOFT_FLOAT && (tune_flags & TF_NO_MODE32))
3523 flag_schedule_insns = flag_schedule_insns_after_reload = 0;
3524
3525 /* Override the default structure alignment for AAPCS ABI. */
3526 if (!global_options_set.x_arm_structure_size_boundary)
3527 {
3528 if (TARGET_AAPCS_BASED)
3529 arm_structure_size_boundary = 8;
3530 }
3531 else
3532 {
3533 warning (0, "option %<-mstructure-size-boundary%> is deprecated");
3534
3535 if (arm_structure_size_boundary != 8
3536 && arm_structure_size_boundary != 32
3537 && !(ARM_DOUBLEWORD_ALIGN && arm_structure_size_boundary == 64))
3538 {
3539 if (ARM_DOUBLEWORD_ALIGN)
3540 warning (0,
3541 "structure size boundary can only be set to 8, 32 or 64");
3542 else
3543 warning (0, "structure size boundary can only be set to 8 or 32");
3544 arm_structure_size_boundary
3545 = (TARGET_AAPCS_BASED ? 8 : DEFAULT_STRUCTURE_SIZE_BOUNDARY);
3546 }
3547 }
3548
3549 if (TARGET_VXWORKS_RTP)
3550 {
3551 if (!global_options_set.x_arm_pic_data_is_text_relative)
3552 arm_pic_data_is_text_relative = 0;
3553 }
3554 else if (flag_pic
3555 && !arm_pic_data_is_text_relative
3556 && !(global_options_set.x_target_flags & MASK_SINGLE_PIC_BASE))
3557 /* When text & data segments don't have a fixed displacement, the
3558 intended use is with a single, read only, pic base register.
3559 Unless the user explicitly requested not to do that, set
3560 it. */
3561 target_flags |= MASK_SINGLE_PIC_BASE;
3562
3563 /* If stack checking is disabled, we can use r10 as the PIC register,
3564 which keeps r9 available. The EABI specifies r9 as the PIC register. */
3565 if (flag_pic && TARGET_SINGLE_PIC_BASE)
3566 {
3567 if (TARGET_VXWORKS_RTP)
3568 warning (0, "RTP PIC is incompatible with %<-msingle-pic-base%>");
3569 arm_pic_register = (TARGET_APCS_STACK || TARGET_AAPCS_BASED) ? 9 : 10;
3570 }
3571
3572 if (flag_pic && TARGET_VXWORKS_RTP)
3573 arm_pic_register = 9;
3574
3575 /* If in FDPIC mode then force arm_pic_register to be r9. */
3576 if (TARGET_FDPIC)
3577 {
3578 arm_pic_register = FDPIC_REGNUM;
3579 if (TARGET_THUMB1)
3580 sorry ("FDPIC mode is not supported in Thumb-1 mode");
3581 }
3582
3583 if (arm_pic_register_string != NULL)
3584 {
3585 int pic_register = decode_reg_name (arm_pic_register_string);
3586
3587 if (!flag_pic)
3588 warning (0, "%<-mpic-register=%> is useless without %<-fpic%>");
3589
3590 /* Prevent the user from choosing an obviously stupid PIC register. */
3591 else if (pic_register < 0 || call_used_or_fixed_reg_p (pic_register)
3592 || pic_register == HARD_FRAME_POINTER_REGNUM
3593 || pic_register == STACK_POINTER_REGNUM
3594 || pic_register >= PC_REGNUM
3595 || (TARGET_VXWORKS_RTP
3596 && (unsigned int) pic_register != arm_pic_register))
3597 error ("unable to use %qs for PIC register", arm_pic_register_string);
3598 else
3599 arm_pic_register = pic_register;
3600 }
3601
3602 if (flag_pic)
3603 target_word_relocations = 1;
3604
3605 /* Enable -mfix-cortex-m3-ldrd by default for Cortex-M3 cores. */
3606 if (fix_cm3_ldrd == 2)
3607 {
3608 if (bitmap_bit_p (arm_active_target.isa, isa_bit_quirk_cm3_ldrd))
3609 fix_cm3_ldrd = 1;
3610 else
3611 fix_cm3_ldrd = 0;
3612 }
3613
3614 /* Hot/Cold partitioning is not currently supported, since we can't
3615 handle literal pool placement in that case. */
3616 if (flag_reorder_blocks_and_partition)
3617 {
3618 inform (input_location,
3619 "%<-freorder-blocks-and-partition%> not supported "
3620 "on this architecture");
3621 flag_reorder_blocks_and_partition = 0;
3622 flag_reorder_blocks = 1;
3623 }
3624
3625 if (flag_pic)
3626 /* Hoisting PIC address calculations more aggressively provides a small,
3627 but measurable, size reduction for PIC code. Therefore, we decrease
3628 the bar for unrestricted expression hoisting to the cost of PIC address
3629 calculation, which is 2 instructions. */
3630 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
3631 param_gcse_unrestricted_cost, 2);
3632
3633 /* ARM EABI defaults to strict volatile bitfields. */
3634 if (TARGET_AAPCS_BASED && flag_strict_volatile_bitfields < 0
3635 && abi_version_at_least(2))
3636 flag_strict_volatile_bitfields = 1;
3637
3638 /* Enable sw prefetching at -O3 for CPUS that have prefetch, and we
3639 have deemed it beneficial (signified by setting
3640 prefetch.num_slots to 1 or more). */
3641 if (flag_prefetch_loop_arrays < 0
3642 && HAVE_prefetch
3643 && optimize >= 3
3644 && current_tune->prefetch.num_slots > 0)
3645 flag_prefetch_loop_arrays = 1;
3646
3647 /* Set up parameters to be used in prefetching algorithm. Do not
3648 override the defaults unless we are tuning for a core we have
3649 researched values for. */
3650 if (current_tune->prefetch.num_slots > 0)
3651 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
3652 param_simultaneous_prefetches,
3653 current_tune->prefetch.num_slots);
3654 if (current_tune->prefetch.l1_cache_line_size >= 0)
3655 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
3656 param_l1_cache_line_size,
3657 current_tune->prefetch.l1_cache_line_size);
3658 if (current_tune->prefetch.l1_cache_size >= 0)
3659 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
3660 param_l1_cache_size,
3661 current_tune->prefetch.l1_cache_size);
3662
3663 /* Look through ready list and all of queue for instructions
3664 relevant for L2 auto-prefetcher. */
3665 int sched_autopref_queue_depth;
3666
3667 switch (current_tune->sched_autopref)
3668 {
3669 case tune_params::SCHED_AUTOPREF_OFF:
3670 sched_autopref_queue_depth = -1;
3671 break;
3672
3673 case tune_params::SCHED_AUTOPREF_RANK:
3674 sched_autopref_queue_depth = 0;
3675 break;
3676
3677 case tune_params::SCHED_AUTOPREF_FULL:
3678 sched_autopref_queue_depth = max_insn_queue_index + 1;
3679 break;
3680
3681 default:
3682 gcc_unreachable ();
3683 }
3684
3685 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
3686 param_sched_autopref_queue_depth,
3687 sched_autopref_queue_depth);
3688
3689 /* Currently, for slow flash data, we just disable literal pools. We also
3690 disable it for pure-code. */
3691 if (target_slow_flash_data || target_pure_code)
3692 arm_disable_literal_pool = true;
3693
3694 /* Disable scheduling fusion by default if it's not armv7 processor
3695 or doesn't prefer ldrd/strd. */
3696 if (flag_schedule_fusion == 2
3697 && (!arm_arch7 || !current_tune->prefer_ldrd_strd))
3698 flag_schedule_fusion = 0;
3699
3700 /* Need to remember initial options before they are overriden. */
3701 init_optimize = build_optimization_node (&global_options,
3702 &global_options_set);
3703
3704 arm_options_perform_arch_sanity_checks ();
3705 arm_option_override_internal (&global_options, &global_options_set);
3706 arm_option_check_internal (&global_options);
3707 arm_option_params_internal ();
3708
3709 /* Create the default target_options structure. */
3710 target_option_default_node = target_option_current_node
3711 = build_target_option_node (&global_options, &global_options_set);
3712
3713 /* Register global variables with the garbage collector. */
3714 arm_add_gc_roots ();
3715
3716 /* Init initial mode for testing. */
3717 thumb_flipper = TARGET_THUMB;
3718 }
3719
3720
3721 /* Reconfigure global status flags from the active_target.isa. */
3722 void
3723 arm_option_reconfigure_globals (void)
3724 {
3725 sprintf (arm_arch_name, "__ARM_ARCH_%s__", arm_active_target.arch_pp_name);
3726 arm_base_arch = arm_active_target.base_arch;
3727
3728 /* Initialize boolean versions of the architectural flags, for use
3729 in the arm.md file. */
3730 arm_arch4 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv4);
3731 arm_arch4t = arm_arch4 && bitmap_bit_p (arm_active_target.isa, isa_bit_thumb);
3732 arm_arch5t = bitmap_bit_p (arm_active_target.isa, isa_bit_armv5t);
3733 arm_arch5te = bitmap_bit_p (arm_active_target.isa, isa_bit_armv5te);
3734 arm_arch6 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv6);
3735 arm_arch6k = bitmap_bit_p (arm_active_target.isa, isa_bit_armv6k);
3736 arm_arch_notm = bitmap_bit_p (arm_active_target.isa, isa_bit_notm);
3737 arm_arch6m = arm_arch6 && !arm_arch_notm;
3738 arm_arch7 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv7);
3739 arm_arch7em = bitmap_bit_p (arm_active_target.isa, isa_bit_armv7em);
3740 arm_arch8 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv8);
3741 arm_arch8_1 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv8_1);
3742 arm_arch8_2 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv8_2);
3743 arm_arch8_3 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv8_3);
3744 arm_arch8_4 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv8_4);
3745 arm_arch8_1m_main = bitmap_bit_p (arm_active_target.isa,
3746 isa_bit_armv8_1m_main);
3747 arm_arch_thumb1 = bitmap_bit_p (arm_active_target.isa, isa_bit_thumb);
3748 arm_arch_thumb2 = bitmap_bit_p (arm_active_target.isa, isa_bit_thumb2);
3749 arm_arch_xscale = bitmap_bit_p (arm_active_target.isa, isa_bit_xscale);
3750 arm_arch_iwmmxt = bitmap_bit_p (arm_active_target.isa, isa_bit_iwmmxt);
3751 arm_arch_iwmmxt2 = bitmap_bit_p (arm_active_target.isa, isa_bit_iwmmxt2);
3752 arm_arch_thumb_hwdiv = bitmap_bit_p (arm_active_target.isa, isa_bit_tdiv);
3753 arm_arch_arm_hwdiv = bitmap_bit_p (arm_active_target.isa, isa_bit_adiv);
3754 arm_arch_crc = bitmap_bit_p (arm_active_target.isa, isa_bit_crc32);
3755 arm_arch_cmse = bitmap_bit_p (arm_active_target.isa, isa_bit_cmse);
3756 arm_arch_lpae = bitmap_bit_p (arm_active_target.isa, isa_bit_lpae);
3757 arm_arch_i8mm = bitmap_bit_p (arm_active_target.isa, isa_bit_i8mm);
3758 arm_arch_bf16 = bitmap_bit_p (arm_active_target.isa, isa_bit_bf16);
3759
3760 arm_fp16_inst = bitmap_bit_p (arm_active_target.isa, isa_bit_fp16);
3761 if (arm_fp16_inst)
3762 {
3763 if (arm_fp16_format == ARM_FP16_FORMAT_ALTERNATIVE)
3764 error ("selected fp16 options are incompatible");
3765 arm_fp16_format = ARM_FP16_FORMAT_IEEE;
3766 }
3767
3768 arm_arch_cde = 0;
3769 arm_arch_cde_coproc = 0;
3770 int cde_bits[] = {isa_bit_cdecp0, isa_bit_cdecp1, isa_bit_cdecp2,
3771 isa_bit_cdecp3, isa_bit_cdecp4, isa_bit_cdecp5,
3772 isa_bit_cdecp6, isa_bit_cdecp7};
3773 for (int i = 0, e = ARRAY_SIZE (cde_bits); i < e; i++)
3774 {
3775 int cde_bit = bitmap_bit_p (arm_active_target.isa, cde_bits[i]);
3776 if (cde_bit)
3777 {
3778 arm_arch_cde |= cde_bit;
3779 arm_arch_cde_coproc |= arm_arch_cde_coproc_bits[i];
3780 }
3781 }
3782
3783 /* And finally, set up some quirks. */
3784 arm_arch_no_volatile_ce
3785 = bitmap_bit_p (arm_active_target.isa, isa_bit_quirk_no_volatile_ce);
3786 arm_arch6kz = arm_arch6k && bitmap_bit_p (arm_active_target.isa,
3787 isa_bit_quirk_armv6kz);
3788
3789 /* Use the cp15 method if it is available. */
3790 if (target_thread_pointer == TP_AUTO)
3791 {
3792 if (arm_arch6k && !TARGET_THUMB1)
3793 target_thread_pointer = TP_CP15;
3794 else
3795 target_thread_pointer = TP_SOFT;
3796 }
3797 }
3798
3799 /* Perform some validation between the desired architecture and the rest of the
3800 options. */
3801 void
3802 arm_options_perform_arch_sanity_checks (void)
3803 {
3804 /* V5T code we generate is completely interworking capable, so we turn off
3805 TARGET_INTERWORK here to avoid many tests later on. */
3806
3807 /* XXX However, we must pass the right pre-processor defines to CPP
3808 or GLD can get confused. This is a hack. */
3809 if (TARGET_INTERWORK)
3810 arm_cpp_interwork = 1;
3811
3812 if (arm_arch5t)
3813 target_flags &= ~MASK_INTERWORK;
3814
3815 if (TARGET_IWMMXT && !ARM_DOUBLEWORD_ALIGN)
3816 error ("iwmmxt requires an AAPCS compatible ABI for proper operation");
3817
3818 if (TARGET_IWMMXT_ABI && !TARGET_IWMMXT)
3819 error ("iwmmxt abi requires an iwmmxt capable cpu");
3820
3821 /* BPABI targets use linker tricks to allow interworking on cores
3822 without thumb support. */
3823 if (TARGET_INTERWORK
3824 && !TARGET_BPABI
3825 && !bitmap_bit_p (arm_active_target.isa, isa_bit_thumb))
3826 {
3827 warning (0, "target CPU does not support interworking" );
3828 target_flags &= ~MASK_INTERWORK;
3829 }
3830
3831 /* If soft-float is specified then don't use FPU. */
3832 if (TARGET_SOFT_FLOAT)
3833 arm_fpu_attr = FPU_NONE;
3834 else
3835 arm_fpu_attr = FPU_VFP;
3836
3837 if (TARGET_AAPCS_BASED)
3838 {
3839 if (TARGET_CALLER_INTERWORKING)
3840 error ("AAPCS does not support %<-mcaller-super-interworking%>");
3841 else
3842 if (TARGET_CALLEE_INTERWORKING)
3843 error ("AAPCS does not support %<-mcallee-super-interworking%>");
3844 }
3845
3846 /* __fp16 support currently assumes the core has ldrh. */
3847 if (!arm_arch4 && arm_fp16_format != ARM_FP16_FORMAT_NONE)
3848 sorry ("__fp16 and no ldrh");
3849
3850 if (use_cmse && !arm_arch_cmse)
3851 error ("target CPU does not support ARMv8-M Security Extensions");
3852
3853 /* We don't clear D16-D31 VFP registers for cmse_nonsecure_call functions
3854 and ARMv8-M Baseline and Mainline do not allow such configuration. */
3855 if (use_cmse && TARGET_HARD_FLOAT && LAST_VFP_REGNUM > LAST_LO_VFP_REGNUM)
3856 error ("ARMv8-M Security Extensions incompatible with selected FPU");
3857
3858
3859 if (TARGET_AAPCS_BASED)
3860 {
3861 if (arm_abi == ARM_ABI_IWMMXT)
3862 arm_pcs_default = ARM_PCS_AAPCS_IWMMXT;
3863 else if (TARGET_HARD_FLOAT_ABI)
3864 {
3865 arm_pcs_default = ARM_PCS_AAPCS_VFP;
3866 if (!bitmap_bit_p (arm_active_target.isa, isa_bit_vfpv2)
3867 && !bitmap_bit_p (arm_active_target.isa, isa_bit_mve))
3868 error ("%<-mfloat-abi=hard%>: selected architecture lacks an FPU");
3869 }
3870 else
3871 arm_pcs_default = ARM_PCS_AAPCS;
3872 }
3873 else
3874 {
3875 if (arm_float_abi == ARM_FLOAT_ABI_HARD)
3876 sorry ("%<-mfloat-abi=hard%> and VFP");
3877
3878 if (arm_abi == ARM_ABI_APCS)
3879 arm_pcs_default = ARM_PCS_APCS;
3880 else
3881 arm_pcs_default = ARM_PCS_ATPCS;
3882 }
3883 }
3884
3885 /* Test whether a local function descriptor is canonical, i.e.,
3886 whether we can use GOTOFFFUNCDESC to compute the address of the
3887 function. */
3888 static bool
3889 arm_fdpic_local_funcdesc_p (rtx fnx)
3890 {
3891 tree fn;
3892 enum symbol_visibility vis;
3893 bool ret;
3894
3895 if (!TARGET_FDPIC)
3896 return true;
3897
3898 if (! SYMBOL_REF_LOCAL_P (fnx))
3899 return false;
3900
3901 fn = SYMBOL_REF_DECL (fnx);
3902
3903 if (! fn)
3904 return false;
3905
3906 vis = DECL_VISIBILITY (fn);
3907
3908 if (vis == VISIBILITY_PROTECTED)
3909 /* Private function descriptors for protected functions are not
3910 canonical. Temporarily change the visibility to global so that
3911 we can ensure uniqueness of funcdesc pointers. */
3912 DECL_VISIBILITY (fn) = VISIBILITY_DEFAULT;
3913
3914 ret = default_binds_local_p_1 (fn, flag_pic);
3915
3916 DECL_VISIBILITY (fn) = vis;
3917
3918 return ret;
3919 }
3920
3921 static void
3922 arm_add_gc_roots (void)
3923 {
3924 gcc_obstack_init(&minipool_obstack);
3925 minipool_startobj = (char *) obstack_alloc (&minipool_obstack, 0);
3926 }
3927 \f
3928 /* A table of known ARM exception types.
3929 For use with the interrupt function attribute. */
3930
3931 typedef struct
3932 {
3933 const char *const arg;
3934 const unsigned long return_value;
3935 }
3936 isr_attribute_arg;
3937
3938 static const isr_attribute_arg isr_attribute_args [] =
3939 {
3940 { "IRQ", ARM_FT_ISR },
3941 { "irq", ARM_FT_ISR },
3942 { "FIQ", ARM_FT_FIQ },
3943 { "fiq", ARM_FT_FIQ },
3944 { "ABORT", ARM_FT_ISR },
3945 { "abort", ARM_FT_ISR },
3946 { "UNDEF", ARM_FT_EXCEPTION },
3947 { "undef", ARM_FT_EXCEPTION },
3948 { "SWI", ARM_FT_EXCEPTION },
3949 { "swi", ARM_FT_EXCEPTION },
3950 { NULL, ARM_FT_NORMAL }
3951 };
3952
3953 /* Returns the (interrupt) function type of the current
3954 function, or ARM_FT_UNKNOWN if the type cannot be determined. */
3955
3956 static unsigned long
3957 arm_isr_value (tree argument)
3958 {
3959 const isr_attribute_arg * ptr;
3960 const char * arg;
3961
3962 if (!arm_arch_notm)
3963 return ARM_FT_NORMAL | ARM_FT_STACKALIGN;
3964
3965 /* No argument - default to IRQ. */
3966 if (argument == NULL_TREE)
3967 return ARM_FT_ISR;
3968
3969 /* Get the value of the argument. */
3970 if (TREE_VALUE (argument) == NULL_TREE
3971 || TREE_CODE (TREE_VALUE (argument)) != STRING_CST)
3972 return ARM_FT_UNKNOWN;
3973
3974 arg = TREE_STRING_POINTER (TREE_VALUE (argument));
3975
3976 /* Check it against the list of known arguments. */
3977 for (ptr = isr_attribute_args; ptr->arg != NULL; ptr++)
3978 if (streq (arg, ptr->arg))
3979 return ptr->return_value;
3980
3981 /* An unrecognized interrupt type. */
3982 return ARM_FT_UNKNOWN;
3983 }
3984
3985 /* Computes the type of the current function. */
3986
3987 static unsigned long
3988 arm_compute_func_type (void)
3989 {
3990 unsigned long type = ARM_FT_UNKNOWN;
3991 tree a;
3992 tree attr;
3993
3994 gcc_assert (TREE_CODE (current_function_decl) == FUNCTION_DECL);
3995
3996 /* Decide if the current function is volatile. Such functions
3997 never return, and many memory cycles can be saved by not storing
3998 register values that will never be needed again. This optimization
3999 was added to speed up context switching in a kernel application. */
4000 if (optimize > 0
4001 && (TREE_NOTHROW (current_function_decl)
4002 || !(flag_unwind_tables
4003 || (flag_exceptions
4004 && arm_except_unwind_info (&global_options) != UI_SJLJ)))
4005 && TREE_THIS_VOLATILE (current_function_decl))
4006 type |= ARM_FT_VOLATILE;
4007
4008 if (cfun->static_chain_decl != NULL)
4009 type |= ARM_FT_NESTED;
4010
4011 attr = DECL_ATTRIBUTES (current_function_decl);
4012
4013 a = lookup_attribute ("naked", attr);
4014 if (a != NULL_TREE)
4015 type |= ARM_FT_NAKED;
4016
4017 a = lookup_attribute ("isr", attr);
4018 if (a == NULL_TREE)
4019 a = lookup_attribute ("interrupt", attr);
4020
4021 if (a == NULL_TREE)
4022 type |= TARGET_INTERWORK ? ARM_FT_INTERWORKED : ARM_FT_NORMAL;
4023 else
4024 type |= arm_isr_value (TREE_VALUE (a));
4025
4026 if (lookup_attribute ("cmse_nonsecure_entry", attr))
4027 type |= ARM_FT_CMSE_ENTRY;
4028
4029 return type;
4030 }
4031
4032 /* Returns the type of the current function. */
4033
4034 unsigned long
4035 arm_current_func_type (void)
4036 {
4037 if (ARM_FUNC_TYPE (cfun->machine->func_type) == ARM_FT_UNKNOWN)
4038 cfun->machine->func_type = arm_compute_func_type ();
4039
4040 return cfun->machine->func_type;
4041 }
4042
4043 bool
4044 arm_allocate_stack_slots_for_args (void)
4045 {
4046 /* Naked functions should not allocate stack slots for arguments. */
4047 return !IS_NAKED (arm_current_func_type ());
4048 }
4049
4050 static bool
4051 arm_warn_func_return (tree decl)
4052 {
4053 /* Naked functions are implemented entirely in assembly, including the
4054 return sequence, so suppress warnings about this. */
4055 return lookup_attribute ("naked", DECL_ATTRIBUTES (decl)) == NULL_TREE;
4056 }
4057
4058 \f
4059 /* Output assembler code for a block containing the constant parts
4060 of a trampoline, leaving space for the variable parts.
4061
4062 On the ARM, (if r8 is the static chain regnum, and remembering that
4063 referencing pc adds an offset of 8) the trampoline looks like:
4064 ldr r8, [pc, #0]
4065 ldr pc, [pc]
4066 .word static chain value
4067 .word function's address
4068 XXX FIXME: When the trampoline returns, r8 will be clobbered.
4069
4070 In FDPIC mode, the trampoline looks like:
4071 .word trampoline address
4072 .word trampoline GOT address
4073 ldr r12, [pc, #8] ; #4 for Arm mode
4074 ldr r9, [pc, #8] ; #4 for Arm mode
4075 ldr pc, [pc, #8] ; #4 for Arm mode
4076 .word static chain value
4077 .word GOT address
4078 .word function's address
4079 */
4080
4081 static void
4082 arm_asm_trampoline_template (FILE *f)
4083 {
4084 fprintf (f, "\t.syntax unified\n");
4085
4086 if (TARGET_FDPIC)
4087 {
4088 /* The first two words are a function descriptor pointing to the
4089 trampoline code just below. */
4090 if (TARGET_ARM)
4091 fprintf (f, "\t.arm\n");
4092 else if (TARGET_THUMB2)
4093 fprintf (f, "\t.thumb\n");
4094 else
4095 /* Only ARM and Thumb-2 are supported. */
4096 gcc_unreachable ();
4097
4098 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
4099 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
4100 /* Trampoline code which sets the static chain register but also
4101 PIC register before jumping into real code. */
4102 asm_fprintf (f, "\tldr\t%r, [%r, #%d]\n",
4103 STATIC_CHAIN_REGNUM, PC_REGNUM,
4104 TARGET_THUMB2 ? 8 : 4);
4105 asm_fprintf (f, "\tldr\t%r, [%r, #%d]\n",
4106 PIC_OFFSET_TABLE_REGNUM, PC_REGNUM,
4107 TARGET_THUMB2 ? 8 : 4);
4108 asm_fprintf (f, "\tldr\t%r, [%r, #%d]\n",
4109 PC_REGNUM, PC_REGNUM,
4110 TARGET_THUMB2 ? 8 : 4);
4111 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
4112 }
4113 else if (TARGET_ARM)
4114 {
4115 fprintf (f, "\t.arm\n");
4116 asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", STATIC_CHAIN_REGNUM, PC_REGNUM);
4117 asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", PC_REGNUM, PC_REGNUM);
4118 }
4119 else if (TARGET_THUMB2)
4120 {
4121 fprintf (f, "\t.thumb\n");
4122 /* The Thumb-2 trampoline is similar to the arm implementation.
4123 Unlike 16-bit Thumb, we enter the stub in thumb mode. */
4124 asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n",
4125 STATIC_CHAIN_REGNUM, PC_REGNUM);
4126 asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n", PC_REGNUM, PC_REGNUM);
4127 }
4128 else
4129 {
4130 ASM_OUTPUT_ALIGN (f, 2);
4131 fprintf (f, "\t.code\t16\n");
4132 fprintf (f, ".Ltrampoline_start:\n");
4133 asm_fprintf (f, "\tpush\t{r0, r1}\n");
4134 asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
4135 asm_fprintf (f, "\tmov\t%r, r0\n", STATIC_CHAIN_REGNUM);
4136 asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
4137 asm_fprintf (f, "\tstr\tr0, [%r, #4]\n", SP_REGNUM);
4138 asm_fprintf (f, "\tpop\t{r0, %r}\n", PC_REGNUM);
4139 }
4140 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
4141 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
4142 }
4143
4144 /* Emit RTL insns to initialize the variable parts of a trampoline. */
4145
4146 static void
4147 arm_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
4148 {
4149 rtx fnaddr, mem, a_tramp;
4150
4151 emit_block_move (m_tramp, assemble_trampoline_template (),
4152 GEN_INT (TRAMPOLINE_SIZE), BLOCK_OP_NORMAL);
4153
4154 if (TARGET_FDPIC)
4155 {
4156 rtx funcdesc = XEXP (DECL_RTL (fndecl), 0);
4157 rtx fnaddr = gen_rtx_MEM (Pmode, funcdesc);
4158 rtx gotaddr = gen_rtx_MEM (Pmode, plus_constant (Pmode, funcdesc, 4));
4159 /* The function start address is at offset 8, but in Thumb mode
4160 we want bit 0 set to 1 to indicate Thumb-ness, hence 9
4161 below. */
4162 rtx trampoline_code_start
4163 = plus_constant (Pmode, XEXP (m_tramp, 0), TARGET_THUMB2 ? 9 : 8);
4164
4165 /* Write initial funcdesc which points to the trampoline. */
4166 mem = adjust_address (m_tramp, SImode, 0);
4167 emit_move_insn (mem, trampoline_code_start);
4168 mem = adjust_address (m_tramp, SImode, 4);
4169 emit_move_insn (mem, gen_rtx_REG (Pmode, PIC_OFFSET_TABLE_REGNUM));
4170 /* Setup static chain. */
4171 mem = adjust_address (m_tramp, SImode, 20);
4172 emit_move_insn (mem, chain_value);
4173 /* GOT + real function entry point. */
4174 mem = adjust_address (m_tramp, SImode, 24);
4175 emit_move_insn (mem, gotaddr);
4176 mem = adjust_address (m_tramp, SImode, 28);
4177 emit_move_insn (mem, fnaddr);
4178 }
4179 else
4180 {
4181 mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 8 : 12);
4182 emit_move_insn (mem, chain_value);
4183
4184 mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 12 : 16);
4185 fnaddr = XEXP (DECL_RTL (fndecl), 0);
4186 emit_move_insn (mem, fnaddr);
4187 }
4188
4189 a_tramp = XEXP (m_tramp, 0);
4190 maybe_emit_call_builtin___clear_cache (a_tramp,
4191 plus_constant (ptr_mode,
4192 a_tramp,
4193 TRAMPOLINE_SIZE));
4194 }
4195
4196 /* Thumb trampolines should be entered in thumb mode, so set
4197 the bottom bit of the address. */
4198
4199 static rtx
4200 arm_trampoline_adjust_address (rtx addr)
4201 {
4202 /* For FDPIC don't fix trampoline address since it's a function
4203 descriptor and not a function address. */
4204 if (TARGET_THUMB && !TARGET_FDPIC)
4205 addr = expand_simple_binop (Pmode, IOR, addr, const1_rtx,
4206 NULL, 0, OPTAB_LIB_WIDEN);
4207 return addr;
4208 }
4209 \f
4210 /* Return 1 if REG needs to be saved. For interrupt handlers, this
4211 includes call-clobbered registers too. If this is a leaf function
4212 we can just examine the registers used by the RTL, but otherwise we
4213 have to assume that whatever function is called might clobber
4214 anything, and so we have to save all the call-clobbered registers
4215 as well. */
4216 static inline bool reg_needs_saving_p (unsigned reg)
4217 {
4218 unsigned long func_type = arm_current_func_type ();
4219
4220 if (IS_INTERRUPT (func_type))
4221 if (df_regs_ever_live_p (reg)
4222 /* Save call-clobbered core registers. */
4223 || (! crtl->is_leaf && call_used_or_fixed_reg_p (reg) && reg < FIRST_VFP_REGNUM))
4224 return true;
4225 else
4226 return false;
4227 else
4228 if (!df_regs_ever_live_p (reg)
4229 || call_used_or_fixed_reg_p (reg))
4230 return false;
4231 else
4232 return true;
4233 }
4234
4235 /* Return 1 if it is possible to return using a single instruction.
4236 If SIBLING is non-null, this is a test for a return before a sibling
4237 call. SIBLING is the call insn, so we can examine its register usage. */
4238
4239 int
4240 use_return_insn (int iscond, rtx sibling)
4241 {
4242 int regno;
4243 unsigned int func_type;
4244 unsigned long saved_int_regs;
4245 unsigned HOST_WIDE_INT stack_adjust;
4246 arm_stack_offsets *offsets;
4247
4248 /* Never use a return instruction before reload has run. */
4249 if (!reload_completed)
4250 return 0;
4251
4252 func_type = arm_current_func_type ();
4253
4254 /* Naked, volatile and stack alignment functions need special
4255 consideration. */
4256 if (func_type & (ARM_FT_VOLATILE | ARM_FT_NAKED | ARM_FT_STACKALIGN))
4257 return 0;
4258
4259 /* So do interrupt functions that use the frame pointer and Thumb
4260 interrupt functions. */
4261 if (IS_INTERRUPT (func_type) && (frame_pointer_needed || TARGET_THUMB))
4262 return 0;
4263
4264 if (TARGET_LDRD && current_tune->prefer_ldrd_strd
4265 && !optimize_function_for_size_p (cfun))
4266 return 0;
4267
4268 offsets = arm_get_frame_offsets ();
4269 stack_adjust = offsets->outgoing_args - offsets->saved_regs;
4270
4271 /* As do variadic functions. */
4272 if (crtl->args.pretend_args_size
4273 || cfun->machine->uses_anonymous_args
4274 /* Or if the function calls __builtin_eh_return () */
4275 || crtl->calls_eh_return
4276 /* Or if the function calls alloca */
4277 || cfun->calls_alloca
4278 /* Or if there is a stack adjustment. However, if the stack pointer
4279 is saved on the stack, we can use a pre-incrementing stack load. */
4280 || !(stack_adjust == 0 || (TARGET_APCS_FRAME && frame_pointer_needed
4281 && stack_adjust == 4))
4282 /* Or if the static chain register was saved above the frame, under the
4283 assumption that the stack pointer isn't saved on the stack. */
4284 || (!(TARGET_APCS_FRAME && frame_pointer_needed)
4285 && arm_compute_static_chain_stack_bytes() != 0))
4286 return 0;
4287
4288 saved_int_regs = offsets->saved_regs_mask;
4289
4290 /* Unfortunately, the insn
4291
4292 ldmib sp, {..., sp, ...}
4293
4294 triggers a bug on most SA-110 based devices, such that the stack
4295 pointer won't be correctly restored if the instruction takes a
4296 page fault. We work around this problem by popping r3 along with
4297 the other registers, since that is never slower than executing
4298 another instruction.
4299
4300 We test for !arm_arch5t here, because code for any architecture
4301 less than this could potentially be run on one of the buggy
4302 chips. */
4303 if (stack_adjust == 4 && !arm_arch5t && TARGET_ARM)
4304 {
4305 /* Validate that r3 is a call-clobbered register (always true in
4306 the default abi) ... */
4307 if (!call_used_or_fixed_reg_p (3))
4308 return 0;
4309
4310 /* ... that it isn't being used for a return value ... */
4311 if (arm_size_return_regs () >= (4 * UNITS_PER_WORD))
4312 return 0;
4313
4314 /* ... or for a tail-call argument ... */
4315 if (sibling)
4316 {
4317 gcc_assert (CALL_P (sibling));
4318
4319 if (find_regno_fusage (sibling, USE, 3))
4320 return 0;
4321 }
4322
4323 /* ... and that there are no call-saved registers in r0-r2
4324 (always true in the default ABI). */
4325 if (saved_int_regs & 0x7)
4326 return 0;
4327 }
4328
4329 /* Can't be done if interworking with Thumb, and any registers have been
4330 stacked. */
4331 if (TARGET_INTERWORK && saved_int_regs != 0 && !IS_INTERRUPT(func_type))
4332 return 0;
4333
4334 /* On StrongARM, conditional returns are expensive if they aren't
4335 taken and multiple registers have been stacked. */
4336 if (iscond && arm_tune_strongarm)
4337 {
4338 /* Conditional return when just the LR is stored is a simple
4339 conditional-load instruction, that's not expensive. */
4340 if (saved_int_regs != 0 && saved_int_regs != (1 << LR_REGNUM))
4341 return 0;
4342
4343 if (flag_pic
4344 && arm_pic_register != INVALID_REGNUM
4345 && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
4346 return 0;
4347 }
4348
4349 /* ARMv8-M nonsecure entry function need to use bxns to return and thus need
4350 several instructions if anything needs to be popped. Armv8.1-M Mainline
4351 also needs several instructions to save and restore FP context. */
4352 if (IS_CMSE_ENTRY (func_type) && (saved_int_regs || TARGET_HAVE_FPCXT_CMSE))
4353 return 0;
4354
4355 /* If there are saved registers but the LR isn't saved, then we need
4356 two instructions for the return. */
4357 if (saved_int_regs && !(saved_int_regs & (1 << LR_REGNUM)))
4358 return 0;
4359
4360 /* Can't be done if any of the VFP regs are pushed,
4361 since this also requires an insn. */
4362 if (TARGET_VFP_BASE)
4363 for (regno = FIRST_VFP_REGNUM; regno <= LAST_VFP_REGNUM; regno++)
4364 if (reg_needs_saving_p (regno))
4365 return 0;
4366
4367 if (TARGET_REALLY_IWMMXT)
4368 for (regno = FIRST_IWMMXT_REGNUM; regno <= LAST_IWMMXT_REGNUM; regno++)
4369 if (reg_needs_saving_p (regno))
4370 return 0;
4371
4372 return 1;
4373 }
4374
4375 /* Return TRUE if we should try to use a simple_return insn, i.e. perform
4376 shrink-wrapping if possible. This is the case if we need to emit a
4377 prologue, which we can test by looking at the offsets. */
4378 bool
4379 use_simple_return_p (void)
4380 {
4381 arm_stack_offsets *offsets;
4382
4383 /* Note this function can be called before or after reload. */
4384 if (!reload_completed)
4385 arm_compute_frame_layout ();
4386
4387 offsets = arm_get_frame_offsets ();
4388 return offsets->outgoing_args != 0;
4389 }
4390
4391 /* Return TRUE if int I is a valid immediate ARM constant. */
4392
4393 int
4394 const_ok_for_arm (HOST_WIDE_INT i)
4395 {
4396 int lowbit;
4397
4398 /* For machines with >32 bit HOST_WIDE_INT, the bits above bit 31 must
4399 be all zero, or all one. */
4400 if ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff) != 0
4401 && ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff)
4402 != ((~(unsigned HOST_WIDE_INT) 0)
4403 & ~(unsigned HOST_WIDE_INT) 0xffffffff)))
4404 return FALSE;
4405
4406 i &= (unsigned HOST_WIDE_INT) 0xffffffff;
4407
4408 /* Fast return for 0 and small values. We must do this for zero, since
4409 the code below can't handle that one case. */
4410 if ((i & ~(unsigned HOST_WIDE_INT) 0xff) == 0)
4411 return TRUE;
4412
4413 /* Get the number of trailing zeros. */
4414 lowbit = ffs((int) i) - 1;
4415
4416 /* Only even shifts are allowed in ARM mode so round down to the
4417 nearest even number. */
4418 if (TARGET_ARM)
4419 lowbit &= ~1;
4420
4421 if ((i & ~(((unsigned HOST_WIDE_INT) 0xff) << lowbit)) == 0)
4422 return TRUE;
4423
4424 if (TARGET_ARM)
4425 {
4426 /* Allow rotated constants in ARM mode. */
4427 if (lowbit <= 4
4428 && ((i & ~0xc000003f) == 0
4429 || (i & ~0xf000000f) == 0
4430 || (i & ~0xfc000003) == 0))
4431 return TRUE;
4432 }
4433 else if (TARGET_THUMB2)
4434 {
4435 HOST_WIDE_INT v;
4436
4437 /* Allow repeated patterns 0x00XY00XY or 0xXYXYXYXY. */
4438 v = i & 0xff;
4439 v |= v << 16;
4440 if (i == v || i == (v | (v << 8)))
4441 return TRUE;
4442
4443 /* Allow repeated pattern 0xXY00XY00. */
4444 v = i & 0xff00;
4445 v |= v << 16;
4446 if (i == v)
4447 return TRUE;
4448 }
4449 else if (TARGET_HAVE_MOVT)
4450 {
4451 /* Thumb-1 Targets with MOVT. */
4452 if (i > 0xffff)
4453 return FALSE;
4454 else
4455 return TRUE;
4456 }
4457
4458 return FALSE;
4459 }
4460
4461 /* Return true if I is a valid constant for the operation CODE. */
4462 int
4463 const_ok_for_op (HOST_WIDE_INT i, enum rtx_code code)
4464 {
4465 if (const_ok_for_arm (i))
4466 return 1;
4467
4468 switch (code)
4469 {
4470 case SET:
4471 /* See if we can use movw. */
4472 if (TARGET_HAVE_MOVT && (i & 0xffff0000) == 0)
4473 return 1;
4474 else
4475 /* Otherwise, try mvn. */
4476 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
4477
4478 case PLUS:
4479 /* See if we can use addw or subw. */
4480 if (TARGET_THUMB2
4481 && ((i & 0xfffff000) == 0
4482 || ((-i) & 0xfffff000) == 0))
4483 return 1;
4484 /* Fall through. */
4485 case COMPARE:
4486 case EQ:
4487 case NE:
4488 case GT:
4489 case LE:
4490 case LT:
4491 case GE:
4492 case GEU:
4493 case LTU:
4494 case GTU:
4495 case LEU:
4496 case UNORDERED:
4497 case ORDERED:
4498 case UNEQ:
4499 case UNGE:
4500 case UNLT:
4501 case UNGT:
4502 case UNLE:
4503 return const_ok_for_arm (ARM_SIGN_EXTEND (-i));
4504
4505 case MINUS: /* Should only occur with (MINUS I reg) => rsb */
4506 case XOR:
4507 return 0;
4508
4509 case IOR:
4510 if (TARGET_THUMB2)
4511 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
4512 return 0;
4513
4514 case AND:
4515 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
4516
4517 default:
4518 gcc_unreachable ();
4519 }
4520 }
4521
4522 /* Return true if I is a valid di mode constant for the operation CODE. */
4523 int
4524 const_ok_for_dimode_op (HOST_WIDE_INT i, enum rtx_code code)
4525 {
4526 HOST_WIDE_INT hi_val = (i >> 32) & 0xFFFFFFFF;
4527 HOST_WIDE_INT lo_val = i & 0xFFFFFFFF;
4528 rtx hi = GEN_INT (hi_val);
4529 rtx lo = GEN_INT (lo_val);
4530
4531 if (TARGET_THUMB1)
4532 return 0;
4533
4534 switch (code)
4535 {
4536 case AND:
4537 case IOR:
4538 case XOR:
4539 return const_ok_for_op (hi_val, code) || hi_val == 0xFFFFFFFF
4540 || const_ok_for_op (lo_val, code) || lo_val == 0xFFFFFFFF;
4541 case PLUS:
4542 return arm_not_operand (hi, SImode) && arm_add_operand (lo, SImode);
4543
4544 default:
4545 return 0;
4546 }
4547 }
4548
4549 /* Emit a sequence of insns to handle a large constant.
4550 CODE is the code of the operation required, it can be any of SET, PLUS,
4551 IOR, AND, XOR, MINUS;
4552 MODE is the mode in which the operation is being performed;
4553 VAL is the integer to operate on;
4554 SOURCE is the other operand (a register, or a null-pointer for SET);
4555 SUBTARGETS means it is safe to create scratch registers if that will
4556 either produce a simpler sequence, or we will want to cse the values.
4557 Return value is the number of insns emitted. */
4558
4559 /* ??? Tweak this for thumb2. */
4560 int
4561 arm_split_constant (enum rtx_code code, machine_mode mode, rtx insn,
4562 HOST_WIDE_INT val, rtx target, rtx source, int subtargets)
4563 {
4564 rtx cond;
4565
4566 if (insn && GET_CODE (PATTERN (insn)) == COND_EXEC)
4567 cond = COND_EXEC_TEST (PATTERN (insn));
4568 else
4569 cond = NULL_RTX;
4570
4571 if (subtargets || code == SET
4572 || (REG_P (target) && REG_P (source)
4573 && REGNO (target) != REGNO (source)))
4574 {
4575 /* After arm_reorg has been called, we can't fix up expensive
4576 constants by pushing them into memory so we must synthesize
4577 them in-line, regardless of the cost. This is only likely to
4578 be more costly on chips that have load delay slots and we are
4579 compiling without running the scheduler (so no splitting
4580 occurred before the final instruction emission).
4581
4582 Ref: gcc -O1 -mcpu=strongarm gcc.c-torture/compile/980506-2.c
4583 */
4584 if (!cfun->machine->after_arm_reorg
4585 && !cond
4586 && (arm_gen_constant (code, mode, NULL_RTX, val, target, source,
4587 1, 0)
4588 > (arm_constant_limit (optimize_function_for_size_p (cfun))
4589 + (code != SET))))
4590 {
4591 if (code == SET)
4592 {
4593 /* Currently SET is the only monadic value for CODE, all
4594 the rest are diadic. */
4595 if (TARGET_USE_MOVT)
4596 arm_emit_movpair (target, GEN_INT (val));
4597 else
4598 emit_set_insn (target, GEN_INT (val));
4599
4600 return 1;
4601 }
4602 else
4603 {
4604 rtx temp = subtargets ? gen_reg_rtx (mode) : target;
4605
4606 if (TARGET_USE_MOVT)
4607 arm_emit_movpair (temp, GEN_INT (val));
4608 else
4609 emit_set_insn (temp, GEN_INT (val));
4610
4611 /* For MINUS, the value is subtracted from, since we never
4612 have subtraction of a constant. */
4613 if (code == MINUS)
4614 emit_set_insn (target, gen_rtx_MINUS (mode, temp, source));
4615 else
4616 emit_set_insn (target,
4617 gen_rtx_fmt_ee (code, mode, source, temp));
4618 return 2;
4619 }
4620 }
4621 }
4622
4623 return arm_gen_constant (code, mode, cond, val, target, source, subtargets,
4624 1);
4625 }
4626
4627 /* Return a sequence of integers, in RETURN_SEQUENCE that fit into
4628 ARM/THUMB2 immediates, and add up to VAL.
4629 Thr function return value gives the number of insns required. */
4630 static int
4631 optimal_immediate_sequence (enum rtx_code code, unsigned HOST_WIDE_INT val,
4632 struct four_ints *return_sequence)
4633 {
4634 int best_consecutive_zeros = 0;
4635 int i;
4636 int best_start = 0;
4637 int insns1, insns2;
4638 struct four_ints tmp_sequence;
4639
4640 /* If we aren't targeting ARM, the best place to start is always at
4641 the bottom, otherwise look more closely. */
4642 if (TARGET_ARM)
4643 {
4644 for (i = 0; i < 32; i += 2)
4645 {
4646 int consecutive_zeros = 0;
4647
4648 if (!(val & (3 << i)))
4649 {
4650 while ((i < 32) && !(val & (3 << i)))
4651 {
4652 consecutive_zeros += 2;
4653 i += 2;
4654 }
4655 if (consecutive_zeros > best_consecutive_zeros)
4656 {
4657 best_consecutive_zeros = consecutive_zeros;
4658 best_start = i - consecutive_zeros;
4659 }
4660 i -= 2;
4661 }
4662 }
4663 }
4664
4665 /* So long as it won't require any more insns to do so, it's
4666 desirable to emit a small constant (in bits 0...9) in the last
4667 insn. This way there is more chance that it can be combined with
4668 a later addressing insn to form a pre-indexed load or store
4669 operation. Consider:
4670
4671 *((volatile int *)0xe0000100) = 1;
4672 *((volatile int *)0xe0000110) = 2;
4673
4674 We want this to wind up as:
4675
4676 mov rA, #0xe0000000
4677 mov rB, #1
4678 str rB, [rA, #0x100]
4679 mov rB, #2
4680 str rB, [rA, #0x110]
4681
4682 rather than having to synthesize both large constants from scratch.
4683
4684 Therefore, we calculate how many insns would be required to emit
4685 the constant starting from `best_start', and also starting from
4686 zero (i.e. with bit 31 first to be output). If `best_start' doesn't
4687 yield a shorter sequence, we may as well use zero. */
4688 insns1 = optimal_immediate_sequence_1 (code, val, return_sequence, best_start);
4689 if (best_start != 0
4690 && ((HOST_WIDE_INT_1U << best_start) < val))
4691 {
4692 insns2 = optimal_immediate_sequence_1 (code, val, &tmp_sequence, 0);
4693 if (insns2 <= insns1)
4694 {
4695 *return_sequence = tmp_sequence;
4696 insns1 = insns2;
4697 }
4698 }
4699
4700 return insns1;
4701 }
4702
4703 /* As for optimal_immediate_sequence, but starting at bit-position I. */
4704 static int
4705 optimal_immediate_sequence_1 (enum rtx_code code, unsigned HOST_WIDE_INT val,
4706 struct four_ints *return_sequence, int i)
4707 {
4708 int remainder = val & 0xffffffff;
4709 int insns = 0;
4710
4711 /* Try and find a way of doing the job in either two or three
4712 instructions.
4713
4714 In ARM mode we can use 8-bit constants, rotated to any 2-bit aligned
4715 location. We start at position I. This may be the MSB, or
4716 optimial_immediate_sequence may have positioned it at the largest block
4717 of zeros that are aligned on a 2-bit boundary. We then fill up the temps,
4718 wrapping around to the top of the word when we drop off the bottom.
4719 In the worst case this code should produce no more than four insns.
4720
4721 In Thumb2 mode, we can use 32/16-bit replicated constants, and 8-bit
4722 constants, shifted to any arbitrary location. We should always start
4723 at the MSB. */
4724 do
4725 {
4726 int end;
4727 unsigned int b1, b2, b3, b4;
4728 unsigned HOST_WIDE_INT result;
4729 int loc;
4730
4731 gcc_assert (insns < 4);
4732
4733 if (i <= 0)
4734 i += 32;
4735
4736 /* First, find the next normal 12/8-bit shifted/rotated immediate. */
4737 if (remainder & ((TARGET_ARM ? (3 << (i - 2)) : (1 << (i - 1)))))
4738 {
4739 loc = i;
4740 if (i <= 12 && TARGET_THUMB2 && code == PLUS)
4741 /* We can use addw/subw for the last 12 bits. */
4742 result = remainder;
4743 else
4744 {
4745 /* Use an 8-bit shifted/rotated immediate. */
4746 end = i - 8;
4747 if (end < 0)
4748 end += 32;
4749 result = remainder & ((0x0ff << end)
4750 | ((i < end) ? (0xff >> (32 - end))
4751 : 0));
4752 i -= 8;
4753 }
4754 }
4755 else
4756 {
4757 /* Arm allows rotates by a multiple of two. Thumb-2 allows
4758 arbitrary shifts. */
4759 i -= TARGET_ARM ? 2 : 1;
4760 continue;
4761 }
4762
4763 /* Next, see if we can do a better job with a thumb2 replicated
4764 constant.
4765
4766 We do it this way around to catch the cases like 0x01F001E0 where
4767 two 8-bit immediates would work, but a replicated constant would
4768 make it worse.
4769
4770 TODO: 16-bit constants that don't clear all the bits, but still win.
4771 TODO: Arithmetic splitting for set/add/sub, rather than bitwise. */
4772 if (TARGET_THUMB2)
4773 {
4774 b1 = (remainder & 0xff000000) >> 24;
4775 b2 = (remainder & 0x00ff0000) >> 16;
4776 b3 = (remainder & 0x0000ff00) >> 8;
4777 b4 = remainder & 0xff;
4778
4779 if (loc > 24)
4780 {
4781 /* The 8-bit immediate already found clears b1 (and maybe b2),
4782 but must leave b3 and b4 alone. */
4783
4784 /* First try to find a 32-bit replicated constant that clears
4785 almost everything. We can assume that we can't do it in one,
4786 or else we wouldn't be here. */
4787 unsigned int tmp = b1 & b2 & b3 & b4;
4788 unsigned int tmp2 = tmp + (tmp << 8) + (tmp << 16)
4789 + (tmp << 24);
4790 unsigned int matching_bytes = (tmp == b1) + (tmp == b2)
4791 + (tmp == b3) + (tmp == b4);
4792 if (tmp
4793 && (matching_bytes >= 3
4794 || (matching_bytes == 2
4795 && const_ok_for_op (remainder & ~tmp2, code))))
4796 {
4797 /* At least 3 of the bytes match, and the fourth has at
4798 least as many bits set, or two of the bytes match
4799 and it will only require one more insn to finish. */
4800 result = tmp2;
4801 i = tmp != b1 ? 32
4802 : tmp != b2 ? 24
4803 : tmp != b3 ? 16
4804 : 8;
4805 }
4806
4807 /* Second, try to find a 16-bit replicated constant that can
4808 leave three of the bytes clear. If b2 or b4 is already
4809 zero, then we can. If the 8-bit from above would not
4810 clear b2 anyway, then we still win. */
4811 else if (b1 == b3 && (!b2 || !b4
4812 || (remainder & 0x00ff0000 & ~result)))
4813 {
4814 result = remainder & 0xff00ff00;
4815 i = 24;
4816 }
4817 }
4818 else if (loc > 16)
4819 {
4820 /* The 8-bit immediate already found clears b2 (and maybe b3)
4821 and we don't get here unless b1 is alredy clear, but it will
4822 leave b4 unchanged. */
4823
4824 /* If we can clear b2 and b4 at once, then we win, since the
4825 8-bits couldn't possibly reach that far. */
4826 if (b2 == b4)
4827 {
4828 result = remainder & 0x00ff00ff;
4829 i = 16;
4830 }
4831 }
4832 }
4833
4834 return_sequence->i[insns++] = result;
4835 remainder &= ~result;
4836
4837 if (code == SET || code == MINUS)
4838 code = PLUS;
4839 }
4840 while (remainder);
4841
4842 return insns;
4843 }
4844
4845 /* Emit an instruction with the indicated PATTERN. If COND is
4846 non-NULL, conditionalize the execution of the instruction on COND
4847 being true. */
4848
4849 static void
4850 emit_constant_insn (rtx cond, rtx pattern)
4851 {
4852 if (cond)
4853 pattern = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (cond), pattern);
4854 emit_insn (pattern);
4855 }
4856
4857 /* As above, but extra parameter GENERATE which, if clear, suppresses
4858 RTL generation. */
4859
4860 static int
4861 arm_gen_constant (enum rtx_code code, machine_mode mode, rtx cond,
4862 unsigned HOST_WIDE_INT val, rtx target, rtx source,
4863 int subtargets, int generate)
4864 {
4865 int can_invert = 0;
4866 int can_negate = 0;
4867 int final_invert = 0;
4868 int i;
4869 int set_sign_bit_copies = 0;
4870 int clear_sign_bit_copies = 0;
4871 int clear_zero_bit_copies = 0;
4872 int set_zero_bit_copies = 0;
4873 int insns = 0, neg_insns, inv_insns;
4874 unsigned HOST_WIDE_INT temp1, temp2;
4875 unsigned HOST_WIDE_INT remainder = val & 0xffffffff;
4876 struct four_ints *immediates;
4877 struct four_ints pos_immediates, neg_immediates, inv_immediates;
4878
4879 /* Find out which operations are safe for a given CODE. Also do a quick
4880 check for degenerate cases; these can occur when DImode operations
4881 are split. */
4882 switch (code)
4883 {
4884 case SET:
4885 can_invert = 1;
4886 break;
4887
4888 case PLUS:
4889 can_negate = 1;
4890 break;
4891
4892 case IOR:
4893 if (remainder == 0xffffffff)
4894 {
4895 if (generate)
4896 emit_constant_insn (cond,
4897 gen_rtx_SET (target,
4898 GEN_INT (ARM_SIGN_EXTEND (val))));
4899 return 1;
4900 }
4901
4902 if (remainder == 0)
4903 {
4904 if (reload_completed && rtx_equal_p (target, source))
4905 return 0;
4906
4907 if (generate)
4908 emit_constant_insn (cond, gen_rtx_SET (target, source));
4909 return 1;
4910 }
4911 break;
4912
4913 case AND:
4914 if (remainder == 0)
4915 {
4916 if (generate)
4917 emit_constant_insn (cond, gen_rtx_SET (target, const0_rtx));
4918 return 1;
4919 }
4920 if (remainder == 0xffffffff)
4921 {
4922 if (reload_completed && rtx_equal_p (target, source))
4923 return 0;
4924 if (generate)
4925 emit_constant_insn (cond, gen_rtx_SET (target, source));
4926 return 1;
4927 }
4928 can_invert = 1;
4929 break;
4930
4931 case XOR:
4932 if (remainder == 0)
4933 {
4934 if (reload_completed && rtx_equal_p (target, source))
4935 return 0;
4936 if (generate)
4937 emit_constant_insn (cond, gen_rtx_SET (target, source));
4938 return 1;
4939 }
4940
4941 if (remainder == 0xffffffff)
4942 {
4943 if (generate)
4944 emit_constant_insn (cond,
4945 gen_rtx_SET (target,
4946 gen_rtx_NOT (mode, source)));
4947 return 1;
4948 }
4949 final_invert = 1;
4950 break;
4951
4952 case MINUS:
4953 /* We treat MINUS as (val - source), since (source - val) is always
4954 passed as (source + (-val)). */
4955 if (remainder == 0)
4956 {
4957 if (generate)
4958 emit_constant_insn (cond,
4959 gen_rtx_SET (target,
4960 gen_rtx_NEG (mode, source)));
4961 return 1;
4962 }
4963 if (const_ok_for_arm (val))
4964 {
4965 if (generate)
4966 emit_constant_insn (cond,
4967 gen_rtx_SET (target,
4968 gen_rtx_MINUS (mode, GEN_INT (val),
4969 source)));
4970 return 1;
4971 }
4972
4973 break;
4974
4975 default:
4976 gcc_unreachable ();
4977 }
4978
4979 /* If we can do it in one insn get out quickly. */
4980 if (const_ok_for_op (val, code))
4981 {
4982 if (generate)
4983 emit_constant_insn (cond,
4984 gen_rtx_SET (target,
4985 (source
4986 ? gen_rtx_fmt_ee (code, mode, source,
4987 GEN_INT (val))
4988 : GEN_INT (val))));
4989 return 1;
4990 }
4991
4992 /* On targets with UXTH/UBFX, we can deal with AND (2^N)-1 in a single
4993 insn. */
4994 if (code == AND && (i = exact_log2 (remainder + 1)) > 0
4995 && (arm_arch_thumb2 || (i == 16 && arm_arch6 && mode == SImode)))
4996 {
4997 if (generate)
4998 {
4999 if (mode == SImode && i == 16)
5000 /* Use UXTH in preference to UBFX, since on Thumb2 it's a
5001 smaller insn. */
5002 emit_constant_insn (cond,
5003 gen_zero_extendhisi2
5004 (target, gen_lowpart (HImode, source)));
5005 else
5006 /* Extz only supports SImode, but we can coerce the operands
5007 into that mode. */
5008 emit_constant_insn (cond,
5009 gen_extzv_t2 (gen_lowpart (SImode, target),
5010 gen_lowpart (SImode, source),
5011 GEN_INT (i), const0_rtx));
5012 }
5013
5014 return 1;
5015 }
5016
5017 /* Calculate a few attributes that may be useful for specific
5018 optimizations. */
5019 /* Count number of leading zeros. */
5020 for (i = 31; i >= 0; i--)
5021 {
5022 if ((remainder & (1 << i)) == 0)
5023 clear_sign_bit_copies++;
5024 else
5025 break;
5026 }
5027
5028 /* Count number of leading 1's. */
5029 for (i = 31; i >= 0; i--)
5030 {
5031 if ((remainder & (1 << i)) != 0)
5032 set_sign_bit_copies++;
5033 else
5034 break;
5035 }
5036
5037 /* Count number of trailing zero's. */
5038 for (i = 0; i <= 31; i++)
5039 {
5040 if ((remainder & (1 << i)) == 0)
5041 clear_zero_bit_copies++;
5042 else
5043 break;
5044 }
5045
5046 /* Count number of trailing 1's. */
5047 for (i = 0; i <= 31; i++)
5048 {
5049 if ((remainder & (1 << i)) != 0)
5050 set_zero_bit_copies++;
5051 else
5052 break;
5053 }
5054
5055 switch (code)
5056 {
5057 case SET:
5058 /* See if we can do this by sign_extending a constant that is known
5059 to be negative. This is a good, way of doing it, since the shift
5060 may well merge into a subsequent insn. */
5061 if (set_sign_bit_copies > 1)
5062 {
5063 if (const_ok_for_arm
5064 (temp1 = ARM_SIGN_EXTEND (remainder
5065 << (set_sign_bit_copies - 1))))
5066 {
5067 if (generate)
5068 {
5069 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5070 emit_constant_insn (cond,
5071 gen_rtx_SET (new_src, GEN_INT (temp1)));
5072 emit_constant_insn (cond,
5073 gen_ashrsi3 (target, new_src,
5074 GEN_INT (set_sign_bit_copies - 1)));
5075 }
5076 return 2;
5077 }
5078 /* For an inverted constant, we will need to set the low bits,
5079 these will be shifted out of harm's way. */
5080 temp1 |= (1 << (set_sign_bit_copies - 1)) - 1;
5081 if (const_ok_for_arm (~temp1))
5082 {
5083 if (generate)
5084 {
5085 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5086 emit_constant_insn (cond,
5087 gen_rtx_SET (new_src, GEN_INT (temp1)));
5088 emit_constant_insn (cond,
5089 gen_ashrsi3 (target, new_src,
5090 GEN_INT (set_sign_bit_copies - 1)));
5091 }
5092 return 2;
5093 }
5094 }
5095
5096 /* See if we can calculate the value as the difference between two
5097 valid immediates. */
5098 if (clear_sign_bit_copies + clear_zero_bit_copies <= 16)
5099 {
5100 int topshift = clear_sign_bit_copies & ~1;
5101
5102 temp1 = ARM_SIGN_EXTEND ((remainder + (0x00800000 >> topshift))
5103 & (0xff000000 >> topshift));
5104
5105 /* If temp1 is zero, then that means the 9 most significant
5106 bits of remainder were 1 and we've caused it to overflow.
5107 When topshift is 0 we don't need to do anything since we
5108 can borrow from 'bit 32'. */
5109 if (temp1 == 0 && topshift != 0)
5110 temp1 = 0x80000000 >> (topshift - 1);
5111
5112 temp2 = ARM_SIGN_EXTEND (temp1 - remainder);
5113
5114 if (const_ok_for_arm (temp2))
5115 {
5116 if (generate)
5117 {
5118 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5119 emit_constant_insn (cond,
5120 gen_rtx_SET (new_src, GEN_INT (temp1)));
5121 emit_constant_insn (cond,
5122 gen_addsi3 (target, new_src,
5123 GEN_INT (-temp2)));
5124 }
5125
5126 return 2;
5127 }
5128 }
5129
5130 /* See if we can generate this by setting the bottom (or the top)
5131 16 bits, and then shifting these into the other half of the
5132 word. We only look for the simplest cases, to do more would cost
5133 too much. Be careful, however, not to generate this when the
5134 alternative would take fewer insns. */
5135 if (val & 0xffff0000)
5136 {
5137 temp1 = remainder & 0xffff0000;
5138 temp2 = remainder & 0x0000ffff;
5139
5140 /* Overlaps outside this range are best done using other methods. */
5141 for (i = 9; i < 24; i++)
5142 {
5143 if ((((temp2 | (temp2 << i)) & 0xffffffff) == remainder)
5144 && !const_ok_for_arm (temp2))
5145 {
5146 rtx new_src = (subtargets
5147 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
5148 : target);
5149 insns = arm_gen_constant (code, mode, cond, temp2, new_src,
5150 source, subtargets, generate);
5151 source = new_src;
5152 if (generate)
5153 emit_constant_insn
5154 (cond,
5155 gen_rtx_SET
5156 (target,
5157 gen_rtx_IOR (mode,
5158 gen_rtx_ASHIFT (mode, source,
5159 GEN_INT (i)),
5160 source)));
5161 return insns + 1;
5162 }
5163 }
5164
5165 /* Don't duplicate cases already considered. */
5166 for (i = 17; i < 24; i++)
5167 {
5168 if (((temp1 | (temp1 >> i)) == remainder)
5169 && !const_ok_for_arm (temp1))
5170 {
5171 rtx new_src = (subtargets
5172 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
5173 : target);
5174 insns = arm_gen_constant (code, mode, cond, temp1, new_src,
5175 source, subtargets, generate);
5176 source = new_src;
5177 if (generate)
5178 emit_constant_insn
5179 (cond,
5180 gen_rtx_SET (target,
5181 gen_rtx_IOR
5182 (mode,
5183 gen_rtx_LSHIFTRT (mode, source,
5184 GEN_INT (i)),
5185 source)));
5186 return insns + 1;
5187 }
5188 }
5189 }
5190 break;
5191
5192 case IOR:
5193 case XOR:
5194 /* If we have IOR or XOR, and the constant can be loaded in a
5195 single instruction, and we can find a temporary to put it in,
5196 then this can be done in two instructions instead of 3-4. */
5197 if (subtargets
5198 /* TARGET can't be NULL if SUBTARGETS is 0 */
5199 || (reload_completed && !reg_mentioned_p (target, source)))
5200 {
5201 if (const_ok_for_arm (ARM_SIGN_EXTEND (~val)))
5202 {
5203 if (generate)
5204 {
5205 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
5206
5207 emit_constant_insn (cond,
5208 gen_rtx_SET (sub, GEN_INT (val)));
5209 emit_constant_insn (cond,
5210 gen_rtx_SET (target,
5211 gen_rtx_fmt_ee (code, mode,
5212 source, sub)));
5213 }
5214 return 2;
5215 }
5216 }
5217
5218 if (code == XOR)
5219 break;
5220
5221 /* Convert.
5222 x = y | constant ( which is composed of set_sign_bit_copies of leading 1s
5223 and the remainder 0s for e.g. 0xfff00000)
5224 x = ~(~(y ashift set_sign_bit_copies) lshiftrt set_sign_bit_copies)
5225
5226 This can be done in 2 instructions by using shifts with mov or mvn.
5227 e.g. for
5228 x = x | 0xfff00000;
5229 we generate.
5230 mvn r0, r0, asl #12
5231 mvn r0, r0, lsr #12 */
5232 if (set_sign_bit_copies > 8
5233 && (val & (HOST_WIDE_INT_M1U << (32 - set_sign_bit_copies))) == val)
5234 {
5235 if (generate)
5236 {
5237 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
5238 rtx shift = GEN_INT (set_sign_bit_copies);
5239
5240 emit_constant_insn
5241 (cond,
5242 gen_rtx_SET (sub,
5243 gen_rtx_NOT (mode,
5244 gen_rtx_ASHIFT (mode,
5245 source,
5246 shift))));
5247 emit_constant_insn
5248 (cond,
5249 gen_rtx_SET (target,
5250 gen_rtx_NOT (mode,
5251 gen_rtx_LSHIFTRT (mode, sub,
5252 shift))));
5253 }
5254 return 2;
5255 }
5256
5257 /* Convert
5258 x = y | constant (which has set_zero_bit_copies number of trailing ones).
5259 to
5260 x = ~((~y lshiftrt set_zero_bit_copies) ashift set_zero_bit_copies).
5261
5262 For eg. r0 = r0 | 0xfff
5263 mvn r0, r0, lsr #12
5264 mvn r0, r0, asl #12
5265
5266 */
5267 if (set_zero_bit_copies > 8
5268 && (remainder & ((1 << set_zero_bit_copies) - 1)) == remainder)
5269 {
5270 if (generate)
5271 {
5272 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
5273 rtx shift = GEN_INT (set_zero_bit_copies);
5274
5275 emit_constant_insn
5276 (cond,
5277 gen_rtx_SET (sub,
5278 gen_rtx_NOT (mode,
5279 gen_rtx_LSHIFTRT (mode,
5280 source,
5281 shift))));
5282 emit_constant_insn
5283 (cond,
5284 gen_rtx_SET (target,
5285 gen_rtx_NOT (mode,
5286 gen_rtx_ASHIFT (mode, sub,
5287 shift))));
5288 }
5289 return 2;
5290 }
5291
5292 /* This will never be reached for Thumb2 because orn is a valid
5293 instruction. This is for Thumb1 and the ARM 32 bit cases.
5294
5295 x = y | constant (such that ~constant is a valid constant)
5296 Transform this to
5297 x = ~(~y & ~constant).
5298 */
5299 if (const_ok_for_arm (temp1 = ARM_SIGN_EXTEND (~val)))
5300 {
5301 if (generate)
5302 {
5303 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
5304 emit_constant_insn (cond,
5305 gen_rtx_SET (sub,
5306 gen_rtx_NOT (mode, source)));
5307 source = sub;
5308 if (subtargets)
5309 sub = gen_reg_rtx (mode);
5310 emit_constant_insn (cond,
5311 gen_rtx_SET (sub,
5312 gen_rtx_AND (mode, source,
5313 GEN_INT (temp1))));
5314 emit_constant_insn (cond,
5315 gen_rtx_SET (target,
5316 gen_rtx_NOT (mode, sub)));
5317 }
5318 return 3;
5319 }
5320 break;
5321
5322 case AND:
5323 /* See if two shifts will do 2 or more insn's worth of work. */
5324 if (clear_sign_bit_copies >= 16 && clear_sign_bit_copies < 24)
5325 {
5326 HOST_WIDE_INT shift_mask = ((0xffffffff
5327 << (32 - clear_sign_bit_copies))
5328 & 0xffffffff);
5329
5330 if ((remainder | shift_mask) != 0xffffffff)
5331 {
5332 HOST_WIDE_INT new_val
5333 = ARM_SIGN_EXTEND (remainder | shift_mask);
5334
5335 if (generate)
5336 {
5337 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5338 insns = arm_gen_constant (AND, SImode, cond, new_val,
5339 new_src, source, subtargets, 1);
5340 source = new_src;
5341 }
5342 else
5343 {
5344 rtx targ = subtargets ? NULL_RTX : target;
5345 insns = arm_gen_constant (AND, mode, cond, new_val,
5346 targ, source, subtargets, 0);
5347 }
5348 }
5349
5350 if (generate)
5351 {
5352 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5353 rtx shift = GEN_INT (clear_sign_bit_copies);
5354
5355 emit_insn (gen_ashlsi3 (new_src, source, shift));
5356 emit_insn (gen_lshrsi3 (target, new_src, shift));
5357 }
5358
5359 return insns + 2;
5360 }
5361
5362 if (clear_zero_bit_copies >= 16 && clear_zero_bit_copies < 24)
5363 {
5364 HOST_WIDE_INT shift_mask = (1 << clear_zero_bit_copies) - 1;
5365
5366 if ((remainder | shift_mask) != 0xffffffff)
5367 {
5368 HOST_WIDE_INT new_val
5369 = ARM_SIGN_EXTEND (remainder | shift_mask);
5370 if (generate)
5371 {
5372 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5373
5374 insns = arm_gen_constant (AND, mode, cond, new_val,
5375 new_src, source, subtargets, 1);
5376 source = new_src;
5377 }
5378 else
5379 {
5380 rtx targ = subtargets ? NULL_RTX : target;
5381
5382 insns = arm_gen_constant (AND, mode, cond, new_val,
5383 targ, source, subtargets, 0);
5384 }
5385 }
5386
5387 if (generate)
5388 {
5389 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5390 rtx shift = GEN_INT (clear_zero_bit_copies);
5391
5392 emit_insn (gen_lshrsi3 (new_src, source, shift));
5393 emit_insn (gen_ashlsi3 (target, new_src, shift));
5394 }
5395
5396 return insns + 2;
5397 }
5398
5399 break;
5400
5401 default:
5402 break;
5403 }
5404
5405 /* Calculate what the instruction sequences would be if we generated it
5406 normally, negated, or inverted. */
5407 if (code == AND)
5408 /* AND cannot be split into multiple insns, so invert and use BIC. */
5409 insns = 99;
5410 else
5411 insns = optimal_immediate_sequence (code, remainder, &pos_immediates);
5412
5413 if (can_negate)
5414 neg_insns = optimal_immediate_sequence (code, (-remainder) & 0xffffffff,
5415 &neg_immediates);
5416 else
5417 neg_insns = 99;
5418
5419 if (can_invert || final_invert)
5420 inv_insns = optimal_immediate_sequence (code, remainder ^ 0xffffffff,
5421 &inv_immediates);
5422 else
5423 inv_insns = 99;
5424
5425 immediates = &pos_immediates;
5426
5427 /* Is the negated immediate sequence more efficient? */
5428 if (neg_insns < insns && neg_insns <= inv_insns)
5429 {
5430 insns = neg_insns;
5431 immediates = &neg_immediates;
5432 }
5433 else
5434 can_negate = 0;
5435
5436 /* Is the inverted immediate sequence more efficient?
5437 We must allow for an extra NOT instruction for XOR operations, although
5438 there is some chance that the final 'mvn' will get optimized later. */
5439 if ((inv_insns + 1) < insns || (!final_invert && inv_insns < insns))
5440 {
5441 insns = inv_insns;
5442 immediates = &inv_immediates;
5443 }
5444 else
5445 {
5446 can_invert = 0;
5447 final_invert = 0;
5448 }
5449
5450 /* Now output the chosen sequence as instructions. */
5451 if (generate)
5452 {
5453 for (i = 0; i < insns; i++)
5454 {
5455 rtx new_src, temp1_rtx;
5456
5457 temp1 = immediates->i[i];
5458
5459 if (code == SET || code == MINUS)
5460 new_src = (subtargets ? gen_reg_rtx (mode) : target);
5461 else if ((final_invert || i < (insns - 1)) && subtargets)
5462 new_src = gen_reg_rtx (mode);
5463 else
5464 new_src = target;
5465
5466 if (can_invert)
5467 temp1 = ~temp1;
5468 else if (can_negate)
5469 temp1 = -temp1;
5470
5471 temp1 = trunc_int_for_mode (temp1, mode);
5472 temp1_rtx = GEN_INT (temp1);
5473
5474 if (code == SET)
5475 ;
5476 else if (code == MINUS)
5477 temp1_rtx = gen_rtx_MINUS (mode, temp1_rtx, source);
5478 else
5479 temp1_rtx = gen_rtx_fmt_ee (code, mode, source, temp1_rtx);
5480
5481 emit_constant_insn (cond, gen_rtx_SET (new_src, temp1_rtx));
5482 source = new_src;
5483
5484 if (code == SET)
5485 {
5486 can_negate = can_invert;
5487 can_invert = 0;
5488 code = PLUS;
5489 }
5490 else if (code == MINUS)
5491 code = PLUS;
5492 }
5493 }
5494
5495 if (final_invert)
5496 {
5497 if (generate)
5498 emit_constant_insn (cond, gen_rtx_SET (target,
5499 gen_rtx_NOT (mode, source)));
5500 insns++;
5501 }
5502
5503 return insns;
5504 }
5505
5506 /* Return TRUE if op is a constant where both the low and top words are
5507 suitable for RSB/RSC instructions. This is never true for Thumb, since
5508 we do not have RSC in that case. */
5509 static bool
5510 arm_const_double_prefer_rsbs_rsc (rtx op)
5511 {
5512 /* Thumb lacks RSC, so we never prefer that sequence. */
5513 if (TARGET_THUMB || !CONST_INT_P (op))
5514 return false;
5515 HOST_WIDE_INT hi, lo;
5516 lo = UINTVAL (op) & 0xffffffffULL;
5517 hi = UINTVAL (op) >> 32;
5518 return const_ok_for_arm (lo) && const_ok_for_arm (hi);
5519 }
5520
5521 /* Canonicalize a comparison so that we are more likely to recognize it.
5522 This can be done for a few constant compares, where we can make the
5523 immediate value easier to load. */
5524
5525 static void
5526 arm_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
5527 bool op0_preserve_value)
5528 {
5529 machine_mode mode;
5530 unsigned HOST_WIDE_INT i, maxval;
5531
5532 mode = GET_MODE (*op0);
5533 if (mode == VOIDmode)
5534 mode = GET_MODE (*op1);
5535
5536 maxval = (HOST_WIDE_INT_1U << (GET_MODE_BITSIZE (mode) - 1)) - 1;
5537
5538 /* For DImode, we have GE/LT/GEU/LTU comparisons (with cmp/sbc). In
5539 ARM mode we can also use cmp/cmpeq for GTU/LEU. GT/LE must be
5540 either reversed or (for constant OP1) adjusted to GE/LT.
5541 Similarly for GTU/LEU in Thumb mode. */
5542 if (mode == DImode)
5543 {
5544
5545 if (*code == GT || *code == LE
5546 || *code == GTU || *code == LEU)
5547 {
5548 /* Missing comparison. First try to use an available
5549 comparison. */
5550 if (CONST_INT_P (*op1))
5551 {
5552 i = INTVAL (*op1);
5553 switch (*code)
5554 {
5555 case GT:
5556 case LE:
5557 if (i != maxval)
5558 {
5559 /* Try to convert to GE/LT, unless that would be more
5560 expensive. */
5561 if (!arm_const_double_by_immediates (GEN_INT (i + 1))
5562 && arm_const_double_prefer_rsbs_rsc (*op1))
5563 return;
5564 *op1 = GEN_INT (i + 1);
5565 *code = *code == GT ? GE : LT;
5566 return;
5567 }
5568 break;
5569
5570 case GTU:
5571 case LEU:
5572 if (i != ~((unsigned HOST_WIDE_INT) 0))
5573 {
5574 /* Try to convert to GEU/LTU, unless that would
5575 be more expensive. */
5576 if (!arm_const_double_by_immediates (GEN_INT (i + 1))
5577 && arm_const_double_prefer_rsbs_rsc (*op1))
5578 return;
5579 *op1 = GEN_INT (i + 1);
5580 *code = *code == GTU ? GEU : LTU;
5581 return;
5582 }
5583 break;
5584
5585 default:
5586 gcc_unreachable ();
5587 }
5588 }
5589
5590 if (!op0_preserve_value)
5591 {
5592 std::swap (*op0, *op1);
5593 *code = (int)swap_condition ((enum rtx_code)*code);
5594 }
5595 }
5596 return;
5597 }
5598
5599 /* If *op0 is (zero_extend:SI (subreg:QI (reg:SI) 0)) and comparing
5600 with const0_rtx, change it to (and:SI (reg:SI) (const_int 255)),
5601 to facilitate possible combining with a cmp into 'ands'. */
5602 if (mode == SImode
5603 && GET_CODE (*op0) == ZERO_EXTEND
5604 && GET_CODE (XEXP (*op0, 0)) == SUBREG
5605 && GET_MODE (XEXP (*op0, 0)) == QImode
5606 && GET_MODE (SUBREG_REG (XEXP (*op0, 0))) == SImode
5607 && subreg_lowpart_p (XEXP (*op0, 0))
5608 && *op1 == const0_rtx)
5609 *op0 = gen_rtx_AND (SImode, SUBREG_REG (XEXP (*op0, 0)),
5610 GEN_INT (255));
5611
5612 /* Comparisons smaller than DImode. Only adjust comparisons against
5613 an out-of-range constant. */
5614 if (!CONST_INT_P (*op1)
5615 || const_ok_for_arm (INTVAL (*op1))
5616 || const_ok_for_arm (- INTVAL (*op1)))
5617 return;
5618
5619 i = INTVAL (*op1);
5620
5621 switch (*code)
5622 {
5623 case EQ:
5624 case NE:
5625 return;
5626
5627 case GT:
5628 case LE:
5629 if (i != maxval
5630 && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
5631 {
5632 *op1 = GEN_INT (ARM_SIGN_EXTEND (i + 1));
5633 *code = *code == GT ? GE : LT;
5634 return;
5635 }
5636 break;
5637
5638 case GE:
5639 case LT:
5640 if (i != ~maxval
5641 && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
5642 {
5643 *op1 = GEN_INT (i - 1);
5644 *code = *code == GE ? GT : LE;
5645 return;
5646 }
5647 break;
5648
5649 case GTU:
5650 case LEU:
5651 if (i != ~((unsigned HOST_WIDE_INT) 0)
5652 && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
5653 {
5654 *op1 = GEN_INT (ARM_SIGN_EXTEND (i + 1));
5655 *code = *code == GTU ? GEU : LTU;
5656 return;
5657 }
5658 break;
5659
5660 case GEU:
5661 case LTU:
5662 if (i != 0
5663 && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
5664 {
5665 *op1 = GEN_INT (i - 1);
5666 *code = *code == GEU ? GTU : LEU;
5667 return;
5668 }
5669 break;
5670
5671 default:
5672 gcc_unreachable ();
5673 }
5674 }
5675
5676
5677 /* Define how to find the value returned by a function. */
5678
5679 static rtx
5680 arm_function_value(const_tree type, const_tree func,
5681 bool outgoing ATTRIBUTE_UNUSED)
5682 {
5683 machine_mode mode;
5684 int unsignedp ATTRIBUTE_UNUSED;
5685 rtx r ATTRIBUTE_UNUSED;
5686
5687 mode = TYPE_MODE (type);
5688
5689 if (TARGET_AAPCS_BASED)
5690 return aapcs_allocate_return_reg (mode, type, func);
5691
5692 /* Promote integer types. */
5693 if (INTEGRAL_TYPE_P (type))
5694 mode = arm_promote_function_mode (type, mode, &unsignedp, func, 1);
5695
5696 /* Promotes small structs returned in a register to full-word size
5697 for big-endian AAPCS. */
5698 if (arm_return_in_msb (type))
5699 {
5700 HOST_WIDE_INT size = int_size_in_bytes (type);
5701 if (size % UNITS_PER_WORD != 0)
5702 {
5703 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
5704 mode = int_mode_for_size (size * BITS_PER_UNIT, 0).require ();
5705 }
5706 }
5707
5708 return arm_libcall_value_1 (mode);
5709 }
5710
5711 /* libcall hashtable helpers. */
5712
5713 struct libcall_hasher : nofree_ptr_hash <const rtx_def>
5714 {
5715 static inline hashval_t hash (const rtx_def *);
5716 static inline bool equal (const rtx_def *, const rtx_def *);
5717 static inline void remove (rtx_def *);
5718 };
5719
5720 inline bool
5721 libcall_hasher::equal (const rtx_def *p1, const rtx_def *p2)
5722 {
5723 return rtx_equal_p (p1, p2);
5724 }
5725
5726 inline hashval_t
5727 libcall_hasher::hash (const rtx_def *p1)
5728 {
5729 return hash_rtx (p1, VOIDmode, NULL, NULL, FALSE);
5730 }
5731
5732 typedef hash_table<libcall_hasher> libcall_table_type;
5733
5734 static void
5735 add_libcall (libcall_table_type *htab, rtx libcall)
5736 {
5737 *htab->find_slot (libcall, INSERT) = libcall;
5738 }
5739
5740 static bool
5741 arm_libcall_uses_aapcs_base (const_rtx libcall)
5742 {
5743 static bool init_done = false;
5744 static libcall_table_type *libcall_htab = NULL;
5745
5746 if (!init_done)
5747 {
5748 init_done = true;
5749
5750 libcall_htab = new libcall_table_type (31);
5751 add_libcall (libcall_htab,
5752 convert_optab_libfunc (sfloat_optab, SFmode, SImode));
5753 add_libcall (libcall_htab,
5754 convert_optab_libfunc (sfloat_optab, DFmode, SImode));
5755 add_libcall (libcall_htab,
5756 convert_optab_libfunc (sfloat_optab, SFmode, DImode));
5757 add_libcall (libcall_htab,
5758 convert_optab_libfunc (sfloat_optab, DFmode, DImode));
5759
5760 add_libcall (libcall_htab,
5761 convert_optab_libfunc (ufloat_optab, SFmode, SImode));
5762 add_libcall (libcall_htab,
5763 convert_optab_libfunc (ufloat_optab, DFmode, SImode));
5764 add_libcall (libcall_htab,
5765 convert_optab_libfunc (ufloat_optab, SFmode, DImode));
5766 add_libcall (libcall_htab,
5767 convert_optab_libfunc (ufloat_optab, DFmode, DImode));
5768
5769 add_libcall (libcall_htab,
5770 convert_optab_libfunc (sext_optab, SFmode, HFmode));
5771 add_libcall (libcall_htab,
5772 convert_optab_libfunc (trunc_optab, HFmode, SFmode));
5773 add_libcall (libcall_htab,
5774 convert_optab_libfunc (sfix_optab, SImode, DFmode));
5775 add_libcall (libcall_htab,
5776 convert_optab_libfunc (ufix_optab, SImode, DFmode));
5777 add_libcall (libcall_htab,
5778 convert_optab_libfunc (sfix_optab, DImode, DFmode));
5779 add_libcall (libcall_htab,
5780 convert_optab_libfunc (ufix_optab, DImode, DFmode));
5781 add_libcall (libcall_htab,
5782 convert_optab_libfunc (sfix_optab, DImode, SFmode));
5783 add_libcall (libcall_htab,
5784 convert_optab_libfunc (ufix_optab, DImode, SFmode));
5785 add_libcall (libcall_htab,
5786 convert_optab_libfunc (sfix_optab, SImode, SFmode));
5787 add_libcall (libcall_htab,
5788 convert_optab_libfunc (ufix_optab, SImode, SFmode));
5789
5790 /* Values from double-precision helper functions are returned in core
5791 registers if the selected core only supports single-precision
5792 arithmetic, even if we are using the hard-float ABI. The same is
5793 true for single-precision helpers except in case of MVE, because in
5794 MVE we will be using the hard-float ABI on a CPU which doesn't support
5795 single-precision operations in hardware. In MVE the following check
5796 enables use of emulation for the single-precision arithmetic
5797 operations. */
5798 if (TARGET_HAVE_MVE)
5799 {
5800 add_libcall (libcall_htab, optab_libfunc (add_optab, SFmode));
5801 add_libcall (libcall_htab, optab_libfunc (sdiv_optab, SFmode));
5802 add_libcall (libcall_htab, optab_libfunc (smul_optab, SFmode));
5803 add_libcall (libcall_htab, optab_libfunc (neg_optab, SFmode));
5804 add_libcall (libcall_htab, optab_libfunc (sub_optab, SFmode));
5805 add_libcall (libcall_htab, optab_libfunc (eq_optab, SFmode));
5806 add_libcall (libcall_htab, optab_libfunc (lt_optab, SFmode));
5807 add_libcall (libcall_htab, optab_libfunc (le_optab, SFmode));
5808 add_libcall (libcall_htab, optab_libfunc (ge_optab, SFmode));
5809 add_libcall (libcall_htab, optab_libfunc (gt_optab, SFmode));
5810 add_libcall (libcall_htab, optab_libfunc (unord_optab, SFmode));
5811 }
5812 add_libcall (libcall_htab, optab_libfunc (add_optab, DFmode));
5813 add_libcall (libcall_htab, optab_libfunc (sdiv_optab, DFmode));
5814 add_libcall (libcall_htab, optab_libfunc (smul_optab, DFmode));
5815 add_libcall (libcall_htab, optab_libfunc (neg_optab, DFmode));
5816 add_libcall (libcall_htab, optab_libfunc (sub_optab, DFmode));
5817 add_libcall (libcall_htab, optab_libfunc (eq_optab, DFmode));
5818 add_libcall (libcall_htab, optab_libfunc (lt_optab, DFmode));
5819 add_libcall (libcall_htab, optab_libfunc (le_optab, DFmode));
5820 add_libcall (libcall_htab, optab_libfunc (ge_optab, DFmode));
5821 add_libcall (libcall_htab, optab_libfunc (gt_optab, DFmode));
5822 add_libcall (libcall_htab, optab_libfunc (unord_optab, DFmode));
5823 add_libcall (libcall_htab, convert_optab_libfunc (sext_optab, DFmode,
5824 SFmode));
5825 add_libcall (libcall_htab, convert_optab_libfunc (trunc_optab, SFmode,
5826 DFmode));
5827 add_libcall (libcall_htab,
5828 convert_optab_libfunc (trunc_optab, HFmode, DFmode));
5829 }
5830
5831 return libcall && libcall_htab->find (libcall) != NULL;
5832 }
5833
5834 static rtx
5835 arm_libcall_value_1 (machine_mode mode)
5836 {
5837 if (TARGET_AAPCS_BASED)
5838 return aapcs_libcall_value (mode);
5839 else if (TARGET_IWMMXT_ABI
5840 && arm_vector_mode_supported_p (mode))
5841 return gen_rtx_REG (mode, FIRST_IWMMXT_REGNUM);
5842 else
5843 return gen_rtx_REG (mode, ARG_REGISTER (1));
5844 }
5845
5846 /* Define how to find the value returned by a library function
5847 assuming the value has mode MODE. */
5848
5849 static rtx
5850 arm_libcall_value (machine_mode mode, const_rtx libcall)
5851 {
5852 if (TARGET_AAPCS_BASED && arm_pcs_default != ARM_PCS_AAPCS
5853 && GET_MODE_CLASS (mode) == MODE_FLOAT)
5854 {
5855 /* The following libcalls return their result in integer registers,
5856 even though they return a floating point value. */
5857 if (arm_libcall_uses_aapcs_base (libcall))
5858 return gen_rtx_REG (mode, ARG_REGISTER(1));
5859
5860 }
5861
5862 return arm_libcall_value_1 (mode);
5863 }
5864
5865 /* Implement TARGET_FUNCTION_VALUE_REGNO_P. */
5866
5867 static bool
5868 arm_function_value_regno_p (const unsigned int regno)
5869 {
5870 if (regno == ARG_REGISTER (1)
5871 || (TARGET_32BIT
5872 && TARGET_AAPCS_BASED
5873 && TARGET_HARD_FLOAT
5874 && regno == FIRST_VFP_REGNUM)
5875 || (TARGET_IWMMXT_ABI
5876 && regno == FIRST_IWMMXT_REGNUM))
5877 return true;
5878
5879 return false;
5880 }
5881
5882 /* Determine the amount of memory needed to store the possible return
5883 registers of an untyped call. */
5884 int
5885 arm_apply_result_size (void)
5886 {
5887 int size = 16;
5888
5889 if (TARGET_32BIT)
5890 {
5891 if (TARGET_HARD_FLOAT_ABI)
5892 size += 32;
5893 if (TARGET_IWMMXT_ABI)
5894 size += 8;
5895 }
5896
5897 return size;
5898 }
5899
5900 /* Decide whether TYPE should be returned in memory (true)
5901 or in a register (false). FNTYPE is the type of the function making
5902 the call. */
5903 static bool
5904 arm_return_in_memory (const_tree type, const_tree fntype)
5905 {
5906 HOST_WIDE_INT size;
5907
5908 size = int_size_in_bytes (type); /* Negative if not fixed size. */
5909
5910 if (TARGET_AAPCS_BASED)
5911 {
5912 /* Simple, non-aggregate types (ie not including vectors and
5913 complex) are always returned in a register (or registers).
5914 We don't care about which register here, so we can short-cut
5915 some of the detail. */
5916 if (!AGGREGATE_TYPE_P (type)
5917 && TREE_CODE (type) != VECTOR_TYPE
5918 && TREE_CODE (type) != COMPLEX_TYPE)
5919 return false;
5920
5921 /* Any return value that is no larger than one word can be
5922 returned in r0. */
5923 if (((unsigned HOST_WIDE_INT) size) <= UNITS_PER_WORD)
5924 return false;
5925
5926 /* Check any available co-processors to see if they accept the
5927 type as a register candidate (VFP, for example, can return
5928 some aggregates in consecutive registers). These aren't
5929 available if the call is variadic. */
5930 if (aapcs_select_return_coproc (type, fntype) >= 0)
5931 return false;
5932
5933 /* Vector values should be returned using ARM registers, not
5934 memory (unless they're over 16 bytes, which will break since
5935 we only have four call-clobbered registers to play with). */
5936 if (TREE_CODE (type) == VECTOR_TYPE)
5937 return (size < 0 || size > (4 * UNITS_PER_WORD));
5938
5939 /* The rest go in memory. */
5940 return true;
5941 }
5942
5943 if (TREE_CODE (type) == VECTOR_TYPE)
5944 return (size < 0 || size > (4 * UNITS_PER_WORD));
5945
5946 if (!AGGREGATE_TYPE_P (type) &&
5947 (TREE_CODE (type) != VECTOR_TYPE))
5948 /* All simple types are returned in registers. */
5949 return false;
5950
5951 if (arm_abi != ARM_ABI_APCS)
5952 {
5953 /* ATPCS and later return aggregate types in memory only if they are
5954 larger than a word (or are variable size). */
5955 return (size < 0 || size > UNITS_PER_WORD);
5956 }
5957
5958 /* For the arm-wince targets we choose to be compatible with Microsoft's
5959 ARM and Thumb compilers, which always return aggregates in memory. */
5960 #ifndef ARM_WINCE
5961 /* All structures/unions bigger than one word are returned in memory.
5962 Also catch the case where int_size_in_bytes returns -1. In this case
5963 the aggregate is either huge or of variable size, and in either case
5964 we will want to return it via memory and not in a register. */
5965 if (size < 0 || size > UNITS_PER_WORD)
5966 return true;
5967
5968 if (TREE_CODE (type) == RECORD_TYPE)
5969 {
5970 tree field;
5971
5972 /* For a struct the APCS says that we only return in a register
5973 if the type is 'integer like' and every addressable element
5974 has an offset of zero. For practical purposes this means
5975 that the structure can have at most one non bit-field element
5976 and that this element must be the first one in the structure. */
5977
5978 /* Find the first field, ignoring non FIELD_DECL things which will
5979 have been created by C++. */
5980 /* NOTE: This code is deprecated and has not been updated to handle
5981 DECL_FIELD_ABI_IGNORED. */
5982 for (field = TYPE_FIELDS (type);
5983 field && TREE_CODE (field) != FIELD_DECL;
5984 field = DECL_CHAIN (field))
5985 continue;
5986
5987 if (field == NULL)
5988 return false; /* An empty structure. Allowed by an extension to ANSI C. */
5989
5990 /* Check that the first field is valid for returning in a register. */
5991
5992 /* ... Floats are not allowed */
5993 if (FLOAT_TYPE_P (TREE_TYPE (field)))
5994 return true;
5995
5996 /* ... Aggregates that are not themselves valid for returning in
5997 a register are not allowed. */
5998 if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
5999 return true;
6000
6001 /* Now check the remaining fields, if any. Only bitfields are allowed,
6002 since they are not addressable. */
6003 for (field = DECL_CHAIN (field);
6004 field;
6005 field = DECL_CHAIN (field))
6006 {
6007 if (TREE_CODE (field) != FIELD_DECL)
6008 continue;
6009
6010 if (!DECL_BIT_FIELD_TYPE (field))
6011 return true;
6012 }
6013
6014 return false;
6015 }
6016
6017 if (TREE_CODE (type) == UNION_TYPE)
6018 {
6019 tree field;
6020
6021 /* Unions can be returned in registers if every element is
6022 integral, or can be returned in an integer register. */
6023 for (field = TYPE_FIELDS (type);
6024 field;
6025 field = DECL_CHAIN (field))
6026 {
6027 if (TREE_CODE (field) != FIELD_DECL)
6028 continue;
6029
6030 if (FLOAT_TYPE_P (TREE_TYPE (field)))
6031 return true;
6032
6033 if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
6034 return true;
6035 }
6036
6037 return false;
6038 }
6039 #endif /* not ARM_WINCE */
6040
6041 /* Return all other types in memory. */
6042 return true;
6043 }
6044
6045 const struct pcs_attribute_arg
6046 {
6047 const char *arg;
6048 enum arm_pcs value;
6049 } pcs_attribute_args[] =
6050 {
6051 {"aapcs", ARM_PCS_AAPCS},
6052 {"aapcs-vfp", ARM_PCS_AAPCS_VFP},
6053 #if 0
6054 /* We could recognize these, but changes would be needed elsewhere
6055 * to implement them. */
6056 {"aapcs-iwmmxt", ARM_PCS_AAPCS_IWMMXT},
6057 {"atpcs", ARM_PCS_ATPCS},
6058 {"apcs", ARM_PCS_APCS},
6059 #endif
6060 {NULL, ARM_PCS_UNKNOWN}
6061 };
6062
6063 static enum arm_pcs
6064 arm_pcs_from_attribute (tree attr)
6065 {
6066 const struct pcs_attribute_arg *ptr;
6067 const char *arg;
6068
6069 /* Get the value of the argument. */
6070 if (TREE_VALUE (attr) == NULL_TREE
6071 || TREE_CODE (TREE_VALUE (attr)) != STRING_CST)
6072 return ARM_PCS_UNKNOWN;
6073
6074 arg = TREE_STRING_POINTER (TREE_VALUE (attr));
6075
6076 /* Check it against the list of known arguments. */
6077 for (ptr = pcs_attribute_args; ptr->arg != NULL; ptr++)
6078 if (streq (arg, ptr->arg))
6079 return ptr->value;
6080
6081 /* An unrecognized interrupt type. */
6082 return ARM_PCS_UNKNOWN;
6083 }
6084
6085 /* Get the PCS variant to use for this call. TYPE is the function's type
6086 specification, DECL is the specific declartion. DECL may be null if
6087 the call could be indirect or if this is a library call. */
6088 static enum arm_pcs
6089 arm_get_pcs_model (const_tree type, const_tree decl)
6090 {
6091 bool user_convention = false;
6092 enum arm_pcs user_pcs = arm_pcs_default;
6093 tree attr;
6094
6095 gcc_assert (type);
6096
6097 attr = lookup_attribute ("pcs", TYPE_ATTRIBUTES (type));
6098 if (attr)
6099 {
6100 user_pcs = arm_pcs_from_attribute (TREE_VALUE (attr));
6101 user_convention = true;
6102 }
6103
6104 if (TARGET_AAPCS_BASED)
6105 {
6106 /* Detect varargs functions. These always use the base rules
6107 (no argument is ever a candidate for a co-processor
6108 register). */
6109 bool base_rules = stdarg_p (type);
6110
6111 if (user_convention)
6112 {
6113 if (user_pcs > ARM_PCS_AAPCS_LOCAL)
6114 sorry ("non-AAPCS derived PCS variant");
6115 else if (base_rules && user_pcs != ARM_PCS_AAPCS)
6116 error ("variadic functions must use the base AAPCS variant");
6117 }
6118
6119 if (base_rules)
6120 return ARM_PCS_AAPCS;
6121 else if (user_convention)
6122 return user_pcs;
6123 else if (decl && flag_unit_at_a_time)
6124 {
6125 /* Local functions never leak outside this compilation unit,
6126 so we are free to use whatever conventions are
6127 appropriate. */
6128 /* FIXME: remove CONST_CAST_TREE when cgraph is constified. */
6129 cgraph_node *local_info_node
6130 = cgraph_node::local_info_node (CONST_CAST_TREE (decl));
6131 if (local_info_node && local_info_node->local)
6132 return ARM_PCS_AAPCS_LOCAL;
6133 }
6134 }
6135 else if (user_convention && user_pcs != arm_pcs_default)
6136 sorry ("PCS variant");
6137
6138 /* For everything else we use the target's default. */
6139 return arm_pcs_default;
6140 }
6141
6142
6143 static void
6144 aapcs_vfp_cum_init (CUMULATIVE_ARGS *pcum ATTRIBUTE_UNUSED,
6145 const_tree fntype ATTRIBUTE_UNUSED,
6146 rtx libcall ATTRIBUTE_UNUSED,
6147 const_tree fndecl ATTRIBUTE_UNUSED)
6148 {
6149 /* Record the unallocated VFP registers. */
6150 pcum->aapcs_vfp_regs_free = (1 << NUM_VFP_ARG_REGS) - 1;
6151 pcum->aapcs_vfp_reg_alloc = 0;
6152 }
6153
6154 /* Bitmasks that indicate whether earlier versions of GCC would have
6155 taken a different path through the ABI logic. This should result in
6156 a -Wpsabi warning if the earlier path led to a different ABI decision.
6157
6158 WARN_PSABI_EMPTY_CXX17_BASE
6159 Indicates that the type includes an artificial empty C++17 base field
6160 that, prior to GCC 10.1, would prevent the type from being treated as
6161 a HFA or HVA. See PR94711 for details.
6162
6163 WARN_PSABI_NO_UNIQUE_ADDRESS
6164 Indicates that the type includes an empty [[no_unique_address]] field
6165 that, prior to GCC 10.1, would prevent the type from being treated as
6166 a HFA or HVA. */
6167 const unsigned int WARN_PSABI_EMPTY_CXX17_BASE = 1U << 0;
6168 const unsigned int WARN_PSABI_NO_UNIQUE_ADDRESS = 1U << 1;
6169
6170 /* Walk down the type tree of TYPE counting consecutive base elements.
6171 If *MODEP is VOIDmode, then set it to the first valid floating point
6172 type. If a non-floating point type is found, or if a floating point
6173 type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
6174 otherwise return the count in the sub-tree.
6175
6176 The WARN_PSABI_FLAGS argument allows the caller to check whether this
6177 function has changed its behavior relative to earlier versions of GCC.
6178 Normally the argument should be nonnull and point to a zero-initialized
6179 variable. The function then records whether the ABI decision might
6180 be affected by a known fix to the ABI logic, setting the associated
6181 WARN_PSABI_* bits if so.
6182
6183 When the argument is instead a null pointer, the function tries to
6184 simulate the behavior of GCC before all such ABI fixes were made.
6185 This is useful to check whether the function returns something
6186 different after the ABI fixes. */
6187 static int
6188 aapcs_vfp_sub_candidate (const_tree type, machine_mode *modep,
6189 unsigned int *warn_psabi_flags)
6190 {
6191 machine_mode mode;
6192 HOST_WIDE_INT size;
6193
6194 switch (TREE_CODE (type))
6195 {
6196 case REAL_TYPE:
6197 mode = TYPE_MODE (type);
6198 if (mode != DFmode && mode != SFmode && mode != HFmode && mode != BFmode)
6199 return -1;
6200
6201 if (*modep == VOIDmode)
6202 *modep = mode;
6203
6204 if (*modep == mode)
6205 return 1;
6206
6207 break;
6208
6209 case COMPLEX_TYPE:
6210 mode = TYPE_MODE (TREE_TYPE (type));
6211 if (mode != DFmode && mode != SFmode)
6212 return -1;
6213
6214 if (*modep == VOIDmode)
6215 *modep = mode;
6216
6217 if (*modep == mode)
6218 return 2;
6219
6220 break;
6221
6222 case VECTOR_TYPE:
6223 /* Use V2SImode and V4SImode as representatives of all 64-bit
6224 and 128-bit vector types, whether or not those modes are
6225 supported with the present options. */
6226 size = int_size_in_bytes (type);
6227 switch (size)
6228 {
6229 case 8:
6230 mode = V2SImode;
6231 break;
6232 case 16:
6233 mode = V4SImode;
6234 break;
6235 default:
6236 return -1;
6237 }
6238
6239 if (*modep == VOIDmode)
6240 *modep = mode;
6241
6242 /* Vector modes are considered to be opaque: two vectors are
6243 equivalent for the purposes of being homogeneous aggregates
6244 if they are the same size. */
6245 if (*modep == mode)
6246 return 1;
6247
6248 break;
6249
6250 case ARRAY_TYPE:
6251 {
6252 int count;
6253 tree index = TYPE_DOMAIN (type);
6254
6255 /* Can't handle incomplete types nor sizes that are not
6256 fixed. */
6257 if (!COMPLETE_TYPE_P (type)
6258 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
6259 return -1;
6260
6261 count = aapcs_vfp_sub_candidate (TREE_TYPE (type), modep,
6262 warn_psabi_flags);
6263 if (count == -1
6264 || !index
6265 || !TYPE_MAX_VALUE (index)
6266 || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index))
6267 || !TYPE_MIN_VALUE (index)
6268 || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index))
6269 || count < 0)
6270 return -1;
6271
6272 count *= (1 + tree_to_uhwi (TYPE_MAX_VALUE (index))
6273 - tree_to_uhwi (TYPE_MIN_VALUE (index)));
6274
6275 /* There must be no padding. */
6276 if (wi::to_wide (TYPE_SIZE (type))
6277 != count * GET_MODE_BITSIZE (*modep))
6278 return -1;
6279
6280 return count;
6281 }
6282
6283 case RECORD_TYPE:
6284 {
6285 int count = 0;
6286 int sub_count;
6287 tree field;
6288
6289 /* Can't handle incomplete types nor sizes that are not
6290 fixed. */
6291 if (!COMPLETE_TYPE_P (type)
6292 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
6293 return -1;
6294
6295 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6296 {
6297 if (TREE_CODE (field) != FIELD_DECL)
6298 continue;
6299
6300 if (DECL_FIELD_ABI_IGNORED (field))
6301 {
6302 /* See whether this is something that earlier versions of
6303 GCC failed to ignore. */
6304 unsigned int flag;
6305 if (lookup_attribute ("no_unique_address",
6306 DECL_ATTRIBUTES (field)))
6307 flag = WARN_PSABI_NO_UNIQUE_ADDRESS;
6308 else if (cxx17_empty_base_field_p (field))
6309 flag = WARN_PSABI_EMPTY_CXX17_BASE;
6310 else
6311 /* No compatibility problem. */
6312 continue;
6313
6314 /* Simulate the old behavior when WARN_PSABI_FLAGS is null. */
6315 if (warn_psabi_flags)
6316 {
6317 *warn_psabi_flags |= flag;
6318 continue;
6319 }
6320 }
6321
6322 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep,
6323 warn_psabi_flags);
6324 if (sub_count < 0)
6325 return -1;
6326 count += sub_count;
6327 }
6328
6329 /* There must be no padding. */
6330 if (wi::to_wide (TYPE_SIZE (type))
6331 != count * GET_MODE_BITSIZE (*modep))
6332 return -1;
6333
6334 return count;
6335 }
6336
6337 case UNION_TYPE:
6338 case QUAL_UNION_TYPE:
6339 {
6340 /* These aren't very interesting except in a degenerate case. */
6341 int count = 0;
6342 int sub_count;
6343 tree field;
6344
6345 /* Can't handle incomplete types nor sizes that are not
6346 fixed. */
6347 if (!COMPLETE_TYPE_P (type)
6348 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
6349 return -1;
6350
6351 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6352 {
6353 if (TREE_CODE (field) != FIELD_DECL)
6354 continue;
6355
6356 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep,
6357 warn_psabi_flags);
6358 if (sub_count < 0)
6359 return -1;
6360 count = count > sub_count ? count : sub_count;
6361 }
6362
6363 /* There must be no padding. */
6364 if (wi::to_wide (TYPE_SIZE (type))
6365 != count * GET_MODE_BITSIZE (*modep))
6366 return -1;
6367
6368 return count;
6369 }
6370
6371 default:
6372 break;
6373 }
6374
6375 return -1;
6376 }
6377
6378 /* Return true if PCS_VARIANT should use VFP registers. */
6379 static bool
6380 use_vfp_abi (enum arm_pcs pcs_variant, bool is_double)
6381 {
6382 if (pcs_variant == ARM_PCS_AAPCS_VFP)
6383 {
6384 static bool seen_thumb1_vfp = false;
6385
6386 if (TARGET_THUMB1 && !seen_thumb1_vfp)
6387 {
6388 sorry ("Thumb-1 hard-float VFP ABI");
6389 /* sorry() is not immediately fatal, so only display this once. */
6390 seen_thumb1_vfp = true;
6391 }
6392
6393 return true;
6394 }
6395
6396 if (pcs_variant != ARM_PCS_AAPCS_LOCAL)
6397 return false;
6398
6399 return (TARGET_32BIT && TARGET_HARD_FLOAT &&
6400 (TARGET_VFP_DOUBLE || !is_double));
6401 }
6402
6403 /* Return true if an argument whose type is TYPE, or mode is MODE, is
6404 suitable for passing or returning in VFP registers for the PCS
6405 variant selected. If it is, then *BASE_MODE is updated to contain
6406 a machine mode describing each element of the argument's type and
6407 *COUNT to hold the number of such elements. */
6408 static bool
6409 aapcs_vfp_is_call_or_return_candidate (enum arm_pcs pcs_variant,
6410 machine_mode mode, const_tree type,
6411 machine_mode *base_mode, int *count)
6412 {
6413 machine_mode new_mode = VOIDmode;
6414
6415 /* If we have the type information, prefer that to working things
6416 out from the mode. */
6417 if (type)
6418 {
6419 unsigned int warn_psabi_flags = 0;
6420 int ag_count = aapcs_vfp_sub_candidate (type, &new_mode,
6421 &warn_psabi_flags);
6422 if (ag_count > 0 && ag_count <= 4)
6423 {
6424 static unsigned last_reported_type_uid;
6425 unsigned uid = TYPE_UID (TYPE_MAIN_VARIANT (type));
6426 int alt;
6427 if (warn_psabi
6428 && warn_psabi_flags
6429 && uid != last_reported_type_uid
6430 && ((alt = aapcs_vfp_sub_candidate (type, &new_mode, NULL))
6431 != ag_count))
6432 {
6433 const char *url
6434 = CHANGES_ROOT_URL "gcc-10/changes.html#empty_base";
6435 gcc_assert (alt == -1);
6436 last_reported_type_uid = uid;
6437 /* Use TYPE_MAIN_VARIANT to strip any redundant const
6438 qualification. */
6439 if (warn_psabi_flags & WARN_PSABI_NO_UNIQUE_ADDRESS)
6440 inform (input_location, "parameter passing for argument of "
6441 "type %qT with %<[[no_unique_address]]%> members "
6442 "changed %{in GCC 10.1%}",
6443 TYPE_MAIN_VARIANT (type), url);
6444 else if (warn_psabi_flags & WARN_PSABI_EMPTY_CXX17_BASE)
6445 inform (input_location, "parameter passing for argument of "
6446 "type %qT when C++17 is enabled changed to match "
6447 "C++14 %{in GCC 10.1%}",
6448 TYPE_MAIN_VARIANT (type), url);
6449 }
6450 *count = ag_count;
6451 }
6452 else
6453 return false;
6454 }
6455 else if (GET_MODE_CLASS (mode) == MODE_FLOAT
6456 || GET_MODE_CLASS (mode) == MODE_VECTOR_INT
6457 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
6458 {
6459 *count = 1;
6460 new_mode = mode;
6461 }
6462 else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
6463 {
6464 *count = 2;
6465 new_mode = (mode == DCmode ? DFmode : SFmode);
6466 }
6467 else
6468 return false;
6469
6470
6471 if (!use_vfp_abi (pcs_variant, ARM_NUM_REGS (new_mode) > 1))
6472 return false;
6473
6474 *base_mode = new_mode;
6475
6476 if (TARGET_GENERAL_REGS_ONLY)
6477 error ("argument of type %qT not permitted with -mgeneral-regs-only",
6478 type);
6479
6480 return true;
6481 }
6482
6483 static bool
6484 aapcs_vfp_is_return_candidate (enum arm_pcs pcs_variant,
6485 machine_mode mode, const_tree type)
6486 {
6487 int count ATTRIBUTE_UNUSED;
6488 machine_mode ag_mode ATTRIBUTE_UNUSED;
6489
6490 if (!use_vfp_abi (pcs_variant, false))
6491 return false;
6492 return aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
6493 &ag_mode, &count);
6494 }
6495
6496 static bool
6497 aapcs_vfp_is_call_candidate (CUMULATIVE_ARGS *pcum, machine_mode mode,
6498 const_tree type)
6499 {
6500 if (!use_vfp_abi (pcum->pcs_variant, false))
6501 return false;
6502
6503 return aapcs_vfp_is_call_or_return_candidate (pcum->pcs_variant, mode, type,
6504 &pcum->aapcs_vfp_rmode,
6505 &pcum->aapcs_vfp_rcount);
6506 }
6507
6508 /* Implement the allocate field in aapcs_cp_arg_layout. See the comment there
6509 for the behaviour of this function. */
6510
6511 static bool
6512 aapcs_vfp_allocate (CUMULATIVE_ARGS *pcum, machine_mode mode,
6513 const_tree type ATTRIBUTE_UNUSED)
6514 {
6515 int rmode_size
6516 = MAX (GET_MODE_SIZE (pcum->aapcs_vfp_rmode), GET_MODE_SIZE (SFmode));
6517 int shift = rmode_size / GET_MODE_SIZE (SFmode);
6518 unsigned mask = (1 << (shift * pcum->aapcs_vfp_rcount)) - 1;
6519 int regno;
6520
6521 for (regno = 0; regno < NUM_VFP_ARG_REGS; regno += shift)
6522 if (((pcum->aapcs_vfp_regs_free >> regno) & mask) == mask)
6523 {
6524 pcum->aapcs_vfp_reg_alloc = mask << regno;
6525 if (mode == BLKmode
6526 || (mode == TImode && ! (TARGET_NEON || TARGET_HAVE_MVE))
6527 || ! arm_hard_regno_mode_ok (FIRST_VFP_REGNUM + regno, mode))
6528 {
6529 int i;
6530 int rcount = pcum->aapcs_vfp_rcount;
6531 int rshift = shift;
6532 machine_mode rmode = pcum->aapcs_vfp_rmode;
6533 rtx par;
6534 if (!(TARGET_NEON || TARGET_HAVE_MVE))
6535 {
6536 /* Avoid using unsupported vector modes. */
6537 if (rmode == V2SImode)
6538 rmode = DImode;
6539 else if (rmode == V4SImode)
6540 {
6541 rmode = DImode;
6542 rcount *= 2;
6543 rshift /= 2;
6544 }
6545 }
6546 par = gen_rtx_PARALLEL (mode, rtvec_alloc (rcount));
6547 for (i = 0; i < rcount; i++)
6548 {
6549 rtx tmp = gen_rtx_REG (rmode,
6550 FIRST_VFP_REGNUM + regno + i * rshift);
6551 tmp = gen_rtx_EXPR_LIST
6552 (VOIDmode, tmp,
6553 GEN_INT (i * GET_MODE_SIZE (rmode)));
6554 XVECEXP (par, 0, i) = tmp;
6555 }
6556
6557 pcum->aapcs_reg = par;
6558 }
6559 else
6560 pcum->aapcs_reg = gen_rtx_REG (mode, FIRST_VFP_REGNUM + regno);
6561 return true;
6562 }
6563 return false;
6564 }
6565
6566 /* Implement the allocate_return_reg field in aapcs_cp_arg_layout. See the
6567 comment there for the behaviour of this function. */
6568
6569 static rtx
6570 aapcs_vfp_allocate_return_reg (enum arm_pcs pcs_variant ATTRIBUTE_UNUSED,
6571 machine_mode mode,
6572 const_tree type ATTRIBUTE_UNUSED)
6573 {
6574 if (!use_vfp_abi (pcs_variant, false))
6575 return NULL;
6576
6577 if (mode == BLKmode
6578 || (GET_MODE_CLASS (mode) == MODE_INT
6579 && GET_MODE_SIZE (mode) >= GET_MODE_SIZE (TImode)
6580 && !(TARGET_NEON || TARGET_HAVE_MVE)))
6581 {
6582 int count;
6583 machine_mode ag_mode;
6584 int i;
6585 rtx par;
6586 int shift;
6587
6588 aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
6589 &ag_mode, &count);
6590
6591 if (!(TARGET_NEON || TARGET_HAVE_MVE))
6592 {
6593 if (ag_mode == V2SImode)
6594 ag_mode = DImode;
6595 else if (ag_mode == V4SImode)
6596 {
6597 ag_mode = DImode;
6598 count *= 2;
6599 }
6600 }
6601 shift = GET_MODE_SIZE(ag_mode) / GET_MODE_SIZE(SFmode);
6602 par = gen_rtx_PARALLEL (mode, rtvec_alloc (count));
6603 for (i = 0; i < count; i++)
6604 {
6605 rtx tmp = gen_rtx_REG (ag_mode, FIRST_VFP_REGNUM + i * shift);
6606 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
6607 GEN_INT (i * GET_MODE_SIZE (ag_mode)));
6608 XVECEXP (par, 0, i) = tmp;
6609 }
6610
6611 return par;
6612 }
6613
6614 return gen_rtx_REG (mode, FIRST_VFP_REGNUM);
6615 }
6616
6617 static void
6618 aapcs_vfp_advance (CUMULATIVE_ARGS *pcum ATTRIBUTE_UNUSED,
6619 machine_mode mode ATTRIBUTE_UNUSED,
6620 const_tree type ATTRIBUTE_UNUSED)
6621 {
6622 pcum->aapcs_vfp_regs_free &= ~pcum->aapcs_vfp_reg_alloc;
6623 pcum->aapcs_vfp_reg_alloc = 0;
6624 return;
6625 }
6626
6627 #define AAPCS_CP(X) \
6628 { \
6629 aapcs_ ## X ## _cum_init, \
6630 aapcs_ ## X ## _is_call_candidate, \
6631 aapcs_ ## X ## _allocate, \
6632 aapcs_ ## X ## _is_return_candidate, \
6633 aapcs_ ## X ## _allocate_return_reg, \
6634 aapcs_ ## X ## _advance \
6635 }
6636
6637 /* Table of co-processors that can be used to pass arguments in
6638 registers. Idealy no arugment should be a candidate for more than
6639 one co-processor table entry, but the table is processed in order
6640 and stops after the first match. If that entry then fails to put
6641 the argument into a co-processor register, the argument will go on
6642 the stack. */
6643 static struct
6644 {
6645 /* Initialize co-processor related state in CUMULATIVE_ARGS structure. */
6646 void (*cum_init) (CUMULATIVE_ARGS *, const_tree, rtx, const_tree);
6647
6648 /* Return true if an argument of mode MODE (or type TYPE if MODE is
6649 BLKmode) is a candidate for this co-processor's registers; this
6650 function should ignore any position-dependent state in
6651 CUMULATIVE_ARGS and only use call-type dependent information. */
6652 bool (*is_call_candidate) (CUMULATIVE_ARGS *, machine_mode, const_tree);
6653
6654 /* Return true if the argument does get a co-processor register; it
6655 should set aapcs_reg to an RTX of the register allocated as is
6656 required for a return from FUNCTION_ARG. */
6657 bool (*allocate) (CUMULATIVE_ARGS *, machine_mode, const_tree);
6658
6659 /* Return true if a result of mode MODE (or type TYPE if MODE is BLKmode) can
6660 be returned in this co-processor's registers. */
6661 bool (*is_return_candidate) (enum arm_pcs, machine_mode, const_tree);
6662
6663 /* Allocate and return an RTX element to hold the return type of a call. This
6664 routine must not fail and will only be called if is_return_candidate
6665 returned true with the same parameters. */
6666 rtx (*allocate_return_reg) (enum arm_pcs, machine_mode, const_tree);
6667
6668 /* Finish processing this argument and prepare to start processing
6669 the next one. */
6670 void (*advance) (CUMULATIVE_ARGS *, machine_mode, const_tree);
6671 } aapcs_cp_arg_layout[ARM_NUM_COPROC_SLOTS] =
6672 {
6673 AAPCS_CP(vfp)
6674 };
6675
6676 #undef AAPCS_CP
6677
6678 static int
6679 aapcs_select_call_coproc (CUMULATIVE_ARGS *pcum, machine_mode mode,
6680 const_tree type)
6681 {
6682 int i;
6683
6684 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
6685 if (aapcs_cp_arg_layout[i].is_call_candidate (pcum, mode, type))
6686 return i;
6687
6688 return -1;
6689 }
6690
6691 static int
6692 aapcs_select_return_coproc (const_tree type, const_tree fntype)
6693 {
6694 /* We aren't passed a decl, so we can't check that a call is local.
6695 However, it isn't clear that that would be a win anyway, since it
6696 might limit some tail-calling opportunities. */
6697 enum arm_pcs pcs_variant;
6698
6699 if (fntype)
6700 {
6701 const_tree fndecl = NULL_TREE;
6702
6703 if (TREE_CODE (fntype) == FUNCTION_DECL)
6704 {
6705 fndecl = fntype;
6706 fntype = TREE_TYPE (fntype);
6707 }
6708
6709 pcs_variant = arm_get_pcs_model (fntype, fndecl);
6710 }
6711 else
6712 pcs_variant = arm_pcs_default;
6713
6714 if (pcs_variant != ARM_PCS_AAPCS)
6715 {
6716 int i;
6717
6718 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
6719 if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant,
6720 TYPE_MODE (type),
6721 type))
6722 return i;
6723 }
6724 return -1;
6725 }
6726
6727 static rtx
6728 aapcs_allocate_return_reg (machine_mode mode, const_tree type,
6729 const_tree fntype)
6730 {
6731 /* We aren't passed a decl, so we can't check that a call is local.
6732 However, it isn't clear that that would be a win anyway, since it
6733 might limit some tail-calling opportunities. */
6734 enum arm_pcs pcs_variant;
6735 int unsignedp ATTRIBUTE_UNUSED;
6736
6737 if (fntype)
6738 {
6739 const_tree fndecl = NULL_TREE;
6740
6741 if (TREE_CODE (fntype) == FUNCTION_DECL)
6742 {
6743 fndecl = fntype;
6744 fntype = TREE_TYPE (fntype);
6745 }
6746
6747 pcs_variant = arm_get_pcs_model (fntype, fndecl);
6748 }
6749 else
6750 pcs_variant = arm_pcs_default;
6751
6752 /* Promote integer types. */
6753 if (type && INTEGRAL_TYPE_P (type))
6754 mode = arm_promote_function_mode (type, mode, &unsignedp, fntype, 1);
6755
6756 if (pcs_variant != ARM_PCS_AAPCS)
6757 {
6758 int i;
6759
6760 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
6761 if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant, mode,
6762 type))
6763 return aapcs_cp_arg_layout[i].allocate_return_reg (pcs_variant,
6764 mode, type);
6765 }
6766
6767 /* Promotes small structs returned in a register to full-word size
6768 for big-endian AAPCS. */
6769 if (type && arm_return_in_msb (type))
6770 {
6771 HOST_WIDE_INT size = int_size_in_bytes (type);
6772 if (size % UNITS_PER_WORD != 0)
6773 {
6774 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
6775 mode = int_mode_for_size (size * BITS_PER_UNIT, 0).require ();
6776 }
6777 }
6778
6779 return gen_rtx_REG (mode, R0_REGNUM);
6780 }
6781
6782 static rtx
6783 aapcs_libcall_value (machine_mode mode)
6784 {
6785 if (BYTES_BIG_ENDIAN && ALL_FIXED_POINT_MODE_P (mode)
6786 && GET_MODE_SIZE (mode) <= 4)
6787 mode = SImode;
6788
6789 return aapcs_allocate_return_reg (mode, NULL_TREE, NULL_TREE);
6790 }
6791
6792 /* Lay out a function argument using the AAPCS rules. The rule
6793 numbers referred to here are those in the AAPCS. */
6794 static void
6795 aapcs_layout_arg (CUMULATIVE_ARGS *pcum, machine_mode mode,
6796 const_tree type, bool named)
6797 {
6798 int nregs, nregs2;
6799 int ncrn;
6800
6801 /* We only need to do this once per argument. */
6802 if (pcum->aapcs_arg_processed)
6803 return;
6804
6805 pcum->aapcs_arg_processed = true;
6806
6807 /* Special case: if named is false then we are handling an incoming
6808 anonymous argument which is on the stack. */
6809 if (!named)
6810 return;
6811
6812 /* Is this a potential co-processor register candidate? */
6813 if (pcum->pcs_variant != ARM_PCS_AAPCS)
6814 {
6815 int slot = aapcs_select_call_coproc (pcum, mode, type);
6816 pcum->aapcs_cprc_slot = slot;
6817
6818 /* We don't have to apply any of the rules from part B of the
6819 preparation phase, these are handled elsewhere in the
6820 compiler. */
6821
6822 if (slot >= 0)
6823 {
6824 /* A Co-processor register candidate goes either in its own
6825 class of registers or on the stack. */
6826 if (!pcum->aapcs_cprc_failed[slot])
6827 {
6828 /* C1.cp - Try to allocate the argument to co-processor
6829 registers. */
6830 if (aapcs_cp_arg_layout[slot].allocate (pcum, mode, type))
6831 return;
6832
6833 /* C2.cp - Put the argument on the stack and note that we
6834 can't assign any more candidates in this slot. We also
6835 need to note that we have allocated stack space, so that
6836 we won't later try to split a non-cprc candidate between
6837 core registers and the stack. */
6838 pcum->aapcs_cprc_failed[slot] = true;
6839 pcum->can_split = false;
6840 }
6841
6842 /* We didn't get a register, so this argument goes on the
6843 stack. */
6844 gcc_assert (pcum->can_split == false);
6845 return;
6846 }
6847 }
6848
6849 /* C3 - For double-word aligned arguments, round the NCRN up to the
6850 next even number. */
6851 ncrn = pcum->aapcs_ncrn;
6852 if (ncrn & 1)
6853 {
6854 int res = arm_needs_doubleword_align (mode, type);
6855 /* Only warn during RTL expansion of call stmts, otherwise we would
6856 warn e.g. during gimplification even on functions that will be
6857 always inlined, and we'd warn multiple times. Don't warn when
6858 called in expand_function_start either, as we warn instead in
6859 arm_function_arg_boundary in that case. */
6860 if (res < 0 && warn_psabi && currently_expanding_gimple_stmt)
6861 inform (input_location, "parameter passing for argument of type "
6862 "%qT changed in GCC 7.1", type);
6863 else if (res > 0)
6864 ncrn++;
6865 }
6866
6867 nregs = ARM_NUM_REGS2(mode, type);
6868
6869 /* Sigh, this test should really assert that nregs > 0, but a GCC
6870 extension allows empty structs and then gives them empty size; it
6871 then allows such a structure to be passed by value. For some of
6872 the code below we have to pretend that such an argument has
6873 non-zero size so that we 'locate' it correctly either in
6874 registers or on the stack. */
6875 gcc_assert (nregs >= 0);
6876
6877 nregs2 = nregs ? nregs : 1;
6878
6879 /* C4 - Argument fits entirely in core registers. */
6880 if (ncrn + nregs2 <= NUM_ARG_REGS)
6881 {
6882 pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
6883 pcum->aapcs_next_ncrn = ncrn + nregs;
6884 return;
6885 }
6886
6887 /* C5 - Some core registers left and there are no arguments already
6888 on the stack: split this argument between the remaining core
6889 registers and the stack. */
6890 if (ncrn < NUM_ARG_REGS && pcum->can_split)
6891 {
6892 pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
6893 pcum->aapcs_next_ncrn = NUM_ARG_REGS;
6894 pcum->aapcs_partial = (NUM_ARG_REGS - ncrn) * UNITS_PER_WORD;
6895 return;
6896 }
6897
6898 /* C6 - NCRN is set to 4. */
6899 pcum->aapcs_next_ncrn = NUM_ARG_REGS;
6900
6901 /* C7,C8 - arugment goes on the stack. We have nothing to do here. */
6902 return;
6903 }
6904
6905 /* Initialize a variable CUM of type CUMULATIVE_ARGS
6906 for a call to a function whose data type is FNTYPE.
6907 For a library call, FNTYPE is NULL. */
6908 void
6909 arm_init_cumulative_args (CUMULATIVE_ARGS *pcum, tree fntype,
6910 rtx libname,
6911 tree fndecl ATTRIBUTE_UNUSED)
6912 {
6913 /* Long call handling. */
6914 if (fntype)
6915 pcum->pcs_variant = arm_get_pcs_model (fntype, fndecl);
6916 else
6917 pcum->pcs_variant = arm_pcs_default;
6918
6919 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
6920 {
6921 if (arm_libcall_uses_aapcs_base (libname))
6922 pcum->pcs_variant = ARM_PCS_AAPCS;
6923
6924 pcum->aapcs_ncrn = pcum->aapcs_next_ncrn = 0;
6925 pcum->aapcs_reg = NULL_RTX;
6926 pcum->aapcs_partial = 0;
6927 pcum->aapcs_arg_processed = false;
6928 pcum->aapcs_cprc_slot = -1;
6929 pcum->can_split = true;
6930
6931 if (pcum->pcs_variant != ARM_PCS_AAPCS)
6932 {
6933 int i;
6934
6935 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
6936 {
6937 pcum->aapcs_cprc_failed[i] = false;
6938 aapcs_cp_arg_layout[i].cum_init (pcum, fntype, libname, fndecl);
6939 }
6940 }
6941 return;
6942 }
6943
6944 /* Legacy ABIs */
6945
6946 /* On the ARM, the offset starts at 0. */
6947 pcum->nregs = 0;
6948 pcum->iwmmxt_nregs = 0;
6949 pcum->can_split = true;
6950
6951 /* Varargs vectors are treated the same as long long.
6952 named_count avoids having to change the way arm handles 'named' */
6953 pcum->named_count = 0;
6954 pcum->nargs = 0;
6955
6956 if (TARGET_REALLY_IWMMXT && fntype)
6957 {
6958 tree fn_arg;
6959
6960 for (fn_arg = TYPE_ARG_TYPES (fntype);
6961 fn_arg;
6962 fn_arg = TREE_CHAIN (fn_arg))
6963 pcum->named_count += 1;
6964
6965 if (! pcum->named_count)
6966 pcum->named_count = INT_MAX;
6967 }
6968 }
6969
6970 /* Return 2 if double word alignment is required for argument passing,
6971 but wasn't required before the fix for PR88469.
6972 Return 1 if double word alignment is required for argument passing.
6973 Return -1 if double word alignment used to be required for argument
6974 passing before PR77728 ABI fix, but is not required anymore.
6975 Return 0 if double word alignment is not required and wasn't requried
6976 before either. */
6977 static int
6978 arm_needs_doubleword_align (machine_mode mode, const_tree type)
6979 {
6980 if (!type)
6981 return GET_MODE_ALIGNMENT (mode) > PARM_BOUNDARY;
6982
6983 /* Scalar and vector types: Use natural alignment, i.e. of base type. */
6984 if (!AGGREGATE_TYPE_P (type))
6985 return TYPE_ALIGN (TYPE_MAIN_VARIANT (type)) > PARM_BOUNDARY;
6986
6987 /* Array types: Use member alignment of element type. */
6988 if (TREE_CODE (type) == ARRAY_TYPE)
6989 return TYPE_ALIGN (TREE_TYPE (type)) > PARM_BOUNDARY;
6990
6991 int ret = 0;
6992 int ret2 = 0;
6993 /* Record/aggregate types: Use greatest member alignment of any member.
6994
6995 Note that we explicitly consider zero-sized fields here, even though
6996 they don't map to AAPCS machine types. For example, in:
6997
6998 struct __attribute__((aligned(8))) empty {};
6999
7000 struct s {
7001 [[no_unique_address]] empty e;
7002 int x;
7003 };
7004
7005 "s" contains only one Fundamental Data Type (the int field)
7006 but gains 8-byte alignment and size thanks to "e". */
7007 for (tree field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
7008 if (DECL_ALIGN (field) > PARM_BOUNDARY)
7009 {
7010 if (TREE_CODE (field) == FIELD_DECL)
7011 return 1;
7012 else
7013 /* Before PR77728 fix, we were incorrectly considering also
7014 other aggregate fields, like VAR_DECLs, TYPE_DECLs etc.
7015 Make sure we can warn about that with -Wpsabi. */
7016 ret = -1;
7017 }
7018 else if (TREE_CODE (field) == FIELD_DECL
7019 && DECL_BIT_FIELD_TYPE (field)
7020 && TYPE_ALIGN (DECL_BIT_FIELD_TYPE (field)) > PARM_BOUNDARY)
7021 ret2 = 1;
7022
7023 if (ret2)
7024 return 2;
7025
7026 return ret;
7027 }
7028
7029
7030 /* Determine where to put an argument to a function.
7031 Value is zero to push the argument on the stack,
7032 or a hard register in which to store the argument.
7033
7034 CUM is a variable of type CUMULATIVE_ARGS which gives info about
7035 the preceding args and about the function being called.
7036 ARG is a description of the argument.
7037
7038 On the ARM, normally the first 16 bytes are passed in registers r0-r3; all
7039 other arguments are passed on the stack. If (NAMED == 0) (which happens
7040 only in assign_parms, since TARGET_SETUP_INCOMING_VARARGS is
7041 defined), say it is passed in the stack (function_prologue will
7042 indeed make it pass in the stack if necessary). */
7043
7044 static rtx
7045 arm_function_arg (cumulative_args_t pcum_v, const function_arg_info &arg)
7046 {
7047 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
7048 int nregs;
7049
7050 /* Handle the special case quickly. Pick an arbitrary value for op2 of
7051 a call insn (op3 of a call_value insn). */
7052 if (arg.end_marker_p ())
7053 return const0_rtx;
7054
7055 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
7056 {
7057 aapcs_layout_arg (pcum, arg.mode, arg.type, arg.named);
7058 return pcum->aapcs_reg;
7059 }
7060
7061 /* Varargs vectors are treated the same as long long.
7062 named_count avoids having to change the way arm handles 'named' */
7063 if (TARGET_IWMMXT_ABI
7064 && arm_vector_mode_supported_p (arg.mode)
7065 && pcum->named_count > pcum->nargs + 1)
7066 {
7067 if (pcum->iwmmxt_nregs <= 9)
7068 return gen_rtx_REG (arg.mode,
7069 pcum->iwmmxt_nregs + FIRST_IWMMXT_REGNUM);
7070 else
7071 {
7072 pcum->can_split = false;
7073 return NULL_RTX;
7074 }
7075 }
7076
7077 /* Put doubleword aligned quantities in even register pairs. */
7078 if ((pcum->nregs & 1) && ARM_DOUBLEWORD_ALIGN)
7079 {
7080 int res = arm_needs_doubleword_align (arg.mode, arg.type);
7081 if (res < 0 && warn_psabi)
7082 inform (input_location, "parameter passing for argument of type "
7083 "%qT changed in GCC 7.1", arg.type);
7084 else if (res > 0)
7085 {
7086 pcum->nregs++;
7087 if (res > 1 && warn_psabi)
7088 inform (input_location, "parameter passing for argument of type "
7089 "%qT changed in GCC 9.1", arg.type);
7090 }
7091 }
7092
7093 /* Only allow splitting an arg between regs and memory if all preceding
7094 args were allocated to regs. For args passed by reference we only count
7095 the reference pointer. */
7096 if (pcum->can_split)
7097 nregs = 1;
7098 else
7099 nregs = ARM_NUM_REGS2 (arg.mode, arg.type);
7100
7101 if (!arg.named || pcum->nregs + nregs > NUM_ARG_REGS)
7102 return NULL_RTX;
7103
7104 return gen_rtx_REG (arg.mode, pcum->nregs);
7105 }
7106
7107 static unsigned int
7108 arm_function_arg_boundary (machine_mode mode, const_tree type)
7109 {
7110 if (!ARM_DOUBLEWORD_ALIGN)
7111 return PARM_BOUNDARY;
7112
7113 int res = arm_needs_doubleword_align (mode, type);
7114 if (res < 0 && warn_psabi)
7115 inform (input_location, "parameter passing for argument of type %qT "
7116 "changed in GCC 7.1", type);
7117 if (res > 1 && warn_psabi)
7118 inform (input_location, "parameter passing for argument of type "
7119 "%qT changed in GCC 9.1", type);
7120
7121 return res > 0 ? DOUBLEWORD_ALIGNMENT : PARM_BOUNDARY;
7122 }
7123
7124 static int
7125 arm_arg_partial_bytes (cumulative_args_t pcum_v, const function_arg_info &arg)
7126 {
7127 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
7128 int nregs = pcum->nregs;
7129
7130 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
7131 {
7132 aapcs_layout_arg (pcum, arg.mode, arg.type, arg.named);
7133 return pcum->aapcs_partial;
7134 }
7135
7136 if (TARGET_IWMMXT_ABI && arm_vector_mode_supported_p (arg.mode))
7137 return 0;
7138
7139 if (NUM_ARG_REGS > nregs
7140 && (NUM_ARG_REGS < nregs + ARM_NUM_REGS2 (arg.mode, arg.type))
7141 && pcum->can_split)
7142 return (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
7143
7144 return 0;
7145 }
7146
7147 /* Update the data in PCUM to advance over argument ARG. */
7148
7149 static void
7150 arm_function_arg_advance (cumulative_args_t pcum_v,
7151 const function_arg_info &arg)
7152 {
7153 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
7154
7155 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
7156 {
7157 aapcs_layout_arg (pcum, arg.mode, arg.type, arg.named);
7158
7159 if (pcum->aapcs_cprc_slot >= 0)
7160 {
7161 aapcs_cp_arg_layout[pcum->aapcs_cprc_slot].advance (pcum, arg.mode,
7162 arg.type);
7163 pcum->aapcs_cprc_slot = -1;
7164 }
7165
7166 /* Generic stuff. */
7167 pcum->aapcs_arg_processed = false;
7168 pcum->aapcs_ncrn = pcum->aapcs_next_ncrn;
7169 pcum->aapcs_reg = NULL_RTX;
7170 pcum->aapcs_partial = 0;
7171 }
7172 else
7173 {
7174 pcum->nargs += 1;
7175 if (arm_vector_mode_supported_p (arg.mode)
7176 && pcum->named_count > pcum->nargs
7177 && TARGET_IWMMXT_ABI)
7178 pcum->iwmmxt_nregs += 1;
7179 else
7180 pcum->nregs += ARM_NUM_REGS2 (arg.mode, arg.type);
7181 }
7182 }
7183
7184 /* Variable sized types are passed by reference. This is a GCC
7185 extension to the ARM ABI. */
7186
7187 static bool
7188 arm_pass_by_reference (cumulative_args_t, const function_arg_info &arg)
7189 {
7190 return arg.type && TREE_CODE (TYPE_SIZE (arg.type)) != INTEGER_CST;
7191 }
7192 \f
7193 /* Encode the current state of the #pragma [no_]long_calls. */
7194 typedef enum
7195 {
7196 OFF, /* No #pragma [no_]long_calls is in effect. */
7197 LONG, /* #pragma long_calls is in effect. */
7198 SHORT /* #pragma no_long_calls is in effect. */
7199 } arm_pragma_enum;
7200
7201 static arm_pragma_enum arm_pragma_long_calls = OFF;
7202
7203 void
7204 arm_pr_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
7205 {
7206 arm_pragma_long_calls = LONG;
7207 }
7208
7209 void
7210 arm_pr_no_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
7211 {
7212 arm_pragma_long_calls = SHORT;
7213 }
7214
7215 void
7216 arm_pr_long_calls_off (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
7217 {
7218 arm_pragma_long_calls = OFF;
7219 }
7220 \f
7221 /* Handle an attribute requiring a FUNCTION_DECL;
7222 arguments as in struct attribute_spec.handler. */
7223 static tree
7224 arm_handle_fndecl_attribute (tree *node, tree name, tree args ATTRIBUTE_UNUSED,
7225 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
7226 {
7227 if (TREE_CODE (*node) != FUNCTION_DECL)
7228 {
7229 warning (OPT_Wattributes, "%qE attribute only applies to functions",
7230 name);
7231 *no_add_attrs = true;
7232 }
7233
7234 return NULL_TREE;
7235 }
7236
7237 /* Handle an "interrupt" or "isr" attribute;
7238 arguments as in struct attribute_spec.handler. */
7239 static tree
7240 arm_handle_isr_attribute (tree *node, tree name, tree args, int flags,
7241 bool *no_add_attrs)
7242 {
7243 if (DECL_P (*node))
7244 {
7245 if (TREE_CODE (*node) != FUNCTION_DECL)
7246 {
7247 warning (OPT_Wattributes, "%qE attribute only applies to functions",
7248 name);
7249 *no_add_attrs = true;
7250 }
7251 else if (TARGET_VFP_BASE)
7252 {
7253 warning (OPT_Wattributes, "FP registers might be clobbered despite %qE attribute: compile with %<-mgeneral-regs-only%>",
7254 name);
7255 }
7256 /* FIXME: the argument if any is checked for type attributes;
7257 should it be checked for decl ones? */
7258 }
7259 else
7260 {
7261 if (TREE_CODE (*node) == FUNCTION_TYPE
7262 || TREE_CODE (*node) == METHOD_TYPE)
7263 {
7264 if (arm_isr_value (args) == ARM_FT_UNKNOWN)
7265 {
7266 warning (OPT_Wattributes, "%qE attribute ignored",
7267 name);
7268 *no_add_attrs = true;
7269 }
7270 }
7271 else if (TREE_CODE (*node) == POINTER_TYPE
7272 && (TREE_CODE (TREE_TYPE (*node)) == FUNCTION_TYPE
7273 || TREE_CODE (TREE_TYPE (*node)) == METHOD_TYPE)
7274 && arm_isr_value (args) != ARM_FT_UNKNOWN)
7275 {
7276 *node = build_variant_type_copy (*node);
7277 TREE_TYPE (*node) = build_type_attribute_variant
7278 (TREE_TYPE (*node),
7279 tree_cons (name, args, TYPE_ATTRIBUTES (TREE_TYPE (*node))));
7280 *no_add_attrs = true;
7281 }
7282 else
7283 {
7284 /* Possibly pass this attribute on from the type to a decl. */
7285 if (flags & ((int) ATTR_FLAG_DECL_NEXT
7286 | (int) ATTR_FLAG_FUNCTION_NEXT
7287 | (int) ATTR_FLAG_ARRAY_NEXT))
7288 {
7289 *no_add_attrs = true;
7290 return tree_cons (name, args, NULL_TREE);
7291 }
7292 else
7293 {
7294 warning (OPT_Wattributes, "%qE attribute ignored",
7295 name);
7296 }
7297 }
7298 }
7299
7300 return NULL_TREE;
7301 }
7302
7303 /* Handle a "pcs" attribute; arguments as in struct
7304 attribute_spec.handler. */
7305 static tree
7306 arm_handle_pcs_attribute (tree *node ATTRIBUTE_UNUSED, tree name, tree args,
7307 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
7308 {
7309 if (arm_pcs_from_attribute (args) == ARM_PCS_UNKNOWN)
7310 {
7311 warning (OPT_Wattributes, "%qE attribute ignored", name);
7312 *no_add_attrs = true;
7313 }
7314 return NULL_TREE;
7315 }
7316
7317 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
7318 /* Handle the "notshared" attribute. This attribute is another way of
7319 requesting hidden visibility. ARM's compiler supports
7320 "__declspec(notshared)"; we support the same thing via an
7321 attribute. */
7322
7323 static tree
7324 arm_handle_notshared_attribute (tree *node,
7325 tree name ATTRIBUTE_UNUSED,
7326 tree args ATTRIBUTE_UNUSED,
7327 int flags ATTRIBUTE_UNUSED,
7328 bool *no_add_attrs)
7329 {
7330 tree decl = TYPE_NAME (*node);
7331
7332 if (decl)
7333 {
7334 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
7335 DECL_VISIBILITY_SPECIFIED (decl) = 1;
7336 *no_add_attrs = false;
7337 }
7338 return NULL_TREE;
7339 }
7340 #endif
7341
7342 /* This function returns true if a function with declaration FNDECL and type
7343 FNTYPE uses the stack to pass arguments or return variables and false
7344 otherwise. This is used for functions with the attributes
7345 'cmse_nonsecure_call' or 'cmse_nonsecure_entry' and this function will issue
7346 diagnostic messages if the stack is used. NAME is the name of the attribute
7347 used. */
7348
7349 static bool
7350 cmse_func_args_or_return_in_stack (tree fndecl, tree name, tree fntype)
7351 {
7352 function_args_iterator args_iter;
7353 CUMULATIVE_ARGS args_so_far_v;
7354 cumulative_args_t args_so_far;
7355 bool first_param = true;
7356 tree arg_type, prev_arg_type = NULL_TREE, ret_type;
7357
7358 /* Error out if any argument is passed on the stack. */
7359 arm_init_cumulative_args (&args_so_far_v, fntype, NULL_RTX, fndecl);
7360 args_so_far = pack_cumulative_args (&args_so_far_v);
7361 FOREACH_FUNCTION_ARGS (fntype, arg_type, args_iter)
7362 {
7363 rtx arg_rtx;
7364
7365 prev_arg_type = arg_type;
7366 if (VOID_TYPE_P (arg_type))
7367 continue;
7368
7369 function_arg_info arg (arg_type, /*named=*/true);
7370 if (!first_param)
7371 /* ??? We should advance after processing the argument and pass
7372 the argument we're advancing past. */
7373 arm_function_arg_advance (args_so_far, arg);
7374 arg_rtx = arm_function_arg (args_so_far, arg);
7375 if (!arg_rtx || arm_arg_partial_bytes (args_so_far, arg))
7376 {
7377 error ("%qE attribute not available to functions with arguments "
7378 "passed on the stack", name);
7379 return true;
7380 }
7381 first_param = false;
7382 }
7383
7384 /* Error out for variadic functions since we cannot control how many
7385 arguments will be passed and thus stack could be used. stdarg_p () is not
7386 used for the checking to avoid browsing arguments twice. */
7387 if (prev_arg_type != NULL_TREE && !VOID_TYPE_P (prev_arg_type))
7388 {
7389 error ("%qE attribute not available to functions with variable number "
7390 "of arguments", name);
7391 return true;
7392 }
7393
7394 /* Error out if return value is passed on the stack. */
7395 ret_type = TREE_TYPE (fntype);
7396 if (arm_return_in_memory (ret_type, fntype))
7397 {
7398 error ("%qE attribute not available to functions that return value on "
7399 "the stack", name);
7400 return true;
7401 }
7402 return false;
7403 }
7404
7405 /* Called upon detection of the use of the cmse_nonsecure_entry attribute, this
7406 function will check whether the attribute is allowed here and will add the
7407 attribute to the function declaration tree or otherwise issue a warning. */
7408
7409 static tree
7410 arm_handle_cmse_nonsecure_entry (tree *node, tree name,
7411 tree /* args */,
7412 int /* flags */,
7413 bool *no_add_attrs)
7414 {
7415 tree fndecl;
7416
7417 if (!use_cmse)
7418 {
7419 *no_add_attrs = true;
7420 warning (OPT_Wattributes, "%qE attribute ignored without %<-mcmse%> "
7421 "option.", name);
7422 return NULL_TREE;
7423 }
7424
7425 /* Ignore attribute for function types. */
7426 if (TREE_CODE (*node) != FUNCTION_DECL)
7427 {
7428 warning (OPT_Wattributes, "%qE attribute only applies to functions",
7429 name);
7430 *no_add_attrs = true;
7431 return NULL_TREE;
7432 }
7433
7434 fndecl = *node;
7435
7436 /* Warn for static linkage functions. */
7437 if (!TREE_PUBLIC (fndecl))
7438 {
7439 warning (OPT_Wattributes, "%qE attribute has no effect on functions "
7440 "with static linkage", name);
7441 *no_add_attrs = true;
7442 return NULL_TREE;
7443 }
7444
7445 *no_add_attrs |= cmse_func_args_or_return_in_stack (fndecl, name,
7446 TREE_TYPE (fndecl));
7447 return NULL_TREE;
7448 }
7449
7450
7451 /* Called upon detection of the use of the cmse_nonsecure_call attribute, this
7452 function will check whether the attribute is allowed here and will add the
7453 attribute to the function type tree or otherwise issue a diagnostic. The
7454 reason we check this at declaration time is to only allow the use of the
7455 attribute with declarations of function pointers and not function
7456 declarations. This function checks NODE is of the expected type and issues
7457 diagnostics otherwise using NAME. If it is not of the expected type
7458 *NO_ADD_ATTRS will be set to true. */
7459
7460 static tree
7461 arm_handle_cmse_nonsecure_call (tree *node, tree name,
7462 tree /* args */,
7463 int /* flags */,
7464 bool *no_add_attrs)
7465 {
7466 tree decl = NULL_TREE, fntype = NULL_TREE;
7467 tree type;
7468
7469 if (!use_cmse)
7470 {
7471 *no_add_attrs = true;
7472 warning (OPT_Wattributes, "%qE attribute ignored without %<-mcmse%> "
7473 "option.", name);
7474 return NULL_TREE;
7475 }
7476
7477 if (TREE_CODE (*node) == VAR_DECL || TREE_CODE (*node) == TYPE_DECL)
7478 {
7479 decl = *node;
7480 fntype = TREE_TYPE (decl);
7481 }
7482
7483 while (fntype != NULL_TREE && TREE_CODE (fntype) == POINTER_TYPE)
7484 fntype = TREE_TYPE (fntype);
7485
7486 if (!decl || TREE_CODE (fntype) != FUNCTION_TYPE)
7487 {
7488 warning (OPT_Wattributes, "%qE attribute only applies to base type of a "
7489 "function pointer", name);
7490 *no_add_attrs = true;
7491 return NULL_TREE;
7492 }
7493
7494 *no_add_attrs |= cmse_func_args_or_return_in_stack (NULL, name, fntype);
7495
7496 if (*no_add_attrs)
7497 return NULL_TREE;
7498
7499 /* Prevent trees being shared among function types with and without
7500 cmse_nonsecure_call attribute. */
7501 type = TREE_TYPE (decl);
7502
7503 type = build_distinct_type_copy (type);
7504 TREE_TYPE (decl) = type;
7505 fntype = type;
7506
7507 while (TREE_CODE (fntype) != FUNCTION_TYPE)
7508 {
7509 type = fntype;
7510 fntype = TREE_TYPE (fntype);
7511 fntype = build_distinct_type_copy (fntype);
7512 TREE_TYPE (type) = fntype;
7513 }
7514
7515 /* Construct a type attribute and add it to the function type. */
7516 tree attrs = tree_cons (get_identifier ("cmse_nonsecure_call"), NULL_TREE,
7517 TYPE_ATTRIBUTES (fntype));
7518 TYPE_ATTRIBUTES (fntype) = attrs;
7519 return NULL_TREE;
7520 }
7521
7522 /* Return 0 if the attributes for two types are incompatible, 1 if they
7523 are compatible, and 2 if they are nearly compatible (which causes a
7524 warning to be generated). */
7525 static int
7526 arm_comp_type_attributes (const_tree type1, const_tree type2)
7527 {
7528 int l1, l2, s1, s2;
7529
7530 tree attrs1 = lookup_attribute ("Advanced SIMD type",
7531 TYPE_ATTRIBUTES (type1));
7532 tree attrs2 = lookup_attribute ("Advanced SIMD type",
7533 TYPE_ATTRIBUTES (type2));
7534 if (bool (attrs1) != bool (attrs2))
7535 return 0;
7536 if (attrs1 && !attribute_value_equal (attrs1, attrs2))
7537 return 0;
7538
7539 /* Check for mismatch of non-default calling convention. */
7540 if (TREE_CODE (type1) != FUNCTION_TYPE)
7541 return 1;
7542
7543 /* Check for mismatched call attributes. */
7544 l1 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type1)) != NULL;
7545 l2 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type2)) != NULL;
7546 s1 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type1)) != NULL;
7547 s2 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type2)) != NULL;
7548
7549 /* Only bother to check if an attribute is defined. */
7550 if (l1 | l2 | s1 | s2)
7551 {
7552 /* If one type has an attribute, the other must have the same attribute. */
7553 if ((l1 != l2) || (s1 != s2))
7554 return 0;
7555
7556 /* Disallow mixed attributes. */
7557 if ((l1 & s2) || (l2 & s1))
7558 return 0;
7559 }
7560
7561 /* Check for mismatched ISR attribute. */
7562 l1 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type1)) != NULL;
7563 if (! l1)
7564 l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type1)) != NULL;
7565 l2 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type2)) != NULL;
7566 if (! l2)
7567 l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type2)) != NULL;
7568 if (l1 != l2)
7569 return 0;
7570
7571 l1 = lookup_attribute ("cmse_nonsecure_call",
7572 TYPE_ATTRIBUTES (type1)) != NULL;
7573 l2 = lookup_attribute ("cmse_nonsecure_call",
7574 TYPE_ATTRIBUTES (type2)) != NULL;
7575
7576 if (l1 != l2)
7577 return 0;
7578
7579 return 1;
7580 }
7581
7582 /* Assigns default attributes to newly defined type. This is used to
7583 set short_call/long_call attributes for function types of
7584 functions defined inside corresponding #pragma scopes. */
7585 static void
7586 arm_set_default_type_attributes (tree type)
7587 {
7588 /* Add __attribute__ ((long_call)) to all functions, when
7589 inside #pragma long_calls or __attribute__ ((short_call)),
7590 when inside #pragma no_long_calls. */
7591 if (TREE_CODE (type) == FUNCTION_TYPE || TREE_CODE (type) == METHOD_TYPE)
7592 {
7593 tree type_attr_list, attr_name;
7594 type_attr_list = TYPE_ATTRIBUTES (type);
7595
7596 if (arm_pragma_long_calls == LONG)
7597 attr_name = get_identifier ("long_call");
7598 else if (arm_pragma_long_calls == SHORT)
7599 attr_name = get_identifier ("short_call");
7600 else
7601 return;
7602
7603 type_attr_list = tree_cons (attr_name, NULL_TREE, type_attr_list);
7604 TYPE_ATTRIBUTES (type) = type_attr_list;
7605 }
7606 }
7607 \f
7608 /* Return true if DECL is known to be linked into section SECTION. */
7609
7610 static bool
7611 arm_function_in_section_p (tree decl, section *section)
7612 {
7613 /* We can only be certain about the prevailing symbol definition. */
7614 if (!decl_binds_to_current_def_p (decl))
7615 return false;
7616
7617 /* If DECL_SECTION_NAME is set, assume it is trustworthy. */
7618 if (!DECL_SECTION_NAME (decl))
7619 {
7620 /* Make sure that we will not create a unique section for DECL. */
7621 if (flag_function_sections || DECL_COMDAT_GROUP (decl))
7622 return false;
7623 }
7624
7625 return function_section (decl) == section;
7626 }
7627
7628 /* Return nonzero if a 32-bit "long_call" should be generated for
7629 a call from the current function to DECL. We generate a long_call
7630 if the function:
7631
7632 a. has an __attribute__((long call))
7633 or b. is within the scope of a #pragma long_calls
7634 or c. the -mlong-calls command line switch has been specified
7635
7636 However we do not generate a long call if the function:
7637
7638 d. has an __attribute__ ((short_call))
7639 or e. is inside the scope of a #pragma no_long_calls
7640 or f. is defined in the same section as the current function. */
7641
7642 bool
7643 arm_is_long_call_p (tree decl)
7644 {
7645 tree attrs;
7646
7647 if (!decl)
7648 return TARGET_LONG_CALLS;
7649
7650 attrs = TYPE_ATTRIBUTES (TREE_TYPE (decl));
7651 if (lookup_attribute ("short_call", attrs))
7652 return false;
7653
7654 /* For "f", be conservative, and only cater for cases in which the
7655 whole of the current function is placed in the same section. */
7656 if (!flag_reorder_blocks_and_partition
7657 && TREE_CODE (decl) == FUNCTION_DECL
7658 && arm_function_in_section_p (decl, current_function_section ()))
7659 return false;
7660
7661 if (lookup_attribute ("long_call", attrs))
7662 return true;
7663
7664 return TARGET_LONG_CALLS;
7665 }
7666
7667 /* Return nonzero if it is ok to make a tail-call to DECL. */
7668 static bool
7669 arm_function_ok_for_sibcall (tree decl, tree exp)
7670 {
7671 unsigned long func_type;
7672
7673 if (cfun->machine->sibcall_blocked)
7674 return false;
7675
7676 if (TARGET_FDPIC)
7677 {
7678 /* In FDPIC, never tailcall something for which we have no decl:
7679 the target function could be in a different module, requiring
7680 a different FDPIC register value. */
7681 if (decl == NULL)
7682 return false;
7683 }
7684
7685 /* Never tailcall something if we are generating code for Thumb-1. */
7686 if (TARGET_THUMB1)
7687 return false;
7688
7689 /* The PIC register is live on entry to VxWorks PLT entries, so we
7690 must make the call before restoring the PIC register. */
7691 if (TARGET_VXWORKS_RTP && flag_pic && decl && !targetm.binds_local_p (decl))
7692 return false;
7693
7694 /* ??? Cannot tail-call to long calls with APCS frame and VFP, because IP
7695 may be used both as target of the call and base register for restoring
7696 the VFP registers */
7697 if (TARGET_APCS_FRAME && TARGET_ARM
7698 && TARGET_HARD_FLOAT
7699 && decl && arm_is_long_call_p (decl))
7700 return false;
7701
7702 /* If we are interworking and the function is not declared static
7703 then we can't tail-call it unless we know that it exists in this
7704 compilation unit (since it might be a Thumb routine). */
7705 if (TARGET_INTERWORK && decl && TREE_PUBLIC (decl)
7706 && !TREE_ASM_WRITTEN (decl))
7707 return false;
7708
7709 func_type = arm_current_func_type ();
7710 /* Never tailcall from an ISR routine - it needs a special exit sequence. */
7711 if (IS_INTERRUPT (func_type))
7712 return false;
7713
7714 /* ARMv8-M non-secure entry functions need to return with bxns which is only
7715 generated for entry functions themselves. */
7716 if (IS_CMSE_ENTRY (arm_current_func_type ()))
7717 return false;
7718
7719 /* We do not allow ARMv8-M non-secure calls to be turned into sibling calls,
7720 this would complicate matters for later code generation. */
7721 if (TREE_CODE (exp) == CALL_EXPR)
7722 {
7723 tree fntype = TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp)));
7724 if (lookup_attribute ("cmse_nonsecure_call", TYPE_ATTRIBUTES (fntype)))
7725 return false;
7726 }
7727
7728 if (!VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
7729 {
7730 /* Check that the return value locations are the same. For
7731 example that we aren't returning a value from the sibling in
7732 a VFP register but then need to transfer it to a core
7733 register. */
7734 rtx a, b;
7735 tree decl_or_type = decl;
7736
7737 /* If it is an indirect function pointer, get the function type. */
7738 if (!decl)
7739 decl_or_type = TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp)));
7740
7741 a = arm_function_value (TREE_TYPE (exp), decl_or_type, false);
7742 b = arm_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
7743 cfun->decl, false);
7744 if (!rtx_equal_p (a, b))
7745 return false;
7746 }
7747
7748 /* Never tailcall if function may be called with a misaligned SP. */
7749 if (IS_STACKALIGN (func_type))
7750 return false;
7751
7752 /* The AAPCS says that, on bare-metal, calls to unresolved weak
7753 references should become a NOP. Don't convert such calls into
7754 sibling calls. */
7755 if (TARGET_AAPCS_BASED
7756 && arm_abi == ARM_ABI_AAPCS
7757 && decl
7758 && DECL_WEAK (decl))
7759 return false;
7760
7761 /* We cannot do a tailcall for an indirect call by descriptor if all the
7762 argument registers are used because the only register left to load the
7763 address is IP and it will already contain the static chain. */
7764 if (!decl && CALL_EXPR_BY_DESCRIPTOR (exp) && !flag_trampolines)
7765 {
7766 tree fntype = TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp)));
7767 CUMULATIVE_ARGS cum;
7768 cumulative_args_t cum_v;
7769
7770 arm_init_cumulative_args (&cum, fntype, NULL_RTX, NULL_TREE);
7771 cum_v = pack_cumulative_args (&cum);
7772
7773 for (tree t = TYPE_ARG_TYPES (fntype); t; t = TREE_CHAIN (t))
7774 {
7775 tree type = TREE_VALUE (t);
7776 if (!VOID_TYPE_P (type))
7777 {
7778 function_arg_info arg (type, /*named=*/true);
7779 arm_function_arg_advance (cum_v, arg);
7780 }
7781 }
7782
7783 function_arg_info arg (integer_type_node, /*named=*/true);
7784 if (!arm_function_arg (cum_v, arg))
7785 return false;
7786 }
7787
7788 /* Everything else is ok. */
7789 return true;
7790 }
7791
7792 \f
7793 /* Addressing mode support functions. */
7794
7795 /* Return nonzero if X is a legitimate immediate operand when compiling
7796 for PIC. We know that X satisfies CONSTANT_P and flag_pic is true. */
7797 int
7798 legitimate_pic_operand_p (rtx x)
7799 {
7800 if (SYMBOL_REF_P (x)
7801 || (GET_CODE (x) == CONST
7802 && GET_CODE (XEXP (x, 0)) == PLUS
7803 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
7804 return 0;
7805
7806 return 1;
7807 }
7808
7809 /* Record that the current function needs a PIC register. If PIC_REG is null,
7810 a new pseudo is allocated as PIC register, otherwise PIC_REG is used. In
7811 both case cfun->machine->pic_reg is initialized if we have not already done
7812 so. COMPUTE_NOW decide whether and where to set the PIC register. If true,
7813 PIC register is reloaded in the current position of the instruction stream
7814 irregardless of whether it was loaded before. Otherwise, it is only loaded
7815 if not already done so (crtl->uses_pic_offset_table is null). Note that
7816 nonnull PIC_REG is only supported iff COMPUTE_NOW is true and null PIC_REG
7817 is only supported iff COMPUTE_NOW is false. */
7818
7819 static void
7820 require_pic_register (rtx pic_reg, bool compute_now)
7821 {
7822 gcc_assert (compute_now == (pic_reg != NULL_RTX));
7823
7824 /* A lot of the logic here is made obscure by the fact that this
7825 routine gets called as part of the rtx cost estimation process.
7826 We don't want those calls to affect any assumptions about the real
7827 function; and further, we can't call entry_of_function() until we
7828 start the real expansion process. */
7829 if (!crtl->uses_pic_offset_table || compute_now)
7830 {
7831 gcc_assert (can_create_pseudo_p ()
7832 || (pic_reg != NULL_RTX
7833 && REG_P (pic_reg)
7834 && GET_MODE (pic_reg) == Pmode));
7835 if (arm_pic_register != INVALID_REGNUM
7836 && !compute_now
7837 && !(TARGET_THUMB1 && arm_pic_register > LAST_LO_REGNUM))
7838 {
7839 if (!cfun->machine->pic_reg)
7840 cfun->machine->pic_reg = gen_rtx_REG (Pmode, arm_pic_register);
7841
7842 /* Play games to avoid marking the function as needing pic
7843 if we are being called as part of the cost-estimation
7844 process. */
7845 if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
7846 crtl->uses_pic_offset_table = 1;
7847 }
7848 else
7849 {
7850 rtx_insn *seq, *insn;
7851
7852 if (pic_reg == NULL_RTX)
7853 pic_reg = gen_reg_rtx (Pmode);
7854 if (!cfun->machine->pic_reg)
7855 cfun->machine->pic_reg = pic_reg;
7856
7857 /* Play games to avoid marking the function as needing pic
7858 if we are being called as part of the cost-estimation
7859 process. */
7860 if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
7861 {
7862 crtl->uses_pic_offset_table = 1;
7863 start_sequence ();
7864
7865 if (TARGET_THUMB1 && arm_pic_register != INVALID_REGNUM
7866 && arm_pic_register > LAST_LO_REGNUM
7867 && !compute_now)
7868 emit_move_insn (cfun->machine->pic_reg,
7869 gen_rtx_REG (Pmode, arm_pic_register));
7870 else
7871 arm_load_pic_register (0UL, pic_reg);
7872
7873 seq = get_insns ();
7874 end_sequence ();
7875
7876 for (insn = seq; insn; insn = NEXT_INSN (insn))
7877 if (INSN_P (insn))
7878 INSN_LOCATION (insn) = prologue_location;
7879
7880 /* We can be called during expansion of PHI nodes, where
7881 we can't yet emit instructions directly in the final
7882 insn stream. Queue the insns on the entry edge, they will
7883 be committed after everything else is expanded. */
7884 if (currently_expanding_to_rtl)
7885 insert_insn_on_edge (seq,
7886 single_succ_edge
7887 (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
7888 else
7889 emit_insn (seq);
7890 }
7891 }
7892 }
7893 }
7894
7895 /* Generate insns to calculate the address of ORIG in pic mode. */
7896 static rtx_insn *
7897 calculate_pic_address_constant (rtx reg, rtx pic_reg, rtx orig)
7898 {
7899 rtx pat;
7900 rtx mem;
7901
7902 pat = gen_calculate_pic_address (reg, pic_reg, orig);
7903
7904 /* Make the MEM as close to a constant as possible. */
7905 mem = SET_SRC (pat);
7906 gcc_assert (MEM_P (mem) && !MEM_VOLATILE_P (mem));
7907 MEM_READONLY_P (mem) = 1;
7908 MEM_NOTRAP_P (mem) = 1;
7909
7910 return emit_insn (pat);
7911 }
7912
7913 /* Legitimize PIC load to ORIG into REG. If REG is NULL, a new pseudo is
7914 created to hold the result of the load. If not NULL, PIC_REG indicates
7915 which register to use as PIC register, otherwise it is decided by register
7916 allocator. COMPUTE_NOW forces the PIC register to be loaded at the current
7917 location in the instruction stream, irregardless of whether it was loaded
7918 previously. Note that nonnull PIC_REG is only supported iff COMPUTE_NOW is
7919 true and null PIC_REG is only supported iff COMPUTE_NOW is false.
7920
7921 Returns the register REG into which the PIC load is performed. */
7922
7923 rtx
7924 legitimize_pic_address (rtx orig, machine_mode mode, rtx reg, rtx pic_reg,
7925 bool compute_now)
7926 {
7927 gcc_assert (compute_now == (pic_reg != NULL_RTX));
7928
7929 if (SYMBOL_REF_P (orig)
7930 || LABEL_REF_P (orig))
7931 {
7932 if (reg == 0)
7933 {
7934 gcc_assert (can_create_pseudo_p ());
7935 reg = gen_reg_rtx (Pmode);
7936 }
7937
7938 /* VxWorks does not impose a fixed gap between segments; the run-time
7939 gap can be different from the object-file gap. We therefore can't
7940 use GOTOFF unless we are absolutely sure that the symbol is in the
7941 same segment as the GOT. Unfortunately, the flexibility of linker
7942 scripts means that we can't be sure of that in general, so assume
7943 that GOTOFF is never valid on VxWorks. */
7944 /* References to weak symbols cannot be resolved locally: they
7945 may be overridden by a non-weak definition at link time. */
7946 rtx_insn *insn;
7947 if ((LABEL_REF_P (orig)
7948 || (SYMBOL_REF_P (orig)
7949 && SYMBOL_REF_LOCAL_P (orig)
7950 && (SYMBOL_REF_DECL (orig)
7951 ? !DECL_WEAK (SYMBOL_REF_DECL (orig)) : 1)
7952 && (!SYMBOL_REF_FUNCTION_P (orig)
7953 || arm_fdpic_local_funcdesc_p (orig))))
7954 && NEED_GOT_RELOC
7955 && arm_pic_data_is_text_relative)
7956 insn = arm_pic_static_addr (orig, reg);
7957 else
7958 {
7959 /* If this function doesn't have a pic register, create one now. */
7960 require_pic_register (pic_reg, compute_now);
7961
7962 if (pic_reg == NULL_RTX)
7963 pic_reg = cfun->machine->pic_reg;
7964
7965 insn = calculate_pic_address_constant (reg, pic_reg, orig);
7966 }
7967
7968 /* Put a REG_EQUAL note on this insn, so that it can be optimized
7969 by loop. */
7970 set_unique_reg_note (insn, REG_EQUAL, orig);
7971
7972 return reg;
7973 }
7974 else if (GET_CODE (orig) == CONST)
7975 {
7976 rtx base, offset;
7977
7978 if (GET_CODE (XEXP (orig, 0)) == PLUS
7979 && XEXP (XEXP (orig, 0), 0) == cfun->machine->pic_reg)
7980 return orig;
7981
7982 /* Handle the case where we have: const (UNSPEC_TLS). */
7983 if (GET_CODE (XEXP (orig, 0)) == UNSPEC
7984 && XINT (XEXP (orig, 0), 1) == UNSPEC_TLS)
7985 return orig;
7986
7987 /* Handle the case where we have:
7988 const (plus (UNSPEC_TLS) (ADDEND)). The ADDEND must be a
7989 CONST_INT. */
7990 if (GET_CODE (XEXP (orig, 0)) == PLUS
7991 && GET_CODE (XEXP (XEXP (orig, 0), 0)) == UNSPEC
7992 && XINT (XEXP (XEXP (orig, 0), 0), 1) == UNSPEC_TLS)
7993 {
7994 gcc_assert (CONST_INT_P (XEXP (XEXP (orig, 0), 1)));
7995 return orig;
7996 }
7997
7998 if (reg == 0)
7999 {
8000 gcc_assert (can_create_pseudo_p ());
8001 reg = gen_reg_rtx (Pmode);
8002 }
8003
8004 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
8005
8006 base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg,
8007 pic_reg, compute_now);
8008 offset = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode,
8009 base == reg ? 0 : reg, pic_reg,
8010 compute_now);
8011
8012 if (CONST_INT_P (offset))
8013 {
8014 /* The base register doesn't really matter, we only want to
8015 test the index for the appropriate mode. */
8016 if (!arm_legitimate_index_p (mode, offset, SET, 0))
8017 {
8018 gcc_assert (can_create_pseudo_p ());
8019 offset = force_reg (Pmode, offset);
8020 }
8021
8022 if (CONST_INT_P (offset))
8023 return plus_constant (Pmode, base, INTVAL (offset));
8024 }
8025
8026 if (GET_MODE_SIZE (mode) > 4
8027 && (GET_MODE_CLASS (mode) == MODE_INT
8028 || TARGET_SOFT_FLOAT))
8029 {
8030 emit_insn (gen_addsi3 (reg, base, offset));
8031 return reg;
8032 }
8033
8034 return gen_rtx_PLUS (Pmode, base, offset);
8035 }
8036
8037 return orig;
8038 }
8039
8040
8041 /* Whether a register is callee saved or not. This is necessary because high
8042 registers are marked as caller saved when optimizing for size on Thumb-1
8043 targets despite being callee saved in order to avoid using them. */
8044 #define callee_saved_reg_p(reg) \
8045 (!call_used_or_fixed_reg_p (reg) \
8046 || (TARGET_THUMB1 && optimize_size \
8047 && reg >= FIRST_HI_REGNUM && reg <= LAST_HI_REGNUM))
8048
8049 /* Return a mask for the call-clobbered low registers that are unused
8050 at the end of the prologue. */
8051 static unsigned long
8052 thumb1_prologue_unused_call_clobbered_lo_regs (void)
8053 {
8054 unsigned long mask = 0;
8055 bitmap prologue_live_out = df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun));
8056
8057 for (int reg = FIRST_LO_REGNUM; reg <= LAST_LO_REGNUM; reg++)
8058 if (!callee_saved_reg_p (reg) && !REGNO_REG_SET_P (prologue_live_out, reg))
8059 mask |= 1 << (reg - FIRST_LO_REGNUM);
8060 return mask;
8061 }
8062
8063 /* Similarly for the start of the epilogue. */
8064 static unsigned long
8065 thumb1_epilogue_unused_call_clobbered_lo_regs (void)
8066 {
8067 unsigned long mask = 0;
8068 bitmap epilogue_live_in = df_get_live_in (EXIT_BLOCK_PTR_FOR_FN (cfun));
8069
8070 for (int reg = FIRST_LO_REGNUM; reg <= LAST_LO_REGNUM; reg++)
8071 if (!callee_saved_reg_p (reg) && !REGNO_REG_SET_P (epilogue_live_in, reg))
8072 mask |= 1 << (reg - FIRST_LO_REGNUM);
8073 return mask;
8074 }
8075
8076 /* Find a spare register to use during the prolog of a function. */
8077
8078 static int
8079 thumb_find_work_register (unsigned long pushed_regs_mask)
8080 {
8081 int reg;
8082
8083 unsigned long unused_regs
8084 = thumb1_prologue_unused_call_clobbered_lo_regs ();
8085
8086 /* Check the argument registers first as these are call-used. The
8087 register allocation order means that sometimes r3 might be used
8088 but earlier argument registers might not, so check them all. */
8089 for (reg = LAST_LO_REGNUM; reg >= FIRST_LO_REGNUM; reg--)
8090 if (unused_regs & (1 << (reg - FIRST_LO_REGNUM)))
8091 return reg;
8092
8093 /* Otherwise look for a call-saved register that is going to be pushed. */
8094 for (reg = LAST_LO_REGNUM; reg > LAST_ARG_REGNUM; reg --)
8095 if (pushed_regs_mask & (1 << reg))
8096 return reg;
8097
8098 if (TARGET_THUMB2)
8099 {
8100 /* Thumb-2 can use high regs. */
8101 for (reg = FIRST_HI_REGNUM; reg < 15; reg ++)
8102 if (pushed_regs_mask & (1 << reg))
8103 return reg;
8104 }
8105 /* Something went wrong - thumb_compute_save_reg_mask()
8106 should have arranged for a suitable register to be pushed. */
8107 gcc_unreachable ();
8108 }
8109
8110 static GTY(()) int pic_labelno;
8111
8112 /* Generate code to load the PIC register. In thumb mode SCRATCH is a
8113 low register. */
8114
8115 void
8116 arm_load_pic_register (unsigned long saved_regs ATTRIBUTE_UNUSED, rtx pic_reg)
8117 {
8118 rtx l1, labelno, pic_tmp, pic_rtx;
8119
8120 if (crtl->uses_pic_offset_table == 0
8121 || TARGET_SINGLE_PIC_BASE
8122 || TARGET_FDPIC)
8123 return;
8124
8125 gcc_assert (flag_pic);
8126
8127 if (pic_reg == NULL_RTX)
8128 pic_reg = cfun->machine->pic_reg;
8129 if (TARGET_VXWORKS_RTP)
8130 {
8131 pic_rtx = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE);
8132 pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
8133 emit_insn (gen_pic_load_addr_32bit (pic_reg, pic_rtx));
8134
8135 emit_insn (gen_rtx_SET (pic_reg, gen_rtx_MEM (Pmode, pic_reg)));
8136
8137 pic_tmp = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
8138 emit_insn (gen_pic_offset_arm (pic_reg, pic_reg, pic_tmp));
8139 }
8140 else
8141 {
8142 /* We use an UNSPEC rather than a LABEL_REF because this label
8143 never appears in the code stream. */
8144
8145 labelno = GEN_INT (pic_labelno++);
8146 l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
8147 l1 = gen_rtx_CONST (VOIDmode, l1);
8148
8149 /* On the ARM the PC register contains 'dot + 8' at the time of the
8150 addition, on the Thumb it is 'dot + 4'. */
8151 pic_rtx = plus_constant (Pmode, l1, TARGET_ARM ? 8 : 4);
8152 pic_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, pic_rtx),
8153 UNSPEC_GOTSYM_OFF);
8154 pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
8155
8156 if (TARGET_32BIT)
8157 {
8158 emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
8159 }
8160 else /* TARGET_THUMB1 */
8161 {
8162 if (arm_pic_register != INVALID_REGNUM
8163 && REGNO (pic_reg) > LAST_LO_REGNUM)
8164 {
8165 /* We will have pushed the pic register, so we should always be
8166 able to find a work register. */
8167 pic_tmp = gen_rtx_REG (SImode,
8168 thumb_find_work_register (saved_regs));
8169 emit_insn (gen_pic_load_addr_thumb1 (pic_tmp, pic_rtx));
8170 emit_insn (gen_movsi (pic_offset_table_rtx, pic_tmp));
8171 emit_insn (gen_pic_add_dot_plus_four (pic_reg, pic_reg, labelno));
8172 }
8173 else if (arm_pic_register != INVALID_REGNUM
8174 && arm_pic_register > LAST_LO_REGNUM
8175 && REGNO (pic_reg) <= LAST_LO_REGNUM)
8176 {
8177 emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
8178 emit_move_insn (gen_rtx_REG (Pmode, arm_pic_register), pic_reg);
8179 emit_use (gen_rtx_REG (Pmode, arm_pic_register));
8180 }
8181 else
8182 emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
8183 }
8184 }
8185
8186 /* Need to emit this whether or not we obey regdecls,
8187 since setjmp/longjmp can cause life info to screw up. */
8188 emit_use (pic_reg);
8189 }
8190
8191 /* Try to determine whether an object, referenced via ORIG, will be
8192 placed in the text or data segment. This is used in FDPIC mode, to
8193 decide which relocations to use when accessing ORIG. *IS_READONLY
8194 is set to true if ORIG is a read-only location, false otherwise.
8195 Return true if we could determine the location of ORIG, false
8196 otherwise. *IS_READONLY is valid only when we return true. */
8197 static bool
8198 arm_is_segment_info_known (rtx orig, bool *is_readonly)
8199 {
8200 *is_readonly = false;
8201
8202 if (LABEL_REF_P (orig))
8203 {
8204 *is_readonly = true;
8205 return true;
8206 }
8207
8208 if (SYMBOL_REF_P (orig))
8209 {
8210 if (CONSTANT_POOL_ADDRESS_P (orig))
8211 {
8212 *is_readonly = true;
8213 return true;
8214 }
8215 if (SYMBOL_REF_LOCAL_P (orig)
8216 && !SYMBOL_REF_EXTERNAL_P (orig)
8217 && SYMBOL_REF_DECL (orig)
8218 && (!DECL_P (SYMBOL_REF_DECL (orig))
8219 || !DECL_COMMON (SYMBOL_REF_DECL (orig))))
8220 {
8221 tree decl = SYMBOL_REF_DECL (orig);
8222 tree init = (TREE_CODE (decl) == VAR_DECL)
8223 ? DECL_INITIAL (decl) : (TREE_CODE (decl) == CONSTRUCTOR)
8224 ? decl : 0;
8225 int reloc = 0;
8226 bool named_section, readonly;
8227
8228 if (init && init != error_mark_node)
8229 reloc = compute_reloc_for_constant (init);
8230
8231 named_section = TREE_CODE (decl) == VAR_DECL
8232 && lookup_attribute ("section", DECL_ATTRIBUTES (decl));
8233 readonly = decl_readonly_section (decl, reloc);
8234
8235 /* We don't know where the link script will put a named
8236 section, so return false in such a case. */
8237 if (named_section)
8238 return false;
8239
8240 *is_readonly = readonly;
8241 return true;
8242 }
8243
8244 /* We don't know. */
8245 return false;
8246 }
8247
8248 gcc_unreachable ();
8249 }
8250
8251 /* Generate code to load the address of a static var when flag_pic is set. */
8252 static rtx_insn *
8253 arm_pic_static_addr (rtx orig, rtx reg)
8254 {
8255 rtx l1, labelno, offset_rtx;
8256 rtx_insn *insn;
8257
8258 gcc_assert (flag_pic);
8259
8260 bool is_readonly = false;
8261 bool info_known = false;
8262
8263 if (TARGET_FDPIC
8264 && SYMBOL_REF_P (orig)
8265 && !SYMBOL_REF_FUNCTION_P (orig))
8266 info_known = arm_is_segment_info_known (orig, &is_readonly);
8267
8268 if (TARGET_FDPIC
8269 && SYMBOL_REF_P (orig)
8270 && !SYMBOL_REF_FUNCTION_P (orig)
8271 && !info_known)
8272 {
8273 /* We don't know where orig is stored, so we have be
8274 pessimistic and use a GOT relocation. */
8275 rtx pic_reg = gen_rtx_REG (Pmode, FDPIC_REGNUM);
8276
8277 insn = calculate_pic_address_constant (reg, pic_reg, orig);
8278 }
8279 else if (TARGET_FDPIC
8280 && SYMBOL_REF_P (orig)
8281 && (SYMBOL_REF_FUNCTION_P (orig)
8282 || !is_readonly))
8283 {
8284 /* We use the GOTOFF relocation. */
8285 rtx pic_reg = gen_rtx_REG (Pmode, FDPIC_REGNUM);
8286
8287 rtx l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, orig), UNSPEC_PIC_SYM);
8288 emit_insn (gen_movsi (reg, l1));
8289 insn = emit_insn (gen_addsi3 (reg, reg, pic_reg));
8290 }
8291 else
8292 {
8293 /* Not FDPIC, not SYMBOL_REF_P or readonly: we can use
8294 PC-relative access. */
8295 /* We use an UNSPEC rather than a LABEL_REF because this label
8296 never appears in the code stream. */
8297 labelno = GEN_INT (pic_labelno++);
8298 l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
8299 l1 = gen_rtx_CONST (VOIDmode, l1);
8300
8301 /* On the ARM the PC register contains 'dot + 8' at the time of the
8302 addition, on the Thumb it is 'dot + 4'. */
8303 offset_rtx = plus_constant (Pmode, l1, TARGET_ARM ? 8 : 4);
8304 offset_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, orig, offset_rtx),
8305 UNSPEC_SYMBOL_OFFSET);
8306 offset_rtx = gen_rtx_CONST (Pmode, offset_rtx);
8307
8308 insn = emit_insn (gen_pic_load_addr_unified (reg, offset_rtx,
8309 labelno));
8310 }
8311
8312 return insn;
8313 }
8314
8315 /* Return nonzero if X is valid as an ARM state addressing register. */
8316 static int
8317 arm_address_register_rtx_p (rtx x, int strict_p)
8318 {
8319 int regno;
8320
8321 if (!REG_P (x))
8322 return 0;
8323
8324 regno = REGNO (x);
8325
8326 if (strict_p)
8327 return ARM_REGNO_OK_FOR_BASE_P (regno);
8328
8329 return (regno <= LAST_ARM_REGNUM
8330 || regno >= FIRST_PSEUDO_REGISTER
8331 || regno == FRAME_POINTER_REGNUM
8332 || regno == ARG_POINTER_REGNUM);
8333 }
8334
8335 /* Return TRUE if this rtx is the difference of a symbol and a label,
8336 and will reduce to a PC-relative relocation in the object file.
8337 Expressions like this can be left alone when generating PIC, rather
8338 than forced through the GOT. */
8339 static int
8340 pcrel_constant_p (rtx x)
8341 {
8342 if (GET_CODE (x) == MINUS)
8343 return symbol_mentioned_p (XEXP (x, 0)) && label_mentioned_p (XEXP (x, 1));
8344
8345 return FALSE;
8346 }
8347
8348 /* Return true if X will surely end up in an index register after next
8349 splitting pass. */
8350 static bool
8351 will_be_in_index_register (const_rtx x)
8352 {
8353 /* arm.md: calculate_pic_address will split this into a register. */
8354 return GET_CODE (x) == UNSPEC && (XINT (x, 1) == UNSPEC_PIC_SYM);
8355 }
8356
8357 /* Return nonzero if X is a valid ARM state address operand. */
8358 int
8359 arm_legitimate_address_outer_p (machine_mode mode, rtx x, RTX_CODE outer,
8360 int strict_p)
8361 {
8362 bool use_ldrd;
8363 enum rtx_code code = GET_CODE (x);
8364
8365 if (arm_address_register_rtx_p (x, strict_p))
8366 return 1;
8367
8368 use_ldrd = (TARGET_LDRD
8369 && (mode == DImode || mode == DFmode));
8370
8371 if (code == POST_INC || code == PRE_DEC
8372 || ((code == PRE_INC || code == POST_DEC)
8373 && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
8374 return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
8375
8376 else if ((code == POST_MODIFY || code == PRE_MODIFY)
8377 && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
8378 && GET_CODE (XEXP (x, 1)) == PLUS
8379 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
8380 {
8381 rtx addend = XEXP (XEXP (x, 1), 1);
8382
8383 /* Don't allow ldrd post increment by register because it's hard
8384 to fixup invalid register choices. */
8385 if (use_ldrd
8386 && GET_CODE (x) == POST_MODIFY
8387 && REG_P (addend))
8388 return 0;
8389
8390 return ((use_ldrd || GET_MODE_SIZE (mode) <= 4)
8391 && arm_legitimate_index_p (mode, addend, outer, strict_p));
8392 }
8393
8394 /* After reload constants split into minipools will have addresses
8395 from a LABEL_REF. */
8396 else if (reload_completed
8397 && (code == LABEL_REF
8398 || (code == CONST
8399 && GET_CODE (XEXP (x, 0)) == PLUS
8400 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
8401 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
8402 return 1;
8403
8404 else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
8405 return 0;
8406
8407 else if (code == PLUS)
8408 {
8409 rtx xop0 = XEXP (x, 0);
8410 rtx xop1 = XEXP (x, 1);
8411
8412 return ((arm_address_register_rtx_p (xop0, strict_p)
8413 && ((CONST_INT_P (xop1)
8414 && arm_legitimate_index_p (mode, xop1, outer, strict_p))
8415 || (!strict_p && will_be_in_index_register (xop1))))
8416 || (arm_address_register_rtx_p (xop1, strict_p)
8417 && arm_legitimate_index_p (mode, xop0, outer, strict_p)));
8418 }
8419
8420 #if 0
8421 /* Reload currently can't handle MINUS, so disable this for now */
8422 else if (GET_CODE (x) == MINUS)
8423 {
8424 rtx xop0 = XEXP (x, 0);
8425 rtx xop1 = XEXP (x, 1);
8426
8427 return (arm_address_register_rtx_p (xop0, strict_p)
8428 && arm_legitimate_index_p (mode, xop1, outer, strict_p));
8429 }
8430 #endif
8431
8432 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
8433 && code == SYMBOL_REF
8434 && CONSTANT_POOL_ADDRESS_P (x)
8435 && ! (flag_pic
8436 && symbol_mentioned_p (get_pool_constant (x))
8437 && ! pcrel_constant_p (get_pool_constant (x))))
8438 return 1;
8439
8440 return 0;
8441 }
8442
8443 /* Return true if we can avoid creating a constant pool entry for x. */
8444 static bool
8445 can_avoid_literal_pool_for_label_p (rtx x)
8446 {
8447 /* Normally we can assign constant values to target registers without
8448 the help of constant pool. But there are cases we have to use constant
8449 pool like:
8450 1) assign a label to register.
8451 2) sign-extend a 8bit value to 32bit and then assign to register.
8452
8453 Constant pool access in format:
8454 (set (reg r0) (mem (symbol_ref (".LC0"))))
8455 will cause the use of literal pool (later in function arm_reorg).
8456 So here we mark such format as an invalid format, then the compiler
8457 will adjust it into:
8458 (set (reg r0) (symbol_ref (".LC0")))
8459 (set (reg r0) (mem (reg r0))).
8460 No extra register is required, and (mem (reg r0)) won't cause the use
8461 of literal pools. */
8462 if (arm_disable_literal_pool && SYMBOL_REF_P (x)
8463 && CONSTANT_POOL_ADDRESS_P (x))
8464 return 1;
8465 return 0;
8466 }
8467
8468
8469 /* Return nonzero if X is a valid Thumb-2 address operand. */
8470 static int
8471 thumb2_legitimate_address_p (machine_mode mode, rtx x, int strict_p)
8472 {
8473 bool use_ldrd;
8474 enum rtx_code code = GET_CODE (x);
8475
8476 if (TARGET_HAVE_MVE
8477 && (mode == V8QImode || mode == E_V4QImode || mode == V4HImode))
8478 return mve_vector_mem_operand (mode, x, strict_p);
8479
8480 if (arm_address_register_rtx_p (x, strict_p))
8481 return 1;
8482
8483 use_ldrd = (TARGET_LDRD
8484 && (mode == DImode || mode == DFmode));
8485
8486 if (code == POST_INC || code == PRE_DEC
8487 || ((code == PRE_INC || code == POST_DEC)
8488 && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
8489 return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
8490
8491 else if ((code == POST_MODIFY || code == PRE_MODIFY)
8492 && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
8493 && GET_CODE (XEXP (x, 1)) == PLUS
8494 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
8495 {
8496 /* Thumb-2 only has autoincrement by constant. */
8497 rtx addend = XEXP (XEXP (x, 1), 1);
8498 HOST_WIDE_INT offset;
8499
8500 if (!CONST_INT_P (addend))
8501 return 0;
8502
8503 offset = INTVAL(addend);
8504 if (GET_MODE_SIZE (mode) <= 4)
8505 return (offset > -256 && offset < 256);
8506
8507 return (use_ldrd && offset > -1024 && offset < 1024
8508 && (offset & 3) == 0);
8509 }
8510
8511 /* After reload constants split into minipools will have addresses
8512 from a LABEL_REF. */
8513 else if (reload_completed
8514 && (code == LABEL_REF
8515 || (code == CONST
8516 && GET_CODE (XEXP (x, 0)) == PLUS
8517 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
8518 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
8519 return 1;
8520
8521 else if (mode == TImode
8522 || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode))
8523 || (TARGET_HAVE_MVE && VALID_MVE_STRUCT_MODE (mode)))
8524 return 0;
8525
8526 else if (code == PLUS)
8527 {
8528 rtx xop0 = XEXP (x, 0);
8529 rtx xop1 = XEXP (x, 1);
8530
8531 return ((arm_address_register_rtx_p (xop0, strict_p)
8532 && (thumb2_legitimate_index_p (mode, xop1, strict_p)
8533 || (!strict_p && will_be_in_index_register (xop1))))
8534 || (arm_address_register_rtx_p (xop1, strict_p)
8535 && thumb2_legitimate_index_p (mode, xop0, strict_p)));
8536 }
8537
8538 else if (can_avoid_literal_pool_for_label_p (x))
8539 return 0;
8540
8541 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
8542 && code == SYMBOL_REF
8543 && CONSTANT_POOL_ADDRESS_P (x)
8544 && ! (flag_pic
8545 && symbol_mentioned_p (get_pool_constant (x))
8546 && ! pcrel_constant_p (get_pool_constant (x))))
8547 return 1;
8548
8549 return 0;
8550 }
8551
8552 /* Return nonzero if INDEX is valid for an address index operand in
8553 ARM state. */
8554 static int
8555 arm_legitimate_index_p (machine_mode mode, rtx index, RTX_CODE outer,
8556 int strict_p)
8557 {
8558 HOST_WIDE_INT range;
8559 enum rtx_code code = GET_CODE (index);
8560
8561 /* Standard coprocessor addressing modes. */
8562 if (TARGET_HARD_FLOAT
8563 && (mode == SFmode || mode == DFmode))
8564 return (code == CONST_INT && INTVAL (index) < 1024
8565 && INTVAL (index) > -1024
8566 && (INTVAL (index) & 3) == 0);
8567
8568 /* For quad modes, we restrict the constant offset to be slightly less
8569 than what the instruction format permits. We do this because for
8570 quad mode moves, we will actually decompose them into two separate
8571 double-mode reads or writes. INDEX must therefore be a valid
8572 (double-mode) offset and so should INDEX+8. */
8573 if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
8574 return (code == CONST_INT
8575 && INTVAL (index) < 1016
8576 && INTVAL (index) > -1024
8577 && (INTVAL (index) & 3) == 0);
8578
8579 /* We have no such constraint on double mode offsets, so we permit the
8580 full range of the instruction format. */
8581 if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
8582 return (code == CONST_INT
8583 && INTVAL (index) < 1024
8584 && INTVAL (index) > -1024
8585 && (INTVAL (index) & 3) == 0);
8586
8587 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
8588 return (code == CONST_INT
8589 && INTVAL (index) < 1024
8590 && INTVAL (index) > -1024
8591 && (INTVAL (index) & 3) == 0);
8592
8593 if (arm_address_register_rtx_p (index, strict_p)
8594 && (GET_MODE_SIZE (mode) <= 4))
8595 return 1;
8596
8597 if (mode == DImode || mode == DFmode)
8598 {
8599 if (code == CONST_INT)
8600 {
8601 HOST_WIDE_INT val = INTVAL (index);
8602
8603 /* Assume we emit ldrd or 2x ldr if !TARGET_LDRD.
8604 If vldr is selected it uses arm_coproc_mem_operand. */
8605 if (TARGET_LDRD)
8606 return val > -256 && val < 256;
8607 else
8608 return val > -4096 && val < 4092;
8609 }
8610
8611 return TARGET_LDRD && arm_address_register_rtx_p (index, strict_p);
8612 }
8613
8614 if (GET_MODE_SIZE (mode) <= 4
8615 && ! (arm_arch4
8616 && (mode == HImode
8617 || mode == HFmode
8618 || (mode == QImode && outer == SIGN_EXTEND))))
8619 {
8620 if (code == MULT)
8621 {
8622 rtx xiop0 = XEXP (index, 0);
8623 rtx xiop1 = XEXP (index, 1);
8624
8625 return ((arm_address_register_rtx_p (xiop0, strict_p)
8626 && power_of_two_operand (xiop1, SImode))
8627 || (arm_address_register_rtx_p (xiop1, strict_p)
8628 && power_of_two_operand (xiop0, SImode)));
8629 }
8630 else if (code == LSHIFTRT || code == ASHIFTRT
8631 || code == ASHIFT || code == ROTATERT)
8632 {
8633 rtx op = XEXP (index, 1);
8634
8635 return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
8636 && CONST_INT_P (op)
8637 && INTVAL (op) > 0
8638 && INTVAL (op) <= 31);
8639 }
8640 }
8641
8642 /* For ARM v4 we may be doing a sign-extend operation during the
8643 load. */
8644 if (arm_arch4)
8645 {
8646 if (mode == HImode
8647 || mode == HFmode
8648 || (outer == SIGN_EXTEND && mode == QImode))
8649 range = 256;
8650 else
8651 range = 4096;
8652 }
8653 else
8654 range = (mode == HImode || mode == HFmode) ? 4095 : 4096;
8655
8656 return (code == CONST_INT
8657 && INTVAL (index) < range
8658 && INTVAL (index) > -range);
8659 }
8660
8661 /* Return true if OP is a valid index scaling factor for Thumb-2 address
8662 index operand. i.e. 1, 2, 4 or 8. */
8663 static bool
8664 thumb2_index_mul_operand (rtx op)
8665 {
8666 HOST_WIDE_INT val;
8667
8668 if (!CONST_INT_P (op))
8669 return false;
8670
8671 val = INTVAL(op);
8672 return (val == 1 || val == 2 || val == 4 || val == 8);
8673 }
8674
8675 /* Return nonzero if INDEX is a valid Thumb-2 address index operand. */
8676 static int
8677 thumb2_legitimate_index_p (machine_mode mode, rtx index, int strict_p)
8678 {
8679 enum rtx_code code = GET_CODE (index);
8680
8681 /* ??? Combine arm and thumb2 coprocessor addressing modes. */
8682 /* Standard coprocessor addressing modes. */
8683 if (TARGET_VFP_BASE
8684 && (mode == SFmode || mode == DFmode))
8685 return (code == CONST_INT && INTVAL (index) < 1024
8686 /* Thumb-2 allows only > -256 index range for it's core register
8687 load/stores. Since we allow SF/DF in core registers, we have
8688 to use the intersection between -256~4096 (core) and -1024~1024
8689 (coprocessor). */
8690 && INTVAL (index) > -256
8691 && (INTVAL (index) & 3) == 0);
8692
8693 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
8694 {
8695 /* For DImode assume values will usually live in core regs
8696 and only allow LDRD addressing modes. */
8697 if (!TARGET_LDRD || mode != DImode)
8698 return (code == CONST_INT
8699 && INTVAL (index) < 1024
8700 && INTVAL (index) > -1024
8701 && (INTVAL (index) & 3) == 0);
8702 }
8703
8704 /* For quad modes, we restrict the constant offset to be slightly less
8705 than what the instruction format permits. We do this because for
8706 quad mode moves, we will actually decompose them into two separate
8707 double-mode reads or writes. INDEX must therefore be a valid
8708 (double-mode) offset and so should INDEX+8. */
8709 if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
8710 return (code == CONST_INT
8711 && INTVAL (index) < 1016
8712 && INTVAL (index) > -1024
8713 && (INTVAL (index) & 3) == 0);
8714
8715 /* We have no such constraint on double mode offsets, so we permit the
8716 full range of the instruction format. */
8717 if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
8718 return (code == CONST_INT
8719 && INTVAL (index) < 1024
8720 && INTVAL (index) > -1024
8721 && (INTVAL (index) & 3) == 0);
8722
8723 if (arm_address_register_rtx_p (index, strict_p)
8724 && (GET_MODE_SIZE (mode) <= 4))
8725 return 1;
8726
8727 if (mode == DImode || mode == DFmode)
8728 {
8729 if (code == CONST_INT)
8730 {
8731 HOST_WIDE_INT val = INTVAL (index);
8732 /* Thumb-2 ldrd only has reg+const addressing modes.
8733 Assume we emit ldrd or 2x ldr if !TARGET_LDRD.
8734 If vldr is selected it uses arm_coproc_mem_operand. */
8735 if (TARGET_LDRD)
8736 return IN_RANGE (val, -1020, 1020) && (val & 3) == 0;
8737 else
8738 return IN_RANGE (val, -255, 4095 - 4);
8739 }
8740 else
8741 return 0;
8742 }
8743
8744 if (code == MULT)
8745 {
8746 rtx xiop0 = XEXP (index, 0);
8747 rtx xiop1 = XEXP (index, 1);
8748
8749 return ((arm_address_register_rtx_p (xiop0, strict_p)
8750 && thumb2_index_mul_operand (xiop1))
8751 || (arm_address_register_rtx_p (xiop1, strict_p)
8752 && thumb2_index_mul_operand (xiop0)));
8753 }
8754 else if (code == ASHIFT)
8755 {
8756 rtx op = XEXP (index, 1);
8757
8758 return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
8759 && CONST_INT_P (op)
8760 && INTVAL (op) > 0
8761 && INTVAL (op) <= 3);
8762 }
8763
8764 return (code == CONST_INT
8765 && INTVAL (index) < 4096
8766 && INTVAL (index) > -256);
8767 }
8768
8769 /* Return nonzero if X is valid as a 16-bit Thumb state base register. */
8770 static int
8771 thumb1_base_register_rtx_p (rtx x, machine_mode mode, int strict_p)
8772 {
8773 int regno;
8774
8775 if (!REG_P (x))
8776 return 0;
8777
8778 regno = REGNO (x);
8779
8780 if (strict_p)
8781 return THUMB1_REGNO_MODE_OK_FOR_BASE_P (regno, mode);
8782
8783 return (regno <= LAST_LO_REGNUM
8784 || regno > LAST_VIRTUAL_REGISTER
8785 || regno == FRAME_POINTER_REGNUM
8786 || (GET_MODE_SIZE (mode) >= 4
8787 && (regno == STACK_POINTER_REGNUM
8788 || regno >= FIRST_PSEUDO_REGISTER
8789 || x == hard_frame_pointer_rtx
8790 || x == arg_pointer_rtx)));
8791 }
8792
8793 /* Return nonzero if x is a legitimate index register. This is the case
8794 for any base register that can access a QImode object. */
8795 inline static int
8796 thumb1_index_register_rtx_p (rtx x, int strict_p)
8797 {
8798 return thumb1_base_register_rtx_p (x, QImode, strict_p);
8799 }
8800
8801 /* Return nonzero if x is a legitimate 16-bit Thumb-state address.
8802
8803 The AP may be eliminated to either the SP or the FP, so we use the
8804 least common denominator, e.g. SImode, and offsets from 0 to 64.
8805
8806 ??? Verify whether the above is the right approach.
8807
8808 ??? Also, the FP may be eliminated to the SP, so perhaps that
8809 needs special handling also.
8810
8811 ??? Look at how the mips16 port solves this problem. It probably uses
8812 better ways to solve some of these problems.
8813
8814 Although it is not incorrect, we don't accept QImode and HImode
8815 addresses based on the frame pointer or arg pointer until the
8816 reload pass starts. This is so that eliminating such addresses
8817 into stack based ones won't produce impossible code. */
8818 int
8819 thumb1_legitimate_address_p (machine_mode mode, rtx x, int strict_p)
8820 {
8821 if (TARGET_HAVE_MOVT && can_avoid_literal_pool_for_label_p (x))
8822 return 0;
8823
8824 /* ??? Not clear if this is right. Experiment. */
8825 if (GET_MODE_SIZE (mode) < 4
8826 && !(reload_in_progress || reload_completed)
8827 && (reg_mentioned_p (frame_pointer_rtx, x)
8828 || reg_mentioned_p (arg_pointer_rtx, x)
8829 || reg_mentioned_p (virtual_incoming_args_rtx, x)
8830 || reg_mentioned_p (virtual_outgoing_args_rtx, x)
8831 || reg_mentioned_p (virtual_stack_dynamic_rtx, x)
8832 || reg_mentioned_p (virtual_stack_vars_rtx, x)))
8833 return 0;
8834
8835 /* Accept any base register. SP only in SImode or larger. */
8836 else if (thumb1_base_register_rtx_p (x, mode, strict_p))
8837 return 1;
8838
8839 /* This is PC relative data before arm_reorg runs. */
8840 else if (GET_MODE_SIZE (mode) >= 4 && CONSTANT_P (x)
8841 && SYMBOL_REF_P (x)
8842 && CONSTANT_POOL_ADDRESS_P (x) && !flag_pic
8843 && !arm_disable_literal_pool)
8844 return 1;
8845
8846 /* This is PC relative data after arm_reorg runs. */
8847 else if ((GET_MODE_SIZE (mode) >= 4 || mode == HFmode)
8848 && reload_completed
8849 && (LABEL_REF_P (x)
8850 || (GET_CODE (x) == CONST
8851 && GET_CODE (XEXP (x, 0)) == PLUS
8852 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
8853 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
8854 return 1;
8855
8856 /* Post-inc indexing only supported for SImode and larger. */
8857 else if (GET_CODE (x) == POST_INC && GET_MODE_SIZE (mode) >= 4
8858 && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p))
8859 return 1;
8860
8861 else if (GET_CODE (x) == PLUS)
8862 {
8863 /* REG+REG address can be any two index registers. */
8864 /* We disallow FRAME+REG addressing since we know that FRAME
8865 will be replaced with STACK, and SP relative addressing only
8866 permits SP+OFFSET. */
8867 if (GET_MODE_SIZE (mode) <= 4
8868 && XEXP (x, 0) != frame_pointer_rtx
8869 && XEXP (x, 1) != frame_pointer_rtx
8870 && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
8871 && (thumb1_index_register_rtx_p (XEXP (x, 1), strict_p)
8872 || (!strict_p && will_be_in_index_register (XEXP (x, 1)))))
8873 return 1;
8874
8875 /* REG+const has 5-7 bit offset for non-SP registers. */
8876 else if ((thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
8877 || XEXP (x, 0) == arg_pointer_rtx)
8878 && CONST_INT_P (XEXP (x, 1))
8879 && thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
8880 return 1;
8881
8882 /* REG+const has 10-bit offset for SP, but only SImode and
8883 larger is supported. */
8884 /* ??? Should probably check for DI/DFmode overflow here
8885 just like GO_IF_LEGITIMATE_OFFSET does. */
8886 else if (REG_P (XEXP (x, 0))
8887 && REGNO (XEXP (x, 0)) == STACK_POINTER_REGNUM
8888 && GET_MODE_SIZE (mode) >= 4
8889 && CONST_INT_P (XEXP (x, 1))
8890 && INTVAL (XEXP (x, 1)) >= 0
8891 && INTVAL (XEXP (x, 1)) + GET_MODE_SIZE (mode) <= 1024
8892 && (INTVAL (XEXP (x, 1)) & 3) == 0)
8893 return 1;
8894
8895 else if (REG_P (XEXP (x, 0))
8896 && (REGNO (XEXP (x, 0)) == FRAME_POINTER_REGNUM
8897 || REGNO (XEXP (x, 0)) == ARG_POINTER_REGNUM
8898 || (REGNO (XEXP (x, 0)) >= FIRST_VIRTUAL_REGISTER
8899 && REGNO (XEXP (x, 0))
8900 <= LAST_VIRTUAL_POINTER_REGISTER))
8901 && GET_MODE_SIZE (mode) >= 4
8902 && CONST_INT_P (XEXP (x, 1))
8903 && (INTVAL (XEXP (x, 1)) & 3) == 0)
8904 return 1;
8905 }
8906
8907 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
8908 && GET_MODE_SIZE (mode) == 4
8909 && SYMBOL_REF_P (x)
8910 && CONSTANT_POOL_ADDRESS_P (x)
8911 && !arm_disable_literal_pool
8912 && ! (flag_pic
8913 && symbol_mentioned_p (get_pool_constant (x))
8914 && ! pcrel_constant_p (get_pool_constant (x))))
8915 return 1;
8916
8917 return 0;
8918 }
8919
8920 /* Return nonzero if VAL can be used as an offset in a Thumb-state address
8921 instruction of mode MODE. */
8922 int
8923 thumb_legitimate_offset_p (machine_mode mode, HOST_WIDE_INT val)
8924 {
8925 switch (GET_MODE_SIZE (mode))
8926 {
8927 case 1:
8928 return val >= 0 && val < 32;
8929
8930 case 2:
8931 return val >= 0 && val < 64 && (val & 1) == 0;
8932
8933 default:
8934 return (val >= 0
8935 && (val + GET_MODE_SIZE (mode)) <= 128
8936 && (val & 3) == 0);
8937 }
8938 }
8939
8940 bool
8941 arm_legitimate_address_p (machine_mode mode, rtx x, bool strict_p)
8942 {
8943 if (TARGET_ARM)
8944 return arm_legitimate_address_outer_p (mode, x, SET, strict_p);
8945 else if (TARGET_THUMB2)
8946 return thumb2_legitimate_address_p (mode, x, strict_p);
8947 else /* if (TARGET_THUMB1) */
8948 return thumb1_legitimate_address_p (mode, x, strict_p);
8949 }
8950
8951 /* Worker function for TARGET_PREFERRED_RELOAD_CLASS.
8952
8953 Given an rtx X being reloaded into a reg required to be
8954 in class CLASS, return the class of reg to actually use.
8955 In general this is just CLASS, but for the Thumb core registers and
8956 immediate constants we prefer a LO_REGS class or a subset. */
8957
8958 static reg_class_t
8959 arm_preferred_reload_class (rtx x ATTRIBUTE_UNUSED, reg_class_t rclass)
8960 {
8961 if (TARGET_32BIT)
8962 return rclass;
8963 else
8964 {
8965 if (rclass == GENERAL_REGS)
8966 return LO_REGS;
8967 else
8968 return rclass;
8969 }
8970 }
8971
8972 /* Build the SYMBOL_REF for __tls_get_addr. */
8973
8974 static GTY(()) rtx tls_get_addr_libfunc;
8975
8976 static rtx
8977 get_tls_get_addr (void)
8978 {
8979 if (!tls_get_addr_libfunc)
8980 tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
8981 return tls_get_addr_libfunc;
8982 }
8983
8984 rtx
8985 arm_load_tp (rtx target)
8986 {
8987 if (!target)
8988 target = gen_reg_rtx (SImode);
8989
8990 if (TARGET_HARD_TP)
8991 {
8992 /* Can return in any reg. */
8993 emit_insn (gen_load_tp_hard (target));
8994 }
8995 else
8996 {
8997 /* Always returned in r0. Immediately copy the result into a pseudo,
8998 otherwise other uses of r0 (e.g. setting up function arguments) may
8999 clobber the value. */
9000
9001 rtx tmp;
9002
9003 if (TARGET_FDPIC)
9004 {
9005 rtx fdpic_reg = gen_rtx_REG (Pmode, FDPIC_REGNUM);
9006 rtx initial_fdpic_reg = get_hard_reg_initial_val (Pmode, FDPIC_REGNUM);
9007
9008 emit_insn (gen_load_tp_soft_fdpic ());
9009
9010 /* Restore r9. */
9011 emit_insn (gen_restore_pic_register_after_call(fdpic_reg, initial_fdpic_reg));
9012 }
9013 else
9014 emit_insn (gen_load_tp_soft ());
9015
9016 tmp = gen_rtx_REG (SImode, R0_REGNUM);
9017 emit_move_insn (target, tmp);
9018 }
9019 return target;
9020 }
9021
9022 static rtx
9023 load_tls_operand (rtx x, rtx reg)
9024 {
9025 rtx tmp;
9026
9027 if (reg == NULL_RTX)
9028 reg = gen_reg_rtx (SImode);
9029
9030 tmp = gen_rtx_CONST (SImode, x);
9031
9032 emit_move_insn (reg, tmp);
9033
9034 return reg;
9035 }
9036
9037 static rtx_insn *
9038 arm_call_tls_get_addr (rtx x, rtx reg, rtx *valuep, int reloc)
9039 {
9040 rtx label, labelno = NULL_RTX, sum;
9041
9042 gcc_assert (reloc != TLS_DESCSEQ);
9043 start_sequence ();
9044
9045 if (TARGET_FDPIC)
9046 {
9047 sum = gen_rtx_UNSPEC (Pmode,
9048 gen_rtvec (2, x, GEN_INT (reloc)),
9049 UNSPEC_TLS);
9050 }
9051 else
9052 {
9053 labelno = GEN_INT (pic_labelno++);
9054 label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
9055 label = gen_rtx_CONST (VOIDmode, label);
9056
9057 sum = gen_rtx_UNSPEC (Pmode,
9058 gen_rtvec (4, x, GEN_INT (reloc), label,
9059 GEN_INT (TARGET_ARM ? 8 : 4)),
9060 UNSPEC_TLS);
9061 }
9062 reg = load_tls_operand (sum, reg);
9063
9064 if (TARGET_FDPIC)
9065 emit_insn (gen_addsi3 (reg, reg, gen_rtx_REG (Pmode, FDPIC_REGNUM)));
9066 else if (TARGET_ARM)
9067 emit_insn (gen_pic_add_dot_plus_eight (reg, reg, labelno));
9068 else
9069 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
9070
9071 *valuep = emit_library_call_value (get_tls_get_addr (), NULL_RTX,
9072 LCT_PURE, /* LCT_CONST? */
9073 Pmode, reg, Pmode);
9074
9075 rtx_insn *insns = get_insns ();
9076 end_sequence ();
9077
9078 return insns;
9079 }
9080
9081 static rtx
9082 arm_tls_descseq_addr (rtx x, rtx reg)
9083 {
9084 rtx labelno = GEN_INT (pic_labelno++);
9085 rtx label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
9086 rtx sum = gen_rtx_UNSPEC (Pmode,
9087 gen_rtvec (4, x, GEN_INT (TLS_DESCSEQ),
9088 gen_rtx_CONST (VOIDmode, label),
9089 GEN_INT (!TARGET_ARM)),
9090 UNSPEC_TLS);
9091 rtx reg0 = load_tls_operand (sum, gen_rtx_REG (SImode, R0_REGNUM));
9092
9093 emit_insn (gen_tlscall (x, labelno));
9094 if (!reg)
9095 reg = gen_reg_rtx (SImode);
9096 else
9097 gcc_assert (REGNO (reg) != R0_REGNUM);
9098
9099 emit_move_insn (reg, reg0);
9100
9101 return reg;
9102 }
9103
9104
9105 rtx
9106 legitimize_tls_address (rtx x, rtx reg)
9107 {
9108 rtx dest, tp, label, labelno, sum, ret, eqv, addend;
9109 rtx_insn *insns;
9110 unsigned int model = SYMBOL_REF_TLS_MODEL (x);
9111
9112 switch (model)
9113 {
9114 case TLS_MODEL_GLOBAL_DYNAMIC:
9115 if (TARGET_GNU2_TLS)
9116 {
9117 gcc_assert (!TARGET_FDPIC);
9118
9119 reg = arm_tls_descseq_addr (x, reg);
9120
9121 tp = arm_load_tp (NULL_RTX);
9122
9123 dest = gen_rtx_PLUS (Pmode, tp, reg);
9124 }
9125 else
9126 {
9127 /* Original scheme */
9128 if (TARGET_FDPIC)
9129 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_GD32_FDPIC);
9130 else
9131 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_GD32);
9132 dest = gen_reg_rtx (Pmode);
9133 emit_libcall_block (insns, dest, ret, x);
9134 }
9135 return dest;
9136
9137 case TLS_MODEL_LOCAL_DYNAMIC:
9138 if (TARGET_GNU2_TLS)
9139 {
9140 gcc_assert (!TARGET_FDPIC);
9141
9142 reg = arm_tls_descseq_addr (x, reg);
9143
9144 tp = arm_load_tp (NULL_RTX);
9145
9146 dest = gen_rtx_PLUS (Pmode, tp, reg);
9147 }
9148 else
9149 {
9150 if (TARGET_FDPIC)
9151 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_LDM32_FDPIC);
9152 else
9153 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_LDM32);
9154
9155 /* Attach a unique REG_EQUIV, to allow the RTL optimizers to
9156 share the LDM result with other LD model accesses. */
9157 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const1_rtx),
9158 UNSPEC_TLS);
9159 dest = gen_reg_rtx (Pmode);
9160 emit_libcall_block (insns, dest, ret, eqv);
9161
9162 /* Load the addend. */
9163 addend = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, x,
9164 GEN_INT (TLS_LDO32)),
9165 UNSPEC_TLS);
9166 addend = force_reg (SImode, gen_rtx_CONST (SImode, addend));
9167 dest = gen_rtx_PLUS (Pmode, dest, addend);
9168 }
9169 return dest;
9170
9171 case TLS_MODEL_INITIAL_EXEC:
9172 if (TARGET_FDPIC)
9173 {
9174 sum = gen_rtx_UNSPEC (Pmode,
9175 gen_rtvec (2, x, GEN_INT (TLS_IE32_FDPIC)),
9176 UNSPEC_TLS);
9177 reg = load_tls_operand (sum, reg);
9178 emit_insn (gen_addsi3 (reg, reg, gen_rtx_REG (Pmode, FDPIC_REGNUM)));
9179 emit_move_insn (reg, gen_rtx_MEM (Pmode, reg));
9180 }
9181 else
9182 {
9183 labelno = GEN_INT (pic_labelno++);
9184 label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
9185 label = gen_rtx_CONST (VOIDmode, label);
9186 sum = gen_rtx_UNSPEC (Pmode,
9187 gen_rtvec (4, x, GEN_INT (TLS_IE32), label,
9188 GEN_INT (TARGET_ARM ? 8 : 4)),
9189 UNSPEC_TLS);
9190 reg = load_tls_operand (sum, reg);
9191
9192 if (TARGET_ARM)
9193 emit_insn (gen_tls_load_dot_plus_eight (reg, reg, labelno));
9194 else if (TARGET_THUMB2)
9195 emit_insn (gen_tls_load_dot_plus_four (reg, NULL, reg, labelno));
9196 else
9197 {
9198 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
9199 emit_move_insn (reg, gen_const_mem (SImode, reg));
9200 }
9201 }
9202
9203 tp = arm_load_tp (NULL_RTX);
9204
9205 return gen_rtx_PLUS (Pmode, tp, reg);
9206
9207 case TLS_MODEL_LOCAL_EXEC:
9208 tp = arm_load_tp (NULL_RTX);
9209
9210 reg = gen_rtx_UNSPEC (Pmode,
9211 gen_rtvec (2, x, GEN_INT (TLS_LE32)),
9212 UNSPEC_TLS);
9213 reg = force_reg (SImode, gen_rtx_CONST (SImode, reg));
9214
9215 return gen_rtx_PLUS (Pmode, tp, reg);
9216
9217 default:
9218 abort ();
9219 }
9220 }
9221
9222 /* Try machine-dependent ways of modifying an illegitimate address
9223 to be legitimate. If we find one, return the new, valid address. */
9224 rtx
9225 arm_legitimize_address (rtx x, rtx orig_x, machine_mode mode)
9226 {
9227 if (arm_tls_referenced_p (x))
9228 {
9229 rtx addend = NULL;
9230
9231 if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS)
9232 {
9233 addend = XEXP (XEXP (x, 0), 1);
9234 x = XEXP (XEXP (x, 0), 0);
9235 }
9236
9237 if (!SYMBOL_REF_P (x))
9238 return x;
9239
9240 gcc_assert (SYMBOL_REF_TLS_MODEL (x) != 0);
9241
9242 x = legitimize_tls_address (x, NULL_RTX);
9243
9244 if (addend)
9245 {
9246 x = gen_rtx_PLUS (SImode, x, addend);
9247 orig_x = x;
9248 }
9249 else
9250 return x;
9251 }
9252
9253 if (TARGET_THUMB1)
9254 return thumb_legitimize_address (x, orig_x, mode);
9255
9256 if (GET_CODE (x) == PLUS)
9257 {
9258 rtx xop0 = XEXP (x, 0);
9259 rtx xop1 = XEXP (x, 1);
9260
9261 if (CONSTANT_P (xop0) && !symbol_mentioned_p (xop0))
9262 xop0 = force_reg (SImode, xop0);
9263
9264 if (CONSTANT_P (xop1) && !CONST_INT_P (xop1)
9265 && !symbol_mentioned_p (xop1))
9266 xop1 = force_reg (SImode, xop1);
9267
9268 if (ARM_BASE_REGISTER_RTX_P (xop0)
9269 && CONST_INT_P (xop1))
9270 {
9271 HOST_WIDE_INT n, low_n;
9272 rtx base_reg, val;
9273 n = INTVAL (xop1);
9274
9275 /* VFP addressing modes actually allow greater offsets, but for
9276 now we just stick with the lowest common denominator. */
9277 if (mode == DImode || mode == DFmode)
9278 {
9279 low_n = n & 0x0f;
9280 n &= ~0x0f;
9281 if (low_n > 4)
9282 {
9283 n += 16;
9284 low_n -= 16;
9285 }
9286 }
9287 else
9288 {
9289 low_n = ((mode) == TImode ? 0
9290 : n >= 0 ? (n & 0xfff) : -((-n) & 0xfff));
9291 n -= low_n;
9292 }
9293
9294 base_reg = gen_reg_rtx (SImode);
9295 val = force_operand (plus_constant (Pmode, xop0, n), NULL_RTX);
9296 emit_move_insn (base_reg, val);
9297 x = plus_constant (Pmode, base_reg, low_n);
9298 }
9299 else if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
9300 x = gen_rtx_PLUS (SImode, xop0, xop1);
9301 }
9302
9303 /* XXX We don't allow MINUS any more -- see comment in
9304 arm_legitimate_address_outer_p (). */
9305 else if (GET_CODE (x) == MINUS)
9306 {
9307 rtx xop0 = XEXP (x, 0);
9308 rtx xop1 = XEXP (x, 1);
9309
9310 if (CONSTANT_P (xop0))
9311 xop0 = force_reg (SImode, xop0);
9312
9313 if (CONSTANT_P (xop1) && ! symbol_mentioned_p (xop1))
9314 xop1 = force_reg (SImode, xop1);
9315
9316 if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
9317 x = gen_rtx_MINUS (SImode, xop0, xop1);
9318 }
9319
9320 /* Make sure to take full advantage of the pre-indexed addressing mode
9321 with absolute addresses which often allows for the base register to
9322 be factorized for multiple adjacent memory references, and it might
9323 even allows for the mini pool to be avoided entirely. */
9324 else if (CONST_INT_P (x) && optimize > 0)
9325 {
9326 unsigned int bits;
9327 HOST_WIDE_INT mask, base, index;
9328 rtx base_reg;
9329
9330 /* LDR and LDRB can use a 12-bit index, ldrsb and the rest can
9331 only use a 8-bit index. So let's use a 12-bit index for
9332 SImode only and hope that arm_gen_constant will enable LDRB
9333 to use more bits. */
9334 bits = (mode == SImode) ? 12 : 8;
9335 mask = (1 << bits) - 1;
9336 base = INTVAL (x) & ~mask;
9337 index = INTVAL (x) & mask;
9338 if (TARGET_ARM && bit_count (base & 0xffffffff) > (32 - bits)/2)
9339 {
9340 /* It'll most probably be more efficient to generate the
9341 base with more bits set and use a negative index instead.
9342 Don't do this for Thumb as negative offsets are much more
9343 limited. */
9344 base |= mask;
9345 index -= mask;
9346 }
9347 base_reg = force_reg (SImode, GEN_INT (base));
9348 x = plus_constant (Pmode, base_reg, index);
9349 }
9350
9351 if (flag_pic)
9352 {
9353 /* We need to find and carefully transform any SYMBOL and LABEL
9354 references; so go back to the original address expression. */
9355 rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX, NULL_RTX,
9356 false /*compute_now*/);
9357
9358 if (new_x != orig_x)
9359 x = new_x;
9360 }
9361
9362 return x;
9363 }
9364
9365
9366 /* Try machine-dependent ways of modifying an illegitimate Thumb address
9367 to be legitimate. If we find one, return the new, valid address. */
9368 rtx
9369 thumb_legitimize_address (rtx x, rtx orig_x, machine_mode mode)
9370 {
9371 if (GET_CODE (x) == PLUS
9372 && CONST_INT_P (XEXP (x, 1))
9373 && (INTVAL (XEXP (x, 1)) >= 32 * GET_MODE_SIZE (mode)
9374 || INTVAL (XEXP (x, 1)) < 0))
9375 {
9376 rtx xop0 = XEXP (x, 0);
9377 rtx xop1 = XEXP (x, 1);
9378 HOST_WIDE_INT offset = INTVAL (xop1);
9379
9380 /* Try and fold the offset into a biasing of the base register and
9381 then offsetting that. Don't do this when optimizing for space
9382 since it can cause too many CSEs. */
9383 if (optimize_size && offset >= 0
9384 && offset < 256 + 31 * GET_MODE_SIZE (mode))
9385 {
9386 HOST_WIDE_INT delta;
9387
9388 if (offset >= 256)
9389 delta = offset - (256 - GET_MODE_SIZE (mode));
9390 else if (offset < 32 * GET_MODE_SIZE (mode) + 8)
9391 delta = 31 * GET_MODE_SIZE (mode);
9392 else
9393 delta = offset & (~31 * GET_MODE_SIZE (mode));
9394
9395 xop0 = force_operand (plus_constant (Pmode, xop0, offset - delta),
9396 NULL_RTX);
9397 x = plus_constant (Pmode, xop0, delta);
9398 }
9399 else if (offset < 0 && offset > -256)
9400 /* Small negative offsets are best done with a subtract before the
9401 dereference, forcing these into a register normally takes two
9402 instructions. */
9403 x = force_operand (x, NULL_RTX);
9404 else
9405 {
9406 /* For the remaining cases, force the constant into a register. */
9407 xop1 = force_reg (SImode, xop1);
9408 x = gen_rtx_PLUS (SImode, xop0, xop1);
9409 }
9410 }
9411 else if (GET_CODE (x) == PLUS
9412 && s_register_operand (XEXP (x, 1), SImode)
9413 && !s_register_operand (XEXP (x, 0), SImode))
9414 {
9415 rtx xop0 = force_operand (XEXP (x, 0), NULL_RTX);
9416
9417 x = gen_rtx_PLUS (SImode, xop0, XEXP (x, 1));
9418 }
9419
9420 if (flag_pic)
9421 {
9422 /* We need to find and carefully transform any SYMBOL and LABEL
9423 references; so go back to the original address expression. */
9424 rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX, NULL_RTX,
9425 false /*compute_now*/);
9426
9427 if (new_x != orig_x)
9428 x = new_x;
9429 }
9430
9431 return x;
9432 }
9433
9434 /* Return TRUE if X contains any TLS symbol references. */
9435
9436 bool
9437 arm_tls_referenced_p (rtx x)
9438 {
9439 if (! TARGET_HAVE_TLS)
9440 return false;
9441
9442 subrtx_iterator::array_type array;
9443 FOR_EACH_SUBRTX (iter, array, x, ALL)
9444 {
9445 const_rtx x = *iter;
9446 if (SYMBOL_REF_P (x) && SYMBOL_REF_TLS_MODEL (x) != 0)
9447 {
9448 /* ARM currently does not provide relocations to encode TLS variables
9449 into AArch32 instructions, only data, so there is no way to
9450 currently implement these if a literal pool is disabled. */
9451 if (arm_disable_literal_pool)
9452 sorry ("accessing thread-local storage is not currently supported "
9453 "with %<-mpure-code%> or %<-mslow-flash-data%>");
9454
9455 return true;
9456 }
9457
9458 /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
9459 TLS offsets, not real symbol references. */
9460 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
9461 iter.skip_subrtxes ();
9462 }
9463 return false;
9464 }
9465
9466 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
9467
9468 On the ARM, allow any integer (invalid ones are removed later by insn
9469 patterns), nice doubles and symbol_refs which refer to the function's
9470 constant pool XXX.
9471
9472 When generating pic allow anything. */
9473
9474 static bool
9475 arm_legitimate_constant_p_1 (machine_mode, rtx x)
9476 {
9477 if (GET_CODE (x) == CONST_VECTOR && !neon_make_constant (x, false))
9478 return false;
9479
9480 return flag_pic || !label_mentioned_p (x);
9481 }
9482
9483 static bool
9484 thumb_legitimate_constant_p (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
9485 {
9486 /* Splitters for TARGET_USE_MOVT call arm_emit_movpair which creates high
9487 RTX. These RTX must therefore be allowed for Thumb-1 so that when run
9488 for ARMv8-M Baseline or later the result is valid. */
9489 if (TARGET_HAVE_MOVT && GET_CODE (x) == HIGH)
9490 x = XEXP (x, 0);
9491
9492 return (CONST_INT_P (x)
9493 || CONST_DOUBLE_P (x)
9494 || CONSTANT_ADDRESS_P (x)
9495 || (TARGET_HAVE_MOVT && SYMBOL_REF_P (x))
9496 /* On Thumb-1 without MOVT/MOVW and literal pool disabled,
9497 we build the symbol address with upper/lower
9498 relocations. */
9499 || (TARGET_THUMB1
9500 && !label_mentioned_p (x)
9501 && arm_valid_symbolic_address_p (x)
9502 && arm_disable_literal_pool)
9503 || flag_pic);
9504 }
9505
9506 static bool
9507 arm_legitimate_constant_p (machine_mode mode, rtx x)
9508 {
9509 return (!arm_cannot_force_const_mem (mode, x)
9510 && (TARGET_32BIT
9511 ? arm_legitimate_constant_p_1 (mode, x)
9512 : thumb_legitimate_constant_p (mode, x)));
9513 }
9514
9515 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
9516
9517 static bool
9518 arm_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
9519 {
9520 rtx base, offset;
9521 split_const (x, &base, &offset);
9522
9523 if (SYMBOL_REF_P (base))
9524 {
9525 /* Function symbols cannot have an offset due to the Thumb bit. */
9526 if ((SYMBOL_REF_FLAGS (base) & SYMBOL_FLAG_FUNCTION)
9527 && INTVAL (offset) != 0)
9528 return true;
9529
9530 if (ARM_OFFSETS_MUST_BE_WITHIN_SECTIONS_P
9531 && !offset_within_block_p (base, INTVAL (offset)))
9532 return true;
9533 }
9534 return arm_tls_referenced_p (x);
9535 }
9536 \f
9537 #define REG_OR_SUBREG_REG(X) \
9538 (REG_P (X) \
9539 || (SUBREG_P (X) && REG_P (SUBREG_REG (X))))
9540
9541 #define REG_OR_SUBREG_RTX(X) \
9542 (REG_P (X) ? (X) : SUBREG_REG (X))
9543
9544 static inline int
9545 thumb1_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
9546 {
9547 machine_mode mode = GET_MODE (x);
9548 int total, words;
9549
9550 switch (code)
9551 {
9552 case ASHIFT:
9553 case ASHIFTRT:
9554 case LSHIFTRT:
9555 case ROTATERT:
9556 return (mode == SImode) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
9557
9558 case PLUS:
9559 case MINUS:
9560 case COMPARE:
9561 case NEG:
9562 case NOT:
9563 return COSTS_N_INSNS (1);
9564
9565 case MULT:
9566 if (arm_arch6m && arm_m_profile_small_mul)
9567 return COSTS_N_INSNS (32);
9568
9569 if (CONST_INT_P (XEXP (x, 1)))
9570 {
9571 int cycles = 0;
9572 unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
9573
9574 while (i)
9575 {
9576 i >>= 2;
9577 cycles++;
9578 }
9579 return COSTS_N_INSNS (2) + cycles;
9580 }
9581 return COSTS_N_INSNS (1) + 16;
9582
9583 case SET:
9584 /* A SET doesn't have a mode, so let's look at the SET_DEST to get
9585 the mode. */
9586 words = ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x))));
9587 return (COSTS_N_INSNS (words)
9588 + 4 * ((MEM_P (SET_SRC (x)))
9589 + MEM_P (SET_DEST (x))));
9590
9591 case CONST_INT:
9592 if (outer == SET)
9593 {
9594 if (UINTVAL (x) < 256
9595 /* 16-bit constant. */
9596 || (TARGET_HAVE_MOVT && !(INTVAL (x) & 0xffff0000)))
9597 return 0;
9598 if (thumb_shiftable_const (INTVAL (x)))
9599 return COSTS_N_INSNS (2);
9600 return arm_disable_literal_pool
9601 ? COSTS_N_INSNS (8)
9602 : COSTS_N_INSNS (3);
9603 }
9604 else if ((outer == PLUS || outer == COMPARE)
9605 && INTVAL (x) < 256 && INTVAL (x) > -256)
9606 return 0;
9607 else if ((outer == IOR || outer == XOR || outer == AND)
9608 && INTVAL (x) < 256 && INTVAL (x) >= -256)
9609 return COSTS_N_INSNS (1);
9610 else if (outer == AND)
9611 {
9612 int i;
9613 /* This duplicates the tests in the andsi3 expander. */
9614 for (i = 9; i <= 31; i++)
9615 if ((HOST_WIDE_INT_1 << i) - 1 == INTVAL (x)
9616 || (HOST_WIDE_INT_1 << i) - 1 == ~INTVAL (x))
9617 return COSTS_N_INSNS (2);
9618 }
9619 else if (outer == ASHIFT || outer == ASHIFTRT
9620 || outer == LSHIFTRT)
9621 return 0;
9622 return COSTS_N_INSNS (2);
9623
9624 case CONST:
9625 case CONST_DOUBLE:
9626 case LABEL_REF:
9627 case SYMBOL_REF:
9628 return COSTS_N_INSNS (3);
9629
9630 case UDIV:
9631 case UMOD:
9632 case DIV:
9633 case MOD:
9634 return 100;
9635
9636 case TRUNCATE:
9637 return 99;
9638
9639 case AND:
9640 case XOR:
9641 case IOR:
9642 /* XXX guess. */
9643 return 8;
9644
9645 case MEM:
9646 /* XXX another guess. */
9647 /* Memory costs quite a lot for the first word, but subsequent words
9648 load at the equivalent of a single insn each. */
9649 return (10 + 4 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
9650 + ((SYMBOL_REF_P (x) && CONSTANT_POOL_ADDRESS_P (x))
9651 ? 4 : 0));
9652
9653 case IF_THEN_ELSE:
9654 /* XXX a guess. */
9655 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
9656 return 14;
9657 return 2;
9658
9659 case SIGN_EXTEND:
9660 case ZERO_EXTEND:
9661 total = mode == DImode ? COSTS_N_INSNS (1) : 0;
9662 total += thumb1_rtx_costs (XEXP (x, 0), GET_CODE (XEXP (x, 0)), code);
9663
9664 if (mode == SImode)
9665 return total;
9666
9667 if (arm_arch6)
9668 return total + COSTS_N_INSNS (1);
9669
9670 /* Assume a two-shift sequence. Increase the cost slightly so
9671 we prefer actual shifts over an extend operation. */
9672 return total + 1 + COSTS_N_INSNS (2);
9673
9674 default:
9675 return 99;
9676 }
9677 }
9678
9679 /* Estimates the size cost of thumb1 instructions.
9680 For now most of the code is copied from thumb1_rtx_costs. We need more
9681 fine grain tuning when we have more related test cases. */
9682 static inline int
9683 thumb1_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
9684 {
9685 machine_mode mode = GET_MODE (x);
9686 int words, cost;
9687
9688 switch (code)
9689 {
9690 case ASHIFT:
9691 case ASHIFTRT:
9692 case LSHIFTRT:
9693 case ROTATERT:
9694 return (mode == SImode) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
9695
9696 case PLUS:
9697 case MINUS:
9698 /* Thumb-1 needs two instructions to fulfill shiftadd/shiftsub0/shiftsub1
9699 defined by RTL expansion, especially for the expansion of
9700 multiplication. */
9701 if ((GET_CODE (XEXP (x, 0)) == MULT
9702 && power_of_two_operand (XEXP (XEXP (x,0),1), SImode))
9703 || (GET_CODE (XEXP (x, 1)) == MULT
9704 && power_of_two_operand (XEXP (XEXP (x, 1), 1), SImode)))
9705 return COSTS_N_INSNS (2);
9706 /* Fall through. */
9707 case COMPARE:
9708 case NEG:
9709 case NOT:
9710 return COSTS_N_INSNS (1);
9711
9712 case MULT:
9713 if (CONST_INT_P (XEXP (x, 1)))
9714 {
9715 /* Thumb1 mul instruction can't operate on const. We must Load it
9716 into a register first. */
9717 int const_size = thumb1_size_rtx_costs (XEXP (x, 1), CONST_INT, SET);
9718 /* For the targets which have a very small and high-latency multiply
9719 unit, we prefer to synthesize the mult with up to 5 instructions,
9720 giving a good balance between size and performance. */
9721 if (arm_arch6m && arm_m_profile_small_mul)
9722 return COSTS_N_INSNS (5);
9723 else
9724 return COSTS_N_INSNS (1) + const_size;
9725 }
9726 return COSTS_N_INSNS (1);
9727
9728 case SET:
9729 /* A SET doesn't have a mode, so let's look at the SET_DEST to get
9730 the mode. */
9731 words = ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x))));
9732 cost = COSTS_N_INSNS (words);
9733 if (satisfies_constraint_J (SET_SRC (x))
9734 || satisfies_constraint_K (SET_SRC (x))
9735 /* Too big an immediate for a 2-byte mov, using MOVT. */
9736 || (CONST_INT_P (SET_SRC (x))
9737 && UINTVAL (SET_SRC (x)) >= 256
9738 && TARGET_HAVE_MOVT
9739 && satisfies_constraint_j (SET_SRC (x)))
9740 /* thumb1_movdi_insn. */
9741 || ((words > 1) && MEM_P (SET_SRC (x))))
9742 cost += COSTS_N_INSNS (1);
9743 return cost;
9744
9745 case CONST_INT:
9746 if (outer == SET)
9747 {
9748 if (UINTVAL (x) < 256)
9749 return COSTS_N_INSNS (1);
9750 /* movw is 4byte long. */
9751 if (TARGET_HAVE_MOVT && !(INTVAL (x) & 0xffff0000))
9752 return COSTS_N_INSNS (2);
9753 /* See split "TARGET_THUMB1 && satisfies_constraint_J". */
9754 if (INTVAL (x) >= -255 && INTVAL (x) <= -1)
9755 return COSTS_N_INSNS (2);
9756 /* See split "TARGET_THUMB1 && satisfies_constraint_K". */
9757 if (thumb_shiftable_const (INTVAL (x)))
9758 return COSTS_N_INSNS (2);
9759 return arm_disable_literal_pool
9760 ? COSTS_N_INSNS (8)
9761 : COSTS_N_INSNS (3);
9762 }
9763 else if ((outer == PLUS || outer == COMPARE)
9764 && INTVAL (x) < 256 && INTVAL (x) > -256)
9765 return 0;
9766 else if ((outer == IOR || outer == XOR || outer == AND)
9767 && INTVAL (x) < 256 && INTVAL (x) >= -256)
9768 return COSTS_N_INSNS (1);
9769 else if (outer == AND)
9770 {
9771 int i;
9772 /* This duplicates the tests in the andsi3 expander. */
9773 for (i = 9; i <= 31; i++)
9774 if ((HOST_WIDE_INT_1 << i) - 1 == INTVAL (x)
9775 || (HOST_WIDE_INT_1 << i) - 1 == ~INTVAL (x))
9776 return COSTS_N_INSNS (2);
9777 }
9778 else if (outer == ASHIFT || outer == ASHIFTRT
9779 || outer == LSHIFTRT)
9780 return 0;
9781 return COSTS_N_INSNS (2);
9782
9783 case CONST:
9784 case CONST_DOUBLE:
9785 case LABEL_REF:
9786 case SYMBOL_REF:
9787 return COSTS_N_INSNS (3);
9788
9789 case UDIV:
9790 case UMOD:
9791 case DIV:
9792 case MOD:
9793 return 100;
9794
9795 case TRUNCATE:
9796 return 99;
9797
9798 case AND:
9799 case XOR:
9800 case IOR:
9801 return COSTS_N_INSNS (1);
9802
9803 case MEM:
9804 return (COSTS_N_INSNS (1)
9805 + COSTS_N_INSNS (1)
9806 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
9807 + ((SYMBOL_REF_P (x) && CONSTANT_POOL_ADDRESS_P (x))
9808 ? COSTS_N_INSNS (1) : 0));
9809
9810 case IF_THEN_ELSE:
9811 /* XXX a guess. */
9812 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
9813 return 14;
9814 return 2;
9815
9816 case ZERO_EXTEND:
9817 /* XXX still guessing. */
9818 switch (GET_MODE (XEXP (x, 0)))
9819 {
9820 case E_QImode:
9821 return (1 + (mode == DImode ? 4 : 0)
9822 + (MEM_P (XEXP (x, 0)) ? 10 : 0));
9823
9824 case E_HImode:
9825 return (4 + (mode == DImode ? 4 : 0)
9826 + (MEM_P (XEXP (x, 0)) ? 10 : 0));
9827
9828 case E_SImode:
9829 return (1 + (MEM_P (XEXP (x, 0)) ? 10 : 0));
9830
9831 default:
9832 return 99;
9833 }
9834
9835 default:
9836 return 99;
9837 }
9838 }
9839
9840 /* Helper function for arm_rtx_costs. If one operand of the OP, a
9841 PLUS, adds the carry flag, then return the other operand. If
9842 neither is a carry, return OP unchanged. */
9843 static rtx
9844 strip_carry_operation (rtx op)
9845 {
9846 gcc_assert (GET_CODE (op) == PLUS);
9847 if (arm_carry_operation (XEXP (op, 0), GET_MODE (op)))
9848 return XEXP (op, 1);
9849 else if (arm_carry_operation (XEXP (op, 1), GET_MODE (op)))
9850 return XEXP (op, 0);
9851 return op;
9852 }
9853
9854 /* Helper function for arm_rtx_costs. If the operand is a valid shift
9855 operand, then return the operand that is being shifted. If the shift
9856 is not by a constant, then set SHIFT_REG to point to the operand.
9857 Return NULL if OP is not a shifter operand. */
9858 static rtx
9859 shifter_op_p (rtx op, rtx *shift_reg)
9860 {
9861 enum rtx_code code = GET_CODE (op);
9862
9863 if (code == MULT && CONST_INT_P (XEXP (op, 1))
9864 && exact_log2 (INTVAL (XEXP (op, 1))) > 0)
9865 return XEXP (op, 0);
9866 else if (code == ROTATE && CONST_INT_P (XEXP (op, 1)))
9867 return XEXP (op, 0);
9868 else if (code == ROTATERT || code == ASHIFT || code == LSHIFTRT
9869 || code == ASHIFTRT)
9870 {
9871 if (!CONST_INT_P (XEXP (op, 1)))
9872 *shift_reg = XEXP (op, 1);
9873 return XEXP (op, 0);
9874 }
9875
9876 return NULL;
9877 }
9878
9879 static bool
9880 arm_unspec_cost (rtx x, enum rtx_code /* outer_code */, bool speed_p, int *cost)
9881 {
9882 const struct cpu_cost_table *extra_cost = current_tune->insn_extra_cost;
9883 rtx_code code = GET_CODE (x);
9884 gcc_assert (code == UNSPEC || code == UNSPEC_VOLATILE);
9885
9886 switch (XINT (x, 1))
9887 {
9888 case UNSPEC_UNALIGNED_LOAD:
9889 /* We can only do unaligned loads into the integer unit, and we can't
9890 use LDM or LDRD. */
9891 *cost = COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x)));
9892 if (speed_p)
9893 *cost += (ARM_NUM_REGS (GET_MODE (x)) * extra_cost->ldst.load
9894 + extra_cost->ldst.load_unaligned);
9895
9896 #ifdef NOT_YET
9897 *cost += arm_address_cost (XEXP (XVECEXP (x, 0, 0), 0), GET_MODE (x),
9898 ADDR_SPACE_GENERIC, speed_p);
9899 #endif
9900 return true;
9901
9902 case UNSPEC_UNALIGNED_STORE:
9903 *cost = COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x)));
9904 if (speed_p)
9905 *cost += (ARM_NUM_REGS (GET_MODE (x)) * extra_cost->ldst.store
9906 + extra_cost->ldst.store_unaligned);
9907
9908 *cost += rtx_cost (XVECEXP (x, 0, 0), VOIDmode, UNSPEC, 0, speed_p);
9909 #ifdef NOT_YET
9910 *cost += arm_address_cost (XEXP (XVECEXP (x, 0, 0), 0), GET_MODE (x),
9911 ADDR_SPACE_GENERIC, speed_p);
9912 #endif
9913 return true;
9914
9915 case UNSPEC_VRINTZ:
9916 case UNSPEC_VRINTP:
9917 case UNSPEC_VRINTM:
9918 case UNSPEC_VRINTR:
9919 case UNSPEC_VRINTX:
9920 case UNSPEC_VRINTA:
9921 if (speed_p)
9922 *cost += extra_cost->fp[GET_MODE (x) == DFmode].roundint;
9923
9924 return true;
9925 default:
9926 *cost = COSTS_N_INSNS (2);
9927 break;
9928 }
9929 return true;
9930 }
9931
9932 /* Cost of a libcall. We assume one insn per argument, an amount for the
9933 call (one insn for -Os) and then one for processing the result. */
9934 #define LIBCALL_COST(N) COSTS_N_INSNS (N + (speed_p ? 18 : 2))
9935
9936 #define HANDLE_NARROW_SHIFT_ARITH(OP, IDX) \
9937 do \
9938 { \
9939 shift_op = shifter_op_p (XEXP (x, IDX), &shift_reg); \
9940 if (shift_op != NULL \
9941 && arm_rtx_shift_left_p (XEXP (x, IDX))) \
9942 { \
9943 if (shift_reg) \
9944 { \
9945 if (speed_p) \
9946 *cost += extra_cost->alu.arith_shift_reg; \
9947 *cost += rtx_cost (shift_reg, GET_MODE (shift_reg), \
9948 ASHIFT, 1, speed_p); \
9949 } \
9950 else if (speed_p) \
9951 *cost += extra_cost->alu.arith_shift; \
9952 \
9953 *cost += (rtx_cost (shift_op, GET_MODE (shift_op), \
9954 ASHIFT, 0, speed_p) \
9955 + rtx_cost (XEXP (x, 1 - IDX), \
9956 GET_MODE (shift_op), \
9957 OP, 1, speed_p)); \
9958 return true; \
9959 } \
9960 } \
9961 while (0)
9962
9963 /* Helper function for arm_rtx_costs_internal. Calculates the cost of a MEM,
9964 considering the costs of the addressing mode and memory access
9965 separately. */
9966 static bool
9967 arm_mem_costs (rtx x, const struct cpu_cost_table *extra_cost,
9968 int *cost, bool speed_p)
9969 {
9970 machine_mode mode = GET_MODE (x);
9971
9972 *cost = COSTS_N_INSNS (1);
9973
9974 if (flag_pic
9975 && GET_CODE (XEXP (x, 0)) == PLUS
9976 && will_be_in_index_register (XEXP (XEXP (x, 0), 1)))
9977 /* This will be split into two instructions. Add the cost of the
9978 additional instruction here. The cost of the memory access is computed
9979 below. See arm.md:calculate_pic_address. */
9980 *cost += COSTS_N_INSNS (1);
9981
9982 /* Calculate cost of the addressing mode. */
9983 if (speed_p)
9984 {
9985 arm_addr_mode_op op_type;
9986 switch (GET_CODE (XEXP (x, 0)))
9987 {
9988 default:
9989 case REG:
9990 op_type = AMO_DEFAULT;
9991 break;
9992 case MINUS:
9993 /* MINUS does not appear in RTL, but the architecture supports it,
9994 so handle this case defensively. */
9995 /* fall through */
9996 case PLUS:
9997 op_type = AMO_NO_WB;
9998 break;
9999 case PRE_INC:
10000 case PRE_DEC:
10001 case POST_INC:
10002 case POST_DEC:
10003 case PRE_MODIFY:
10004 case POST_MODIFY:
10005 op_type = AMO_WB;
10006 break;
10007 }
10008
10009 if (VECTOR_MODE_P (mode))
10010 *cost += current_tune->addr_mode_costs->vector[op_type];
10011 else if (FLOAT_MODE_P (mode))
10012 *cost += current_tune->addr_mode_costs->fp[op_type];
10013 else
10014 *cost += current_tune->addr_mode_costs->integer[op_type];
10015 }
10016
10017 /* Calculate cost of memory access. */
10018 if (speed_p)
10019 {
10020 if (FLOAT_MODE_P (mode))
10021 {
10022 if (GET_MODE_SIZE (mode) == 8)
10023 *cost += extra_cost->ldst.loadd;
10024 else
10025 *cost += extra_cost->ldst.loadf;
10026 }
10027 else if (VECTOR_MODE_P (mode))
10028 *cost += extra_cost->ldst.loadv;
10029 else
10030 {
10031 /* Integer modes */
10032 if (GET_MODE_SIZE (mode) == 8)
10033 *cost += extra_cost->ldst.ldrd;
10034 else
10035 *cost += extra_cost->ldst.load;
10036 }
10037 }
10038
10039 return true;
10040 }
10041
10042 /* RTX costs. Make an estimate of the cost of executing the operation
10043 X, which is contained within an operation with code OUTER_CODE.
10044 SPEED_P indicates whether the cost desired is the performance cost,
10045 or the size cost. The estimate is stored in COST and the return
10046 value is TRUE if the cost calculation is final, or FALSE if the
10047 caller should recurse through the operands of X to add additional
10048 costs.
10049
10050 We currently make no attempt to model the size savings of Thumb-2
10051 16-bit instructions. At the normal points in compilation where
10052 this code is called we have no measure of whether the condition
10053 flags are live or not, and thus no realistic way to determine what
10054 the size will eventually be. */
10055 static bool
10056 arm_rtx_costs_internal (rtx x, enum rtx_code code, enum rtx_code outer_code,
10057 const struct cpu_cost_table *extra_cost,
10058 int *cost, bool speed_p)
10059 {
10060 machine_mode mode = GET_MODE (x);
10061
10062 *cost = COSTS_N_INSNS (1);
10063
10064 if (TARGET_THUMB1)
10065 {
10066 if (speed_p)
10067 *cost = thumb1_rtx_costs (x, code, outer_code);
10068 else
10069 *cost = thumb1_size_rtx_costs (x, code, outer_code);
10070 return true;
10071 }
10072
10073 switch (code)
10074 {
10075 case SET:
10076 *cost = 0;
10077 /* SET RTXs don't have a mode so we get it from the destination. */
10078 mode = GET_MODE (SET_DEST (x));
10079
10080 if (REG_P (SET_SRC (x))
10081 && REG_P (SET_DEST (x)))
10082 {
10083 /* Assume that most copies can be done with a single insn,
10084 unless we don't have HW FP, in which case everything
10085 larger than word mode will require two insns. */
10086 *cost = COSTS_N_INSNS (((!TARGET_VFP_BASE
10087 && GET_MODE_SIZE (mode) > 4)
10088 || mode == DImode)
10089 ? 2 : 1);
10090 /* Conditional register moves can be encoded
10091 in 16 bits in Thumb mode. */
10092 if (!speed_p && TARGET_THUMB && outer_code == COND_EXEC)
10093 *cost >>= 1;
10094
10095 return true;
10096 }
10097
10098 if (CONST_INT_P (SET_SRC (x)))
10099 {
10100 /* Handle CONST_INT here, since the value doesn't have a mode
10101 and we would otherwise be unable to work out the true cost. */
10102 *cost = rtx_cost (SET_DEST (x), GET_MODE (SET_DEST (x)), SET,
10103 0, speed_p);
10104 outer_code = SET;
10105 /* Slightly lower the cost of setting a core reg to a constant.
10106 This helps break up chains and allows for better scheduling. */
10107 if (REG_P (SET_DEST (x))
10108 && REGNO (SET_DEST (x)) <= LR_REGNUM)
10109 *cost -= 1;
10110 x = SET_SRC (x);
10111 /* Immediate moves with an immediate in the range [0, 255] can be
10112 encoded in 16 bits in Thumb mode. */
10113 if (!speed_p && TARGET_THUMB && GET_MODE (x) == SImode
10114 && INTVAL (x) >= 0 && INTVAL (x) <=255)
10115 *cost >>= 1;
10116 goto const_int_cost;
10117 }
10118
10119 return false;
10120
10121 case MEM:
10122 return arm_mem_costs (x, extra_cost, cost, speed_p);
10123
10124 case PARALLEL:
10125 {
10126 /* Calculations of LDM costs are complex. We assume an initial cost
10127 (ldm_1st) which will load the number of registers mentioned in
10128 ldm_regs_per_insn_1st registers; then each additional
10129 ldm_regs_per_insn_subsequent registers cost one more insn. The
10130 formula for N regs is thus:
10131
10132 ldm_1st + COSTS_N_INSNS ((max (N - ldm_regs_per_insn_1st, 0)
10133 + ldm_regs_per_insn_subsequent - 1)
10134 / ldm_regs_per_insn_subsequent).
10135
10136 Additional costs may also be added for addressing. A similar
10137 formula is used for STM. */
10138
10139 bool is_ldm = load_multiple_operation (x, SImode);
10140 bool is_stm = store_multiple_operation (x, SImode);
10141
10142 if (is_ldm || is_stm)
10143 {
10144 if (speed_p)
10145 {
10146 HOST_WIDE_INT nregs = XVECLEN (x, 0);
10147 HOST_WIDE_INT regs_per_insn_1st = is_ldm
10148 ? extra_cost->ldst.ldm_regs_per_insn_1st
10149 : extra_cost->ldst.stm_regs_per_insn_1st;
10150 HOST_WIDE_INT regs_per_insn_sub = is_ldm
10151 ? extra_cost->ldst.ldm_regs_per_insn_subsequent
10152 : extra_cost->ldst.stm_regs_per_insn_subsequent;
10153
10154 *cost += regs_per_insn_1st
10155 + COSTS_N_INSNS (((MAX (nregs - regs_per_insn_1st, 0))
10156 + regs_per_insn_sub - 1)
10157 / regs_per_insn_sub);
10158 return true;
10159 }
10160
10161 }
10162 return false;
10163 }
10164 case DIV:
10165 case UDIV:
10166 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10167 && (mode == SFmode || !TARGET_VFP_SINGLE))
10168 *cost += COSTS_N_INSNS (speed_p
10169 ? extra_cost->fp[mode != SFmode].div : 0);
10170 else if (mode == SImode && TARGET_IDIV)
10171 *cost += COSTS_N_INSNS (speed_p ? extra_cost->mult[0].idiv : 0);
10172 else
10173 *cost = LIBCALL_COST (2);
10174
10175 /* Make the cost of sdiv more expensive so when both sdiv and udiv are
10176 possible udiv is prefered. */
10177 *cost += (code == DIV ? COSTS_N_INSNS (1) : 0);
10178 return false; /* All arguments must be in registers. */
10179
10180 case MOD:
10181 /* MOD by a power of 2 can be expanded as:
10182 rsbs r1, r0, #0
10183 and r0, r0, #(n - 1)
10184 and r1, r1, #(n - 1)
10185 rsbpl r0, r1, #0. */
10186 if (CONST_INT_P (XEXP (x, 1))
10187 && exact_log2 (INTVAL (XEXP (x, 1))) > 0
10188 && mode == SImode)
10189 {
10190 *cost += COSTS_N_INSNS (3);
10191
10192 if (speed_p)
10193 *cost += 2 * extra_cost->alu.logical
10194 + extra_cost->alu.arith;
10195 return true;
10196 }
10197
10198 /* Fall-through. */
10199 case UMOD:
10200 /* Make the cost of sdiv more expensive so when both sdiv and udiv are
10201 possible udiv is prefered. */
10202 *cost = LIBCALL_COST (2) + (code == MOD ? COSTS_N_INSNS (1) : 0);
10203 return false; /* All arguments must be in registers. */
10204
10205 case ROTATE:
10206 if (mode == SImode && REG_P (XEXP (x, 1)))
10207 {
10208 *cost += (COSTS_N_INSNS (1)
10209 + rtx_cost (XEXP (x, 0), mode, code, 0, speed_p));
10210 if (speed_p)
10211 *cost += extra_cost->alu.shift_reg;
10212 return true;
10213 }
10214 /* Fall through */
10215 case ROTATERT:
10216 case ASHIFT:
10217 case LSHIFTRT:
10218 case ASHIFTRT:
10219 if (mode == DImode && CONST_INT_P (XEXP (x, 1)))
10220 {
10221 *cost += (COSTS_N_INSNS (2)
10222 + rtx_cost (XEXP (x, 0), mode, code, 0, speed_p));
10223 if (speed_p)
10224 *cost += 2 * extra_cost->alu.shift;
10225 /* Slightly disparage left shift by 1 at so we prefer adddi3. */
10226 if (code == ASHIFT && XEXP (x, 1) == CONST1_RTX (SImode))
10227 *cost += 1;
10228 return true;
10229 }
10230 else if (mode == SImode)
10231 {
10232 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10233 /* Slightly disparage register shifts at -Os, but not by much. */
10234 if (!CONST_INT_P (XEXP (x, 1)))
10235 *cost += (speed_p ? extra_cost->alu.shift_reg : 1
10236 + rtx_cost (XEXP (x, 1), mode, code, 1, speed_p));
10237 return true;
10238 }
10239 else if (GET_MODE_CLASS (mode) == MODE_INT
10240 && GET_MODE_SIZE (mode) < 4)
10241 {
10242 if (code == ASHIFT)
10243 {
10244 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10245 /* Slightly disparage register shifts at -Os, but not by
10246 much. */
10247 if (!CONST_INT_P (XEXP (x, 1)))
10248 *cost += (speed_p ? extra_cost->alu.shift_reg : 1
10249 + rtx_cost (XEXP (x, 1), mode, code, 1, speed_p));
10250 }
10251 else if (code == LSHIFTRT || code == ASHIFTRT)
10252 {
10253 if (arm_arch_thumb2 && CONST_INT_P (XEXP (x, 1)))
10254 {
10255 /* Can use SBFX/UBFX. */
10256 if (speed_p)
10257 *cost += extra_cost->alu.bfx;
10258 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10259 }
10260 else
10261 {
10262 *cost += COSTS_N_INSNS (1);
10263 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10264 if (speed_p)
10265 {
10266 if (CONST_INT_P (XEXP (x, 1)))
10267 *cost += 2 * extra_cost->alu.shift;
10268 else
10269 *cost += (extra_cost->alu.shift
10270 + extra_cost->alu.shift_reg);
10271 }
10272 else
10273 /* Slightly disparage register shifts. */
10274 *cost += !CONST_INT_P (XEXP (x, 1));
10275 }
10276 }
10277 else /* Rotates. */
10278 {
10279 *cost = COSTS_N_INSNS (2 + !CONST_INT_P (XEXP (x, 1)));
10280 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10281 if (speed_p)
10282 {
10283 if (CONST_INT_P (XEXP (x, 1)))
10284 *cost += (2 * extra_cost->alu.shift
10285 + extra_cost->alu.log_shift);
10286 else
10287 *cost += (extra_cost->alu.shift
10288 + extra_cost->alu.shift_reg
10289 + extra_cost->alu.log_shift_reg);
10290 }
10291 }
10292 return true;
10293 }
10294
10295 *cost = LIBCALL_COST (2);
10296 return false;
10297
10298 case BSWAP:
10299 if (arm_arch6)
10300 {
10301 if (mode == SImode)
10302 {
10303 if (speed_p)
10304 *cost += extra_cost->alu.rev;
10305
10306 return false;
10307 }
10308 }
10309 else
10310 {
10311 /* No rev instruction available. Look at arm_legacy_rev
10312 and thumb_legacy_rev for the form of RTL used then. */
10313 if (TARGET_THUMB)
10314 {
10315 *cost += COSTS_N_INSNS (9);
10316
10317 if (speed_p)
10318 {
10319 *cost += 6 * extra_cost->alu.shift;
10320 *cost += 3 * extra_cost->alu.logical;
10321 }
10322 }
10323 else
10324 {
10325 *cost += COSTS_N_INSNS (4);
10326
10327 if (speed_p)
10328 {
10329 *cost += 2 * extra_cost->alu.shift;
10330 *cost += extra_cost->alu.arith_shift;
10331 *cost += 2 * extra_cost->alu.logical;
10332 }
10333 }
10334 return true;
10335 }
10336 return false;
10337
10338 case MINUS:
10339 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10340 && (mode == SFmode || !TARGET_VFP_SINGLE))
10341 {
10342 if (GET_CODE (XEXP (x, 0)) == MULT
10343 || GET_CODE (XEXP (x, 1)) == MULT)
10344 {
10345 rtx mul_op0, mul_op1, sub_op;
10346
10347 if (speed_p)
10348 *cost += extra_cost->fp[mode != SFmode].mult_addsub;
10349
10350 if (GET_CODE (XEXP (x, 0)) == MULT)
10351 {
10352 mul_op0 = XEXP (XEXP (x, 0), 0);
10353 mul_op1 = XEXP (XEXP (x, 0), 1);
10354 sub_op = XEXP (x, 1);
10355 }
10356 else
10357 {
10358 mul_op0 = XEXP (XEXP (x, 1), 0);
10359 mul_op1 = XEXP (XEXP (x, 1), 1);
10360 sub_op = XEXP (x, 0);
10361 }
10362
10363 /* The first operand of the multiply may be optionally
10364 negated. */
10365 if (GET_CODE (mul_op0) == NEG)
10366 mul_op0 = XEXP (mul_op0, 0);
10367
10368 *cost += (rtx_cost (mul_op0, mode, code, 0, speed_p)
10369 + rtx_cost (mul_op1, mode, code, 0, speed_p)
10370 + rtx_cost (sub_op, mode, code, 0, speed_p));
10371
10372 return true;
10373 }
10374
10375 if (speed_p)
10376 *cost += extra_cost->fp[mode != SFmode].addsub;
10377 return false;
10378 }
10379
10380 if (mode == SImode)
10381 {
10382 rtx shift_by_reg = NULL;
10383 rtx shift_op;
10384 rtx non_shift_op;
10385 rtx op0 = XEXP (x, 0);
10386 rtx op1 = XEXP (x, 1);
10387
10388 /* Factor out any borrow operation. There's more than one way
10389 of expressing this; try to recognize them all. */
10390 if (GET_CODE (op0) == MINUS)
10391 {
10392 if (arm_borrow_operation (op1, SImode))
10393 {
10394 op1 = XEXP (op0, 1);
10395 op0 = XEXP (op0, 0);
10396 }
10397 else if (arm_borrow_operation (XEXP (op0, 1), SImode))
10398 op0 = XEXP (op0, 0);
10399 }
10400 else if (GET_CODE (op1) == PLUS
10401 && arm_borrow_operation (XEXP (op1, 0), SImode))
10402 op1 = XEXP (op1, 0);
10403 else if (GET_CODE (op0) == NEG
10404 && arm_borrow_operation (op1, SImode))
10405 {
10406 /* Negate with carry-in. For Thumb2 this is done with
10407 SBC R, X, X lsl #1 (ie X - 2X - C) as Thumb lacks the
10408 RSC instruction that exists in Arm mode. */
10409 if (speed_p)
10410 *cost += (TARGET_THUMB2
10411 ? extra_cost->alu.arith_shift
10412 : extra_cost->alu.arith);
10413 *cost += rtx_cost (XEXP (op0, 0), mode, MINUS, 0, speed_p);
10414 return true;
10415 }
10416 /* (Carry_op - reg) can be done as RSC Rd, Rn, #1 on Arm.
10417 Note we do mean ~borrow here. */
10418 else if (TARGET_ARM && arm_carry_operation (op0, SImode))
10419 {
10420 *cost += rtx_cost (op1, mode, code, 1, speed_p);
10421 return true;
10422 }
10423
10424 shift_op = shifter_op_p (op0, &shift_by_reg);
10425 if (shift_op == NULL)
10426 {
10427 shift_op = shifter_op_p (op1, &shift_by_reg);
10428 non_shift_op = op0;
10429 }
10430 else
10431 non_shift_op = op1;
10432
10433 if (shift_op != NULL)
10434 {
10435 if (shift_by_reg != NULL)
10436 {
10437 if (speed_p)
10438 *cost += extra_cost->alu.arith_shift_reg;
10439 *cost += rtx_cost (shift_by_reg, mode, code, 0, speed_p);
10440 }
10441 else if (speed_p)
10442 *cost += extra_cost->alu.arith_shift;
10443
10444 *cost += rtx_cost (shift_op, mode, code, 0, speed_p);
10445 *cost += rtx_cost (non_shift_op, mode, code, 0, speed_p);
10446 return true;
10447 }
10448
10449 if (arm_arch_thumb2
10450 && GET_CODE (XEXP (x, 1)) == MULT)
10451 {
10452 /* MLS. */
10453 if (speed_p)
10454 *cost += extra_cost->mult[0].add;
10455 *cost += rtx_cost (XEXP (x, 0), mode, MINUS, 0, speed_p);
10456 *cost += rtx_cost (XEXP (XEXP (x, 1), 0), mode, MULT, 0, speed_p);
10457 *cost += rtx_cost (XEXP (XEXP (x, 1), 1), mode, MULT, 1, speed_p);
10458 return true;
10459 }
10460
10461 if (CONST_INT_P (op0))
10462 {
10463 int insns = arm_gen_constant (MINUS, SImode, NULL_RTX,
10464 INTVAL (op0), NULL_RTX,
10465 NULL_RTX, 1, 0);
10466 *cost = COSTS_N_INSNS (insns);
10467 if (speed_p)
10468 *cost += insns * extra_cost->alu.arith;
10469 *cost += rtx_cost (XEXP (x, 1), mode, code, 1, speed_p);
10470 return true;
10471 }
10472 else if (speed_p)
10473 *cost += extra_cost->alu.arith;
10474
10475 /* Don't recurse as we don't want to cost any borrow that
10476 we've stripped. */
10477 *cost += rtx_cost (op0, mode, MINUS, 0, speed_p);
10478 *cost += rtx_cost (op1, mode, MINUS, 1, speed_p);
10479 return true;
10480 }
10481
10482 if (GET_MODE_CLASS (mode) == MODE_INT
10483 && GET_MODE_SIZE (mode) < 4)
10484 {
10485 rtx shift_op, shift_reg;
10486 shift_reg = NULL;
10487
10488 /* We check both sides of the MINUS for shifter operands since,
10489 unlike PLUS, it's not commutative. */
10490
10491 HANDLE_NARROW_SHIFT_ARITH (MINUS, 0);
10492 HANDLE_NARROW_SHIFT_ARITH (MINUS, 1);
10493
10494 /* Slightly disparage, as we might need to widen the result. */
10495 *cost += 1;
10496 if (speed_p)
10497 *cost += extra_cost->alu.arith;
10498
10499 if (CONST_INT_P (XEXP (x, 0)))
10500 {
10501 *cost += rtx_cost (XEXP (x, 1), mode, code, 1, speed_p);
10502 return true;
10503 }
10504
10505 return false;
10506 }
10507
10508 if (mode == DImode)
10509 {
10510 *cost += COSTS_N_INSNS (1);
10511
10512 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
10513 {
10514 rtx op1 = XEXP (x, 1);
10515
10516 if (speed_p)
10517 *cost += 2 * extra_cost->alu.arith;
10518
10519 if (GET_CODE (op1) == ZERO_EXTEND)
10520 *cost += rtx_cost (XEXP (op1, 0), VOIDmode, ZERO_EXTEND,
10521 0, speed_p);
10522 else
10523 *cost += rtx_cost (op1, mode, MINUS, 1, speed_p);
10524 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, ZERO_EXTEND,
10525 0, speed_p);
10526 return true;
10527 }
10528 else if (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
10529 {
10530 if (speed_p)
10531 *cost += extra_cost->alu.arith + extra_cost->alu.arith_shift;
10532 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, SIGN_EXTEND,
10533 0, speed_p)
10534 + rtx_cost (XEXP (x, 1), mode, MINUS, 1, speed_p));
10535 return true;
10536 }
10537 else if (GET_CODE (XEXP (x, 1)) == ZERO_EXTEND
10538 || GET_CODE (XEXP (x, 1)) == SIGN_EXTEND)
10539 {
10540 if (speed_p)
10541 *cost += (extra_cost->alu.arith
10542 + (GET_CODE (XEXP (x, 1)) == ZERO_EXTEND
10543 ? extra_cost->alu.arith
10544 : extra_cost->alu.arith_shift));
10545 *cost += (rtx_cost (XEXP (x, 0), mode, MINUS, 0, speed_p)
10546 + rtx_cost (XEXP (XEXP (x, 1), 0), VOIDmode,
10547 GET_CODE (XEXP (x, 1)), 0, speed_p));
10548 return true;
10549 }
10550
10551 if (speed_p)
10552 *cost += 2 * extra_cost->alu.arith;
10553 return false;
10554 }
10555
10556 /* Vector mode? */
10557
10558 *cost = LIBCALL_COST (2);
10559 return false;
10560
10561 case PLUS:
10562 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10563 && (mode == SFmode || !TARGET_VFP_SINGLE))
10564 {
10565 if (GET_CODE (XEXP (x, 0)) == MULT)
10566 {
10567 rtx mul_op0, mul_op1, add_op;
10568
10569 if (speed_p)
10570 *cost += extra_cost->fp[mode != SFmode].mult_addsub;
10571
10572 mul_op0 = XEXP (XEXP (x, 0), 0);
10573 mul_op1 = XEXP (XEXP (x, 0), 1);
10574 add_op = XEXP (x, 1);
10575
10576 *cost += (rtx_cost (mul_op0, mode, code, 0, speed_p)
10577 + rtx_cost (mul_op1, mode, code, 0, speed_p)
10578 + rtx_cost (add_op, mode, code, 0, speed_p));
10579
10580 return true;
10581 }
10582
10583 if (speed_p)
10584 *cost += extra_cost->fp[mode != SFmode].addsub;
10585 return false;
10586 }
10587 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
10588 {
10589 *cost = LIBCALL_COST (2);
10590 return false;
10591 }
10592
10593 /* Narrow modes can be synthesized in SImode, but the range
10594 of useful sub-operations is limited. Check for shift operations
10595 on one of the operands. Only left shifts can be used in the
10596 narrow modes. */
10597 if (GET_MODE_CLASS (mode) == MODE_INT
10598 && GET_MODE_SIZE (mode) < 4)
10599 {
10600 rtx shift_op, shift_reg;
10601 shift_reg = NULL;
10602
10603 HANDLE_NARROW_SHIFT_ARITH (PLUS, 0);
10604
10605 if (CONST_INT_P (XEXP (x, 1)))
10606 {
10607 int insns = arm_gen_constant (PLUS, SImode, NULL_RTX,
10608 INTVAL (XEXP (x, 1)), NULL_RTX,
10609 NULL_RTX, 1, 0);
10610 *cost = COSTS_N_INSNS (insns);
10611 if (speed_p)
10612 *cost += insns * extra_cost->alu.arith;
10613 /* Slightly penalize a narrow operation as the result may
10614 need widening. */
10615 *cost += 1 + rtx_cost (XEXP (x, 0), mode, PLUS, 0, speed_p);
10616 return true;
10617 }
10618
10619 /* Slightly penalize a narrow operation as the result may
10620 need widening. */
10621 *cost += 1;
10622 if (speed_p)
10623 *cost += extra_cost->alu.arith;
10624
10625 return false;
10626 }
10627
10628 if (mode == SImode)
10629 {
10630 rtx shift_op, shift_reg;
10631
10632 if (TARGET_INT_SIMD
10633 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
10634 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
10635 {
10636 /* UXTA[BH] or SXTA[BH]. */
10637 if (speed_p)
10638 *cost += extra_cost->alu.extend_arith;
10639 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, ZERO_EXTEND,
10640 0, speed_p)
10641 + rtx_cost (XEXP (x, 1), mode, PLUS, 0, speed_p));
10642 return true;
10643 }
10644
10645 rtx op0 = XEXP (x, 0);
10646 rtx op1 = XEXP (x, 1);
10647
10648 /* Handle a side effect of adding in the carry to an addition. */
10649 if (GET_CODE (op0) == PLUS
10650 && arm_carry_operation (op1, mode))
10651 {
10652 op1 = XEXP (op0, 1);
10653 op0 = XEXP (op0, 0);
10654 }
10655 else if (GET_CODE (op1) == PLUS
10656 && arm_carry_operation (op0, mode))
10657 {
10658 op0 = XEXP (op1, 0);
10659 op1 = XEXP (op1, 1);
10660 }
10661 else if (GET_CODE (op0) == PLUS)
10662 {
10663 op0 = strip_carry_operation (op0);
10664 if (swap_commutative_operands_p (op0, op1))
10665 std::swap (op0, op1);
10666 }
10667
10668 if (arm_carry_operation (op0, mode))
10669 {
10670 /* Adding the carry to a register is a canonicalization of
10671 adding 0 to the register plus the carry. */
10672 if (speed_p)
10673 *cost += extra_cost->alu.arith;
10674 *cost += rtx_cost (op1, mode, PLUS, 1, speed_p);
10675 return true;
10676 }
10677
10678 shift_reg = NULL;
10679 shift_op = shifter_op_p (op0, &shift_reg);
10680 if (shift_op != NULL)
10681 {
10682 if (shift_reg)
10683 {
10684 if (speed_p)
10685 *cost += extra_cost->alu.arith_shift_reg;
10686 *cost += rtx_cost (shift_reg, mode, ASHIFT, 1, speed_p);
10687 }
10688 else if (speed_p)
10689 *cost += extra_cost->alu.arith_shift;
10690
10691 *cost += (rtx_cost (shift_op, mode, ASHIFT, 0, speed_p)
10692 + rtx_cost (op1, mode, PLUS, 1, speed_p));
10693 return true;
10694 }
10695
10696 if (GET_CODE (op0) == MULT)
10697 {
10698 rtx mul_op = op0;
10699
10700 if (TARGET_DSP_MULTIPLY
10701 && ((GET_CODE (XEXP (mul_op, 0)) == SIGN_EXTEND
10702 && (GET_CODE (XEXP (mul_op, 1)) == SIGN_EXTEND
10703 || (GET_CODE (XEXP (mul_op, 1)) == ASHIFTRT
10704 && CONST_INT_P (XEXP (XEXP (mul_op, 1), 1))
10705 && INTVAL (XEXP (XEXP (mul_op, 1), 1)) == 16)))
10706 || (GET_CODE (XEXP (mul_op, 0)) == ASHIFTRT
10707 && CONST_INT_P (XEXP (XEXP (mul_op, 0), 1))
10708 && INTVAL (XEXP (XEXP (mul_op, 0), 1)) == 16
10709 && (GET_CODE (XEXP (mul_op, 1)) == SIGN_EXTEND
10710 || (GET_CODE (XEXP (mul_op, 1)) == ASHIFTRT
10711 && CONST_INT_P (XEXP (XEXP (mul_op, 1), 1))
10712 && (INTVAL (XEXP (XEXP (mul_op, 1), 1))
10713 == 16))))))
10714 {
10715 /* SMLA[BT][BT]. */
10716 if (speed_p)
10717 *cost += extra_cost->mult[0].extend_add;
10718 *cost += (rtx_cost (XEXP (XEXP (mul_op, 0), 0), mode,
10719 SIGN_EXTEND, 0, speed_p)
10720 + rtx_cost (XEXP (XEXP (mul_op, 1), 0), mode,
10721 SIGN_EXTEND, 0, speed_p)
10722 + rtx_cost (op1, mode, PLUS, 1, speed_p));
10723 return true;
10724 }
10725
10726 if (speed_p)
10727 *cost += extra_cost->mult[0].add;
10728 *cost += (rtx_cost (XEXP (mul_op, 0), mode, MULT, 0, speed_p)
10729 + rtx_cost (XEXP (mul_op, 1), mode, MULT, 1, speed_p)
10730 + rtx_cost (op1, mode, PLUS, 1, speed_p));
10731 return true;
10732 }
10733
10734 if (CONST_INT_P (op1))
10735 {
10736 int insns = arm_gen_constant (PLUS, SImode, NULL_RTX,
10737 INTVAL (op1), NULL_RTX,
10738 NULL_RTX, 1, 0);
10739 *cost = COSTS_N_INSNS (insns);
10740 if (speed_p)
10741 *cost += insns * extra_cost->alu.arith;
10742 *cost += rtx_cost (op0, mode, PLUS, 0, speed_p);
10743 return true;
10744 }
10745
10746 if (speed_p)
10747 *cost += extra_cost->alu.arith;
10748
10749 /* Don't recurse here because we want to test the operands
10750 without any carry operation. */
10751 *cost += rtx_cost (op0, mode, PLUS, 0, speed_p);
10752 *cost += rtx_cost (op1, mode, PLUS, 1, speed_p);
10753 return true;
10754 }
10755
10756 if (mode == DImode)
10757 {
10758 if (GET_CODE (XEXP (x, 0)) == MULT
10759 && ((GET_CODE (XEXP (XEXP (x, 0), 0)) == ZERO_EXTEND
10760 && GET_CODE (XEXP (XEXP (x, 0), 1)) == ZERO_EXTEND)
10761 || (GET_CODE (XEXP (XEXP (x, 0), 0)) == SIGN_EXTEND
10762 && GET_CODE (XEXP (XEXP (x, 0), 1)) == SIGN_EXTEND)))
10763 {
10764 if (speed_p)
10765 *cost += extra_cost->mult[1].extend_add;
10766 *cost += (rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), mode,
10767 ZERO_EXTEND, 0, speed_p)
10768 + rtx_cost (XEXP (XEXP (XEXP (x, 0), 1), 0), mode,
10769 ZERO_EXTEND, 0, speed_p)
10770 + rtx_cost (XEXP (x, 1), mode, PLUS, 1, speed_p));
10771 return true;
10772 }
10773
10774 *cost += COSTS_N_INSNS (1);
10775
10776 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
10777 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
10778 {
10779 if (speed_p)
10780 *cost += (extra_cost->alu.arith
10781 + (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
10782 ? extra_cost->alu.arith
10783 : extra_cost->alu.arith_shift));
10784
10785 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, ZERO_EXTEND,
10786 0, speed_p)
10787 + rtx_cost (XEXP (x, 1), mode, PLUS, 1, speed_p));
10788 return true;
10789 }
10790
10791 if (speed_p)
10792 *cost += 2 * extra_cost->alu.arith;
10793 return false;
10794 }
10795
10796 /* Vector mode? */
10797 *cost = LIBCALL_COST (2);
10798 return false;
10799 case IOR:
10800 if (mode == SImode && arm_arch6 && aarch_rev16_p (x))
10801 {
10802 if (speed_p)
10803 *cost += extra_cost->alu.rev;
10804
10805 return true;
10806 }
10807 /* Fall through. */
10808 case AND: case XOR:
10809 if (mode == SImode)
10810 {
10811 enum rtx_code subcode = GET_CODE (XEXP (x, 0));
10812 rtx op0 = XEXP (x, 0);
10813 rtx shift_op, shift_reg;
10814
10815 if (subcode == NOT
10816 && (code == AND
10817 || (code == IOR && TARGET_THUMB2)))
10818 op0 = XEXP (op0, 0);
10819
10820 shift_reg = NULL;
10821 shift_op = shifter_op_p (op0, &shift_reg);
10822 if (shift_op != NULL)
10823 {
10824 if (shift_reg)
10825 {
10826 if (speed_p)
10827 *cost += extra_cost->alu.log_shift_reg;
10828 *cost += rtx_cost (shift_reg, mode, ASHIFT, 1, speed_p);
10829 }
10830 else if (speed_p)
10831 *cost += extra_cost->alu.log_shift;
10832
10833 *cost += (rtx_cost (shift_op, mode, ASHIFT, 0, speed_p)
10834 + rtx_cost (XEXP (x, 1), mode, code, 1, speed_p));
10835 return true;
10836 }
10837
10838 if (CONST_INT_P (XEXP (x, 1)))
10839 {
10840 int insns = arm_gen_constant (code, SImode, NULL_RTX,
10841 INTVAL (XEXP (x, 1)), NULL_RTX,
10842 NULL_RTX, 1, 0);
10843
10844 *cost = COSTS_N_INSNS (insns);
10845 if (speed_p)
10846 *cost += insns * extra_cost->alu.logical;
10847 *cost += rtx_cost (op0, mode, code, 0, speed_p);
10848 return true;
10849 }
10850
10851 if (speed_p)
10852 *cost += extra_cost->alu.logical;
10853 *cost += (rtx_cost (op0, mode, code, 0, speed_p)
10854 + rtx_cost (XEXP (x, 1), mode, code, 1, speed_p));
10855 return true;
10856 }
10857
10858 if (mode == DImode)
10859 {
10860 rtx op0 = XEXP (x, 0);
10861 enum rtx_code subcode = GET_CODE (op0);
10862
10863 *cost += COSTS_N_INSNS (1);
10864
10865 if (subcode == NOT
10866 && (code == AND
10867 || (code == IOR && TARGET_THUMB2)))
10868 op0 = XEXP (op0, 0);
10869
10870 if (GET_CODE (op0) == ZERO_EXTEND)
10871 {
10872 if (speed_p)
10873 *cost += 2 * extra_cost->alu.logical;
10874
10875 *cost += (rtx_cost (XEXP (op0, 0), VOIDmode, ZERO_EXTEND,
10876 0, speed_p)
10877 + rtx_cost (XEXP (x, 1), mode, code, 0, speed_p));
10878 return true;
10879 }
10880 else if (GET_CODE (op0) == SIGN_EXTEND)
10881 {
10882 if (speed_p)
10883 *cost += extra_cost->alu.logical + extra_cost->alu.log_shift;
10884
10885 *cost += (rtx_cost (XEXP (op0, 0), VOIDmode, SIGN_EXTEND,
10886 0, speed_p)
10887 + rtx_cost (XEXP (x, 1), mode, code, 0, speed_p));
10888 return true;
10889 }
10890
10891 if (speed_p)
10892 *cost += 2 * extra_cost->alu.logical;
10893
10894 return true;
10895 }
10896 /* Vector mode? */
10897
10898 *cost = LIBCALL_COST (2);
10899 return false;
10900
10901 case MULT:
10902 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10903 && (mode == SFmode || !TARGET_VFP_SINGLE))
10904 {
10905 rtx op0 = XEXP (x, 0);
10906
10907 if (GET_CODE (op0) == NEG && !flag_rounding_math)
10908 op0 = XEXP (op0, 0);
10909
10910 if (speed_p)
10911 *cost += extra_cost->fp[mode != SFmode].mult;
10912
10913 *cost += (rtx_cost (op0, mode, MULT, 0, speed_p)
10914 + rtx_cost (XEXP (x, 1), mode, MULT, 1, speed_p));
10915 return true;
10916 }
10917 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
10918 {
10919 *cost = LIBCALL_COST (2);
10920 return false;
10921 }
10922
10923 if (mode == SImode)
10924 {
10925 if (TARGET_DSP_MULTIPLY
10926 && ((GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
10927 && (GET_CODE (XEXP (x, 1)) == SIGN_EXTEND
10928 || (GET_CODE (XEXP (x, 1)) == ASHIFTRT
10929 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
10930 && INTVAL (XEXP (XEXP (x, 1), 1)) == 16)))
10931 || (GET_CODE (XEXP (x, 0)) == ASHIFTRT
10932 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
10933 && INTVAL (XEXP (XEXP (x, 0), 1)) == 16
10934 && (GET_CODE (XEXP (x, 1)) == SIGN_EXTEND
10935 || (GET_CODE (XEXP (x, 1)) == ASHIFTRT
10936 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
10937 && (INTVAL (XEXP (XEXP (x, 1), 1))
10938 == 16))))))
10939 {
10940 /* SMUL[TB][TB]. */
10941 if (speed_p)
10942 *cost += extra_cost->mult[0].extend;
10943 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), mode,
10944 SIGN_EXTEND, 0, speed_p);
10945 *cost += rtx_cost (XEXP (XEXP (x, 1), 0), mode,
10946 SIGN_EXTEND, 1, speed_p);
10947 return true;
10948 }
10949 if (speed_p)
10950 *cost += extra_cost->mult[0].simple;
10951 return false;
10952 }
10953
10954 if (mode == DImode)
10955 {
10956 if ((GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
10957 && GET_CODE (XEXP (x, 1)) == ZERO_EXTEND)
10958 || (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
10959 && GET_CODE (XEXP (x, 1)) == SIGN_EXTEND))
10960 {
10961 if (speed_p)
10962 *cost += extra_cost->mult[1].extend;
10963 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode,
10964 ZERO_EXTEND, 0, speed_p)
10965 + rtx_cost (XEXP (XEXP (x, 1), 0), VOIDmode,
10966 ZERO_EXTEND, 0, speed_p));
10967 return true;
10968 }
10969
10970 *cost = LIBCALL_COST (2);
10971 return false;
10972 }
10973
10974 /* Vector mode? */
10975 *cost = LIBCALL_COST (2);
10976 return false;
10977
10978 case NEG:
10979 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10980 && (mode == SFmode || !TARGET_VFP_SINGLE))
10981 {
10982 if (GET_CODE (XEXP (x, 0)) == MULT)
10983 {
10984 /* VNMUL. */
10985 *cost = rtx_cost (XEXP (x, 0), mode, NEG, 0, speed_p);
10986 return true;
10987 }
10988
10989 if (speed_p)
10990 *cost += extra_cost->fp[mode != SFmode].neg;
10991
10992 return false;
10993 }
10994 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
10995 {
10996 *cost = LIBCALL_COST (1);
10997 return false;
10998 }
10999
11000 if (mode == SImode)
11001 {
11002 if (GET_CODE (XEXP (x, 0)) == ABS)
11003 {
11004 *cost += COSTS_N_INSNS (1);
11005 /* Assume the non-flag-changing variant. */
11006 if (speed_p)
11007 *cost += (extra_cost->alu.log_shift
11008 + extra_cost->alu.arith_shift);
11009 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), mode, ABS, 0, speed_p);
11010 return true;
11011 }
11012
11013 if (GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMPARE
11014 || GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMM_COMPARE)
11015 {
11016 *cost += COSTS_N_INSNS (1);
11017 /* No extra cost for MOV imm and MVN imm. */
11018 /* If the comparison op is using the flags, there's no further
11019 cost, otherwise we need to add the cost of the comparison. */
11020 if (!(REG_P (XEXP (XEXP (x, 0), 0))
11021 && REGNO (XEXP (XEXP (x, 0), 0)) == CC_REGNUM
11022 && XEXP (XEXP (x, 0), 1) == const0_rtx))
11023 {
11024 mode = GET_MODE (XEXP (XEXP (x, 0), 0));
11025 *cost += (COSTS_N_INSNS (1)
11026 + rtx_cost (XEXP (XEXP (x, 0), 0), mode, COMPARE,
11027 0, speed_p)
11028 + rtx_cost (XEXP (XEXP (x, 0), 1), mode, COMPARE,
11029 1, speed_p));
11030 if (speed_p)
11031 *cost += extra_cost->alu.arith;
11032 }
11033 return true;
11034 }
11035
11036 if (speed_p)
11037 *cost += extra_cost->alu.arith;
11038 return false;
11039 }
11040
11041 if (GET_MODE_CLASS (mode) == MODE_INT
11042 && GET_MODE_SIZE (mode) < 4)
11043 {
11044 /* Slightly disparage, as we might need an extend operation. */
11045 *cost += 1;
11046 if (speed_p)
11047 *cost += extra_cost->alu.arith;
11048 return false;
11049 }
11050
11051 if (mode == DImode)
11052 {
11053 *cost += COSTS_N_INSNS (1);
11054 if (speed_p)
11055 *cost += 2 * extra_cost->alu.arith;
11056 return false;
11057 }
11058
11059 /* Vector mode? */
11060 *cost = LIBCALL_COST (1);
11061 return false;
11062
11063 case NOT:
11064 if (mode == SImode)
11065 {
11066 rtx shift_op;
11067 rtx shift_reg = NULL;
11068
11069 shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
11070
11071 if (shift_op)
11072 {
11073 if (shift_reg != NULL)
11074 {
11075 if (speed_p)
11076 *cost += extra_cost->alu.log_shift_reg;
11077 *cost += rtx_cost (shift_reg, mode, ASHIFT, 1, speed_p);
11078 }
11079 else if (speed_p)
11080 *cost += extra_cost->alu.log_shift;
11081 *cost += rtx_cost (shift_op, mode, ASHIFT, 0, speed_p);
11082 return true;
11083 }
11084
11085 if (speed_p)
11086 *cost += extra_cost->alu.logical;
11087 return false;
11088 }
11089 if (mode == DImode)
11090 {
11091 *cost += COSTS_N_INSNS (1);
11092 return false;
11093 }
11094
11095 /* Vector mode? */
11096
11097 *cost += LIBCALL_COST (1);
11098 return false;
11099
11100 case IF_THEN_ELSE:
11101 {
11102 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
11103 {
11104 *cost += COSTS_N_INSNS (3);
11105 return true;
11106 }
11107 int op1cost = rtx_cost (XEXP (x, 1), mode, SET, 1, speed_p);
11108 int op2cost = rtx_cost (XEXP (x, 2), mode, SET, 1, speed_p);
11109
11110 *cost = rtx_cost (XEXP (x, 0), mode, IF_THEN_ELSE, 0, speed_p);
11111 /* Assume that if one arm of the if_then_else is a register,
11112 that it will be tied with the result and eliminate the
11113 conditional insn. */
11114 if (REG_P (XEXP (x, 1)))
11115 *cost += op2cost;
11116 else if (REG_P (XEXP (x, 2)))
11117 *cost += op1cost;
11118 else
11119 {
11120 if (speed_p)
11121 {
11122 if (extra_cost->alu.non_exec_costs_exec)
11123 *cost += op1cost + op2cost + extra_cost->alu.non_exec;
11124 else
11125 *cost += MAX (op1cost, op2cost) + extra_cost->alu.non_exec;
11126 }
11127 else
11128 *cost += op1cost + op2cost;
11129 }
11130 }
11131 return true;
11132
11133 case COMPARE:
11134 if (cc_register (XEXP (x, 0), VOIDmode) && XEXP (x, 1) == const0_rtx)
11135 *cost = 0;
11136 else
11137 {
11138 machine_mode op0mode;
11139 /* We'll mostly assume that the cost of a compare is the cost of the
11140 LHS. However, there are some notable exceptions. */
11141
11142 /* Floating point compares are never done as side-effects. */
11143 op0mode = GET_MODE (XEXP (x, 0));
11144 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (op0mode) == MODE_FLOAT
11145 && (op0mode == SFmode || !TARGET_VFP_SINGLE))
11146 {
11147 if (speed_p)
11148 *cost += extra_cost->fp[op0mode != SFmode].compare;
11149
11150 if (XEXP (x, 1) == CONST0_RTX (op0mode))
11151 {
11152 *cost += rtx_cost (XEXP (x, 0), op0mode, code, 0, speed_p);
11153 return true;
11154 }
11155
11156 return false;
11157 }
11158 else if (GET_MODE_CLASS (op0mode) == MODE_FLOAT)
11159 {
11160 *cost = LIBCALL_COST (2);
11161 return false;
11162 }
11163
11164 /* DImode compares normally take two insns. */
11165 if (op0mode == DImode)
11166 {
11167 *cost += COSTS_N_INSNS (1);
11168 if (speed_p)
11169 *cost += 2 * extra_cost->alu.arith;
11170 return false;
11171 }
11172
11173 if (op0mode == SImode)
11174 {
11175 rtx shift_op;
11176 rtx shift_reg;
11177
11178 if (XEXP (x, 1) == const0_rtx
11179 && !(REG_P (XEXP (x, 0))
11180 || (GET_CODE (XEXP (x, 0)) == SUBREG
11181 && REG_P (SUBREG_REG (XEXP (x, 0))))))
11182 {
11183 *cost = rtx_cost (XEXP (x, 0), op0mode, COMPARE, 0, speed_p);
11184
11185 /* Multiply operations that set the flags are often
11186 significantly more expensive. */
11187 if (speed_p
11188 && GET_CODE (XEXP (x, 0)) == MULT
11189 && !power_of_two_operand (XEXP (XEXP (x, 0), 1), mode))
11190 *cost += extra_cost->mult[0].flag_setting;
11191
11192 if (speed_p
11193 && GET_CODE (XEXP (x, 0)) == PLUS
11194 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
11195 && !power_of_two_operand (XEXP (XEXP (XEXP (x, 0),
11196 0), 1), mode))
11197 *cost += extra_cost->mult[0].flag_setting;
11198 return true;
11199 }
11200
11201 shift_reg = NULL;
11202 shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
11203 if (shift_op != NULL)
11204 {
11205 if (shift_reg != NULL)
11206 {
11207 *cost += rtx_cost (shift_reg, op0mode, ASHIFT,
11208 1, speed_p);
11209 if (speed_p)
11210 *cost += extra_cost->alu.arith_shift_reg;
11211 }
11212 else if (speed_p)
11213 *cost += extra_cost->alu.arith_shift;
11214 *cost += rtx_cost (shift_op, op0mode, ASHIFT, 0, speed_p);
11215 *cost += rtx_cost (XEXP (x, 1), op0mode, COMPARE, 1, speed_p);
11216 return true;
11217 }
11218
11219 if (speed_p)
11220 *cost += extra_cost->alu.arith;
11221 if (CONST_INT_P (XEXP (x, 1))
11222 && const_ok_for_op (INTVAL (XEXP (x, 1)), COMPARE))
11223 {
11224 *cost += rtx_cost (XEXP (x, 0), op0mode, COMPARE, 0, speed_p);
11225 return true;
11226 }
11227 return false;
11228 }
11229
11230 /* Vector mode? */
11231
11232 *cost = LIBCALL_COST (2);
11233 return false;
11234 }
11235 return true;
11236
11237 case EQ:
11238 case GE:
11239 case GT:
11240 case LE:
11241 case LT:
11242 /* Neon has special instructions when comparing with 0 (vceq, vcge, vcgt,
11243 vcle and vclt). */
11244 if (TARGET_NEON
11245 && TARGET_HARD_FLOAT
11246 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
11247 && (XEXP (x, 1) == CONST0_RTX (mode)))
11248 {
11249 *cost = 0;
11250 return true;
11251 }
11252
11253 /* Fall through. */
11254 case NE:
11255 case LTU:
11256 case LEU:
11257 case GEU:
11258 case GTU:
11259 case ORDERED:
11260 case UNORDERED:
11261 case UNEQ:
11262 case UNLE:
11263 case UNLT:
11264 case UNGE:
11265 case UNGT:
11266 case LTGT:
11267 if (outer_code == SET)
11268 {
11269 /* Is it a store-flag operation? */
11270 if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM
11271 && XEXP (x, 1) == const0_rtx)
11272 {
11273 /* Thumb also needs an IT insn. */
11274 *cost += COSTS_N_INSNS (TARGET_THUMB ? 2 : 1);
11275 return true;
11276 }
11277 if (XEXP (x, 1) == const0_rtx)
11278 {
11279 switch (code)
11280 {
11281 case LT:
11282 /* LSR Rd, Rn, #31. */
11283 if (speed_p)
11284 *cost += extra_cost->alu.shift;
11285 break;
11286
11287 case EQ:
11288 /* RSBS T1, Rn, #0
11289 ADC Rd, Rn, T1. */
11290
11291 case NE:
11292 /* SUBS T1, Rn, #1
11293 SBC Rd, Rn, T1. */
11294 *cost += COSTS_N_INSNS (1);
11295 break;
11296
11297 case LE:
11298 /* RSBS T1, Rn, Rn, LSR #31
11299 ADC Rd, Rn, T1. */
11300 *cost += COSTS_N_INSNS (1);
11301 if (speed_p)
11302 *cost += extra_cost->alu.arith_shift;
11303 break;
11304
11305 case GT:
11306 /* RSB Rd, Rn, Rn, ASR #1
11307 LSR Rd, Rd, #31. */
11308 *cost += COSTS_N_INSNS (1);
11309 if (speed_p)
11310 *cost += (extra_cost->alu.arith_shift
11311 + extra_cost->alu.shift);
11312 break;
11313
11314 case GE:
11315 /* ASR Rd, Rn, #31
11316 ADD Rd, Rn, #1. */
11317 *cost += COSTS_N_INSNS (1);
11318 if (speed_p)
11319 *cost += extra_cost->alu.shift;
11320 break;
11321
11322 default:
11323 /* Remaining cases are either meaningless or would take
11324 three insns anyway. */
11325 *cost = COSTS_N_INSNS (3);
11326 break;
11327 }
11328 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
11329 return true;
11330 }
11331 else
11332 {
11333 *cost += COSTS_N_INSNS (TARGET_THUMB ? 3 : 2);
11334 if (CONST_INT_P (XEXP (x, 1))
11335 && const_ok_for_op (INTVAL (XEXP (x, 1)), COMPARE))
11336 {
11337 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
11338 return true;
11339 }
11340
11341 return false;
11342 }
11343 }
11344 /* Not directly inside a set. If it involves the condition code
11345 register it must be the condition for a branch, cond_exec or
11346 I_T_E operation. Since the comparison is performed elsewhere
11347 this is just the control part which has no additional
11348 cost. */
11349 else if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM
11350 && XEXP (x, 1) == const0_rtx)
11351 {
11352 *cost = 0;
11353 return true;
11354 }
11355 return false;
11356
11357 case ABS:
11358 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
11359 && (mode == SFmode || !TARGET_VFP_SINGLE))
11360 {
11361 if (speed_p)
11362 *cost += extra_cost->fp[mode != SFmode].neg;
11363
11364 return false;
11365 }
11366 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
11367 {
11368 *cost = LIBCALL_COST (1);
11369 return false;
11370 }
11371
11372 if (mode == SImode)
11373 {
11374 if (speed_p)
11375 *cost += extra_cost->alu.log_shift + extra_cost->alu.arith_shift;
11376 return false;
11377 }
11378 /* Vector mode? */
11379 *cost = LIBCALL_COST (1);
11380 return false;
11381
11382 case SIGN_EXTEND:
11383 if ((arm_arch4 || GET_MODE (XEXP (x, 0)) == SImode)
11384 && MEM_P (XEXP (x, 0)))
11385 {
11386 if (mode == DImode)
11387 *cost += COSTS_N_INSNS (1);
11388
11389 if (!speed_p)
11390 return true;
11391
11392 if (GET_MODE (XEXP (x, 0)) == SImode)
11393 *cost += extra_cost->ldst.load;
11394 else
11395 *cost += extra_cost->ldst.load_sign_extend;
11396
11397 if (mode == DImode)
11398 *cost += extra_cost->alu.shift;
11399
11400 return true;
11401 }
11402
11403 /* Widening from less than 32-bits requires an extend operation. */
11404 if (GET_MODE (XEXP (x, 0)) != SImode && arm_arch6)
11405 {
11406 /* We have SXTB/SXTH. */
11407 *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
11408 if (speed_p)
11409 *cost += extra_cost->alu.extend;
11410 }
11411 else if (GET_MODE (XEXP (x, 0)) != SImode)
11412 {
11413 /* Needs two shifts. */
11414 *cost += COSTS_N_INSNS (1);
11415 *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
11416 if (speed_p)
11417 *cost += 2 * extra_cost->alu.shift;
11418 }
11419
11420 /* Widening beyond 32-bits requires one more insn. */
11421 if (mode == DImode)
11422 {
11423 *cost += COSTS_N_INSNS (1);
11424 if (speed_p)
11425 *cost += extra_cost->alu.shift;
11426 }
11427
11428 return true;
11429
11430 case ZERO_EXTEND:
11431 if ((arm_arch4
11432 || GET_MODE (XEXP (x, 0)) == SImode
11433 || GET_MODE (XEXP (x, 0)) == QImode)
11434 && MEM_P (XEXP (x, 0)))
11435 {
11436 *cost = rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
11437
11438 if (mode == DImode)
11439 *cost += COSTS_N_INSNS (1); /* No speed penalty. */
11440
11441 return true;
11442 }
11443
11444 /* Widening from less than 32-bits requires an extend operation. */
11445 if (GET_MODE (XEXP (x, 0)) == QImode)
11446 {
11447 /* UXTB can be a shorter instruction in Thumb2, but it might
11448 be slower than the AND Rd, Rn, #255 alternative. When
11449 optimizing for speed it should never be slower to use
11450 AND, and we don't really model 16-bit vs 32-bit insns
11451 here. */
11452 if (speed_p)
11453 *cost += extra_cost->alu.logical;
11454 }
11455 else if (GET_MODE (XEXP (x, 0)) != SImode && arm_arch6)
11456 {
11457 /* We have UXTB/UXTH. */
11458 *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
11459 if (speed_p)
11460 *cost += extra_cost->alu.extend;
11461 }
11462 else if (GET_MODE (XEXP (x, 0)) != SImode)
11463 {
11464 /* Needs two shifts. It's marginally preferable to use
11465 shifts rather than two BIC instructions as the second
11466 shift may merge with a subsequent insn as a shifter
11467 op. */
11468 *cost = COSTS_N_INSNS (2);
11469 *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
11470 if (speed_p)
11471 *cost += 2 * extra_cost->alu.shift;
11472 }
11473
11474 /* Widening beyond 32-bits requires one more insn. */
11475 if (mode == DImode)
11476 {
11477 *cost += COSTS_N_INSNS (1); /* No speed penalty. */
11478 }
11479
11480 return true;
11481
11482 case CONST_INT:
11483 *cost = 0;
11484 /* CONST_INT has no mode, so we cannot tell for sure how many
11485 insns are really going to be needed. The best we can do is
11486 look at the value passed. If it fits in SImode, then assume
11487 that's the mode it will be used for. Otherwise assume it
11488 will be used in DImode. */
11489 if (INTVAL (x) == trunc_int_for_mode (INTVAL (x), SImode))
11490 mode = SImode;
11491 else
11492 mode = DImode;
11493
11494 /* Avoid blowing up in arm_gen_constant (). */
11495 if (!(outer_code == PLUS
11496 || outer_code == AND
11497 || outer_code == IOR
11498 || outer_code == XOR
11499 || outer_code == MINUS))
11500 outer_code = SET;
11501
11502 const_int_cost:
11503 if (mode == SImode)
11504 {
11505 *cost += COSTS_N_INSNS (arm_gen_constant (outer_code, SImode, NULL,
11506 INTVAL (x), NULL, NULL,
11507 0, 0));
11508 /* Extra costs? */
11509 }
11510 else
11511 {
11512 *cost += COSTS_N_INSNS (arm_gen_constant
11513 (outer_code, SImode, NULL,
11514 trunc_int_for_mode (INTVAL (x), SImode),
11515 NULL, NULL, 0, 0)
11516 + arm_gen_constant (outer_code, SImode, NULL,
11517 INTVAL (x) >> 32, NULL,
11518 NULL, 0, 0));
11519 /* Extra costs? */
11520 }
11521
11522 return true;
11523
11524 case CONST:
11525 case LABEL_REF:
11526 case SYMBOL_REF:
11527 if (speed_p)
11528 {
11529 if (arm_arch_thumb2 && !flag_pic)
11530 *cost += COSTS_N_INSNS (1);
11531 else
11532 *cost += extra_cost->ldst.load;
11533 }
11534 else
11535 *cost += COSTS_N_INSNS (1);
11536
11537 if (flag_pic)
11538 {
11539 *cost += COSTS_N_INSNS (1);
11540 if (speed_p)
11541 *cost += extra_cost->alu.arith;
11542 }
11543
11544 return true;
11545
11546 case CONST_FIXED:
11547 *cost = COSTS_N_INSNS (4);
11548 /* Fixme. */
11549 return true;
11550
11551 case CONST_DOUBLE:
11552 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
11553 && (mode == SFmode || !TARGET_VFP_SINGLE))
11554 {
11555 if (vfp3_const_double_rtx (x))
11556 {
11557 if (speed_p)
11558 *cost += extra_cost->fp[mode == DFmode].fpconst;
11559 return true;
11560 }
11561
11562 if (speed_p)
11563 {
11564 if (mode == DFmode)
11565 *cost += extra_cost->ldst.loadd;
11566 else
11567 *cost += extra_cost->ldst.loadf;
11568 }
11569 else
11570 *cost += COSTS_N_INSNS (1 + (mode == DFmode));
11571
11572 return true;
11573 }
11574 *cost = COSTS_N_INSNS (4);
11575 return true;
11576
11577 case CONST_VECTOR:
11578 /* Fixme. */
11579 if (((TARGET_NEON && TARGET_HARD_FLOAT
11580 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode)))
11581 || TARGET_HAVE_MVE)
11582 && simd_immediate_valid_for_move (x, mode, NULL, NULL))
11583 *cost = COSTS_N_INSNS (1);
11584 else
11585 *cost = COSTS_N_INSNS (4);
11586 return true;
11587
11588 case HIGH:
11589 case LO_SUM:
11590 /* When optimizing for size, we prefer constant pool entries to
11591 MOVW/MOVT pairs, so bump the cost of these slightly. */
11592 if (!speed_p)
11593 *cost += 1;
11594 return true;
11595
11596 case CLZ:
11597 if (speed_p)
11598 *cost += extra_cost->alu.clz;
11599 return false;
11600
11601 case SMIN:
11602 if (XEXP (x, 1) == const0_rtx)
11603 {
11604 if (speed_p)
11605 *cost += extra_cost->alu.log_shift;
11606 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
11607 return true;
11608 }
11609 /* Fall through. */
11610 case SMAX:
11611 case UMIN:
11612 case UMAX:
11613 *cost += COSTS_N_INSNS (1);
11614 return false;
11615
11616 case TRUNCATE:
11617 if (GET_CODE (XEXP (x, 0)) == ASHIFTRT
11618 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
11619 && INTVAL (XEXP (XEXP (x, 0), 1)) == 32
11620 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
11621 && ((GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND
11622 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == SIGN_EXTEND)
11623 || (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND
11624 && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1))
11625 == ZERO_EXTEND))))
11626 {
11627 if (speed_p)
11628 *cost += extra_cost->mult[1].extend;
11629 *cost += (rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), VOIDmode,
11630 ZERO_EXTEND, 0, speed_p)
11631 + rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 1), VOIDmode,
11632 ZERO_EXTEND, 0, speed_p));
11633 return true;
11634 }
11635 *cost = LIBCALL_COST (1);
11636 return false;
11637
11638 case UNSPEC_VOLATILE:
11639 case UNSPEC:
11640 return arm_unspec_cost (x, outer_code, speed_p, cost);
11641
11642 case PC:
11643 /* Reading the PC is like reading any other register. Writing it
11644 is more expensive, but we take that into account elsewhere. */
11645 *cost = 0;
11646 return true;
11647
11648 case ZERO_EXTRACT:
11649 /* TODO: Simple zero_extract of bottom bits using AND. */
11650 /* Fall through. */
11651 case SIGN_EXTRACT:
11652 if (arm_arch6
11653 && mode == SImode
11654 && CONST_INT_P (XEXP (x, 1))
11655 && CONST_INT_P (XEXP (x, 2)))
11656 {
11657 if (speed_p)
11658 *cost += extra_cost->alu.bfx;
11659 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
11660 return true;
11661 }
11662 /* Without UBFX/SBFX, need to resort to shift operations. */
11663 *cost += COSTS_N_INSNS (1);
11664 if (speed_p)
11665 *cost += 2 * extra_cost->alu.shift;
11666 *cost += rtx_cost (XEXP (x, 0), mode, ASHIFT, 0, speed_p);
11667 return true;
11668
11669 case FLOAT_EXTEND:
11670 if (TARGET_HARD_FLOAT)
11671 {
11672 if (speed_p)
11673 *cost += extra_cost->fp[mode == DFmode].widen;
11674 if (!TARGET_VFP5
11675 && GET_MODE (XEXP (x, 0)) == HFmode)
11676 {
11677 /* Pre v8, widening HF->DF is a two-step process, first
11678 widening to SFmode. */
11679 *cost += COSTS_N_INSNS (1);
11680 if (speed_p)
11681 *cost += extra_cost->fp[0].widen;
11682 }
11683 *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
11684 return true;
11685 }
11686
11687 *cost = LIBCALL_COST (1);
11688 return false;
11689
11690 case FLOAT_TRUNCATE:
11691 if (TARGET_HARD_FLOAT)
11692 {
11693 if (speed_p)
11694 *cost += extra_cost->fp[mode == DFmode].narrow;
11695 *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
11696 return true;
11697 /* Vector modes? */
11698 }
11699 *cost = LIBCALL_COST (1);
11700 return false;
11701
11702 case FMA:
11703 if (TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_FMA)
11704 {
11705 rtx op0 = XEXP (x, 0);
11706 rtx op1 = XEXP (x, 1);
11707 rtx op2 = XEXP (x, 2);
11708
11709
11710 /* vfms or vfnma. */
11711 if (GET_CODE (op0) == NEG)
11712 op0 = XEXP (op0, 0);
11713
11714 /* vfnms or vfnma. */
11715 if (GET_CODE (op2) == NEG)
11716 op2 = XEXP (op2, 0);
11717
11718 *cost += rtx_cost (op0, mode, FMA, 0, speed_p);
11719 *cost += rtx_cost (op1, mode, FMA, 1, speed_p);
11720 *cost += rtx_cost (op2, mode, FMA, 2, speed_p);
11721
11722 if (speed_p)
11723 *cost += extra_cost->fp[mode ==DFmode].fma;
11724
11725 return true;
11726 }
11727
11728 *cost = LIBCALL_COST (3);
11729 return false;
11730
11731 case FIX:
11732 case UNSIGNED_FIX:
11733 if (TARGET_HARD_FLOAT)
11734 {
11735 /* The *combine_vcvtf2i reduces a vmul+vcvt into
11736 a vcvt fixed-point conversion. */
11737 if (code == FIX && mode == SImode
11738 && GET_CODE (XEXP (x, 0)) == FIX
11739 && GET_MODE (XEXP (x, 0)) == SFmode
11740 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
11741 && vfp3_const_double_for_bits (XEXP (XEXP (XEXP (x, 0), 0), 1))
11742 > 0)
11743 {
11744 if (speed_p)
11745 *cost += extra_cost->fp[0].toint;
11746
11747 *cost += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), mode,
11748 code, 0, speed_p);
11749 return true;
11750 }
11751
11752 if (GET_MODE_CLASS (mode) == MODE_INT)
11753 {
11754 mode = GET_MODE (XEXP (x, 0));
11755 if (speed_p)
11756 *cost += extra_cost->fp[mode == DFmode].toint;
11757 /* Strip of the 'cost' of rounding towards zero. */
11758 if (GET_CODE (XEXP (x, 0)) == FIX)
11759 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), mode, code,
11760 0, speed_p);
11761 else
11762 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
11763 /* ??? Increase the cost to deal with transferring from
11764 FP -> CORE registers? */
11765 return true;
11766 }
11767 else if (GET_MODE_CLASS (mode) == MODE_FLOAT
11768 && TARGET_VFP5)
11769 {
11770 if (speed_p)
11771 *cost += extra_cost->fp[mode == DFmode].roundint;
11772 return false;
11773 }
11774 /* Vector costs? */
11775 }
11776 *cost = LIBCALL_COST (1);
11777 return false;
11778
11779 case FLOAT:
11780 case UNSIGNED_FLOAT:
11781 if (TARGET_HARD_FLOAT)
11782 {
11783 /* ??? Increase the cost to deal with transferring from CORE
11784 -> FP registers? */
11785 if (speed_p)
11786 *cost += extra_cost->fp[mode == DFmode].fromint;
11787 return false;
11788 }
11789 *cost = LIBCALL_COST (1);
11790 return false;
11791
11792 case CALL:
11793 return true;
11794
11795 case ASM_OPERANDS:
11796 {
11797 /* Just a guess. Guess number of instructions in the asm
11798 plus one insn per input. Always a minimum of COSTS_N_INSNS (1)
11799 though (see PR60663). */
11800 int asm_length = MAX (1, asm_str_count (ASM_OPERANDS_TEMPLATE (x)));
11801 int num_operands = ASM_OPERANDS_INPUT_LENGTH (x);
11802
11803 *cost = COSTS_N_INSNS (asm_length + num_operands);
11804 return true;
11805 }
11806 default:
11807 if (mode != VOIDmode)
11808 *cost = COSTS_N_INSNS (ARM_NUM_REGS (mode));
11809 else
11810 *cost = COSTS_N_INSNS (4); /* Who knows? */
11811 return false;
11812 }
11813 }
11814
11815 #undef HANDLE_NARROW_SHIFT_ARITH
11816
11817 /* RTX costs entry point. */
11818
11819 static bool
11820 arm_rtx_costs (rtx x, machine_mode mode ATTRIBUTE_UNUSED, int outer_code,
11821 int opno ATTRIBUTE_UNUSED, int *total, bool speed)
11822 {
11823 bool result;
11824 int code = GET_CODE (x);
11825 gcc_assert (current_tune->insn_extra_cost);
11826
11827 result = arm_rtx_costs_internal (x, (enum rtx_code) code,
11828 (enum rtx_code) outer_code,
11829 current_tune->insn_extra_cost,
11830 total, speed);
11831
11832 if (dump_file && arm_verbose_cost)
11833 {
11834 print_rtl_single (dump_file, x);
11835 fprintf (dump_file, "\n%s cost: %d (%s)\n", speed ? "Hot" : "Cold",
11836 *total, result ? "final" : "partial");
11837 }
11838 return result;
11839 }
11840
11841 static int
11842 arm_insn_cost (rtx_insn *insn, bool speed)
11843 {
11844 int cost;
11845
11846 /* Don't cost a simple reg-reg move at a full insn cost: such moves
11847 will likely disappear during register allocation. */
11848 if (!reload_completed
11849 && GET_CODE (PATTERN (insn)) == SET
11850 && REG_P (SET_DEST (PATTERN (insn)))
11851 && REG_P (SET_SRC (PATTERN (insn))))
11852 return 2;
11853 cost = pattern_cost (PATTERN (insn), speed);
11854 /* If the cost is zero, then it's likely a complex insn. We don't want the
11855 cost of these to be less than something we know about. */
11856 return cost ? cost : COSTS_N_INSNS (2);
11857 }
11858
11859 /* All address computations that can be done are free, but rtx cost returns
11860 the same for practically all of them. So we weight the different types
11861 of address here in the order (most pref first):
11862 PRE/POST_INC/DEC, SHIFT or NON-INT sum, INT sum, REG, MEM or LABEL. */
11863 static inline int
11864 arm_arm_address_cost (rtx x)
11865 {
11866 enum rtx_code c = GET_CODE (x);
11867
11868 if (c == PRE_INC || c == PRE_DEC || c == POST_INC || c == POST_DEC)
11869 return 0;
11870 if (c == MEM || c == LABEL_REF || c == SYMBOL_REF)
11871 return 10;
11872
11873 if (c == PLUS)
11874 {
11875 if (CONST_INT_P (XEXP (x, 1)))
11876 return 2;
11877
11878 if (ARITHMETIC_P (XEXP (x, 0)) || ARITHMETIC_P (XEXP (x, 1)))
11879 return 3;
11880
11881 return 4;
11882 }
11883
11884 return 6;
11885 }
11886
11887 static inline int
11888 arm_thumb_address_cost (rtx x)
11889 {
11890 enum rtx_code c = GET_CODE (x);
11891
11892 if (c == REG)
11893 return 1;
11894 if (c == PLUS
11895 && REG_P (XEXP (x, 0))
11896 && CONST_INT_P (XEXP (x, 1)))
11897 return 1;
11898
11899 return 2;
11900 }
11901
11902 static int
11903 arm_address_cost (rtx x, machine_mode mode ATTRIBUTE_UNUSED,
11904 addr_space_t as ATTRIBUTE_UNUSED, bool speed ATTRIBUTE_UNUSED)
11905 {
11906 return TARGET_32BIT ? arm_arm_address_cost (x) : arm_thumb_address_cost (x);
11907 }
11908
11909 /* Adjust cost hook for XScale. */
11910 static bool
11911 xscale_sched_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep,
11912 int * cost)
11913 {
11914 /* Some true dependencies can have a higher cost depending
11915 on precisely how certain input operands are used. */
11916 if (dep_type == 0
11917 && recog_memoized (insn) >= 0
11918 && recog_memoized (dep) >= 0)
11919 {
11920 int shift_opnum = get_attr_shift (insn);
11921 enum attr_type attr_type = get_attr_type (dep);
11922
11923 /* If nonzero, SHIFT_OPNUM contains the operand number of a shifted
11924 operand for INSN. If we have a shifted input operand and the
11925 instruction we depend on is another ALU instruction, then we may
11926 have to account for an additional stall. */
11927 if (shift_opnum != 0
11928 && (attr_type == TYPE_ALU_SHIFT_IMM_LSL_1TO4
11929 || attr_type == TYPE_ALU_SHIFT_IMM_OTHER
11930 || attr_type == TYPE_ALUS_SHIFT_IMM
11931 || attr_type == TYPE_LOGIC_SHIFT_IMM
11932 || attr_type == TYPE_LOGICS_SHIFT_IMM
11933 || attr_type == TYPE_ALU_SHIFT_REG
11934 || attr_type == TYPE_ALUS_SHIFT_REG
11935 || attr_type == TYPE_LOGIC_SHIFT_REG
11936 || attr_type == TYPE_LOGICS_SHIFT_REG
11937 || attr_type == TYPE_MOV_SHIFT
11938 || attr_type == TYPE_MVN_SHIFT
11939 || attr_type == TYPE_MOV_SHIFT_REG
11940 || attr_type == TYPE_MVN_SHIFT_REG))
11941 {
11942 rtx shifted_operand;
11943 int opno;
11944
11945 /* Get the shifted operand. */
11946 extract_insn (insn);
11947 shifted_operand = recog_data.operand[shift_opnum];
11948
11949 /* Iterate over all the operands in DEP. If we write an operand
11950 that overlaps with SHIFTED_OPERAND, then we have increase the
11951 cost of this dependency. */
11952 extract_insn (dep);
11953 preprocess_constraints (dep);
11954 for (opno = 0; opno < recog_data.n_operands; opno++)
11955 {
11956 /* We can ignore strict inputs. */
11957 if (recog_data.operand_type[opno] == OP_IN)
11958 continue;
11959
11960 if (reg_overlap_mentioned_p (recog_data.operand[opno],
11961 shifted_operand))
11962 {
11963 *cost = 2;
11964 return false;
11965 }
11966 }
11967 }
11968 }
11969 return true;
11970 }
11971
11972 /* Adjust cost hook for Cortex A9. */
11973 static bool
11974 cortex_a9_sched_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep,
11975 int * cost)
11976 {
11977 switch (dep_type)
11978 {
11979 case REG_DEP_ANTI:
11980 *cost = 0;
11981 return false;
11982
11983 case REG_DEP_TRUE:
11984 case REG_DEP_OUTPUT:
11985 if (recog_memoized (insn) >= 0
11986 && recog_memoized (dep) >= 0)
11987 {
11988 if (GET_CODE (PATTERN (insn)) == SET)
11989 {
11990 if (GET_MODE_CLASS
11991 (GET_MODE (SET_DEST (PATTERN (insn)))) == MODE_FLOAT
11992 || GET_MODE_CLASS
11993 (GET_MODE (SET_SRC (PATTERN (insn)))) == MODE_FLOAT)
11994 {
11995 enum attr_type attr_type_insn = get_attr_type (insn);
11996 enum attr_type attr_type_dep = get_attr_type (dep);
11997
11998 /* By default all dependencies of the form
11999 s0 = s0 <op> s1
12000 s0 = s0 <op> s2
12001 have an extra latency of 1 cycle because
12002 of the input and output dependency in this
12003 case. However this gets modeled as an true
12004 dependency and hence all these checks. */
12005 if (REG_P (SET_DEST (PATTERN (insn)))
12006 && reg_set_p (SET_DEST (PATTERN (insn)), dep))
12007 {
12008 /* FMACS is a special case where the dependent
12009 instruction can be issued 3 cycles before
12010 the normal latency in case of an output
12011 dependency. */
12012 if ((attr_type_insn == TYPE_FMACS
12013 || attr_type_insn == TYPE_FMACD)
12014 && (attr_type_dep == TYPE_FMACS
12015 || attr_type_dep == TYPE_FMACD))
12016 {
12017 if (dep_type == REG_DEP_OUTPUT)
12018 *cost = insn_default_latency (dep) - 3;
12019 else
12020 *cost = insn_default_latency (dep);
12021 return false;
12022 }
12023 else
12024 {
12025 if (dep_type == REG_DEP_OUTPUT)
12026 *cost = insn_default_latency (dep) + 1;
12027 else
12028 *cost = insn_default_latency (dep);
12029 }
12030 return false;
12031 }
12032 }
12033 }
12034 }
12035 break;
12036
12037 default:
12038 gcc_unreachable ();
12039 }
12040
12041 return true;
12042 }
12043
12044 /* Adjust cost hook for FA726TE. */
12045 static bool
12046 fa726te_sched_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep,
12047 int * cost)
12048 {
12049 /* For FA726TE, true dependency on CPSR (i.e. set cond followed by predicated)
12050 have penalty of 3. */
12051 if (dep_type == REG_DEP_TRUE
12052 && recog_memoized (insn) >= 0
12053 && recog_memoized (dep) >= 0
12054 && get_attr_conds (dep) == CONDS_SET)
12055 {
12056 /* Use of carry (e.g. 64-bit arithmetic) in ALU: 3-cycle latency. */
12057 if (get_attr_conds (insn) == CONDS_USE
12058 && get_attr_type (insn) != TYPE_BRANCH)
12059 {
12060 *cost = 3;
12061 return false;
12062 }
12063
12064 if (GET_CODE (PATTERN (insn)) == COND_EXEC
12065 || get_attr_conds (insn) == CONDS_USE)
12066 {
12067 *cost = 0;
12068 return false;
12069 }
12070 }
12071
12072 return true;
12073 }
12074
12075 /* Implement TARGET_REGISTER_MOVE_COST.
12076
12077 Moves between VFP_REGS and GENERAL_REGS are a single insn, but
12078 it is typically more expensive than a single memory access. We set
12079 the cost to less than two memory accesses so that floating
12080 point to integer conversion does not go through memory. */
12081
12082 int
12083 arm_register_move_cost (machine_mode mode ATTRIBUTE_UNUSED,
12084 reg_class_t from, reg_class_t to)
12085 {
12086 if (TARGET_32BIT)
12087 {
12088 if ((IS_VFP_CLASS (from) && !IS_VFP_CLASS (to))
12089 || (!IS_VFP_CLASS (from) && IS_VFP_CLASS (to)))
12090 return 15;
12091 else if ((from == IWMMXT_REGS && to != IWMMXT_REGS)
12092 || (from != IWMMXT_REGS && to == IWMMXT_REGS))
12093 return 4;
12094 else if (from == IWMMXT_GR_REGS || to == IWMMXT_GR_REGS)
12095 return 20;
12096 else
12097 return 2;
12098 }
12099 else
12100 {
12101 if (from == HI_REGS || to == HI_REGS)
12102 return 4;
12103 else
12104 return 2;
12105 }
12106 }
12107
12108 /* Implement TARGET_MEMORY_MOVE_COST. */
12109
12110 int
12111 arm_memory_move_cost (machine_mode mode, reg_class_t rclass,
12112 bool in ATTRIBUTE_UNUSED)
12113 {
12114 if (TARGET_32BIT)
12115 return 10;
12116 else
12117 {
12118 if (GET_MODE_SIZE (mode) < 4)
12119 return 8;
12120 else
12121 return ((2 * GET_MODE_SIZE (mode)) * (rclass == LO_REGS ? 1 : 2));
12122 }
12123 }
12124
12125 /* Vectorizer cost model implementation. */
12126
12127 /* Implement targetm.vectorize.builtin_vectorization_cost. */
12128 static int
12129 arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
12130 tree vectype,
12131 int misalign ATTRIBUTE_UNUSED)
12132 {
12133 unsigned elements;
12134
12135 switch (type_of_cost)
12136 {
12137 case scalar_stmt:
12138 return current_tune->vec_costs->scalar_stmt_cost;
12139
12140 case scalar_load:
12141 return current_tune->vec_costs->scalar_load_cost;
12142
12143 case scalar_store:
12144 return current_tune->vec_costs->scalar_store_cost;
12145
12146 case vector_stmt:
12147 return current_tune->vec_costs->vec_stmt_cost;
12148
12149 case vector_load:
12150 return current_tune->vec_costs->vec_align_load_cost;
12151
12152 case vector_store:
12153 return current_tune->vec_costs->vec_store_cost;
12154
12155 case vec_to_scalar:
12156 return current_tune->vec_costs->vec_to_scalar_cost;
12157
12158 case scalar_to_vec:
12159 return current_tune->vec_costs->scalar_to_vec_cost;
12160
12161 case unaligned_load:
12162 case vector_gather_load:
12163 return current_tune->vec_costs->vec_unalign_load_cost;
12164
12165 case unaligned_store:
12166 case vector_scatter_store:
12167 return current_tune->vec_costs->vec_unalign_store_cost;
12168
12169 case cond_branch_taken:
12170 return current_tune->vec_costs->cond_taken_branch_cost;
12171
12172 case cond_branch_not_taken:
12173 return current_tune->vec_costs->cond_not_taken_branch_cost;
12174
12175 case vec_perm:
12176 case vec_promote_demote:
12177 return current_tune->vec_costs->vec_stmt_cost;
12178
12179 case vec_construct:
12180 elements = TYPE_VECTOR_SUBPARTS (vectype);
12181 return elements / 2 + 1;
12182
12183 default:
12184 gcc_unreachable ();
12185 }
12186 }
12187
12188 /* Implement targetm.vectorize.add_stmt_cost. */
12189
12190 static unsigned
12191 arm_add_stmt_cost (vec_info *vinfo, void *data, int count,
12192 enum vect_cost_for_stmt kind,
12193 struct _stmt_vec_info *stmt_info, tree vectype,
12194 int misalign, enum vect_cost_model_location where)
12195 {
12196 unsigned *cost = (unsigned *) data;
12197 unsigned retval = 0;
12198
12199 if (flag_vect_cost_model)
12200 {
12201 int stmt_cost = arm_builtin_vectorization_cost (kind, vectype, misalign);
12202
12203 /* Statements in an inner loop relative to the loop being
12204 vectorized are weighted more heavily. The value here is
12205 arbitrary and could potentially be improved with analysis. */
12206 if (where == vect_body && stmt_info
12207 && stmt_in_inner_loop_p (vinfo, stmt_info))
12208 count *= 50; /* FIXME. */
12209
12210 retval = (unsigned) (count * stmt_cost);
12211 cost[where] += retval;
12212 }
12213
12214 return retval;
12215 }
12216
12217 /* Return true if and only if this insn can dual-issue only as older. */
12218 static bool
12219 cortexa7_older_only (rtx_insn *insn)
12220 {
12221 if (recog_memoized (insn) < 0)
12222 return false;
12223
12224 switch (get_attr_type (insn))
12225 {
12226 case TYPE_ALU_DSP_REG:
12227 case TYPE_ALU_SREG:
12228 case TYPE_ALUS_SREG:
12229 case TYPE_LOGIC_REG:
12230 case TYPE_LOGICS_REG:
12231 case TYPE_ADC_REG:
12232 case TYPE_ADCS_REG:
12233 case TYPE_ADR:
12234 case TYPE_BFM:
12235 case TYPE_REV:
12236 case TYPE_MVN_REG:
12237 case TYPE_SHIFT_IMM:
12238 case TYPE_SHIFT_REG:
12239 case TYPE_LOAD_BYTE:
12240 case TYPE_LOAD_4:
12241 case TYPE_STORE_4:
12242 case TYPE_FFARITHS:
12243 case TYPE_FADDS:
12244 case TYPE_FFARITHD:
12245 case TYPE_FADDD:
12246 case TYPE_FMOV:
12247 case TYPE_F_CVT:
12248 case TYPE_FCMPS:
12249 case TYPE_FCMPD:
12250 case TYPE_FCONSTS:
12251 case TYPE_FCONSTD:
12252 case TYPE_FMULS:
12253 case TYPE_FMACS:
12254 case TYPE_FMULD:
12255 case TYPE_FMACD:
12256 case TYPE_FDIVS:
12257 case TYPE_FDIVD:
12258 case TYPE_F_MRC:
12259 case TYPE_F_MRRC:
12260 case TYPE_F_FLAG:
12261 case TYPE_F_LOADS:
12262 case TYPE_F_STORES:
12263 return true;
12264 default:
12265 return false;
12266 }
12267 }
12268
12269 /* Return true if and only if this insn can dual-issue as younger. */
12270 static bool
12271 cortexa7_younger (FILE *file, int verbose, rtx_insn *insn)
12272 {
12273 if (recog_memoized (insn) < 0)
12274 {
12275 if (verbose > 5)
12276 fprintf (file, ";; not cortexa7_younger %d\n", INSN_UID (insn));
12277 return false;
12278 }
12279
12280 switch (get_attr_type (insn))
12281 {
12282 case TYPE_ALU_IMM:
12283 case TYPE_ALUS_IMM:
12284 case TYPE_LOGIC_IMM:
12285 case TYPE_LOGICS_IMM:
12286 case TYPE_EXTEND:
12287 case TYPE_MVN_IMM:
12288 case TYPE_MOV_IMM:
12289 case TYPE_MOV_REG:
12290 case TYPE_MOV_SHIFT:
12291 case TYPE_MOV_SHIFT_REG:
12292 case TYPE_BRANCH:
12293 case TYPE_CALL:
12294 return true;
12295 default:
12296 return false;
12297 }
12298 }
12299
12300
12301 /* Look for an instruction that can dual issue only as an older
12302 instruction, and move it in front of any instructions that can
12303 dual-issue as younger, while preserving the relative order of all
12304 other instructions in the ready list. This is a hueuristic to help
12305 dual-issue in later cycles, by postponing issue of more flexible
12306 instructions. This heuristic may affect dual issue opportunities
12307 in the current cycle. */
12308 static void
12309 cortexa7_sched_reorder (FILE *file, int verbose, rtx_insn **ready,
12310 int *n_readyp, int clock)
12311 {
12312 int i;
12313 int first_older_only = -1, first_younger = -1;
12314
12315 if (verbose > 5)
12316 fprintf (file,
12317 ";; sched_reorder for cycle %d with %d insns in ready list\n",
12318 clock,
12319 *n_readyp);
12320
12321 /* Traverse the ready list from the head (the instruction to issue
12322 first), and looking for the first instruction that can issue as
12323 younger and the first instruction that can dual-issue only as
12324 older. */
12325 for (i = *n_readyp - 1; i >= 0; i--)
12326 {
12327 rtx_insn *insn = ready[i];
12328 if (cortexa7_older_only (insn))
12329 {
12330 first_older_only = i;
12331 if (verbose > 5)
12332 fprintf (file, ";; reorder older found %d\n", INSN_UID (insn));
12333 break;
12334 }
12335 else if (cortexa7_younger (file, verbose, insn) && first_younger == -1)
12336 first_younger = i;
12337 }
12338
12339 /* Nothing to reorder because either no younger insn found or insn
12340 that can dual-issue only as older appears before any insn that
12341 can dual-issue as younger. */
12342 if (first_younger == -1)
12343 {
12344 if (verbose > 5)
12345 fprintf (file, ";; sched_reorder nothing to reorder as no younger\n");
12346 return;
12347 }
12348
12349 /* Nothing to reorder because no older-only insn in the ready list. */
12350 if (first_older_only == -1)
12351 {
12352 if (verbose > 5)
12353 fprintf (file, ";; sched_reorder nothing to reorder as no older_only\n");
12354 return;
12355 }
12356
12357 /* Move first_older_only insn before first_younger. */
12358 if (verbose > 5)
12359 fprintf (file, ";; cortexa7_sched_reorder insn %d before %d\n",
12360 INSN_UID(ready [first_older_only]),
12361 INSN_UID(ready [first_younger]));
12362 rtx_insn *first_older_only_insn = ready [first_older_only];
12363 for (i = first_older_only; i < first_younger; i++)
12364 {
12365 ready[i] = ready[i+1];
12366 }
12367
12368 ready[i] = first_older_only_insn;
12369 return;
12370 }
12371
12372 /* Implement TARGET_SCHED_REORDER. */
12373 static int
12374 arm_sched_reorder (FILE *file, int verbose, rtx_insn **ready, int *n_readyp,
12375 int clock)
12376 {
12377 switch (arm_tune)
12378 {
12379 case TARGET_CPU_cortexa7:
12380 cortexa7_sched_reorder (file, verbose, ready, n_readyp, clock);
12381 break;
12382 default:
12383 /* Do nothing for other cores. */
12384 break;
12385 }
12386
12387 return arm_issue_rate ();
12388 }
12389
12390 /* This function implements the target macro TARGET_SCHED_ADJUST_COST.
12391 It corrects the value of COST based on the relationship between
12392 INSN and DEP through the dependence LINK. It returns the new
12393 value. There is a per-core adjust_cost hook to adjust scheduler costs
12394 and the per-core hook can choose to completely override the generic
12395 adjust_cost function. Only put bits of code into arm_adjust_cost that
12396 are common across all cores. */
12397 static int
12398 arm_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep, int cost,
12399 unsigned int)
12400 {
12401 rtx i_pat, d_pat;
12402
12403 /* When generating Thumb-1 code, we want to place flag-setting operations
12404 close to a conditional branch which depends on them, so that we can
12405 omit the comparison. */
12406 if (TARGET_THUMB1
12407 && dep_type == 0
12408 && recog_memoized (insn) == CODE_FOR_cbranchsi4_insn
12409 && recog_memoized (dep) >= 0
12410 && get_attr_conds (dep) == CONDS_SET)
12411 return 0;
12412
12413 if (current_tune->sched_adjust_cost != NULL)
12414 {
12415 if (!current_tune->sched_adjust_cost (insn, dep_type, dep, &cost))
12416 return cost;
12417 }
12418
12419 /* XXX Is this strictly true? */
12420 if (dep_type == REG_DEP_ANTI
12421 || dep_type == REG_DEP_OUTPUT)
12422 return 0;
12423
12424 /* Call insns don't incur a stall, even if they follow a load. */
12425 if (dep_type == 0
12426 && CALL_P (insn))
12427 return 1;
12428
12429 if ((i_pat = single_set (insn)) != NULL
12430 && MEM_P (SET_SRC (i_pat))
12431 && (d_pat = single_set (dep)) != NULL
12432 && MEM_P (SET_DEST (d_pat)))
12433 {
12434 rtx src_mem = XEXP (SET_SRC (i_pat), 0);
12435 /* This is a load after a store, there is no conflict if the load reads
12436 from a cached area. Assume that loads from the stack, and from the
12437 constant pool are cached, and that others will miss. This is a
12438 hack. */
12439
12440 if ((SYMBOL_REF_P (src_mem)
12441 && CONSTANT_POOL_ADDRESS_P (src_mem))
12442 || reg_mentioned_p (stack_pointer_rtx, src_mem)
12443 || reg_mentioned_p (frame_pointer_rtx, src_mem)
12444 || reg_mentioned_p (hard_frame_pointer_rtx, src_mem))
12445 return 1;
12446 }
12447
12448 return cost;
12449 }
12450
12451 int
12452 arm_max_conditional_execute (void)
12453 {
12454 return max_insns_skipped;
12455 }
12456
12457 static int
12458 arm_default_branch_cost (bool speed_p, bool predictable_p ATTRIBUTE_UNUSED)
12459 {
12460 if (TARGET_32BIT)
12461 return (TARGET_THUMB2 && !speed_p) ? 1 : 4;
12462 else
12463 return (optimize > 0) ? 2 : 0;
12464 }
12465
12466 static int
12467 arm_cortex_a5_branch_cost (bool speed_p, bool predictable_p)
12468 {
12469 return speed_p ? 0 : arm_default_branch_cost (speed_p, predictable_p);
12470 }
12471
12472 /* Thumb-2 branches are relatively cheap on Cortex-M processors ("1 + P cycles"
12473 on Cortex-M4, where P varies from 1 to 3 according to some criteria), since
12474 sequences of non-executed instructions in IT blocks probably take the same
12475 amount of time as executed instructions (and the IT instruction itself takes
12476 space in icache). This function was experimentally determined to give good
12477 results on a popular embedded benchmark. */
12478
12479 static int
12480 arm_cortex_m_branch_cost (bool speed_p, bool predictable_p)
12481 {
12482 return (TARGET_32BIT && speed_p) ? 1
12483 : arm_default_branch_cost (speed_p, predictable_p);
12484 }
12485
12486 static int
12487 arm_cortex_m7_branch_cost (bool speed_p, bool predictable_p)
12488 {
12489 return speed_p ? 0 : arm_default_branch_cost (speed_p, predictable_p);
12490 }
12491
12492 static bool fp_consts_inited = false;
12493
12494 static REAL_VALUE_TYPE value_fp0;
12495
12496 static void
12497 init_fp_table (void)
12498 {
12499 REAL_VALUE_TYPE r;
12500
12501 r = REAL_VALUE_ATOF ("0", DFmode);
12502 value_fp0 = r;
12503 fp_consts_inited = true;
12504 }
12505
12506 /* Return TRUE if rtx X is a valid immediate FP constant. */
12507 int
12508 arm_const_double_rtx (rtx x)
12509 {
12510 const REAL_VALUE_TYPE *r;
12511
12512 if (!fp_consts_inited)
12513 init_fp_table ();
12514
12515 r = CONST_DOUBLE_REAL_VALUE (x);
12516 if (REAL_VALUE_MINUS_ZERO (*r))
12517 return 0;
12518
12519 if (real_equal (r, &value_fp0))
12520 return 1;
12521
12522 return 0;
12523 }
12524
12525 /* VFPv3 has a fairly wide range of representable immediates, formed from
12526 "quarter-precision" floating-point values. These can be evaluated using this
12527 formula (with ^ for exponentiation):
12528
12529 -1^s * n * 2^-r
12530
12531 Where 's' is a sign bit (0/1), 'n' and 'r' are integers such that
12532 16 <= n <= 31 and 0 <= r <= 7.
12533
12534 These values are mapped onto an 8-bit integer ABCDEFGH s.t.
12535
12536 - A (most-significant) is the sign bit.
12537 - BCD are the exponent (encoded as r XOR 3).
12538 - EFGH are the mantissa (encoded as n - 16).
12539 */
12540
12541 /* Return an integer index for a VFPv3 immediate operand X suitable for the
12542 fconst[sd] instruction, or -1 if X isn't suitable. */
12543 static int
12544 vfp3_const_double_index (rtx x)
12545 {
12546 REAL_VALUE_TYPE r, m;
12547 int sign, exponent;
12548 unsigned HOST_WIDE_INT mantissa, mant_hi;
12549 unsigned HOST_WIDE_INT mask;
12550 int point_pos = 2 * HOST_BITS_PER_WIDE_INT - 1;
12551 bool fail;
12552
12553 if (!TARGET_VFP3 || !CONST_DOUBLE_P (x))
12554 return -1;
12555
12556 r = *CONST_DOUBLE_REAL_VALUE (x);
12557
12558 /* We can't represent these things, so detect them first. */
12559 if (REAL_VALUE_ISINF (r) || REAL_VALUE_ISNAN (r) || REAL_VALUE_MINUS_ZERO (r))
12560 return -1;
12561
12562 /* Extract sign, exponent and mantissa. */
12563 sign = REAL_VALUE_NEGATIVE (r) ? 1 : 0;
12564 r = real_value_abs (&r);
12565 exponent = REAL_EXP (&r);
12566 /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
12567 highest (sign) bit, with a fixed binary point at bit point_pos.
12568 WARNING: If there's ever a VFP version which uses more than 2 * H_W_I - 1
12569 bits for the mantissa, this may fail (low bits would be lost). */
12570 real_ldexp (&m, &r, point_pos - exponent);
12571 wide_int w = real_to_integer (&m, &fail, HOST_BITS_PER_WIDE_INT * 2);
12572 mantissa = w.elt (0);
12573 mant_hi = w.elt (1);
12574
12575 /* If there are bits set in the low part of the mantissa, we can't
12576 represent this value. */
12577 if (mantissa != 0)
12578 return -1;
12579
12580 /* Now make it so that mantissa contains the most-significant bits, and move
12581 the point_pos to indicate that the least-significant bits have been
12582 discarded. */
12583 point_pos -= HOST_BITS_PER_WIDE_INT;
12584 mantissa = mant_hi;
12585
12586 /* We can permit four significant bits of mantissa only, plus a high bit
12587 which is always 1. */
12588 mask = (HOST_WIDE_INT_1U << (point_pos - 5)) - 1;
12589 if ((mantissa & mask) != 0)
12590 return -1;
12591
12592 /* Now we know the mantissa is in range, chop off the unneeded bits. */
12593 mantissa >>= point_pos - 5;
12594
12595 /* The mantissa may be zero. Disallow that case. (It's possible to load the
12596 floating-point immediate zero with Neon using an integer-zero load, but
12597 that case is handled elsewhere.) */
12598 if (mantissa == 0)
12599 return -1;
12600
12601 gcc_assert (mantissa >= 16 && mantissa <= 31);
12602
12603 /* The value of 5 here would be 4 if GCC used IEEE754-like encoding (where
12604 normalized significands are in the range [1, 2). (Our mantissa is shifted
12605 left 4 places at this point relative to normalized IEEE754 values). GCC
12606 internally uses [0.5, 1) (see real.c), so the exponent returned from
12607 REAL_EXP must be altered. */
12608 exponent = 5 - exponent;
12609
12610 if (exponent < 0 || exponent > 7)
12611 return -1;
12612
12613 /* Sign, mantissa and exponent are now in the correct form to plug into the
12614 formula described in the comment above. */
12615 return (sign << 7) | ((exponent ^ 3) << 4) | (mantissa - 16);
12616 }
12617
12618 /* Return TRUE if rtx X is a valid immediate VFPv3 constant. */
12619 int
12620 vfp3_const_double_rtx (rtx x)
12621 {
12622 if (!TARGET_VFP3)
12623 return 0;
12624
12625 return vfp3_const_double_index (x) != -1;
12626 }
12627
12628 /* Recognize immediates which can be used in various Neon and MVE instructions.
12629 Legal immediates are described by the following table (for VMVN variants, the
12630 bitwise inverse of the constant shown is recognized. In either case, VMOV
12631 is output and the correct instruction to use for a given constant is chosen
12632 by the assembler). The constant shown is replicated across all elements of
12633 the destination vector.
12634
12635 insn elems variant constant (binary)
12636 ---- ----- ------- -----------------
12637 vmov i32 0 00000000 00000000 00000000 abcdefgh
12638 vmov i32 1 00000000 00000000 abcdefgh 00000000
12639 vmov i32 2 00000000 abcdefgh 00000000 00000000
12640 vmov i32 3 abcdefgh 00000000 00000000 00000000
12641 vmov i16 4 00000000 abcdefgh
12642 vmov i16 5 abcdefgh 00000000
12643 vmvn i32 6 00000000 00000000 00000000 abcdefgh
12644 vmvn i32 7 00000000 00000000 abcdefgh 00000000
12645 vmvn i32 8 00000000 abcdefgh 00000000 00000000
12646 vmvn i32 9 abcdefgh 00000000 00000000 00000000
12647 vmvn i16 10 00000000 abcdefgh
12648 vmvn i16 11 abcdefgh 00000000
12649 vmov i32 12 00000000 00000000 abcdefgh 11111111
12650 vmvn i32 13 00000000 00000000 abcdefgh 11111111
12651 vmov i32 14 00000000 abcdefgh 11111111 11111111
12652 vmvn i32 15 00000000 abcdefgh 11111111 11111111
12653 vmov i8 16 abcdefgh
12654 vmov i64 17 aaaaaaaa bbbbbbbb cccccccc dddddddd
12655 eeeeeeee ffffffff gggggggg hhhhhhhh
12656 vmov f32 18 aBbbbbbc defgh000 00000000 00000000
12657 vmov f32 19 00000000 00000000 00000000 00000000
12658
12659 For case 18, B = !b. Representable values are exactly those accepted by
12660 vfp3_const_double_index, but are output as floating-point numbers rather
12661 than indices.
12662
12663 For case 19, we will change it to vmov.i32 when assembling.
12664
12665 Variants 0-5 (inclusive) may also be used as immediates for the second
12666 operand of VORR/VBIC instructions.
12667
12668 The INVERSE argument causes the bitwise inverse of the given operand to be
12669 recognized instead (used for recognizing legal immediates for the VAND/VORN
12670 pseudo-instructions). If INVERSE is true, the value placed in *MODCONST is
12671 *not* inverted (i.e. the pseudo-instruction forms vand/vorn should still be
12672 output, rather than the real insns vbic/vorr).
12673
12674 INVERSE makes no difference to the recognition of float vectors.
12675
12676 The return value is the variant of immediate as shown in the above table, or
12677 -1 if the given value doesn't match any of the listed patterns.
12678 */
12679 static int
12680 simd_valid_immediate (rtx op, machine_mode mode, int inverse,
12681 rtx *modconst, int *elementwidth)
12682 {
12683 #define CHECK(STRIDE, ELSIZE, CLASS, TEST) \
12684 matches = 1; \
12685 for (i = 0; i < idx; i += (STRIDE)) \
12686 if (!(TEST)) \
12687 matches = 0; \
12688 if (matches) \
12689 { \
12690 immtype = (CLASS); \
12691 elsize = (ELSIZE); \
12692 break; \
12693 }
12694
12695 unsigned int i, elsize = 0, idx = 0, n_elts;
12696 unsigned int innersize;
12697 unsigned char bytes[16] = {};
12698 int immtype = -1, matches;
12699 unsigned int invmask = inverse ? 0xff : 0;
12700 bool vector = GET_CODE (op) == CONST_VECTOR;
12701
12702 if (vector)
12703 n_elts = CONST_VECTOR_NUNITS (op);
12704 else
12705 {
12706 n_elts = 1;
12707 gcc_assert (mode != VOIDmode);
12708 }
12709
12710 innersize = GET_MODE_UNIT_SIZE (mode);
12711
12712 /* Only support 128-bit vectors for MVE. */
12713 if (TARGET_HAVE_MVE && (!vector || n_elts * innersize != 16))
12714 return -1;
12715
12716 /* Vectors of float constants. */
12717 if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
12718 {
12719 rtx el0 = CONST_VECTOR_ELT (op, 0);
12720
12721 if (!vfp3_const_double_rtx (el0) && el0 != CONST0_RTX (GET_MODE (el0)))
12722 return -1;
12723
12724 /* FP16 vectors cannot be represented. */
12725 if (GET_MODE_INNER (mode) == HFmode)
12726 return -1;
12727
12728 /* All elements in the vector must be the same. Note that 0.0 and -0.0
12729 are distinct in this context. */
12730 if (!const_vec_duplicate_p (op))
12731 return -1;
12732
12733 if (modconst)
12734 *modconst = CONST_VECTOR_ELT (op, 0);
12735
12736 if (elementwidth)
12737 *elementwidth = 0;
12738
12739 if (el0 == CONST0_RTX (GET_MODE (el0)))
12740 return 19;
12741 else
12742 return 18;
12743 }
12744
12745 /* The tricks done in the code below apply for little-endian vector layout.
12746 For big-endian vectors only allow vectors of the form { a, a, a..., a }.
12747 FIXME: Implement logic for big-endian vectors. */
12748 if (BYTES_BIG_ENDIAN && vector && !const_vec_duplicate_p (op))
12749 return -1;
12750
12751 /* Splat vector constant out into a byte vector. */
12752 for (i = 0; i < n_elts; i++)
12753 {
12754 rtx el = vector ? CONST_VECTOR_ELT (op, i) : op;
12755 unsigned HOST_WIDE_INT elpart;
12756
12757 gcc_assert (CONST_INT_P (el));
12758 elpart = INTVAL (el);
12759
12760 for (unsigned int byte = 0; byte < innersize; byte++)
12761 {
12762 bytes[idx++] = (elpart & 0xff) ^ invmask;
12763 elpart >>= BITS_PER_UNIT;
12764 }
12765 }
12766
12767 /* Sanity check. */
12768 gcc_assert (idx == GET_MODE_SIZE (mode));
12769
12770 do
12771 {
12772 CHECK (4, 32, 0, bytes[i] == bytes[0] && bytes[i + 1] == 0
12773 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
12774
12775 CHECK (4, 32, 1, bytes[i] == 0 && bytes[i + 1] == bytes[1]
12776 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
12777
12778 CHECK (4, 32, 2, bytes[i] == 0 && bytes[i + 1] == 0
12779 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
12780
12781 CHECK (4, 32, 3, bytes[i] == 0 && bytes[i + 1] == 0
12782 && bytes[i + 2] == 0 && bytes[i + 3] == bytes[3]);
12783
12784 CHECK (2, 16, 4, bytes[i] == bytes[0] && bytes[i + 1] == 0);
12785
12786 CHECK (2, 16, 5, bytes[i] == 0 && bytes[i + 1] == bytes[1]);
12787
12788 CHECK (4, 32, 6, bytes[i] == bytes[0] && bytes[i + 1] == 0xff
12789 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
12790
12791 CHECK (4, 32, 7, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
12792 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
12793
12794 CHECK (4, 32, 8, bytes[i] == 0xff && bytes[i + 1] == 0xff
12795 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
12796
12797 CHECK (4, 32, 9, bytes[i] == 0xff && bytes[i + 1] == 0xff
12798 && bytes[i + 2] == 0xff && bytes[i + 3] == bytes[3]);
12799
12800 CHECK (2, 16, 10, bytes[i] == bytes[0] && bytes[i + 1] == 0xff);
12801
12802 CHECK (2, 16, 11, bytes[i] == 0xff && bytes[i + 1] == bytes[1]);
12803
12804 CHECK (4, 32, 12, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
12805 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
12806
12807 CHECK (4, 32, 13, bytes[i] == 0 && bytes[i + 1] == bytes[1]
12808 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
12809
12810 CHECK (4, 32, 14, bytes[i] == 0xff && bytes[i + 1] == 0xff
12811 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
12812
12813 CHECK (4, 32, 15, bytes[i] == 0 && bytes[i + 1] == 0
12814 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
12815
12816 CHECK (1, 8, 16, bytes[i] == bytes[0]);
12817
12818 CHECK (1, 64, 17, (bytes[i] == 0 || bytes[i] == 0xff)
12819 && bytes[i] == bytes[(i + 8) % idx]);
12820 }
12821 while (0);
12822
12823 if (immtype == -1)
12824 return -1;
12825
12826 if (elementwidth)
12827 *elementwidth = elsize;
12828
12829 if (modconst)
12830 {
12831 unsigned HOST_WIDE_INT imm = 0;
12832
12833 /* Un-invert bytes of recognized vector, if necessary. */
12834 if (invmask != 0)
12835 for (i = 0; i < idx; i++)
12836 bytes[i] ^= invmask;
12837
12838 if (immtype == 17)
12839 {
12840 /* FIXME: Broken on 32-bit H_W_I hosts. */
12841 gcc_assert (sizeof (HOST_WIDE_INT) == 8);
12842
12843 for (i = 0; i < 8; i++)
12844 imm |= (unsigned HOST_WIDE_INT) (bytes[i] ? 0xff : 0)
12845 << (i * BITS_PER_UNIT);
12846
12847 *modconst = GEN_INT (imm);
12848 }
12849 else
12850 {
12851 unsigned HOST_WIDE_INT imm = 0;
12852
12853 for (i = 0; i < elsize / BITS_PER_UNIT; i++)
12854 imm |= (unsigned HOST_WIDE_INT) bytes[i] << (i * BITS_PER_UNIT);
12855
12856 *modconst = GEN_INT (imm);
12857 }
12858 }
12859
12860 return immtype;
12861 #undef CHECK
12862 }
12863
12864 /* Return TRUE if rtx X is legal for use as either a Neon or MVE VMOV (or,
12865 implicitly, VMVN) immediate. Write back width per element to *ELEMENTWIDTH
12866 (or zero for float elements), and a modified constant (whatever should be
12867 output for a VMOV) in *MODCONST. "neon_immediate_valid_for_move" function is
12868 modified to "simd_immediate_valid_for_move" as this function will be used
12869 both by neon and mve. */
12870 int
12871 simd_immediate_valid_for_move (rtx op, machine_mode mode,
12872 rtx *modconst, int *elementwidth)
12873 {
12874 rtx tmpconst;
12875 int tmpwidth;
12876 int retval = simd_valid_immediate (op, mode, 0, &tmpconst, &tmpwidth);
12877
12878 if (retval == -1)
12879 return 0;
12880
12881 if (modconst)
12882 *modconst = tmpconst;
12883
12884 if (elementwidth)
12885 *elementwidth = tmpwidth;
12886
12887 return 1;
12888 }
12889
12890 /* Return TRUE if rtx X is legal for use in a VORR or VBIC instruction. If
12891 the immediate is valid, write a constant suitable for using as an operand
12892 to VORR/VBIC/VAND/VORN to *MODCONST and the corresponding element width to
12893 *ELEMENTWIDTH. See simd_valid_immediate for description of INVERSE. */
12894
12895 int
12896 neon_immediate_valid_for_logic (rtx op, machine_mode mode, int inverse,
12897 rtx *modconst, int *elementwidth)
12898 {
12899 rtx tmpconst;
12900 int tmpwidth;
12901 int retval = simd_valid_immediate (op, mode, inverse, &tmpconst, &tmpwidth);
12902
12903 if (retval < 0 || retval > 5)
12904 return 0;
12905
12906 if (modconst)
12907 *modconst = tmpconst;
12908
12909 if (elementwidth)
12910 *elementwidth = tmpwidth;
12911
12912 return 1;
12913 }
12914
12915 /* Return TRUE if rtx OP is legal for use in a VSHR or VSHL instruction. If
12916 the immediate is valid, write a constant suitable for using as an operand
12917 to VSHR/VSHL to *MODCONST and the corresponding element width to
12918 *ELEMENTWIDTH. ISLEFTSHIFT is for determine left or right shift,
12919 because they have different limitations. */
12920
12921 int
12922 neon_immediate_valid_for_shift (rtx op, machine_mode mode,
12923 rtx *modconst, int *elementwidth,
12924 bool isleftshift)
12925 {
12926 unsigned int innersize = GET_MODE_UNIT_SIZE (mode);
12927 unsigned int n_elts = CONST_VECTOR_NUNITS (op), i;
12928 unsigned HOST_WIDE_INT last_elt = 0;
12929 unsigned HOST_WIDE_INT maxshift;
12930
12931 /* Split vector constant out into a byte vector. */
12932 for (i = 0; i < n_elts; i++)
12933 {
12934 rtx el = CONST_VECTOR_ELT (op, i);
12935 unsigned HOST_WIDE_INT elpart;
12936
12937 if (CONST_INT_P (el))
12938 elpart = INTVAL (el);
12939 else if (CONST_DOUBLE_P (el))
12940 return 0;
12941 else
12942 gcc_unreachable ();
12943
12944 if (i != 0 && elpart != last_elt)
12945 return 0;
12946
12947 last_elt = elpart;
12948 }
12949
12950 /* Shift less than element size. */
12951 maxshift = innersize * 8;
12952
12953 if (isleftshift)
12954 {
12955 /* Left shift immediate value can be from 0 to <size>-1. */
12956 if (last_elt >= maxshift)
12957 return 0;
12958 }
12959 else
12960 {
12961 /* Right shift immediate value can be from 1 to <size>. */
12962 if (last_elt == 0 || last_elt > maxshift)
12963 return 0;
12964 }
12965
12966 if (elementwidth)
12967 *elementwidth = innersize * 8;
12968
12969 if (modconst)
12970 *modconst = CONST_VECTOR_ELT (op, 0);
12971
12972 return 1;
12973 }
12974
12975 /* Return a string suitable for output of Neon immediate logic operation
12976 MNEM. */
12977
12978 char *
12979 neon_output_logic_immediate (const char *mnem, rtx *op2, machine_mode mode,
12980 int inverse, int quad)
12981 {
12982 int width, is_valid;
12983 static char templ[40];
12984
12985 is_valid = neon_immediate_valid_for_logic (*op2, mode, inverse, op2, &width);
12986
12987 gcc_assert (is_valid != 0);
12988
12989 if (quad)
12990 sprintf (templ, "%s.i%d\t%%q0, %%2", mnem, width);
12991 else
12992 sprintf (templ, "%s.i%d\t%%P0, %%2", mnem, width);
12993
12994 return templ;
12995 }
12996
12997 /* Return a string suitable for output of Neon immediate shift operation
12998 (VSHR or VSHL) MNEM. */
12999
13000 char *
13001 neon_output_shift_immediate (const char *mnem, char sign, rtx *op2,
13002 machine_mode mode, int quad,
13003 bool isleftshift)
13004 {
13005 int width, is_valid;
13006 static char templ[40];
13007
13008 is_valid = neon_immediate_valid_for_shift (*op2, mode, op2, &width, isleftshift);
13009 gcc_assert (is_valid != 0);
13010
13011 if (quad)
13012 sprintf (templ, "%s.%c%d\t%%q0, %%q1, %%2", mnem, sign, width);
13013 else
13014 sprintf (templ, "%s.%c%d\t%%P0, %%P1, %%2", mnem, sign, width);
13015
13016 return templ;
13017 }
13018
13019 /* Output a sequence of pairwise operations to implement a reduction.
13020 NOTE: We do "too much work" here, because pairwise operations work on two
13021 registers-worth of operands in one go. Unfortunately we can't exploit those
13022 extra calculations to do the full operation in fewer steps, I don't think.
13023 Although all vector elements of the result but the first are ignored, we
13024 actually calculate the same result in each of the elements. An alternative
13025 such as initially loading a vector with zero to use as each of the second
13026 operands would use up an additional register and take an extra instruction,
13027 for no particular gain. */
13028
13029 void
13030 neon_pairwise_reduce (rtx op0, rtx op1, machine_mode mode,
13031 rtx (*reduc) (rtx, rtx, rtx))
13032 {
13033 unsigned int i, parts = GET_MODE_SIZE (mode) / GET_MODE_UNIT_SIZE (mode);
13034 rtx tmpsum = op1;
13035
13036 for (i = parts / 2; i >= 1; i /= 2)
13037 {
13038 rtx dest = (i == 1) ? op0 : gen_reg_rtx (mode);
13039 emit_insn (reduc (dest, tmpsum, tmpsum));
13040 tmpsum = dest;
13041 }
13042 }
13043
13044 /* Return a non-NULL RTX iff VALS is a vector constant that can be
13045 loaded into a register using VDUP.
13046
13047 If this is the case, and GENERATE is set, we also generate
13048 instructions to do this and return an RTX to assign to the register. */
13049
13050 static rtx
13051 neon_vdup_constant (rtx vals, bool generate)
13052 {
13053 machine_mode mode = GET_MODE (vals);
13054 machine_mode inner_mode = GET_MODE_INNER (mode);
13055 rtx x;
13056
13057 if (GET_CODE (vals) != CONST_VECTOR || GET_MODE_SIZE (inner_mode) > 4)
13058 return NULL_RTX;
13059
13060 if (!const_vec_duplicate_p (vals, &x))
13061 /* The elements are not all the same. We could handle repeating
13062 patterns of a mode larger than INNER_MODE here (e.g. int8x8_t
13063 {0, C, 0, C, 0, C, 0, C} which can be loaded using
13064 vdup.i16). */
13065 return NULL_RTX;
13066
13067 if (!generate)
13068 return x;
13069
13070 /* We can load this constant by using VDUP and a constant in a
13071 single ARM register. This will be cheaper than a vector
13072 load. */
13073
13074 x = copy_to_mode_reg (inner_mode, x);
13075 return gen_vec_duplicate (mode, x);
13076 }
13077
13078 /* Return a non-NULL RTX iff VALS, which is a PARALLEL containing only
13079 constants (for vec_init) or CONST_VECTOR, can be effeciently loaded
13080 into a register.
13081
13082 If this is the case, and GENERATE is set, we also generate code to do
13083 this and return an RTX to copy into the register. */
13084
13085 rtx
13086 neon_make_constant (rtx vals, bool generate)
13087 {
13088 machine_mode mode = GET_MODE (vals);
13089 rtx target;
13090 rtx const_vec = NULL_RTX;
13091 int n_elts = GET_MODE_NUNITS (mode);
13092 int n_const = 0;
13093 int i;
13094
13095 if (GET_CODE (vals) == CONST_VECTOR)
13096 const_vec = vals;
13097 else if (GET_CODE (vals) == PARALLEL)
13098 {
13099 /* A CONST_VECTOR must contain only CONST_INTs and
13100 CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
13101 Only store valid constants in a CONST_VECTOR. */
13102 for (i = 0; i < n_elts; ++i)
13103 {
13104 rtx x = XVECEXP (vals, 0, i);
13105 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
13106 n_const++;
13107 }
13108 if (n_const == n_elts)
13109 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
13110 }
13111 else
13112 gcc_unreachable ();
13113
13114 if (const_vec != NULL
13115 && simd_immediate_valid_for_move (const_vec, mode, NULL, NULL))
13116 /* Load using VMOV. On Cortex-A8 this takes one cycle. */
13117 return const_vec;
13118 else if ((target = neon_vdup_constant (vals, generate)) != NULL_RTX)
13119 /* Loaded using VDUP. On Cortex-A8 the VDUP takes one NEON
13120 pipeline cycle; creating the constant takes one or two ARM
13121 pipeline cycles. */
13122 return target;
13123 else if (const_vec != NULL_RTX)
13124 /* Load from constant pool. On Cortex-A8 this takes two cycles
13125 (for either double or quad vectors). We cannot take advantage
13126 of single-cycle VLD1 because we need a PC-relative addressing
13127 mode. */
13128 return arm_disable_literal_pool ? NULL_RTX : const_vec;
13129 else
13130 /* A PARALLEL containing something not valid inside CONST_VECTOR.
13131 We cannot construct an initializer. */
13132 return NULL_RTX;
13133 }
13134
13135 /* Initialize vector TARGET to VALS. */
13136
13137 void
13138 neon_expand_vector_init (rtx target, rtx vals)
13139 {
13140 machine_mode mode = GET_MODE (target);
13141 machine_mode inner_mode = GET_MODE_INNER (mode);
13142 int n_elts = GET_MODE_NUNITS (mode);
13143 int n_var = 0, one_var = -1;
13144 bool all_same = true;
13145 rtx x, mem;
13146 int i;
13147
13148 for (i = 0; i < n_elts; ++i)
13149 {
13150 x = XVECEXP (vals, 0, i);
13151 if (!CONSTANT_P (x))
13152 ++n_var, one_var = i;
13153
13154 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
13155 all_same = false;
13156 }
13157
13158 if (n_var == 0)
13159 {
13160 rtx constant = neon_make_constant (vals);
13161 if (constant != NULL_RTX)
13162 {
13163 emit_move_insn (target, constant);
13164 return;
13165 }
13166 }
13167
13168 /* Splat a single non-constant element if we can. */
13169 if (all_same && GET_MODE_SIZE (inner_mode) <= 4)
13170 {
13171 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
13172 emit_insn (gen_rtx_SET (target, gen_vec_duplicate (mode, x)));
13173 return;
13174 }
13175
13176 /* One field is non-constant. Load constant then overwrite varying
13177 field. This is more efficient than using the stack. */
13178 if (n_var == 1)
13179 {
13180 rtx copy = copy_rtx (vals);
13181 rtx merge_mask = GEN_INT (1 << one_var);
13182
13183 /* Load constant part of vector, substitute neighboring value for
13184 varying element. */
13185 XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, (one_var + 1) % n_elts);
13186 neon_expand_vector_init (target, copy);
13187
13188 /* Insert variable. */
13189 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, one_var));
13190 emit_insn (gen_vec_set_internal (mode, target, x, merge_mask, target));
13191 return;
13192 }
13193
13194 /* Construct the vector in memory one field at a time
13195 and load the whole vector. */
13196 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
13197 for (i = 0; i < n_elts; i++)
13198 emit_move_insn (adjust_address_nv (mem, inner_mode,
13199 i * GET_MODE_SIZE (inner_mode)),
13200 XVECEXP (vals, 0, i));
13201 emit_move_insn (target, mem);
13202 }
13203
13204 /* Ensure OPERAND lies between LOW (inclusive) and HIGH (exclusive). Raise
13205 ERR if it doesn't. EXP indicates the source location, which includes the
13206 inlining history for intrinsics. */
13207
13208 static void
13209 bounds_check (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
13210 const_tree exp, const char *desc)
13211 {
13212 HOST_WIDE_INT lane;
13213
13214 gcc_assert (CONST_INT_P (operand));
13215
13216 lane = INTVAL (operand);
13217
13218 if (lane < low || lane >= high)
13219 {
13220 if (exp)
13221 error ("%K%s %wd out of range %wd - %wd",
13222 exp, desc, lane, low, high - 1);
13223 else
13224 error ("%s %wd out of range %wd - %wd", desc, lane, low, high - 1);
13225 }
13226 }
13227
13228 /* Bounds-check lanes. */
13229
13230 void
13231 neon_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
13232 const_tree exp)
13233 {
13234 bounds_check (operand, low, high, exp, "lane");
13235 }
13236
13237 /* Bounds-check constants. */
13238
13239 void
13240 arm_const_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
13241 {
13242 bounds_check (operand, low, high, NULL_TREE, "constant");
13243 }
13244
13245 HOST_WIDE_INT
13246 neon_element_bits (machine_mode mode)
13247 {
13248 return GET_MODE_UNIT_BITSIZE (mode);
13249 }
13250
13251 \f
13252 /* Predicates for `match_operand' and `match_operator'. */
13253
13254 /* Return TRUE if OP is a valid coprocessor memory address pattern.
13255 WB level is 2 if full writeback address modes are allowed, 1
13256 if limited writeback address modes (POST_INC and PRE_DEC) are
13257 allowed and 0 if no writeback at all is supported. */
13258
13259 int
13260 arm_coproc_mem_operand_wb (rtx op, int wb_level)
13261 {
13262 gcc_assert (wb_level == 0 || wb_level == 1 || wb_level == 2);
13263 rtx ind;
13264
13265 /* Reject eliminable registers. */
13266 if (! (reload_in_progress || reload_completed || lra_in_progress)
13267 && ( reg_mentioned_p (frame_pointer_rtx, op)
13268 || reg_mentioned_p (arg_pointer_rtx, op)
13269 || reg_mentioned_p (virtual_incoming_args_rtx, op)
13270 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
13271 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
13272 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
13273 return FALSE;
13274
13275 /* Constants are converted into offsets from labels. */
13276 if (!MEM_P (op))
13277 return FALSE;
13278
13279 ind = XEXP (op, 0);
13280
13281 if (reload_completed
13282 && (LABEL_REF_P (ind)
13283 || (GET_CODE (ind) == CONST
13284 && GET_CODE (XEXP (ind, 0)) == PLUS
13285 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
13286 && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
13287 return TRUE;
13288
13289 /* Match: (mem (reg)). */
13290 if (REG_P (ind))
13291 return arm_address_register_rtx_p (ind, 0);
13292
13293 /* Autoincremment addressing modes. POST_INC and PRE_DEC are
13294 acceptable in any case (subject to verification by
13295 arm_address_register_rtx_p). We need full writeback to accept
13296 PRE_INC and POST_DEC, and at least restricted writeback for
13297 PRE_INC and POST_DEC. */
13298 if (wb_level > 0
13299 && (GET_CODE (ind) == POST_INC
13300 || GET_CODE (ind) == PRE_DEC
13301 || (wb_level > 1
13302 && (GET_CODE (ind) == PRE_INC
13303 || GET_CODE (ind) == POST_DEC))))
13304 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
13305
13306 if (wb_level > 1
13307 && (GET_CODE (ind) == POST_MODIFY || GET_CODE (ind) == PRE_MODIFY)
13308 && arm_address_register_rtx_p (XEXP (ind, 0), 0)
13309 && GET_CODE (XEXP (ind, 1)) == PLUS
13310 && rtx_equal_p (XEXP (XEXP (ind, 1), 0), XEXP (ind, 0)))
13311 ind = XEXP (ind, 1);
13312
13313 /* Match:
13314 (plus (reg)
13315 (const))
13316
13317 The encoded immediate for 16-bit modes is multiplied by 2,
13318 while the encoded immediate for 32-bit and 64-bit modes is
13319 multiplied by 4. */
13320 int factor = MIN (GET_MODE_SIZE (GET_MODE (op)), 4);
13321 if (GET_CODE (ind) == PLUS
13322 && REG_P (XEXP (ind, 0))
13323 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
13324 && CONST_INT_P (XEXP (ind, 1))
13325 && IN_RANGE (INTVAL (XEXP (ind, 1)), -255 * factor, 255 * factor)
13326 && (INTVAL (XEXP (ind, 1)) & (factor - 1)) == 0)
13327 return TRUE;
13328
13329 return FALSE;
13330 }
13331
13332 /* Return TRUE if OP is a valid coprocessor memory address pattern.
13333 WB is true if full writeback address modes are allowed and is false
13334 if limited writeback address modes (POST_INC and PRE_DEC) are
13335 allowed. */
13336
13337 int arm_coproc_mem_operand (rtx op, bool wb)
13338 {
13339 return arm_coproc_mem_operand_wb (op, wb ? 2 : 1);
13340 }
13341
13342 /* Return TRUE if OP is a valid coprocessor memory address pattern in a
13343 context in which no writeback address modes are allowed. */
13344
13345 int
13346 arm_coproc_mem_operand_no_writeback (rtx op)
13347 {
13348 return arm_coproc_mem_operand_wb (op, 0);
13349 }
13350
13351 /* This function returns TRUE on matching mode and op.
13352 1. For given modes, check for [Rn], return TRUE for Rn <= LO_REGS.
13353 2. For other modes, check for [Rn], return TRUE for Rn < R15 (expect R13). */
13354 int
13355 mve_vector_mem_operand (machine_mode mode, rtx op, bool strict)
13356 {
13357 enum rtx_code code;
13358 int val, reg_no;
13359
13360 /* Match: (mem (reg)). */
13361 if (REG_P (op))
13362 {
13363 int reg_no = REGNO (op);
13364 return (((mode == E_V8QImode || mode == E_V4QImode || mode == E_V4HImode)
13365 ? reg_no <= LAST_LO_REGNUM
13366 :(reg_no < LAST_ARM_REGNUM && reg_no != SP_REGNUM))
13367 || (!strict && reg_no >= FIRST_PSEUDO_REGISTER));
13368 }
13369 code = GET_CODE (op);
13370
13371 if (code == POST_INC || code == PRE_DEC
13372 || code == PRE_INC || code == POST_DEC)
13373 {
13374 reg_no = REGNO (XEXP (op, 0));
13375 return (((mode == E_V8QImode || mode == E_V4QImode || mode == E_V4HImode)
13376 ? reg_no <= LAST_LO_REGNUM
13377 :(reg_no < LAST_ARM_REGNUM && reg_no != SP_REGNUM))
13378 || (!strict && reg_no >= FIRST_PSEUDO_REGISTER));
13379 }
13380 else if ((code == POST_MODIFY || code == PRE_MODIFY)
13381 && GET_CODE (XEXP (op, 1)) == PLUS && REG_P (XEXP (XEXP (op, 1), 1)))
13382 {
13383 reg_no = REGNO (XEXP (op, 0));
13384 val = INTVAL (XEXP ( XEXP (op, 1), 1));
13385 switch (mode)
13386 {
13387 case E_V16QImode:
13388 if (abs (val) <= 127)
13389 return ((reg_no < LAST_ARM_REGNUM && reg_no != SP_REGNUM)
13390 || (!strict && reg_no >= FIRST_PSEUDO_REGISTER));
13391 return FALSE;
13392 case E_V8HImode:
13393 case E_V8HFmode:
13394 if (abs (val) <= 255)
13395 return ((reg_no < LAST_ARM_REGNUM && reg_no != SP_REGNUM)
13396 || (!strict && reg_no >= FIRST_PSEUDO_REGISTER));
13397 return FALSE;
13398 case E_V8QImode:
13399 case E_V4QImode:
13400 if (abs (val) <= 127)
13401 return (reg_no <= LAST_LO_REGNUM
13402 || (!strict && reg_no >= FIRST_PSEUDO_REGISTER));
13403 return FALSE;
13404 case E_V4HImode:
13405 case E_V4HFmode:
13406 if (val % 2 == 0 && abs (val) <= 254)
13407 return (reg_no <= LAST_LO_REGNUM
13408 || (!strict && reg_no >= FIRST_PSEUDO_REGISTER));
13409 return FALSE;
13410 case E_V4SImode:
13411 case E_V4SFmode:
13412 if (val % 4 == 0 && abs (val) <= 508)
13413 return ((reg_no < LAST_ARM_REGNUM && reg_no != SP_REGNUM)
13414 || (!strict && reg_no >= FIRST_PSEUDO_REGISTER));
13415 return FALSE;
13416 case E_V2DImode:
13417 case E_V2DFmode:
13418 case E_TImode:
13419 if (val % 4 == 0 && val >= 0 && val <= 1020)
13420 return ((reg_no < LAST_ARM_REGNUM && reg_no != SP_REGNUM)
13421 || (!strict && reg_no >= FIRST_PSEUDO_REGISTER));
13422 return FALSE;
13423 default:
13424 return FALSE;
13425 }
13426 }
13427 return FALSE;
13428 }
13429
13430 /* Return TRUE if OP is a memory operand which we can load or store a vector
13431 to/from. TYPE is one of the following values:
13432 0 - Vector load/stor (vldr)
13433 1 - Core registers (ldm)
13434 2 - Element/structure loads (vld1)
13435 */
13436 int
13437 neon_vector_mem_operand (rtx op, int type, bool strict)
13438 {
13439 rtx ind;
13440
13441 /* Reject eliminable registers. */
13442 if (strict && ! (reload_in_progress || reload_completed)
13443 && (reg_mentioned_p (frame_pointer_rtx, op)
13444 || reg_mentioned_p (arg_pointer_rtx, op)
13445 || reg_mentioned_p (virtual_incoming_args_rtx, op)
13446 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
13447 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
13448 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
13449 return FALSE;
13450
13451 /* Constants are converted into offsets from labels. */
13452 if (!MEM_P (op))
13453 return FALSE;
13454
13455 ind = XEXP (op, 0);
13456
13457 if (reload_completed
13458 && (LABEL_REF_P (ind)
13459 || (GET_CODE (ind) == CONST
13460 && GET_CODE (XEXP (ind, 0)) == PLUS
13461 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
13462 && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
13463 return TRUE;
13464
13465 /* Match: (mem (reg)). */
13466 if (REG_P (ind))
13467 return arm_address_register_rtx_p (ind, 0);
13468
13469 /* Allow post-increment with Neon registers. */
13470 if ((type != 1 && GET_CODE (ind) == POST_INC)
13471 || (type == 0 && GET_CODE (ind) == PRE_DEC))
13472 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
13473
13474 /* Allow post-increment by register for VLDn */
13475 if (type == 2 && GET_CODE (ind) == POST_MODIFY
13476 && GET_CODE (XEXP (ind, 1)) == PLUS
13477 && REG_P (XEXP (XEXP (ind, 1), 1))
13478 && REG_P (XEXP (ind, 0))
13479 && rtx_equal_p (XEXP (ind, 0), XEXP (XEXP (ind, 1), 0)))
13480 return true;
13481
13482 /* Match:
13483 (plus (reg)
13484 (const)). */
13485 if (type == 0
13486 && GET_CODE (ind) == PLUS
13487 && REG_P (XEXP (ind, 0))
13488 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
13489 && CONST_INT_P (XEXP (ind, 1))
13490 && INTVAL (XEXP (ind, 1)) > -1024
13491 /* For quad modes, we restrict the constant offset to be slightly less
13492 than what the instruction format permits. We have no such constraint
13493 on double mode offsets. (This must match arm_legitimate_index_p.) */
13494 && (INTVAL (XEXP (ind, 1))
13495 < (VALID_NEON_QREG_MODE (GET_MODE (op))? 1016 : 1024))
13496 && (INTVAL (XEXP (ind, 1)) & 3) == 0)
13497 return TRUE;
13498
13499 return FALSE;
13500 }
13501
13502 /* Return TRUE if OP is a mem suitable for loading/storing a Neon struct
13503 type. */
13504 int
13505 neon_struct_mem_operand (rtx op)
13506 {
13507 rtx ind;
13508
13509 /* Reject eliminable registers. */
13510 if (! (reload_in_progress || reload_completed)
13511 && ( reg_mentioned_p (frame_pointer_rtx, op)
13512 || reg_mentioned_p (arg_pointer_rtx, op)
13513 || reg_mentioned_p (virtual_incoming_args_rtx, op)
13514 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
13515 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
13516 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
13517 return FALSE;
13518
13519 /* Constants are converted into offsets from labels. */
13520 if (!MEM_P (op))
13521 return FALSE;
13522
13523 ind = XEXP (op, 0);
13524
13525 if (reload_completed
13526 && (LABEL_REF_P (ind)
13527 || (GET_CODE (ind) == CONST
13528 && GET_CODE (XEXP (ind, 0)) == PLUS
13529 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
13530 && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
13531 return TRUE;
13532
13533 /* Match: (mem (reg)). */
13534 if (REG_P (ind))
13535 return arm_address_register_rtx_p (ind, 0);
13536
13537 /* vldm/vstm allows POST_INC (ia) and PRE_DEC (db). */
13538 if (GET_CODE (ind) == POST_INC
13539 || GET_CODE (ind) == PRE_DEC)
13540 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
13541
13542 return FALSE;
13543 }
13544
13545 /* Prepares the operands for the VCMLA by lane instruction such that the right
13546 register number is selected. This instruction is special in that it always
13547 requires a D register, however there is a choice to be made between Dn[0],
13548 Dn[1], D(n+1)[0], and D(n+1)[1] depending on the mode of the registers.
13549
13550 The VCMLA by lane function always selects two values. For instance given D0
13551 and a V2SF, the only valid index is 0 as the values in S0 and S1 will be
13552 used by the instruction. However given V4SF then index 0 and 1 are valid as
13553 D0[0] or D1[0] are both valid.
13554
13555 This function centralizes that information based on OPERANDS, OPERANDS[3]
13556 will be changed from a REG into a CONST_INT RTX and OPERANDS[4] will be
13557 updated to contain the right index. */
13558
13559 rtx *
13560 neon_vcmla_lane_prepare_operands (rtx *operands)
13561 {
13562 int lane = INTVAL (operands[4]);
13563 machine_mode constmode = SImode;
13564 machine_mode mode = GET_MODE (operands[3]);
13565 int regno = REGNO (operands[3]);
13566 regno = ((regno - FIRST_VFP_REGNUM) >> 1);
13567 if (lane > 0 && lane >= GET_MODE_NUNITS (mode) / 4)
13568 {
13569 operands[3] = gen_int_mode (regno + 1, constmode);
13570 operands[4]
13571 = gen_int_mode (lane - GET_MODE_NUNITS (mode) / 4, constmode);
13572 }
13573 else
13574 {
13575 operands[3] = gen_int_mode (regno, constmode);
13576 operands[4] = gen_int_mode (lane, constmode);
13577 }
13578 return operands;
13579 }
13580
13581
13582 /* Return true if X is a register that will be eliminated later on. */
13583 int
13584 arm_eliminable_register (rtx x)
13585 {
13586 return REG_P (x) && (REGNO (x) == FRAME_POINTER_REGNUM
13587 || REGNO (x) == ARG_POINTER_REGNUM
13588 || (REGNO (x) >= FIRST_VIRTUAL_REGISTER
13589 && REGNO (x) <= LAST_VIRTUAL_REGISTER));
13590 }
13591
13592 /* Return GENERAL_REGS if a scratch register required to reload x to/from
13593 coprocessor registers. Otherwise return NO_REGS. */
13594
13595 enum reg_class
13596 coproc_secondary_reload_class (machine_mode mode, rtx x, bool wb)
13597 {
13598 if (mode == HFmode)
13599 {
13600 if (!TARGET_NEON_FP16 && !TARGET_VFP_FP16INST)
13601 return GENERAL_REGS;
13602 if (s_register_operand (x, mode) || neon_vector_mem_operand (x, 2, true))
13603 return NO_REGS;
13604 return GENERAL_REGS;
13605 }
13606
13607 /* The neon move patterns handle all legitimate vector and struct
13608 addresses. */
13609 if (TARGET_NEON
13610 && (MEM_P (x) || GET_CODE (x) == CONST_VECTOR)
13611 && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
13612 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT
13613 || VALID_NEON_STRUCT_MODE (mode)))
13614 return NO_REGS;
13615
13616 if (arm_coproc_mem_operand (x, wb) || s_register_operand (x, mode))
13617 return NO_REGS;
13618
13619 return GENERAL_REGS;
13620 }
13621
13622 /* Values which must be returned in the most-significant end of the return
13623 register. */
13624
13625 static bool
13626 arm_return_in_msb (const_tree valtype)
13627 {
13628 return (TARGET_AAPCS_BASED
13629 && BYTES_BIG_ENDIAN
13630 && (AGGREGATE_TYPE_P (valtype)
13631 || TREE_CODE (valtype) == COMPLEX_TYPE
13632 || FIXED_POINT_TYPE_P (valtype)));
13633 }
13634
13635 /* Return TRUE if X references a SYMBOL_REF. */
13636 int
13637 symbol_mentioned_p (rtx x)
13638 {
13639 const char * fmt;
13640 int i;
13641
13642 if (SYMBOL_REF_P (x))
13643 return 1;
13644
13645 /* UNSPEC_TLS entries for a symbol include the SYMBOL_REF, but they
13646 are constant offsets, not symbols. */
13647 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
13648 return 0;
13649
13650 fmt = GET_RTX_FORMAT (GET_CODE (x));
13651
13652 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
13653 {
13654 if (fmt[i] == 'E')
13655 {
13656 int j;
13657
13658 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
13659 if (symbol_mentioned_p (XVECEXP (x, i, j)))
13660 return 1;
13661 }
13662 else if (fmt[i] == 'e' && symbol_mentioned_p (XEXP (x, i)))
13663 return 1;
13664 }
13665
13666 return 0;
13667 }
13668
13669 /* Return TRUE if X references a LABEL_REF. */
13670 int
13671 label_mentioned_p (rtx x)
13672 {
13673 const char * fmt;
13674 int i;
13675
13676 if (LABEL_REF_P (x))
13677 return 1;
13678
13679 /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the referencing
13680 instruction, but they are constant offsets, not symbols. */
13681 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
13682 return 0;
13683
13684 fmt = GET_RTX_FORMAT (GET_CODE (x));
13685 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
13686 {
13687 if (fmt[i] == 'E')
13688 {
13689 int j;
13690
13691 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
13692 if (label_mentioned_p (XVECEXP (x, i, j)))
13693 return 1;
13694 }
13695 else if (fmt[i] == 'e' && label_mentioned_p (XEXP (x, i)))
13696 return 1;
13697 }
13698
13699 return 0;
13700 }
13701
13702 int
13703 tls_mentioned_p (rtx x)
13704 {
13705 switch (GET_CODE (x))
13706 {
13707 case CONST:
13708 return tls_mentioned_p (XEXP (x, 0));
13709
13710 case UNSPEC:
13711 if (XINT (x, 1) == UNSPEC_TLS)
13712 return 1;
13713
13714 /* Fall through. */
13715 default:
13716 return 0;
13717 }
13718 }
13719
13720 /* Must not copy any rtx that uses a pc-relative address.
13721 Also, disallow copying of load-exclusive instructions that
13722 may appear after splitting of compare-and-swap-style operations
13723 so as to prevent those loops from being transformed away from their
13724 canonical forms (see PR 69904). */
13725
13726 static bool
13727 arm_cannot_copy_insn_p (rtx_insn *insn)
13728 {
13729 /* The tls call insn cannot be copied, as it is paired with a data
13730 word. */
13731 if (recog_memoized (insn) == CODE_FOR_tlscall)
13732 return true;
13733
13734 subrtx_iterator::array_type array;
13735 FOR_EACH_SUBRTX (iter, array, PATTERN (insn), ALL)
13736 {
13737 const_rtx x = *iter;
13738 if (GET_CODE (x) == UNSPEC
13739 && (XINT (x, 1) == UNSPEC_PIC_BASE
13740 || XINT (x, 1) == UNSPEC_PIC_UNIFIED))
13741 return true;
13742 }
13743
13744 rtx set = single_set (insn);
13745 if (set)
13746 {
13747 rtx src = SET_SRC (set);
13748 if (GET_CODE (src) == ZERO_EXTEND)
13749 src = XEXP (src, 0);
13750
13751 /* Catch the load-exclusive and load-acquire operations. */
13752 if (GET_CODE (src) == UNSPEC_VOLATILE
13753 && (XINT (src, 1) == VUNSPEC_LL
13754 || XINT (src, 1) == VUNSPEC_LAX))
13755 return true;
13756 }
13757 return false;
13758 }
13759
13760 enum rtx_code
13761 minmax_code (rtx x)
13762 {
13763 enum rtx_code code = GET_CODE (x);
13764
13765 switch (code)
13766 {
13767 case SMAX:
13768 return GE;
13769 case SMIN:
13770 return LE;
13771 case UMIN:
13772 return LEU;
13773 case UMAX:
13774 return GEU;
13775 default:
13776 gcc_unreachable ();
13777 }
13778 }
13779
13780 /* Match pair of min/max operators that can be implemented via usat/ssat. */
13781
13782 bool
13783 arm_sat_operator_match (rtx lo_bound, rtx hi_bound,
13784 int *mask, bool *signed_sat)
13785 {
13786 /* The high bound must be a power of two minus one. */
13787 int log = exact_log2 (INTVAL (hi_bound) + 1);
13788 if (log == -1)
13789 return false;
13790
13791 /* The low bound is either zero (for usat) or one less than the
13792 negation of the high bound (for ssat). */
13793 if (INTVAL (lo_bound) == 0)
13794 {
13795 if (mask)
13796 *mask = log;
13797 if (signed_sat)
13798 *signed_sat = false;
13799
13800 return true;
13801 }
13802
13803 if (INTVAL (lo_bound) == -INTVAL (hi_bound) - 1)
13804 {
13805 if (mask)
13806 *mask = log + 1;
13807 if (signed_sat)
13808 *signed_sat = true;
13809
13810 return true;
13811 }
13812
13813 return false;
13814 }
13815
13816 /* Return 1 if memory locations are adjacent. */
13817 int
13818 adjacent_mem_locations (rtx a, rtx b)
13819 {
13820 /* We don't guarantee to preserve the order of these memory refs. */
13821 if (volatile_refs_p (a) || volatile_refs_p (b))
13822 return 0;
13823
13824 if ((REG_P (XEXP (a, 0))
13825 || (GET_CODE (XEXP (a, 0)) == PLUS
13826 && CONST_INT_P (XEXP (XEXP (a, 0), 1))))
13827 && (REG_P (XEXP (b, 0))
13828 || (GET_CODE (XEXP (b, 0)) == PLUS
13829 && CONST_INT_P (XEXP (XEXP (b, 0), 1)))))
13830 {
13831 HOST_WIDE_INT val0 = 0, val1 = 0;
13832 rtx reg0, reg1;
13833 int val_diff;
13834
13835 if (GET_CODE (XEXP (a, 0)) == PLUS)
13836 {
13837 reg0 = XEXP (XEXP (a, 0), 0);
13838 val0 = INTVAL (XEXP (XEXP (a, 0), 1));
13839 }
13840 else
13841 reg0 = XEXP (a, 0);
13842
13843 if (GET_CODE (XEXP (b, 0)) == PLUS)
13844 {
13845 reg1 = XEXP (XEXP (b, 0), 0);
13846 val1 = INTVAL (XEXP (XEXP (b, 0), 1));
13847 }
13848 else
13849 reg1 = XEXP (b, 0);
13850
13851 /* Don't accept any offset that will require multiple
13852 instructions to handle, since this would cause the
13853 arith_adjacentmem pattern to output an overlong sequence. */
13854 if (!const_ok_for_op (val0, PLUS) || !const_ok_for_op (val1, PLUS))
13855 return 0;
13856
13857 /* Don't allow an eliminable register: register elimination can make
13858 the offset too large. */
13859 if (arm_eliminable_register (reg0))
13860 return 0;
13861
13862 val_diff = val1 - val0;
13863
13864 if (arm_ld_sched)
13865 {
13866 /* If the target has load delay slots, then there's no benefit
13867 to using an ldm instruction unless the offset is zero and
13868 we are optimizing for size. */
13869 return (optimize_size && (REGNO (reg0) == REGNO (reg1))
13870 && (val0 == 0 || val1 == 0 || val0 == 4 || val1 == 4)
13871 && (val_diff == 4 || val_diff == -4));
13872 }
13873
13874 return ((REGNO (reg0) == REGNO (reg1))
13875 && (val_diff == 4 || val_diff == -4));
13876 }
13877
13878 return 0;
13879 }
13880
13881 /* Return true if OP is a valid load or store multiple operation. LOAD is true
13882 for load operations, false for store operations. CONSECUTIVE is true
13883 if the register numbers in the operation must be consecutive in the register
13884 bank. RETURN_PC is true if value is to be loaded in PC.
13885 The pattern we are trying to match for load is:
13886 [(SET (R_d0) (MEM (PLUS (addr) (offset))))
13887 (SET (R_d1) (MEM (PLUS (addr) (offset + <reg_increment>))))
13888 :
13889 :
13890 (SET (R_dn) (MEM (PLUS (addr) (offset + n * <reg_increment>))))
13891 ]
13892 where
13893 1. If offset is 0, first insn should be (SET (R_d0) (MEM (src_addr))).
13894 2. REGNO (R_d0) < REGNO (R_d1) < ... < REGNO (R_dn).
13895 3. If consecutive is TRUE, then for kth register being loaded,
13896 REGNO (R_dk) = REGNO (R_d0) + k.
13897 The pattern for store is similar. */
13898 bool
13899 ldm_stm_operation_p (rtx op, bool load, machine_mode mode,
13900 bool consecutive, bool return_pc)
13901 {
13902 HOST_WIDE_INT count = XVECLEN (op, 0);
13903 rtx reg, mem, addr;
13904 unsigned regno;
13905 unsigned first_regno;
13906 HOST_WIDE_INT i = 1, base = 0, offset = 0;
13907 rtx elt;
13908 bool addr_reg_in_reglist = false;
13909 bool update = false;
13910 int reg_increment;
13911 int offset_adj;
13912 int regs_per_val;
13913
13914 /* If not in SImode, then registers must be consecutive
13915 (e.g., VLDM instructions for DFmode). */
13916 gcc_assert ((mode == SImode) || consecutive);
13917 /* Setting return_pc for stores is illegal. */
13918 gcc_assert (!return_pc || load);
13919
13920 /* Set up the increments and the regs per val based on the mode. */
13921 reg_increment = GET_MODE_SIZE (mode);
13922 regs_per_val = reg_increment / 4;
13923 offset_adj = return_pc ? 1 : 0;
13924
13925 if (count <= 1
13926 || GET_CODE (XVECEXP (op, 0, offset_adj)) != SET
13927 || (load && !REG_P (SET_DEST (XVECEXP (op, 0, offset_adj)))))
13928 return false;
13929
13930 /* Check if this is a write-back. */
13931 elt = XVECEXP (op, 0, offset_adj);
13932 if (GET_CODE (SET_SRC (elt)) == PLUS)
13933 {
13934 i++;
13935 base = 1;
13936 update = true;
13937
13938 /* The offset adjustment must be the number of registers being
13939 popped times the size of a single register. */
13940 if (!REG_P (SET_DEST (elt))
13941 || !REG_P (XEXP (SET_SRC (elt), 0))
13942 || (REGNO (SET_DEST (elt)) != REGNO (XEXP (SET_SRC (elt), 0)))
13943 || !CONST_INT_P (XEXP (SET_SRC (elt), 1))
13944 || INTVAL (XEXP (SET_SRC (elt), 1)) !=
13945 ((count - 1 - offset_adj) * reg_increment))
13946 return false;
13947 }
13948
13949 i = i + offset_adj;
13950 base = base + offset_adj;
13951 /* Perform a quick check so we don't blow up below. If only one reg is loaded,
13952 success depends on the type: VLDM can do just one reg,
13953 LDM must do at least two. */
13954 if ((count <= i) && (mode == SImode))
13955 return false;
13956
13957 elt = XVECEXP (op, 0, i - 1);
13958 if (GET_CODE (elt) != SET)
13959 return false;
13960
13961 if (load)
13962 {
13963 reg = SET_DEST (elt);
13964 mem = SET_SRC (elt);
13965 }
13966 else
13967 {
13968 reg = SET_SRC (elt);
13969 mem = SET_DEST (elt);
13970 }
13971
13972 if (!REG_P (reg) || !MEM_P (mem))
13973 return false;
13974
13975 regno = REGNO (reg);
13976 first_regno = regno;
13977 addr = XEXP (mem, 0);
13978 if (GET_CODE (addr) == PLUS)
13979 {
13980 if (!CONST_INT_P (XEXP (addr, 1)))
13981 return false;
13982
13983 offset = INTVAL (XEXP (addr, 1));
13984 addr = XEXP (addr, 0);
13985 }
13986
13987 if (!REG_P (addr))
13988 return false;
13989
13990 /* Don't allow SP to be loaded unless it is also the base register. It
13991 guarantees that SP is reset correctly when an LDM instruction
13992 is interrupted. Otherwise, we might end up with a corrupt stack. */
13993 if (load && (REGNO (reg) == SP_REGNUM) && (REGNO (addr) != SP_REGNUM))
13994 return false;
13995
13996 if (regno == REGNO (addr))
13997 addr_reg_in_reglist = true;
13998
13999 for (; i < count; i++)
14000 {
14001 elt = XVECEXP (op, 0, i);
14002 if (GET_CODE (elt) != SET)
14003 return false;
14004
14005 if (load)
14006 {
14007 reg = SET_DEST (elt);
14008 mem = SET_SRC (elt);
14009 }
14010 else
14011 {
14012 reg = SET_SRC (elt);
14013 mem = SET_DEST (elt);
14014 }
14015
14016 if (!REG_P (reg)
14017 || GET_MODE (reg) != mode
14018 || REGNO (reg) <= regno
14019 || (consecutive
14020 && (REGNO (reg) !=
14021 (unsigned int) (first_regno + regs_per_val * (i - base))))
14022 /* Don't allow SP to be loaded unless it is also the base register. It
14023 guarantees that SP is reset correctly when an LDM instruction
14024 is interrupted. Otherwise, we might end up with a corrupt stack. */
14025 || (load && (REGNO (reg) == SP_REGNUM) && (REGNO (addr) != SP_REGNUM))
14026 || !MEM_P (mem)
14027 || GET_MODE (mem) != mode
14028 || ((GET_CODE (XEXP (mem, 0)) != PLUS
14029 || !rtx_equal_p (XEXP (XEXP (mem, 0), 0), addr)
14030 || !CONST_INT_P (XEXP (XEXP (mem, 0), 1))
14031 || (INTVAL (XEXP (XEXP (mem, 0), 1)) !=
14032 offset + (i - base) * reg_increment))
14033 && (!REG_P (XEXP (mem, 0))
14034 || offset + (i - base) * reg_increment != 0)))
14035 return false;
14036
14037 regno = REGNO (reg);
14038 if (regno == REGNO (addr))
14039 addr_reg_in_reglist = true;
14040 }
14041
14042 if (load)
14043 {
14044 if (update && addr_reg_in_reglist)
14045 return false;
14046
14047 /* For Thumb-1, address register is always modified - either by write-back
14048 or by explicit load. If the pattern does not describe an update,
14049 then the address register must be in the list of loaded registers. */
14050 if (TARGET_THUMB1)
14051 return update || addr_reg_in_reglist;
14052 }
14053
14054 return true;
14055 }
14056
14057 /* Checks whether OP is a valid parallel pattern for a CLRM (if VFP is false)
14058 or VSCCLRM (otherwise) insn. To be a valid CLRM pattern, OP must have the
14059 following form:
14060
14061 [(set (reg:SI <N>) (const_int 0))
14062 (set (reg:SI <M>) (const_int 0))
14063 ...
14064 (unspec_volatile [(const_int 0)]
14065 VUNSPEC_CLRM_APSR)
14066 (clobber (reg:CC CC_REGNUM))
14067 ]
14068
14069 Any number (including 0) of set expressions is valid, the volatile unspec is
14070 optional. All registers but SP and PC are allowed and registers must be in
14071 strict increasing order.
14072
14073 To be a valid VSCCLRM pattern, OP must have the following form:
14074
14075 [(unspec_volatile [(const_int 0)]
14076 VUNSPEC_VSCCLRM_VPR)
14077 (set (reg:SF <N>) (const_int 0))
14078 (set (reg:SF <M>) (const_int 0))
14079 ...
14080 ]
14081
14082 As with CLRM, any number (including 0) of set expressions is valid, however
14083 the volatile unspec is mandatory here. Any VFP single-precision register is
14084 accepted but all registers must be consecutive and in increasing order. */
14085
14086 bool
14087 clear_operation_p (rtx op, bool vfp)
14088 {
14089 unsigned regno;
14090 unsigned last_regno = INVALID_REGNUM;
14091 rtx elt, reg, zero;
14092 int count = XVECLEN (op, 0);
14093 int first_set = vfp ? 1 : 0;
14094 machine_mode expected_mode = vfp ? E_SFmode : E_SImode;
14095
14096 for (int i = first_set; i < count; i++)
14097 {
14098 elt = XVECEXP (op, 0, i);
14099
14100 if (!vfp && GET_CODE (elt) == UNSPEC_VOLATILE)
14101 {
14102 if (XINT (elt, 1) != VUNSPEC_CLRM_APSR
14103 || XVECLEN (elt, 0) != 1
14104 || XVECEXP (elt, 0, 0) != CONST0_RTX (SImode)
14105 || i != count - 2)
14106 return false;
14107
14108 continue;
14109 }
14110
14111 if (GET_CODE (elt) == CLOBBER)
14112 continue;
14113
14114 if (GET_CODE (elt) != SET)
14115 return false;
14116
14117 reg = SET_DEST (elt);
14118 zero = SET_SRC (elt);
14119
14120 if (!REG_P (reg)
14121 || GET_MODE (reg) != expected_mode
14122 || zero != CONST0_RTX (SImode))
14123 return false;
14124
14125 regno = REGNO (reg);
14126
14127 if (vfp)
14128 {
14129 if (i != first_set && regno != last_regno + 1)
14130 return false;
14131 }
14132 else
14133 {
14134 if (regno == SP_REGNUM || regno == PC_REGNUM)
14135 return false;
14136 if (i != first_set && regno <= last_regno)
14137 return false;
14138 }
14139
14140 last_regno = regno;
14141 }
14142
14143 return true;
14144 }
14145
14146 /* Return true iff it would be profitable to turn a sequence of NOPS loads
14147 or stores (depending on IS_STORE) into a load-multiple or store-multiple
14148 instruction. ADD_OFFSET is nonzero if the base address register needs
14149 to be modified with an add instruction before we can use it. */
14150
14151 static bool
14152 multiple_operation_profitable_p (bool is_store ATTRIBUTE_UNUSED,
14153 int nops, HOST_WIDE_INT add_offset)
14154 {
14155 /* For ARM8,9 & StrongARM, 2 ldr instructions are faster than an ldm
14156 if the offset isn't small enough. The reason 2 ldrs are faster
14157 is because these ARMs are able to do more than one cache access
14158 in a single cycle. The ARM9 and StrongARM have Harvard caches,
14159 whilst the ARM8 has a double bandwidth cache. This means that
14160 these cores can do both an instruction fetch and a data fetch in
14161 a single cycle, so the trick of calculating the address into a
14162 scratch register (one of the result regs) and then doing a load
14163 multiple actually becomes slower (and no smaller in code size).
14164 That is the transformation
14165
14166 ldr rd1, [rbase + offset]
14167 ldr rd2, [rbase + offset + 4]
14168
14169 to
14170
14171 add rd1, rbase, offset
14172 ldmia rd1, {rd1, rd2}
14173
14174 produces worse code -- '3 cycles + any stalls on rd2' instead of
14175 '2 cycles + any stalls on rd2'. On ARMs with only one cache
14176 access per cycle, the first sequence could never complete in less
14177 than 6 cycles, whereas the ldm sequence would only take 5 and
14178 would make better use of sequential accesses if not hitting the
14179 cache.
14180
14181 We cheat here and test 'arm_ld_sched' which we currently know to
14182 only be true for the ARM8, ARM9 and StrongARM. If this ever
14183 changes, then the test below needs to be reworked. */
14184 if (nops == 2 && arm_ld_sched && add_offset != 0)
14185 return false;
14186
14187 /* XScale has load-store double instructions, but they have stricter
14188 alignment requirements than load-store multiple, so we cannot
14189 use them.
14190
14191 For XScale ldm requires 2 + NREGS cycles to complete and blocks
14192 the pipeline until completion.
14193
14194 NREGS CYCLES
14195 1 3
14196 2 4
14197 3 5
14198 4 6
14199
14200 An ldr instruction takes 1-3 cycles, but does not block the
14201 pipeline.
14202
14203 NREGS CYCLES
14204 1 1-3
14205 2 2-6
14206 3 3-9
14207 4 4-12
14208
14209 Best case ldr will always win. However, the more ldr instructions
14210 we issue, the less likely we are to be able to schedule them well.
14211 Using ldr instructions also increases code size.
14212
14213 As a compromise, we use ldr for counts of 1 or 2 regs, and ldm
14214 for counts of 3 or 4 regs. */
14215 if (nops <= 2 && arm_tune_xscale && !optimize_size)
14216 return false;
14217 return true;
14218 }
14219
14220 /* Subroutine of load_multiple_sequence and store_multiple_sequence.
14221 Given an array of UNSORTED_OFFSETS, of which there are NOPS, compute
14222 an array ORDER which describes the sequence to use when accessing the
14223 offsets that produces an ascending order. In this sequence, each
14224 offset must be larger by exactly 4 than the previous one. ORDER[0]
14225 must have been filled in with the lowest offset by the caller.
14226 If UNSORTED_REGS is nonnull, it is an array of register numbers that
14227 we use to verify that ORDER produces an ascending order of registers.
14228 Return true if it was possible to construct such an order, false if
14229 not. */
14230
14231 static bool
14232 compute_offset_order (int nops, HOST_WIDE_INT *unsorted_offsets, int *order,
14233 int *unsorted_regs)
14234 {
14235 int i;
14236 for (i = 1; i < nops; i++)
14237 {
14238 int j;
14239
14240 order[i] = order[i - 1];
14241 for (j = 0; j < nops; j++)
14242 if (unsorted_offsets[j] == unsorted_offsets[order[i - 1]] + 4)
14243 {
14244 /* We must find exactly one offset that is higher than the
14245 previous one by 4. */
14246 if (order[i] != order[i - 1])
14247 return false;
14248 order[i] = j;
14249 }
14250 if (order[i] == order[i - 1])
14251 return false;
14252 /* The register numbers must be ascending. */
14253 if (unsorted_regs != NULL
14254 && unsorted_regs[order[i]] <= unsorted_regs[order[i - 1]])
14255 return false;
14256 }
14257 return true;
14258 }
14259
14260 /* Used to determine in a peephole whether a sequence of load
14261 instructions can be changed into a load-multiple instruction.
14262 NOPS is the number of separate load instructions we are examining. The
14263 first NOPS entries in OPERANDS are the destination registers, the
14264 next NOPS entries are memory operands. If this function is
14265 successful, *BASE is set to the common base register of the memory
14266 accesses; *LOAD_OFFSET is set to the first memory location's offset
14267 from that base register.
14268 REGS is an array filled in with the destination register numbers.
14269 SAVED_ORDER (if nonnull), is an array filled in with an order that maps
14270 insn numbers to an ascending order of stores. If CHECK_REGS is true,
14271 the sequence of registers in REGS matches the loads from ascending memory
14272 locations, and the function verifies that the register numbers are
14273 themselves ascending. If CHECK_REGS is false, the register numbers
14274 are stored in the order they are found in the operands. */
14275 static int
14276 load_multiple_sequence (rtx *operands, int nops, int *regs, int *saved_order,
14277 int *base, HOST_WIDE_INT *load_offset, bool check_regs)
14278 {
14279 int unsorted_regs[MAX_LDM_STM_OPS];
14280 HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
14281 int order[MAX_LDM_STM_OPS];
14282 int base_reg = -1;
14283 int i, ldm_case;
14284
14285 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
14286 easily extended if required. */
14287 gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
14288
14289 memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
14290
14291 /* Loop over the operands and check that the memory references are
14292 suitable (i.e. immediate offsets from the same base register). At
14293 the same time, extract the target register, and the memory
14294 offsets. */
14295 for (i = 0; i < nops; i++)
14296 {
14297 rtx reg;
14298 rtx offset;
14299
14300 /* Convert a subreg of a mem into the mem itself. */
14301 if (GET_CODE (operands[nops + i]) == SUBREG)
14302 operands[nops + i] = alter_subreg (operands + (nops + i), true);
14303
14304 gcc_assert (MEM_P (operands[nops + i]));
14305
14306 /* Don't reorder volatile memory references; it doesn't seem worth
14307 looking for the case where the order is ok anyway. */
14308 if (MEM_VOLATILE_P (operands[nops + i]))
14309 return 0;
14310
14311 offset = const0_rtx;
14312
14313 if ((REG_P (reg = XEXP (operands[nops + i], 0))
14314 || (SUBREG_P (reg)
14315 && REG_P (reg = SUBREG_REG (reg))))
14316 || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
14317 && ((REG_P (reg = XEXP (XEXP (operands[nops + i], 0), 0)))
14318 || (SUBREG_P (reg)
14319 && REG_P (reg = SUBREG_REG (reg))))
14320 && (CONST_INT_P (offset
14321 = XEXP (XEXP (operands[nops + i], 0), 1)))))
14322 {
14323 if (i == 0)
14324 {
14325 base_reg = REGNO (reg);
14326 if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
14327 return 0;
14328 }
14329 else if (base_reg != (int) REGNO (reg))
14330 /* Not addressed from the same base register. */
14331 return 0;
14332
14333 unsorted_regs[i] = (REG_P (operands[i])
14334 ? REGNO (operands[i])
14335 : REGNO (SUBREG_REG (operands[i])));
14336
14337 /* If it isn't an integer register, or if it overwrites the
14338 base register but isn't the last insn in the list, then
14339 we can't do this. */
14340 if (unsorted_regs[i] < 0
14341 || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
14342 || unsorted_regs[i] > 14
14343 || (i != nops - 1 && unsorted_regs[i] == base_reg))
14344 return 0;
14345
14346 /* Don't allow SP to be loaded unless it is also the base
14347 register. It guarantees that SP is reset correctly when
14348 an LDM instruction is interrupted. Otherwise, we might
14349 end up with a corrupt stack. */
14350 if (unsorted_regs[i] == SP_REGNUM && base_reg != SP_REGNUM)
14351 return 0;
14352
14353 unsorted_offsets[i] = INTVAL (offset);
14354 if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
14355 order[0] = i;
14356 }
14357 else
14358 /* Not a suitable memory address. */
14359 return 0;
14360 }
14361
14362 /* All the useful information has now been extracted from the
14363 operands into unsorted_regs and unsorted_offsets; additionally,
14364 order[0] has been set to the lowest offset in the list. Sort
14365 the offsets into order, verifying that they are adjacent, and
14366 check that the register numbers are ascending. */
14367 if (!compute_offset_order (nops, unsorted_offsets, order,
14368 check_regs ? unsorted_regs : NULL))
14369 return 0;
14370
14371 if (saved_order)
14372 memcpy (saved_order, order, sizeof order);
14373
14374 if (base)
14375 {
14376 *base = base_reg;
14377
14378 for (i = 0; i < nops; i++)
14379 regs[i] = unsorted_regs[check_regs ? order[i] : i];
14380
14381 *load_offset = unsorted_offsets[order[0]];
14382 }
14383
14384 if (unsorted_offsets[order[0]] == 0)
14385 ldm_case = 1; /* ldmia */
14386 else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
14387 ldm_case = 2; /* ldmib */
14388 else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
14389 ldm_case = 3; /* ldmda */
14390 else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
14391 ldm_case = 4; /* ldmdb */
14392 else if (const_ok_for_arm (unsorted_offsets[order[0]])
14393 || const_ok_for_arm (-unsorted_offsets[order[0]]))
14394 ldm_case = 5;
14395 else
14396 return 0;
14397
14398 if (!multiple_operation_profitable_p (false, nops,
14399 ldm_case == 5
14400 ? unsorted_offsets[order[0]] : 0))
14401 return 0;
14402
14403 return ldm_case;
14404 }
14405
14406 /* Used to determine in a peephole whether a sequence of store instructions can
14407 be changed into a store-multiple instruction.
14408 NOPS is the number of separate store instructions we are examining.
14409 NOPS_TOTAL is the total number of instructions recognized by the peephole
14410 pattern.
14411 The first NOPS entries in OPERANDS are the source registers, the next
14412 NOPS entries are memory operands. If this function is successful, *BASE is
14413 set to the common base register of the memory accesses; *LOAD_OFFSET is set
14414 to the first memory location's offset from that base register. REGS is an
14415 array filled in with the source register numbers, REG_RTXS (if nonnull) is
14416 likewise filled with the corresponding rtx's.
14417 SAVED_ORDER (if nonnull), is an array filled in with an order that maps insn
14418 numbers to an ascending order of stores.
14419 If CHECK_REGS is true, the sequence of registers in *REGS matches the stores
14420 from ascending memory locations, and the function verifies that the register
14421 numbers are themselves ascending. If CHECK_REGS is false, the register
14422 numbers are stored in the order they are found in the operands. */
14423 static int
14424 store_multiple_sequence (rtx *operands, int nops, int nops_total,
14425 int *regs, rtx *reg_rtxs, int *saved_order, int *base,
14426 HOST_WIDE_INT *load_offset, bool check_regs)
14427 {
14428 int unsorted_regs[MAX_LDM_STM_OPS];
14429 rtx unsorted_reg_rtxs[MAX_LDM_STM_OPS];
14430 HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
14431 int order[MAX_LDM_STM_OPS];
14432 int base_reg = -1;
14433 rtx base_reg_rtx = NULL;
14434 int i, stm_case;
14435
14436 /* Write back of base register is currently only supported for Thumb 1. */
14437 int base_writeback = TARGET_THUMB1;
14438
14439 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
14440 easily extended if required. */
14441 gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
14442
14443 memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
14444
14445 /* Loop over the operands and check that the memory references are
14446 suitable (i.e. immediate offsets from the same base register). At
14447 the same time, extract the target register, and the memory
14448 offsets. */
14449 for (i = 0; i < nops; i++)
14450 {
14451 rtx reg;
14452 rtx offset;
14453
14454 /* Convert a subreg of a mem into the mem itself. */
14455 if (GET_CODE (operands[nops + i]) == SUBREG)
14456 operands[nops + i] = alter_subreg (operands + (nops + i), true);
14457
14458 gcc_assert (MEM_P (operands[nops + i]));
14459
14460 /* Don't reorder volatile memory references; it doesn't seem worth
14461 looking for the case where the order is ok anyway. */
14462 if (MEM_VOLATILE_P (operands[nops + i]))
14463 return 0;
14464
14465 offset = const0_rtx;
14466
14467 if ((REG_P (reg = XEXP (operands[nops + i], 0))
14468 || (SUBREG_P (reg)
14469 && REG_P (reg = SUBREG_REG (reg))))
14470 || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
14471 && ((REG_P (reg = XEXP (XEXP (operands[nops + i], 0), 0)))
14472 || (SUBREG_P (reg)
14473 && REG_P (reg = SUBREG_REG (reg))))
14474 && (CONST_INT_P (offset
14475 = XEXP (XEXP (operands[nops + i], 0), 1)))))
14476 {
14477 unsorted_reg_rtxs[i] = (REG_P (operands[i])
14478 ? operands[i] : SUBREG_REG (operands[i]));
14479 unsorted_regs[i] = REGNO (unsorted_reg_rtxs[i]);
14480
14481 if (i == 0)
14482 {
14483 base_reg = REGNO (reg);
14484 base_reg_rtx = reg;
14485 if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
14486 return 0;
14487 }
14488 else if (base_reg != (int) REGNO (reg))
14489 /* Not addressed from the same base register. */
14490 return 0;
14491
14492 /* If it isn't an integer register, then we can't do this. */
14493 if (unsorted_regs[i] < 0
14494 || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
14495 /* The effects are unpredictable if the base register is
14496 both updated and stored. */
14497 || (base_writeback && unsorted_regs[i] == base_reg)
14498 || (TARGET_THUMB2 && unsorted_regs[i] == SP_REGNUM)
14499 || unsorted_regs[i] > 14)
14500 return 0;
14501
14502 unsorted_offsets[i] = INTVAL (offset);
14503 if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
14504 order[0] = i;
14505 }
14506 else
14507 /* Not a suitable memory address. */
14508 return 0;
14509 }
14510
14511 /* All the useful information has now been extracted from the
14512 operands into unsorted_regs and unsorted_offsets; additionally,
14513 order[0] has been set to the lowest offset in the list. Sort
14514 the offsets into order, verifying that they are adjacent, and
14515 check that the register numbers are ascending. */
14516 if (!compute_offset_order (nops, unsorted_offsets, order,
14517 check_regs ? unsorted_regs : NULL))
14518 return 0;
14519
14520 if (saved_order)
14521 memcpy (saved_order, order, sizeof order);
14522
14523 if (base)
14524 {
14525 *base = base_reg;
14526
14527 for (i = 0; i < nops; i++)
14528 {
14529 regs[i] = unsorted_regs[check_regs ? order[i] : i];
14530 if (reg_rtxs)
14531 reg_rtxs[i] = unsorted_reg_rtxs[check_regs ? order[i] : i];
14532 }
14533
14534 *load_offset = unsorted_offsets[order[0]];
14535 }
14536
14537 if (TARGET_THUMB1
14538 && !peep2_reg_dead_p (nops_total, base_reg_rtx))
14539 return 0;
14540
14541 if (unsorted_offsets[order[0]] == 0)
14542 stm_case = 1; /* stmia */
14543 else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
14544 stm_case = 2; /* stmib */
14545 else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
14546 stm_case = 3; /* stmda */
14547 else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
14548 stm_case = 4; /* stmdb */
14549 else
14550 return 0;
14551
14552 if (!multiple_operation_profitable_p (false, nops, 0))
14553 return 0;
14554
14555 return stm_case;
14556 }
14557 \f
14558 /* Routines for use in generating RTL. */
14559
14560 /* Generate a load-multiple instruction. COUNT is the number of loads in
14561 the instruction; REGS and MEMS are arrays containing the operands.
14562 BASEREG is the base register to be used in addressing the memory operands.
14563 WBACK_OFFSET is nonzero if the instruction should update the base
14564 register. */
14565
14566 static rtx
14567 arm_gen_load_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
14568 HOST_WIDE_INT wback_offset)
14569 {
14570 int i = 0, j;
14571 rtx result;
14572
14573 if (!multiple_operation_profitable_p (false, count, 0))
14574 {
14575 rtx seq;
14576
14577 start_sequence ();
14578
14579 for (i = 0; i < count; i++)
14580 emit_move_insn (gen_rtx_REG (SImode, regs[i]), mems[i]);
14581
14582 if (wback_offset != 0)
14583 emit_move_insn (basereg, plus_constant (Pmode, basereg, wback_offset));
14584
14585 seq = get_insns ();
14586 end_sequence ();
14587
14588 return seq;
14589 }
14590
14591 result = gen_rtx_PARALLEL (VOIDmode,
14592 rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
14593 if (wback_offset != 0)
14594 {
14595 XVECEXP (result, 0, 0)
14596 = gen_rtx_SET (basereg, plus_constant (Pmode, basereg, wback_offset));
14597 i = 1;
14598 count++;
14599 }
14600
14601 for (j = 0; i < count; i++, j++)
14602 XVECEXP (result, 0, i)
14603 = gen_rtx_SET (gen_rtx_REG (SImode, regs[j]), mems[j]);
14604
14605 return result;
14606 }
14607
14608 /* Generate a store-multiple instruction. COUNT is the number of stores in
14609 the instruction; REGS and MEMS are arrays containing the operands.
14610 BASEREG is the base register to be used in addressing the memory operands.
14611 WBACK_OFFSET is nonzero if the instruction should update the base
14612 register. */
14613
14614 static rtx
14615 arm_gen_store_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
14616 HOST_WIDE_INT wback_offset)
14617 {
14618 int i = 0, j;
14619 rtx result;
14620
14621 if (GET_CODE (basereg) == PLUS)
14622 basereg = XEXP (basereg, 0);
14623
14624 if (!multiple_operation_profitable_p (false, count, 0))
14625 {
14626 rtx seq;
14627
14628 start_sequence ();
14629
14630 for (i = 0; i < count; i++)
14631 emit_move_insn (mems[i], gen_rtx_REG (SImode, regs[i]));
14632
14633 if (wback_offset != 0)
14634 emit_move_insn (basereg, plus_constant (Pmode, basereg, wback_offset));
14635
14636 seq = get_insns ();
14637 end_sequence ();
14638
14639 return seq;
14640 }
14641
14642 result = gen_rtx_PARALLEL (VOIDmode,
14643 rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
14644 if (wback_offset != 0)
14645 {
14646 XVECEXP (result, 0, 0)
14647 = gen_rtx_SET (basereg, plus_constant (Pmode, basereg, wback_offset));
14648 i = 1;
14649 count++;
14650 }
14651
14652 for (j = 0; i < count; i++, j++)
14653 XVECEXP (result, 0, i)
14654 = gen_rtx_SET (mems[j], gen_rtx_REG (SImode, regs[j]));
14655
14656 return result;
14657 }
14658
14659 /* Generate either a load-multiple or a store-multiple instruction. This
14660 function can be used in situations where we can start with a single MEM
14661 rtx and adjust its address upwards.
14662 COUNT is the number of operations in the instruction, not counting a
14663 possible update of the base register. REGS is an array containing the
14664 register operands.
14665 BASEREG is the base register to be used in addressing the memory operands,
14666 which are constructed from BASEMEM.
14667 WRITE_BACK specifies whether the generated instruction should include an
14668 update of the base register.
14669 OFFSETP is used to pass an offset to and from this function; this offset
14670 is not used when constructing the address (instead BASEMEM should have an
14671 appropriate offset in its address), it is used only for setting
14672 MEM_OFFSET. It is updated only if WRITE_BACK is true.*/
14673
14674 static rtx
14675 arm_gen_multiple_op (bool is_load, int *regs, int count, rtx basereg,
14676 bool write_back, rtx basemem, HOST_WIDE_INT *offsetp)
14677 {
14678 rtx mems[MAX_LDM_STM_OPS];
14679 HOST_WIDE_INT offset = *offsetp;
14680 int i;
14681
14682 gcc_assert (count <= MAX_LDM_STM_OPS);
14683
14684 if (GET_CODE (basereg) == PLUS)
14685 basereg = XEXP (basereg, 0);
14686
14687 for (i = 0; i < count; i++)
14688 {
14689 rtx addr = plus_constant (Pmode, basereg, i * 4);
14690 mems[i] = adjust_automodify_address_nv (basemem, SImode, addr, offset);
14691 offset += 4;
14692 }
14693
14694 if (write_back)
14695 *offsetp = offset;
14696
14697 if (is_load)
14698 return arm_gen_load_multiple_1 (count, regs, mems, basereg,
14699 write_back ? 4 * count : 0);
14700 else
14701 return arm_gen_store_multiple_1 (count, regs, mems, basereg,
14702 write_back ? 4 * count : 0);
14703 }
14704
14705 rtx
14706 arm_gen_load_multiple (int *regs, int count, rtx basereg, int write_back,
14707 rtx basemem, HOST_WIDE_INT *offsetp)
14708 {
14709 return arm_gen_multiple_op (TRUE, regs, count, basereg, write_back, basemem,
14710 offsetp);
14711 }
14712
14713 rtx
14714 arm_gen_store_multiple (int *regs, int count, rtx basereg, int write_back,
14715 rtx basemem, HOST_WIDE_INT *offsetp)
14716 {
14717 return arm_gen_multiple_op (FALSE, regs, count, basereg, write_back, basemem,
14718 offsetp);
14719 }
14720
14721 /* Called from a peephole2 expander to turn a sequence of loads into an
14722 LDM instruction. OPERANDS are the operands found by the peephole matcher;
14723 NOPS indicates how many separate loads we are trying to combine. SORT_REGS
14724 is true if we can reorder the registers because they are used commutatively
14725 subsequently.
14726 Returns true iff we could generate a new instruction. */
14727
14728 bool
14729 gen_ldm_seq (rtx *operands, int nops, bool sort_regs)
14730 {
14731 int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
14732 rtx mems[MAX_LDM_STM_OPS];
14733 int i, j, base_reg;
14734 rtx base_reg_rtx;
14735 HOST_WIDE_INT offset;
14736 int write_back = FALSE;
14737 int ldm_case;
14738 rtx addr;
14739
14740 ldm_case = load_multiple_sequence (operands, nops, regs, mem_order,
14741 &base_reg, &offset, !sort_regs);
14742
14743 if (ldm_case == 0)
14744 return false;
14745
14746 if (sort_regs)
14747 for (i = 0; i < nops - 1; i++)
14748 for (j = i + 1; j < nops; j++)
14749 if (regs[i] > regs[j])
14750 {
14751 int t = regs[i];
14752 regs[i] = regs[j];
14753 regs[j] = t;
14754 }
14755 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
14756
14757 if (TARGET_THUMB1)
14758 {
14759 gcc_assert (ldm_case == 1 || ldm_case == 5);
14760
14761 /* Thumb-1 ldm uses writeback except if the base is loaded. */
14762 write_back = true;
14763 for (i = 0; i < nops; i++)
14764 if (base_reg == regs[i])
14765 write_back = false;
14766
14767 /* Ensure the base is dead if it is updated. */
14768 if (write_back && !peep2_reg_dead_p (nops, base_reg_rtx))
14769 return false;
14770 }
14771
14772 if (ldm_case == 5)
14773 {
14774 rtx newbase = TARGET_THUMB1 ? base_reg_rtx : gen_rtx_REG (SImode, regs[0]);
14775 emit_insn (gen_addsi3 (newbase, base_reg_rtx, GEN_INT (offset)));
14776 offset = 0;
14777 base_reg_rtx = newbase;
14778 }
14779
14780 for (i = 0; i < nops; i++)
14781 {
14782 addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
14783 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
14784 SImode, addr, 0);
14785 }
14786 emit_insn (arm_gen_load_multiple_1 (nops, regs, mems, base_reg_rtx,
14787 write_back ? offset + i * 4 : 0));
14788 return true;
14789 }
14790
14791 /* Called from a peephole2 expander to turn a sequence of stores into an
14792 STM instruction. OPERANDS are the operands found by the peephole matcher;
14793 NOPS indicates how many separate stores we are trying to combine.
14794 Returns true iff we could generate a new instruction. */
14795
14796 bool
14797 gen_stm_seq (rtx *operands, int nops)
14798 {
14799 int i;
14800 int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
14801 rtx mems[MAX_LDM_STM_OPS];
14802 int base_reg;
14803 rtx base_reg_rtx;
14804 HOST_WIDE_INT offset;
14805 int write_back = FALSE;
14806 int stm_case;
14807 rtx addr;
14808 bool base_reg_dies;
14809
14810 stm_case = store_multiple_sequence (operands, nops, nops, regs, NULL,
14811 mem_order, &base_reg, &offset, true);
14812
14813 if (stm_case == 0)
14814 return false;
14815
14816 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
14817
14818 base_reg_dies = peep2_reg_dead_p (nops, base_reg_rtx);
14819 if (TARGET_THUMB1)
14820 {
14821 gcc_assert (base_reg_dies);
14822 write_back = TRUE;
14823 }
14824
14825 if (stm_case == 5)
14826 {
14827 gcc_assert (base_reg_dies);
14828 emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
14829 offset = 0;
14830 }
14831
14832 addr = plus_constant (Pmode, base_reg_rtx, offset);
14833
14834 for (i = 0; i < nops; i++)
14835 {
14836 addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
14837 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
14838 SImode, addr, 0);
14839 }
14840 emit_insn (arm_gen_store_multiple_1 (nops, regs, mems, base_reg_rtx,
14841 write_back ? offset + i * 4 : 0));
14842 return true;
14843 }
14844
14845 /* Called from a peephole2 expander to turn a sequence of stores that are
14846 preceded by constant loads into an STM instruction. OPERANDS are the
14847 operands found by the peephole matcher; NOPS indicates how many
14848 separate stores we are trying to combine; there are 2 * NOPS
14849 instructions in the peephole.
14850 Returns true iff we could generate a new instruction. */
14851
14852 bool
14853 gen_const_stm_seq (rtx *operands, int nops)
14854 {
14855 int regs[MAX_LDM_STM_OPS], sorted_regs[MAX_LDM_STM_OPS];
14856 int reg_order[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
14857 rtx reg_rtxs[MAX_LDM_STM_OPS], orig_reg_rtxs[MAX_LDM_STM_OPS];
14858 rtx mems[MAX_LDM_STM_OPS];
14859 int base_reg;
14860 rtx base_reg_rtx;
14861 HOST_WIDE_INT offset;
14862 int write_back = FALSE;
14863 int stm_case;
14864 rtx addr;
14865 bool base_reg_dies;
14866 int i, j;
14867 HARD_REG_SET allocated;
14868
14869 stm_case = store_multiple_sequence (operands, nops, 2 * nops, regs, reg_rtxs,
14870 mem_order, &base_reg, &offset, false);
14871
14872 if (stm_case == 0)
14873 return false;
14874
14875 memcpy (orig_reg_rtxs, reg_rtxs, sizeof orig_reg_rtxs);
14876
14877 /* If the same register is used more than once, try to find a free
14878 register. */
14879 CLEAR_HARD_REG_SET (allocated);
14880 for (i = 0; i < nops; i++)
14881 {
14882 for (j = i + 1; j < nops; j++)
14883 if (regs[i] == regs[j])
14884 {
14885 rtx t = peep2_find_free_register (0, nops * 2,
14886 TARGET_THUMB1 ? "l" : "r",
14887 SImode, &allocated);
14888 if (t == NULL_RTX)
14889 return false;
14890 reg_rtxs[i] = t;
14891 regs[i] = REGNO (t);
14892 }
14893 }
14894
14895 /* Compute an ordering that maps the register numbers to an ascending
14896 sequence. */
14897 reg_order[0] = 0;
14898 for (i = 0; i < nops; i++)
14899 if (regs[i] < regs[reg_order[0]])
14900 reg_order[0] = i;
14901
14902 for (i = 1; i < nops; i++)
14903 {
14904 int this_order = reg_order[i - 1];
14905 for (j = 0; j < nops; j++)
14906 if (regs[j] > regs[reg_order[i - 1]]
14907 && (this_order == reg_order[i - 1]
14908 || regs[j] < regs[this_order]))
14909 this_order = j;
14910 reg_order[i] = this_order;
14911 }
14912
14913 /* Ensure that registers that must be live after the instruction end
14914 up with the correct value. */
14915 for (i = 0; i < nops; i++)
14916 {
14917 int this_order = reg_order[i];
14918 if ((this_order != mem_order[i]
14919 || orig_reg_rtxs[this_order] != reg_rtxs[this_order])
14920 && !peep2_reg_dead_p (nops * 2, orig_reg_rtxs[this_order]))
14921 return false;
14922 }
14923
14924 /* Load the constants. */
14925 for (i = 0; i < nops; i++)
14926 {
14927 rtx op = operands[2 * nops + mem_order[i]];
14928 sorted_regs[i] = regs[reg_order[i]];
14929 emit_move_insn (reg_rtxs[reg_order[i]], op);
14930 }
14931
14932 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
14933
14934 base_reg_dies = peep2_reg_dead_p (nops * 2, base_reg_rtx);
14935 if (TARGET_THUMB1)
14936 {
14937 gcc_assert (base_reg_dies);
14938 write_back = TRUE;
14939 }
14940
14941 if (stm_case == 5)
14942 {
14943 gcc_assert (base_reg_dies);
14944 emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
14945 offset = 0;
14946 }
14947
14948 addr = plus_constant (Pmode, base_reg_rtx, offset);
14949
14950 for (i = 0; i < nops; i++)
14951 {
14952 addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
14953 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
14954 SImode, addr, 0);
14955 }
14956 emit_insn (arm_gen_store_multiple_1 (nops, sorted_regs, mems, base_reg_rtx,
14957 write_back ? offset + i * 4 : 0));
14958 return true;
14959 }
14960
14961 /* Copy a block of memory using plain ldr/str/ldrh/strh instructions, to permit
14962 unaligned copies on processors which support unaligned semantics for those
14963 instructions. INTERLEAVE_FACTOR can be used to attempt to hide load latency
14964 (using more registers) by doing e.g. load/load/store/store for a factor of 2.
14965 An interleave factor of 1 (the minimum) will perform no interleaving.
14966 Load/store multiple are used for aligned addresses where possible. */
14967
14968 static void
14969 arm_block_move_unaligned_straight (rtx dstbase, rtx srcbase,
14970 HOST_WIDE_INT length,
14971 unsigned int interleave_factor)
14972 {
14973 rtx *regs = XALLOCAVEC (rtx, interleave_factor);
14974 int *regnos = XALLOCAVEC (int, interleave_factor);
14975 HOST_WIDE_INT block_size_bytes = interleave_factor * UNITS_PER_WORD;
14976 HOST_WIDE_INT i, j;
14977 HOST_WIDE_INT remaining = length, words;
14978 rtx halfword_tmp = NULL, byte_tmp = NULL;
14979 rtx dst, src;
14980 bool src_aligned = MEM_ALIGN (srcbase) >= BITS_PER_WORD;
14981 bool dst_aligned = MEM_ALIGN (dstbase) >= BITS_PER_WORD;
14982 HOST_WIDE_INT srcoffset, dstoffset;
14983 HOST_WIDE_INT src_autoinc, dst_autoinc;
14984 rtx mem, addr;
14985
14986 gcc_assert (interleave_factor >= 1 && interleave_factor <= 4);
14987
14988 /* Use hard registers if we have aligned source or destination so we can use
14989 load/store multiple with contiguous registers. */
14990 if (dst_aligned || src_aligned)
14991 for (i = 0; i < interleave_factor; i++)
14992 regs[i] = gen_rtx_REG (SImode, i);
14993 else
14994 for (i = 0; i < interleave_factor; i++)
14995 regs[i] = gen_reg_rtx (SImode);
14996
14997 dst = copy_addr_to_reg (XEXP (dstbase, 0));
14998 src = copy_addr_to_reg (XEXP (srcbase, 0));
14999
15000 srcoffset = dstoffset = 0;
15001
15002 /* Calls to arm_gen_load_multiple and arm_gen_store_multiple update SRC/DST.
15003 For copying the last bytes we want to subtract this offset again. */
15004 src_autoinc = dst_autoinc = 0;
15005
15006 for (i = 0; i < interleave_factor; i++)
15007 regnos[i] = i;
15008
15009 /* Copy BLOCK_SIZE_BYTES chunks. */
15010
15011 for (i = 0; i + block_size_bytes <= length; i += block_size_bytes)
15012 {
15013 /* Load words. */
15014 if (src_aligned && interleave_factor > 1)
15015 {
15016 emit_insn (arm_gen_load_multiple (regnos, interleave_factor, src,
15017 TRUE, srcbase, &srcoffset));
15018 src_autoinc += UNITS_PER_WORD * interleave_factor;
15019 }
15020 else
15021 {
15022 for (j = 0; j < interleave_factor; j++)
15023 {
15024 addr = plus_constant (Pmode, src, (srcoffset + j * UNITS_PER_WORD
15025 - src_autoinc));
15026 mem = adjust_automodify_address (srcbase, SImode, addr,
15027 srcoffset + j * UNITS_PER_WORD);
15028 emit_insn (gen_unaligned_loadsi (regs[j], mem));
15029 }
15030 srcoffset += block_size_bytes;
15031 }
15032
15033 /* Store words. */
15034 if (dst_aligned && interleave_factor > 1)
15035 {
15036 emit_insn (arm_gen_store_multiple (regnos, interleave_factor, dst,
15037 TRUE, dstbase, &dstoffset));
15038 dst_autoinc += UNITS_PER_WORD * interleave_factor;
15039 }
15040 else
15041 {
15042 for (j = 0; j < interleave_factor; j++)
15043 {
15044 addr = plus_constant (Pmode, dst, (dstoffset + j * UNITS_PER_WORD
15045 - dst_autoinc));
15046 mem = adjust_automodify_address (dstbase, SImode, addr,
15047 dstoffset + j * UNITS_PER_WORD);
15048 emit_insn (gen_unaligned_storesi (mem, regs[j]));
15049 }
15050 dstoffset += block_size_bytes;
15051 }
15052
15053 remaining -= block_size_bytes;
15054 }
15055
15056 /* Copy any whole words left (note these aren't interleaved with any
15057 subsequent halfword/byte load/stores in the interests of simplicity). */
15058
15059 words = remaining / UNITS_PER_WORD;
15060
15061 gcc_assert (words < interleave_factor);
15062
15063 if (src_aligned && words > 1)
15064 {
15065 emit_insn (arm_gen_load_multiple (regnos, words, src, TRUE, srcbase,
15066 &srcoffset));
15067 src_autoinc += UNITS_PER_WORD * words;
15068 }
15069 else
15070 {
15071 for (j = 0; j < words; j++)
15072 {
15073 addr = plus_constant (Pmode, src,
15074 srcoffset + j * UNITS_PER_WORD - src_autoinc);
15075 mem = adjust_automodify_address (srcbase, SImode, addr,
15076 srcoffset + j * UNITS_PER_WORD);
15077 if (src_aligned)
15078 emit_move_insn (regs[j], mem);
15079 else
15080 emit_insn (gen_unaligned_loadsi (regs[j], mem));
15081 }
15082 srcoffset += words * UNITS_PER_WORD;
15083 }
15084
15085 if (dst_aligned && words > 1)
15086 {
15087 emit_insn (arm_gen_store_multiple (regnos, words, dst, TRUE, dstbase,
15088 &dstoffset));
15089 dst_autoinc += words * UNITS_PER_WORD;
15090 }
15091 else
15092 {
15093 for (j = 0; j < words; j++)
15094 {
15095 addr = plus_constant (Pmode, dst,
15096 dstoffset + j * UNITS_PER_WORD - dst_autoinc);
15097 mem = adjust_automodify_address (dstbase, SImode, addr,
15098 dstoffset + j * UNITS_PER_WORD);
15099 if (dst_aligned)
15100 emit_move_insn (mem, regs[j]);
15101 else
15102 emit_insn (gen_unaligned_storesi (mem, regs[j]));
15103 }
15104 dstoffset += words * UNITS_PER_WORD;
15105 }
15106
15107 remaining -= words * UNITS_PER_WORD;
15108
15109 gcc_assert (remaining < 4);
15110
15111 /* Copy a halfword if necessary. */
15112
15113 if (remaining >= 2)
15114 {
15115 halfword_tmp = gen_reg_rtx (SImode);
15116
15117 addr = plus_constant (Pmode, src, srcoffset - src_autoinc);
15118 mem = adjust_automodify_address (srcbase, HImode, addr, srcoffset);
15119 emit_insn (gen_unaligned_loadhiu (halfword_tmp, mem));
15120
15121 /* Either write out immediately, or delay until we've loaded the last
15122 byte, depending on interleave factor. */
15123 if (interleave_factor == 1)
15124 {
15125 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
15126 mem = adjust_automodify_address (dstbase, HImode, addr, dstoffset);
15127 emit_insn (gen_unaligned_storehi (mem,
15128 gen_lowpart (HImode, halfword_tmp)));
15129 halfword_tmp = NULL;
15130 dstoffset += 2;
15131 }
15132
15133 remaining -= 2;
15134 srcoffset += 2;
15135 }
15136
15137 gcc_assert (remaining < 2);
15138
15139 /* Copy last byte. */
15140
15141 if ((remaining & 1) != 0)
15142 {
15143 byte_tmp = gen_reg_rtx (SImode);
15144
15145 addr = plus_constant (Pmode, src, srcoffset - src_autoinc);
15146 mem = adjust_automodify_address (srcbase, QImode, addr, srcoffset);
15147 emit_move_insn (gen_lowpart (QImode, byte_tmp), mem);
15148
15149 if (interleave_factor == 1)
15150 {
15151 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
15152 mem = adjust_automodify_address (dstbase, QImode, addr, dstoffset);
15153 emit_move_insn (mem, gen_lowpart (QImode, byte_tmp));
15154 byte_tmp = NULL;
15155 dstoffset++;
15156 }
15157
15158 remaining--;
15159 srcoffset++;
15160 }
15161
15162 /* Store last halfword if we haven't done so already. */
15163
15164 if (halfword_tmp)
15165 {
15166 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
15167 mem = adjust_automodify_address (dstbase, HImode, addr, dstoffset);
15168 emit_insn (gen_unaligned_storehi (mem,
15169 gen_lowpart (HImode, halfword_tmp)));
15170 dstoffset += 2;
15171 }
15172
15173 /* Likewise for last byte. */
15174
15175 if (byte_tmp)
15176 {
15177 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
15178 mem = adjust_automodify_address (dstbase, QImode, addr, dstoffset);
15179 emit_move_insn (mem, gen_lowpart (QImode, byte_tmp));
15180 dstoffset++;
15181 }
15182
15183 gcc_assert (remaining == 0 && srcoffset == dstoffset);
15184 }
15185
15186 /* From mips_adjust_block_mem:
15187
15188 Helper function for doing a loop-based block operation on memory
15189 reference MEM. Each iteration of the loop will operate on LENGTH
15190 bytes of MEM.
15191
15192 Create a new base register for use within the loop and point it to
15193 the start of MEM. Create a new memory reference that uses this
15194 register. Store them in *LOOP_REG and *LOOP_MEM respectively. */
15195
15196 static void
15197 arm_adjust_block_mem (rtx mem, HOST_WIDE_INT length, rtx *loop_reg,
15198 rtx *loop_mem)
15199 {
15200 *loop_reg = copy_addr_to_reg (XEXP (mem, 0));
15201
15202 /* Although the new mem does not refer to a known location,
15203 it does keep up to LENGTH bytes of alignment. */
15204 *loop_mem = change_address (mem, BLKmode, *loop_reg);
15205 set_mem_align (*loop_mem, MIN (MEM_ALIGN (mem), length * BITS_PER_UNIT));
15206 }
15207
15208 /* From mips_block_move_loop:
15209
15210 Move LENGTH bytes from SRC to DEST using a loop that moves BYTES_PER_ITER
15211 bytes at a time. LENGTH must be at least BYTES_PER_ITER. Assume that
15212 the memory regions do not overlap. */
15213
15214 static void
15215 arm_block_move_unaligned_loop (rtx dest, rtx src, HOST_WIDE_INT length,
15216 unsigned int interleave_factor,
15217 HOST_WIDE_INT bytes_per_iter)
15218 {
15219 rtx src_reg, dest_reg, final_src, test;
15220 HOST_WIDE_INT leftover;
15221
15222 leftover = length % bytes_per_iter;
15223 length -= leftover;
15224
15225 /* Create registers and memory references for use within the loop. */
15226 arm_adjust_block_mem (src, bytes_per_iter, &src_reg, &src);
15227 arm_adjust_block_mem (dest, bytes_per_iter, &dest_reg, &dest);
15228
15229 /* Calculate the value that SRC_REG should have after the last iteration of
15230 the loop. */
15231 final_src = expand_simple_binop (Pmode, PLUS, src_reg, GEN_INT (length),
15232 0, 0, OPTAB_WIDEN);
15233
15234 /* Emit the start of the loop. */
15235 rtx_code_label *label = gen_label_rtx ();
15236 emit_label (label);
15237
15238 /* Emit the loop body. */
15239 arm_block_move_unaligned_straight (dest, src, bytes_per_iter,
15240 interleave_factor);
15241
15242 /* Move on to the next block. */
15243 emit_move_insn (src_reg, plus_constant (Pmode, src_reg, bytes_per_iter));
15244 emit_move_insn (dest_reg, plus_constant (Pmode, dest_reg, bytes_per_iter));
15245
15246 /* Emit the loop condition. */
15247 test = gen_rtx_NE (VOIDmode, src_reg, final_src);
15248 emit_jump_insn (gen_cbranchsi4 (test, src_reg, final_src, label));
15249
15250 /* Mop up any left-over bytes. */
15251 if (leftover)
15252 arm_block_move_unaligned_straight (dest, src, leftover, interleave_factor);
15253 }
15254
15255 /* Emit a block move when either the source or destination is unaligned (not
15256 aligned to a four-byte boundary). This may need further tuning depending on
15257 core type, optimize_size setting, etc. */
15258
15259 static int
15260 arm_cpymemqi_unaligned (rtx *operands)
15261 {
15262 HOST_WIDE_INT length = INTVAL (operands[2]);
15263
15264 if (optimize_size)
15265 {
15266 bool src_aligned = MEM_ALIGN (operands[1]) >= BITS_PER_WORD;
15267 bool dst_aligned = MEM_ALIGN (operands[0]) >= BITS_PER_WORD;
15268 /* Inlined memcpy using ldr/str/ldrh/strh can be quite big: try to limit
15269 size of code if optimizing for size. We'll use ldm/stm if src_aligned
15270 or dst_aligned though: allow more interleaving in those cases since the
15271 resulting code can be smaller. */
15272 unsigned int interleave_factor = (src_aligned || dst_aligned) ? 2 : 1;
15273 HOST_WIDE_INT bytes_per_iter = (src_aligned || dst_aligned) ? 8 : 4;
15274
15275 if (length > 12)
15276 arm_block_move_unaligned_loop (operands[0], operands[1], length,
15277 interleave_factor, bytes_per_iter);
15278 else
15279 arm_block_move_unaligned_straight (operands[0], operands[1], length,
15280 interleave_factor);
15281 }
15282 else
15283 {
15284 /* Note that the loop created by arm_block_move_unaligned_loop may be
15285 subject to loop unrolling, which makes tuning this condition a little
15286 redundant. */
15287 if (length > 32)
15288 arm_block_move_unaligned_loop (operands[0], operands[1], length, 4, 16);
15289 else
15290 arm_block_move_unaligned_straight (operands[0], operands[1], length, 4);
15291 }
15292
15293 return 1;
15294 }
15295
15296 int
15297 arm_gen_cpymemqi (rtx *operands)
15298 {
15299 HOST_WIDE_INT in_words_to_go, out_words_to_go, last_bytes;
15300 HOST_WIDE_INT srcoffset, dstoffset;
15301 rtx src, dst, srcbase, dstbase;
15302 rtx part_bytes_reg = NULL;
15303 rtx mem;
15304
15305 if (!CONST_INT_P (operands[2])
15306 || !CONST_INT_P (operands[3])
15307 || INTVAL (operands[2]) > 64)
15308 return 0;
15309
15310 if (unaligned_access && (INTVAL (operands[3]) & 3) != 0)
15311 return arm_cpymemqi_unaligned (operands);
15312
15313 if (INTVAL (operands[3]) & 3)
15314 return 0;
15315
15316 dstbase = operands[0];
15317 srcbase = operands[1];
15318
15319 dst = copy_to_mode_reg (SImode, XEXP (dstbase, 0));
15320 src = copy_to_mode_reg (SImode, XEXP (srcbase, 0));
15321
15322 in_words_to_go = ARM_NUM_INTS (INTVAL (operands[2]));
15323 out_words_to_go = INTVAL (operands[2]) / 4;
15324 last_bytes = INTVAL (operands[2]) & 3;
15325 dstoffset = srcoffset = 0;
15326
15327 if (out_words_to_go != in_words_to_go && ((in_words_to_go - 1) & 3) != 0)
15328 part_bytes_reg = gen_rtx_REG (SImode, (in_words_to_go - 1) & 3);
15329
15330 while (in_words_to_go >= 2)
15331 {
15332 if (in_words_to_go > 4)
15333 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, 4, src,
15334 TRUE, srcbase, &srcoffset));
15335 else
15336 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, in_words_to_go,
15337 src, FALSE, srcbase,
15338 &srcoffset));
15339
15340 if (out_words_to_go)
15341 {
15342 if (out_words_to_go > 4)
15343 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence, 4, dst,
15344 TRUE, dstbase, &dstoffset));
15345 else if (out_words_to_go != 1)
15346 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence,
15347 out_words_to_go, dst,
15348 (last_bytes == 0
15349 ? FALSE : TRUE),
15350 dstbase, &dstoffset));
15351 else
15352 {
15353 mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
15354 emit_move_insn (mem, gen_rtx_REG (SImode, R0_REGNUM));
15355 if (last_bytes != 0)
15356 {
15357 emit_insn (gen_addsi3 (dst, dst, GEN_INT (4)));
15358 dstoffset += 4;
15359 }
15360 }
15361 }
15362
15363 in_words_to_go -= in_words_to_go < 4 ? in_words_to_go : 4;
15364 out_words_to_go -= out_words_to_go < 4 ? out_words_to_go : 4;
15365 }
15366
15367 /* OUT_WORDS_TO_GO will be zero here if there are byte stores to do. */
15368 if (out_words_to_go)
15369 {
15370 rtx sreg;
15371
15372 mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
15373 sreg = copy_to_reg (mem);
15374
15375 mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
15376 emit_move_insn (mem, sreg);
15377 in_words_to_go--;
15378
15379 gcc_assert (!in_words_to_go); /* Sanity check */
15380 }
15381
15382 if (in_words_to_go)
15383 {
15384 gcc_assert (in_words_to_go > 0);
15385
15386 mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
15387 part_bytes_reg = copy_to_mode_reg (SImode, mem);
15388 }
15389
15390 gcc_assert (!last_bytes || part_bytes_reg);
15391
15392 if (BYTES_BIG_ENDIAN && last_bytes)
15393 {
15394 rtx tmp = gen_reg_rtx (SImode);
15395
15396 /* The bytes we want are in the top end of the word. */
15397 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg,
15398 GEN_INT (8 * (4 - last_bytes))));
15399 part_bytes_reg = tmp;
15400
15401 while (last_bytes)
15402 {
15403 mem = adjust_automodify_address (dstbase, QImode,
15404 plus_constant (Pmode, dst,
15405 last_bytes - 1),
15406 dstoffset + last_bytes - 1);
15407 emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
15408
15409 if (--last_bytes)
15410 {
15411 tmp = gen_reg_rtx (SImode);
15412 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (8)));
15413 part_bytes_reg = tmp;
15414 }
15415 }
15416
15417 }
15418 else
15419 {
15420 if (last_bytes > 1)
15421 {
15422 mem = adjust_automodify_address (dstbase, HImode, dst, dstoffset);
15423 emit_move_insn (mem, gen_lowpart (HImode, part_bytes_reg));
15424 last_bytes -= 2;
15425 if (last_bytes)
15426 {
15427 rtx tmp = gen_reg_rtx (SImode);
15428 emit_insn (gen_addsi3 (dst, dst, const2_rtx));
15429 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (16)));
15430 part_bytes_reg = tmp;
15431 dstoffset += 2;
15432 }
15433 }
15434
15435 if (last_bytes)
15436 {
15437 mem = adjust_automodify_address (dstbase, QImode, dst, dstoffset);
15438 emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
15439 }
15440 }
15441
15442 return 1;
15443 }
15444
15445 /* Helper for gen_cpymem_ldrd_strd. Increase the address of memory rtx
15446 by mode size. */
15447 inline static rtx
15448 next_consecutive_mem (rtx mem)
15449 {
15450 machine_mode mode = GET_MODE (mem);
15451 HOST_WIDE_INT offset = GET_MODE_SIZE (mode);
15452 rtx addr = plus_constant (Pmode, XEXP (mem, 0), offset);
15453
15454 return adjust_automodify_address (mem, mode, addr, offset);
15455 }
15456
15457 /* Copy using LDRD/STRD instructions whenever possible.
15458 Returns true upon success. */
15459 bool
15460 gen_cpymem_ldrd_strd (rtx *operands)
15461 {
15462 unsigned HOST_WIDE_INT len;
15463 HOST_WIDE_INT align;
15464 rtx src, dst, base;
15465 rtx reg0;
15466 bool src_aligned, dst_aligned;
15467 bool src_volatile, dst_volatile;
15468
15469 gcc_assert (CONST_INT_P (operands[2]));
15470 gcc_assert (CONST_INT_P (operands[3]));
15471
15472 len = UINTVAL (operands[2]);
15473 if (len > 64)
15474 return false;
15475
15476 /* Maximum alignment we can assume for both src and dst buffers. */
15477 align = INTVAL (operands[3]);
15478
15479 if ((!unaligned_access) && (len >= 4) && ((align & 3) != 0))
15480 return false;
15481
15482 /* Place src and dst addresses in registers
15483 and update the corresponding mem rtx. */
15484 dst = operands[0];
15485 dst_volatile = MEM_VOLATILE_P (dst);
15486 dst_aligned = MEM_ALIGN (dst) >= BITS_PER_WORD;
15487 base = copy_to_mode_reg (SImode, XEXP (dst, 0));
15488 dst = adjust_automodify_address (dst, VOIDmode, base, 0);
15489
15490 src = operands[1];
15491 src_volatile = MEM_VOLATILE_P (src);
15492 src_aligned = MEM_ALIGN (src) >= BITS_PER_WORD;
15493 base = copy_to_mode_reg (SImode, XEXP (src, 0));
15494 src = adjust_automodify_address (src, VOIDmode, base, 0);
15495
15496 if (!unaligned_access && !(src_aligned && dst_aligned))
15497 return false;
15498
15499 if (src_volatile || dst_volatile)
15500 return false;
15501
15502 /* If we cannot generate any LDRD/STRD, try to generate LDM/STM. */
15503 if (!(dst_aligned || src_aligned))
15504 return arm_gen_cpymemqi (operands);
15505
15506 /* If the either src or dst is unaligned we'll be accessing it as pairs
15507 of unaligned SImode accesses. Otherwise we can generate DImode
15508 ldrd/strd instructions. */
15509 src = adjust_address (src, src_aligned ? DImode : SImode, 0);
15510 dst = adjust_address (dst, dst_aligned ? DImode : SImode, 0);
15511
15512 while (len >= 8)
15513 {
15514 len -= 8;
15515 reg0 = gen_reg_rtx (DImode);
15516 rtx low_reg = NULL_RTX;
15517 rtx hi_reg = NULL_RTX;
15518
15519 if (!src_aligned || !dst_aligned)
15520 {
15521 low_reg = gen_lowpart (SImode, reg0);
15522 hi_reg = gen_highpart_mode (SImode, DImode, reg0);
15523 }
15524 if (MEM_ALIGN (src) >= 2 * BITS_PER_WORD)
15525 emit_move_insn (reg0, src);
15526 else if (src_aligned)
15527 emit_insn (gen_unaligned_loaddi (reg0, src));
15528 else
15529 {
15530 emit_insn (gen_unaligned_loadsi (low_reg, src));
15531 src = next_consecutive_mem (src);
15532 emit_insn (gen_unaligned_loadsi (hi_reg, src));
15533 }
15534
15535 if (MEM_ALIGN (dst) >= 2 * BITS_PER_WORD)
15536 emit_move_insn (dst, reg0);
15537 else if (dst_aligned)
15538 emit_insn (gen_unaligned_storedi (dst, reg0));
15539 else
15540 {
15541 emit_insn (gen_unaligned_storesi (dst, low_reg));
15542 dst = next_consecutive_mem (dst);
15543 emit_insn (gen_unaligned_storesi (dst, hi_reg));
15544 }
15545
15546 src = next_consecutive_mem (src);
15547 dst = next_consecutive_mem (dst);
15548 }
15549
15550 gcc_assert (len < 8);
15551 if (len >= 4)
15552 {
15553 /* More than a word but less than a double-word to copy. Copy a word. */
15554 reg0 = gen_reg_rtx (SImode);
15555 src = adjust_address (src, SImode, 0);
15556 dst = adjust_address (dst, SImode, 0);
15557 if (src_aligned)
15558 emit_move_insn (reg0, src);
15559 else
15560 emit_insn (gen_unaligned_loadsi (reg0, src));
15561
15562 if (dst_aligned)
15563 emit_move_insn (dst, reg0);
15564 else
15565 emit_insn (gen_unaligned_storesi (dst, reg0));
15566
15567 src = next_consecutive_mem (src);
15568 dst = next_consecutive_mem (dst);
15569 len -= 4;
15570 }
15571
15572 if (len == 0)
15573 return true;
15574
15575 /* Copy the remaining bytes. */
15576 if (len >= 2)
15577 {
15578 dst = adjust_address (dst, HImode, 0);
15579 src = adjust_address (src, HImode, 0);
15580 reg0 = gen_reg_rtx (SImode);
15581 if (src_aligned)
15582 emit_insn (gen_zero_extendhisi2 (reg0, src));
15583 else
15584 emit_insn (gen_unaligned_loadhiu (reg0, src));
15585
15586 if (dst_aligned)
15587 emit_insn (gen_movhi (dst, gen_lowpart(HImode, reg0)));
15588 else
15589 emit_insn (gen_unaligned_storehi (dst, gen_lowpart (HImode, reg0)));
15590
15591 src = next_consecutive_mem (src);
15592 dst = next_consecutive_mem (dst);
15593 if (len == 2)
15594 return true;
15595 }
15596
15597 dst = adjust_address (dst, QImode, 0);
15598 src = adjust_address (src, QImode, 0);
15599 reg0 = gen_reg_rtx (QImode);
15600 emit_move_insn (reg0, src);
15601 emit_move_insn (dst, reg0);
15602 return true;
15603 }
15604
15605 /* Decompose operands for a 64-bit binary operation in OP1 and OP2
15606 into its component 32-bit subregs. OP2 may be an immediate
15607 constant and we want to simplify it in that case. */
15608 void
15609 arm_decompose_di_binop (rtx op1, rtx op2, rtx *lo_op1, rtx *hi_op1,
15610 rtx *lo_op2, rtx *hi_op2)
15611 {
15612 *lo_op1 = gen_lowpart (SImode, op1);
15613 *hi_op1 = gen_highpart (SImode, op1);
15614 *lo_op2 = simplify_gen_subreg (SImode, op2, DImode,
15615 subreg_lowpart_offset (SImode, DImode));
15616 *hi_op2 = simplify_gen_subreg (SImode, op2, DImode,
15617 subreg_highpart_offset (SImode, DImode));
15618 }
15619
15620 /* Select a dominance comparison mode if possible for a test of the general
15621 form (OP (COND_OR (X) (Y)) (const_int 0)). We support three forms.
15622 COND_OR == DOM_CC_X_AND_Y => (X && Y)
15623 COND_OR == DOM_CC_NX_OR_Y => ((! X) || Y)
15624 COND_OR == DOM_CC_X_OR_Y => (X || Y)
15625 In all cases OP will be either EQ or NE, but we don't need to know which
15626 here. If we are unable to support a dominance comparison we return
15627 CC mode. This will then fail to match for the RTL expressions that
15628 generate this call. */
15629 machine_mode
15630 arm_select_dominance_cc_mode (rtx x, rtx y, HOST_WIDE_INT cond_or)
15631 {
15632 enum rtx_code cond1, cond2;
15633 int swapped = 0;
15634
15635 /* Currently we will probably get the wrong result if the individual
15636 comparisons are not simple. This also ensures that it is safe to
15637 reverse a comparison if necessary. */
15638 if ((arm_select_cc_mode (cond1 = GET_CODE (x), XEXP (x, 0), XEXP (x, 1))
15639 != CCmode)
15640 || (arm_select_cc_mode (cond2 = GET_CODE (y), XEXP (y, 0), XEXP (y, 1))
15641 != CCmode))
15642 return CCmode;
15643
15644 /* The if_then_else variant of this tests the second condition if the
15645 first passes, but is true if the first fails. Reverse the first
15646 condition to get a true "inclusive-or" expression. */
15647 if (cond_or == DOM_CC_NX_OR_Y)
15648 cond1 = reverse_condition (cond1);
15649
15650 /* If the comparisons are not equal, and one doesn't dominate the other,
15651 then we can't do this. */
15652 if (cond1 != cond2
15653 && !comparison_dominates_p (cond1, cond2)
15654 && (swapped = 1, !comparison_dominates_p (cond2, cond1)))
15655 return CCmode;
15656
15657 if (swapped)
15658 std::swap (cond1, cond2);
15659
15660 switch (cond1)
15661 {
15662 case EQ:
15663 if (cond_or == DOM_CC_X_AND_Y)
15664 return CC_DEQmode;
15665
15666 switch (cond2)
15667 {
15668 case EQ: return CC_DEQmode;
15669 case LE: return CC_DLEmode;
15670 case LEU: return CC_DLEUmode;
15671 case GE: return CC_DGEmode;
15672 case GEU: return CC_DGEUmode;
15673 default: gcc_unreachable ();
15674 }
15675
15676 case LT:
15677 if (cond_or == DOM_CC_X_AND_Y)
15678 return CC_DLTmode;
15679
15680 switch (cond2)
15681 {
15682 case LT:
15683 return CC_DLTmode;
15684 case LE:
15685 return CC_DLEmode;
15686 case NE:
15687 return CC_DNEmode;
15688 default:
15689 gcc_unreachable ();
15690 }
15691
15692 case GT:
15693 if (cond_or == DOM_CC_X_AND_Y)
15694 return CC_DGTmode;
15695
15696 switch (cond2)
15697 {
15698 case GT:
15699 return CC_DGTmode;
15700 case GE:
15701 return CC_DGEmode;
15702 case NE:
15703 return CC_DNEmode;
15704 default:
15705 gcc_unreachable ();
15706 }
15707
15708 case LTU:
15709 if (cond_or == DOM_CC_X_AND_Y)
15710 return CC_DLTUmode;
15711
15712 switch (cond2)
15713 {
15714 case LTU:
15715 return CC_DLTUmode;
15716 case LEU:
15717 return CC_DLEUmode;
15718 case NE:
15719 return CC_DNEmode;
15720 default:
15721 gcc_unreachable ();
15722 }
15723
15724 case GTU:
15725 if (cond_or == DOM_CC_X_AND_Y)
15726 return CC_DGTUmode;
15727
15728 switch (cond2)
15729 {
15730 case GTU:
15731 return CC_DGTUmode;
15732 case GEU:
15733 return CC_DGEUmode;
15734 case NE:
15735 return CC_DNEmode;
15736 default:
15737 gcc_unreachable ();
15738 }
15739
15740 /* The remaining cases only occur when both comparisons are the
15741 same. */
15742 case NE:
15743 gcc_assert (cond1 == cond2);
15744 return CC_DNEmode;
15745
15746 case LE:
15747 gcc_assert (cond1 == cond2);
15748 return CC_DLEmode;
15749
15750 case GE:
15751 gcc_assert (cond1 == cond2);
15752 return CC_DGEmode;
15753
15754 case LEU:
15755 gcc_assert (cond1 == cond2);
15756 return CC_DLEUmode;
15757
15758 case GEU:
15759 gcc_assert (cond1 == cond2);
15760 return CC_DGEUmode;
15761
15762 default:
15763 gcc_unreachable ();
15764 }
15765 }
15766
15767 machine_mode
15768 arm_select_cc_mode (enum rtx_code op, rtx x, rtx y)
15769 {
15770 /* All floating point compares return CCFP if it is an equality
15771 comparison, and CCFPE otherwise. */
15772 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
15773 {
15774 switch (op)
15775 {
15776 case EQ:
15777 case NE:
15778 case UNORDERED:
15779 case ORDERED:
15780 case UNLT:
15781 case UNLE:
15782 case UNGT:
15783 case UNGE:
15784 case UNEQ:
15785 case LTGT:
15786 return CCFPmode;
15787
15788 case LT:
15789 case LE:
15790 case GT:
15791 case GE:
15792 return CCFPEmode;
15793
15794 default:
15795 gcc_unreachable ();
15796 }
15797 }
15798
15799 /* A compare with a shifted operand. Because of canonicalization, the
15800 comparison will have to be swapped when we emit the assembler. */
15801 if (GET_MODE (y) == SImode
15802 && (REG_P (y) || (SUBREG_P (y)))
15803 && (GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
15804 || GET_CODE (x) == LSHIFTRT || GET_CODE (x) == ROTATE
15805 || GET_CODE (x) == ROTATERT))
15806 return CC_SWPmode;
15807
15808 /* A widened compare of the sum of a value plus a carry against a
15809 constant. This is a representation of RSC. We want to swap the
15810 result of the comparison at output. Not valid if the Z bit is
15811 needed. */
15812 if (GET_MODE (x) == DImode
15813 && GET_CODE (x) == PLUS
15814 && arm_borrow_operation (XEXP (x, 1), DImode)
15815 && CONST_INT_P (y)
15816 && ((GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
15817 && (op == LE || op == GT))
15818 || (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
15819 && (op == LEU || op == GTU))))
15820 return CC_SWPmode;
15821
15822 /* If X is a constant we want to use CC_RSBmode. This is
15823 non-canonical, but arm_gen_compare_reg uses this to generate the
15824 correct canonical form. */
15825 if (GET_MODE (y) == SImode
15826 && (REG_P (y) || SUBREG_P (y))
15827 && CONST_INT_P (x))
15828 return CC_RSBmode;
15829
15830 /* This operation is performed swapped, but since we only rely on the Z
15831 flag we don't need an additional mode. */
15832 if (GET_MODE (y) == SImode
15833 && (REG_P (y) || (SUBREG_P (y)))
15834 && GET_CODE (x) == NEG
15835 && (op == EQ || op == NE))
15836 return CC_Zmode;
15837
15838 /* This is a special case that is used by combine to allow a
15839 comparison of a shifted byte load to be split into a zero-extend
15840 followed by a comparison of the shifted integer (only valid for
15841 equalities and unsigned inequalities). */
15842 if (GET_MODE (x) == SImode
15843 && GET_CODE (x) == ASHIFT
15844 && CONST_INT_P (XEXP (x, 1)) && INTVAL (XEXP (x, 1)) == 24
15845 && GET_CODE (XEXP (x, 0)) == SUBREG
15846 && MEM_P (SUBREG_REG (XEXP (x, 0)))
15847 && GET_MODE (SUBREG_REG (XEXP (x, 0))) == QImode
15848 && (op == EQ || op == NE
15849 || op == GEU || op == GTU || op == LTU || op == LEU)
15850 && CONST_INT_P (y))
15851 return CC_Zmode;
15852
15853 /* A construct for a conditional compare, if the false arm contains
15854 0, then both conditions must be true, otherwise either condition
15855 must be true. Not all conditions are possible, so CCmode is
15856 returned if it can't be done. */
15857 if (GET_CODE (x) == IF_THEN_ELSE
15858 && (XEXP (x, 2) == const0_rtx
15859 || XEXP (x, 2) == const1_rtx)
15860 && COMPARISON_P (XEXP (x, 0))
15861 && COMPARISON_P (XEXP (x, 1)))
15862 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
15863 INTVAL (XEXP (x, 2)));
15864
15865 /* Alternate canonicalizations of the above. These are somewhat cleaner. */
15866 if (GET_CODE (x) == AND
15867 && (op == EQ || op == NE)
15868 && COMPARISON_P (XEXP (x, 0))
15869 && COMPARISON_P (XEXP (x, 1)))
15870 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
15871 DOM_CC_X_AND_Y);
15872
15873 if (GET_CODE (x) == IOR
15874 && (op == EQ || op == NE)
15875 && COMPARISON_P (XEXP (x, 0))
15876 && COMPARISON_P (XEXP (x, 1)))
15877 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
15878 DOM_CC_X_OR_Y);
15879
15880 /* An operation (on Thumb) where we want to test for a single bit.
15881 This is done by shifting that bit up into the top bit of a
15882 scratch register; we can then branch on the sign bit. */
15883 if (TARGET_THUMB1
15884 && GET_MODE (x) == SImode
15885 && (op == EQ || op == NE)
15886 && GET_CODE (x) == ZERO_EXTRACT
15887 && XEXP (x, 1) == const1_rtx)
15888 return CC_Nmode;
15889
15890 /* An operation that sets the condition codes as a side-effect, the
15891 V flag is not set correctly, so we can only use comparisons where
15892 this doesn't matter. (For LT and GE we can use "mi" and "pl"
15893 instead.) */
15894 /* ??? Does the ZERO_EXTRACT case really apply to thumb2? */
15895 if (GET_MODE (x) == SImode
15896 && y == const0_rtx
15897 && (op == EQ || op == NE || op == LT || op == GE)
15898 && (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS
15899 || GET_CODE (x) == AND || GET_CODE (x) == IOR
15900 || GET_CODE (x) == XOR || GET_CODE (x) == MULT
15901 || GET_CODE (x) == NOT || GET_CODE (x) == NEG
15902 || GET_CODE (x) == LSHIFTRT
15903 || GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
15904 || GET_CODE (x) == ROTATERT
15905 || (TARGET_32BIT && GET_CODE (x) == ZERO_EXTRACT)))
15906 return CC_NZmode;
15907
15908 /* A comparison of ~reg with a const is really a special
15909 canoncialization of compare (~const, reg), which is a reverse
15910 subtract operation. We may not get here if CONST is 0, but that
15911 doesn't matter because ~0 isn't a valid immediate for RSB. */
15912 if (GET_MODE (x) == SImode
15913 && GET_CODE (x) == NOT
15914 && CONST_INT_P (y))
15915 return CC_RSBmode;
15916
15917 if (GET_MODE (x) == QImode && (op == EQ || op == NE))
15918 return CC_Zmode;
15919
15920 if (GET_MODE (x) == SImode && (op == LTU || op == GEU)
15921 && GET_CODE (x) == PLUS
15922 && (rtx_equal_p (XEXP (x, 0), y) || rtx_equal_p (XEXP (x, 1), y)))
15923 return CC_Cmode;
15924
15925 if (GET_MODE (x) == DImode
15926 && GET_CODE (x) == PLUS
15927 && GET_CODE (XEXP (x, 1)) == ZERO_EXTEND
15928 && CONST_INT_P (y)
15929 && UINTVAL (y) == 0x800000000
15930 && (op == GEU || op == LTU))
15931 return CC_ADCmode;
15932
15933 if (GET_MODE (x) == DImode
15934 && (op == GE || op == LT)
15935 && GET_CODE (x) == SIGN_EXTEND
15936 && ((GET_CODE (y) == PLUS
15937 && arm_borrow_operation (XEXP (y, 0), DImode))
15938 || arm_borrow_operation (y, DImode)))
15939 return CC_NVmode;
15940
15941 if (GET_MODE (x) == DImode
15942 && (op == GEU || op == LTU)
15943 && GET_CODE (x) == ZERO_EXTEND
15944 && ((GET_CODE (y) == PLUS
15945 && arm_borrow_operation (XEXP (y, 0), DImode))
15946 || arm_borrow_operation (y, DImode)))
15947 return CC_Bmode;
15948
15949 if (GET_MODE (x) == DImode
15950 && (op == EQ || op == NE)
15951 && (GET_CODE (x) == PLUS
15952 || GET_CODE (x) == MINUS)
15953 && (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
15954 || GET_CODE (XEXP (x, 1)) == SIGN_EXTEND)
15955 && GET_CODE (y) == SIGN_EXTEND
15956 && GET_CODE (XEXP (y, 0)) == GET_CODE (x))
15957 return CC_Vmode;
15958
15959 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_CC)
15960 return GET_MODE (x);
15961
15962 return CCmode;
15963 }
15964
15965 /* X and Y are two (DImode) things to compare for the condition CODE. Emit
15966 the sequence of instructions needed to generate a suitable condition
15967 code register. Return the CC register result. */
15968 static rtx
15969 arm_gen_dicompare_reg (rtx_code code, rtx x, rtx y, rtx scratch)
15970 {
15971 machine_mode mode;
15972 rtx cc_reg;
15973
15974 /* We don't currently handle DImode in thumb1, but rely on libgcc. */
15975 gcc_assert (TARGET_32BIT);
15976 gcc_assert (!CONST_INT_P (x));
15977
15978 rtx x_lo = simplify_gen_subreg (SImode, x, DImode,
15979 subreg_lowpart_offset (SImode, DImode));
15980 rtx x_hi = simplify_gen_subreg (SImode, x, DImode,
15981 subreg_highpart_offset (SImode, DImode));
15982 rtx y_lo = simplify_gen_subreg (SImode, y, DImode,
15983 subreg_lowpart_offset (SImode, DImode));
15984 rtx y_hi = simplify_gen_subreg (SImode, y, DImode,
15985 subreg_highpart_offset (SImode, DImode));
15986 switch (code)
15987 {
15988 case EQ:
15989 case NE:
15990 {
15991 if (y_lo == const0_rtx || y_hi == const0_rtx)
15992 {
15993 if (y_lo != const0_rtx)
15994 {
15995 rtx scratch2 = scratch ? scratch : gen_reg_rtx (SImode);
15996
15997 gcc_assert (y_hi == const0_rtx);
15998 y_lo = gen_int_mode (-INTVAL (y_lo), SImode);
15999 if (!arm_add_operand (y_lo, SImode))
16000 y_lo = force_reg (SImode, y_lo);
16001 emit_insn (gen_addsi3 (scratch2, x_lo, y_lo));
16002 x_lo = scratch2;
16003 }
16004 else if (y_hi != const0_rtx)
16005 {
16006 rtx scratch2 = scratch ? scratch : gen_reg_rtx (SImode);
16007
16008 y_hi = gen_int_mode (-INTVAL (y_hi), SImode);
16009 if (!arm_add_operand (y_hi, SImode))
16010 y_hi = force_reg (SImode, y_hi);
16011 emit_insn (gen_addsi3 (scratch2, x_hi, y_hi));
16012 x_hi = scratch2;
16013 }
16014
16015 if (!scratch)
16016 {
16017 gcc_assert (!reload_completed);
16018 scratch = gen_rtx_SCRATCH (SImode);
16019 }
16020
16021 rtx clobber = gen_rtx_CLOBBER (VOIDmode, scratch);
16022 cc_reg = gen_rtx_REG (CC_NZmode, CC_REGNUM);
16023
16024 rtx set
16025 = gen_rtx_SET (cc_reg,
16026 gen_rtx_COMPARE (CC_NZmode,
16027 gen_rtx_IOR (SImode, x_lo, x_hi),
16028 const0_rtx));
16029 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set,
16030 clobber)));
16031 return cc_reg;
16032 }
16033
16034 if (!arm_add_operand (y_lo, SImode))
16035 y_lo = force_reg (SImode, y_lo);
16036
16037 if (!arm_add_operand (y_hi, SImode))
16038 y_hi = force_reg (SImode, y_hi);
16039
16040 rtx cmp1 = gen_rtx_NE (SImode, x_lo, y_lo);
16041 rtx cmp2 = gen_rtx_NE (SImode, x_hi, y_hi);
16042 rtx conjunction = gen_rtx_IOR (SImode, cmp1, cmp2);
16043 mode = SELECT_CC_MODE (code, conjunction, const0_rtx);
16044 cc_reg = gen_rtx_REG (mode, CC_REGNUM);
16045
16046 emit_insn (gen_rtx_SET (cc_reg,
16047 gen_rtx_COMPARE (mode, conjunction,
16048 const0_rtx)));
16049 return cc_reg;
16050 }
16051
16052 case LT:
16053 case GE:
16054 {
16055 if (y_lo == const0_rtx)
16056 {
16057 /* If the low word of y is 0, then this is simply a normal
16058 compare of the upper words. */
16059 if (!arm_add_operand (y_hi, SImode))
16060 y_hi = force_reg (SImode, y_hi);
16061
16062 return arm_gen_compare_reg (code, x_hi, y_hi, NULL_RTX);
16063 }
16064
16065 if (!arm_add_operand (y_lo, SImode))
16066 y_lo = force_reg (SImode, y_lo);
16067
16068 rtx cmp1
16069 = gen_rtx_LTU (DImode,
16070 arm_gen_compare_reg (LTU, x_lo, y_lo, NULL_RTX),
16071 const0_rtx);
16072
16073 if (!scratch)
16074 scratch = gen_rtx_SCRATCH (SImode);
16075
16076 if (!arm_not_operand (y_hi, SImode))
16077 y_hi = force_reg (SImode, y_hi);
16078
16079 rtx_insn *insn;
16080 if (y_hi == const0_rtx)
16081 insn = emit_insn (gen_cmpsi3_0_carryin_CC_NVout (scratch, x_hi,
16082 cmp1));
16083 else if (CONST_INT_P (y_hi))
16084 insn = emit_insn (gen_cmpsi3_imm_carryin_CC_NVout (scratch, x_hi,
16085 y_hi, cmp1));
16086 else
16087 insn = emit_insn (gen_cmpsi3_carryin_CC_NVout (scratch, x_hi, y_hi,
16088 cmp1));
16089 return SET_DEST (single_set (insn));
16090 }
16091
16092 case LE:
16093 case GT:
16094 {
16095 /* During expansion, we only expect to get here if y is a
16096 constant that we want to handle, otherwise we should have
16097 swapped the operands already. */
16098 gcc_assert (arm_const_double_prefer_rsbs_rsc (y));
16099
16100 if (!const_ok_for_arm (INTVAL (y_lo)))
16101 y_lo = force_reg (SImode, y_lo);
16102
16103 /* Perform a reverse subtract and compare. */
16104 rtx cmp1
16105 = gen_rtx_LTU (DImode,
16106 arm_gen_compare_reg (LTU, y_lo, x_lo, scratch),
16107 const0_rtx);
16108 rtx_insn *insn = emit_insn (gen_rscsi3_CC_NVout_scratch (scratch, y_hi,
16109 x_hi, cmp1));
16110 return SET_DEST (single_set (insn));
16111 }
16112
16113 case LTU:
16114 case GEU:
16115 {
16116 if (y_lo == const0_rtx)
16117 {
16118 /* If the low word of y is 0, then this is simply a normal
16119 compare of the upper words. */
16120 if (!arm_add_operand (y_hi, SImode))
16121 y_hi = force_reg (SImode, y_hi);
16122
16123 return arm_gen_compare_reg (code, x_hi, y_hi, NULL_RTX);
16124 }
16125
16126 if (!arm_add_operand (y_lo, SImode))
16127 y_lo = force_reg (SImode, y_lo);
16128
16129 rtx cmp1
16130 = gen_rtx_LTU (DImode,
16131 arm_gen_compare_reg (LTU, x_lo, y_lo, NULL_RTX),
16132 const0_rtx);
16133
16134 if (!scratch)
16135 scratch = gen_rtx_SCRATCH (SImode);
16136 if (!arm_not_operand (y_hi, SImode))
16137 y_hi = force_reg (SImode, y_hi);
16138
16139 rtx_insn *insn;
16140 if (y_hi == const0_rtx)
16141 insn = emit_insn (gen_cmpsi3_0_carryin_CC_Bout (scratch, x_hi,
16142 cmp1));
16143 else if (CONST_INT_P (y_hi))
16144 {
16145 /* Constant is viewed as unsigned when zero-extended. */
16146 y_hi = GEN_INT (UINTVAL (y_hi) & 0xffffffffULL);
16147 insn = emit_insn (gen_cmpsi3_imm_carryin_CC_Bout (scratch, x_hi,
16148 y_hi, cmp1));
16149 }
16150 else
16151 insn = emit_insn (gen_cmpsi3_carryin_CC_Bout (scratch, x_hi, y_hi,
16152 cmp1));
16153 return SET_DEST (single_set (insn));
16154 }
16155
16156 case LEU:
16157 case GTU:
16158 {
16159 /* During expansion, we only expect to get here if y is a
16160 constant that we want to handle, otherwise we should have
16161 swapped the operands already. */
16162 gcc_assert (arm_const_double_prefer_rsbs_rsc (y));
16163
16164 if (!const_ok_for_arm (INTVAL (y_lo)))
16165 y_lo = force_reg (SImode, y_lo);
16166
16167 /* Perform a reverse subtract and compare. */
16168 rtx cmp1
16169 = gen_rtx_LTU (DImode,
16170 arm_gen_compare_reg (LTU, y_lo, x_lo, scratch),
16171 const0_rtx);
16172 y_hi = GEN_INT (0xffffffff & UINTVAL (y_hi));
16173 rtx_insn *insn = emit_insn (gen_rscsi3_CC_Bout_scratch (scratch, y_hi,
16174 x_hi, cmp1));
16175 return SET_DEST (single_set (insn));
16176 }
16177
16178 default:
16179 gcc_unreachable ();
16180 }
16181 }
16182
16183 /* X and Y are two things to compare using CODE. Emit the compare insn and
16184 return the rtx for register 0 in the proper mode. */
16185 rtx
16186 arm_gen_compare_reg (rtx_code code, rtx x, rtx y, rtx scratch)
16187 {
16188 if (GET_MODE (x) == DImode || GET_MODE (y) == DImode)
16189 return arm_gen_dicompare_reg (code, x, y, scratch);
16190
16191 machine_mode mode = SELECT_CC_MODE (code, x, y);
16192 rtx cc_reg = gen_rtx_REG (mode, CC_REGNUM);
16193 if (mode == CC_RSBmode)
16194 {
16195 if (!scratch)
16196 scratch = gen_rtx_SCRATCH (SImode);
16197 emit_insn (gen_rsb_imm_compare_scratch (scratch,
16198 GEN_INT (~UINTVAL (x)), y));
16199 }
16200 else
16201 emit_set_insn (cc_reg, gen_rtx_COMPARE (mode, x, y));
16202
16203 return cc_reg;
16204 }
16205
16206 /* Generate a sequence of insns that will generate the correct return
16207 address mask depending on the physical architecture that the program
16208 is running on. */
16209 rtx
16210 arm_gen_return_addr_mask (void)
16211 {
16212 rtx reg = gen_reg_rtx (Pmode);
16213
16214 emit_insn (gen_return_addr_mask (reg));
16215 return reg;
16216 }
16217
16218 void
16219 arm_reload_in_hi (rtx *operands)
16220 {
16221 rtx ref = operands[1];
16222 rtx base, scratch;
16223 HOST_WIDE_INT offset = 0;
16224
16225 if (SUBREG_P (ref))
16226 {
16227 offset = SUBREG_BYTE (ref);
16228 ref = SUBREG_REG (ref);
16229 }
16230
16231 if (REG_P (ref))
16232 {
16233 /* We have a pseudo which has been spilt onto the stack; there
16234 are two cases here: the first where there is a simple
16235 stack-slot replacement and a second where the stack-slot is
16236 out of range, or is used as a subreg. */
16237 if (reg_equiv_mem (REGNO (ref)))
16238 {
16239 ref = reg_equiv_mem (REGNO (ref));
16240 base = find_replacement (&XEXP (ref, 0));
16241 }
16242 else
16243 /* The slot is out of range, or was dressed up in a SUBREG. */
16244 base = reg_equiv_address (REGNO (ref));
16245
16246 /* PR 62554: If there is no equivalent memory location then just move
16247 the value as an SImode register move. This happens when the target
16248 architecture variant does not have an HImode register move. */
16249 if (base == NULL)
16250 {
16251 gcc_assert (REG_P (operands[0]));
16252 emit_insn (gen_movsi (gen_rtx_SUBREG (SImode, operands[0], 0),
16253 gen_rtx_SUBREG (SImode, ref, 0)));
16254 return;
16255 }
16256 }
16257 else
16258 base = find_replacement (&XEXP (ref, 0));
16259
16260 /* Handle the case where the address is too complex to be offset by 1. */
16261 if (GET_CODE (base) == MINUS
16262 || (GET_CODE (base) == PLUS && !CONST_INT_P (XEXP (base, 1))))
16263 {
16264 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
16265
16266 emit_set_insn (base_plus, base);
16267 base = base_plus;
16268 }
16269 else if (GET_CODE (base) == PLUS)
16270 {
16271 /* The addend must be CONST_INT, or we would have dealt with it above. */
16272 HOST_WIDE_INT hi, lo;
16273
16274 offset += INTVAL (XEXP (base, 1));
16275 base = XEXP (base, 0);
16276
16277 /* Rework the address into a legal sequence of insns. */
16278 /* Valid range for lo is -4095 -> 4095 */
16279 lo = (offset >= 0
16280 ? (offset & 0xfff)
16281 : -((-offset) & 0xfff));
16282
16283 /* Corner case, if lo is the max offset then we would be out of range
16284 once we have added the additional 1 below, so bump the msb into the
16285 pre-loading insn(s). */
16286 if (lo == 4095)
16287 lo &= 0x7ff;
16288
16289 hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
16290 ^ (HOST_WIDE_INT) 0x80000000)
16291 - (HOST_WIDE_INT) 0x80000000);
16292
16293 gcc_assert (hi + lo == offset);
16294
16295 if (hi != 0)
16296 {
16297 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
16298
16299 /* Get the base address; addsi3 knows how to handle constants
16300 that require more than one insn. */
16301 emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
16302 base = base_plus;
16303 offset = lo;
16304 }
16305 }
16306
16307 /* Operands[2] may overlap operands[0] (though it won't overlap
16308 operands[1]), that's why we asked for a DImode reg -- so we can
16309 use the bit that does not overlap. */
16310 if (REGNO (operands[2]) == REGNO (operands[0]))
16311 scratch = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
16312 else
16313 scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
16314
16315 emit_insn (gen_zero_extendqisi2 (scratch,
16316 gen_rtx_MEM (QImode,
16317 plus_constant (Pmode, base,
16318 offset))));
16319 emit_insn (gen_zero_extendqisi2 (gen_rtx_SUBREG (SImode, operands[0], 0),
16320 gen_rtx_MEM (QImode,
16321 plus_constant (Pmode, base,
16322 offset + 1))));
16323 if (!BYTES_BIG_ENDIAN)
16324 emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
16325 gen_rtx_IOR (SImode,
16326 gen_rtx_ASHIFT
16327 (SImode,
16328 gen_rtx_SUBREG (SImode, operands[0], 0),
16329 GEN_INT (8)),
16330 scratch));
16331 else
16332 emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
16333 gen_rtx_IOR (SImode,
16334 gen_rtx_ASHIFT (SImode, scratch,
16335 GEN_INT (8)),
16336 gen_rtx_SUBREG (SImode, operands[0], 0)));
16337 }
16338
16339 /* Handle storing a half-word to memory during reload by synthesizing as two
16340 byte stores. Take care not to clobber the input values until after we
16341 have moved them somewhere safe. This code assumes that if the DImode
16342 scratch in operands[2] overlaps either the input value or output address
16343 in some way, then that value must die in this insn (we absolutely need
16344 two scratch registers for some corner cases). */
16345 void
16346 arm_reload_out_hi (rtx *operands)
16347 {
16348 rtx ref = operands[0];
16349 rtx outval = operands[1];
16350 rtx base, scratch;
16351 HOST_WIDE_INT offset = 0;
16352
16353 if (SUBREG_P (ref))
16354 {
16355 offset = SUBREG_BYTE (ref);
16356 ref = SUBREG_REG (ref);
16357 }
16358
16359 if (REG_P (ref))
16360 {
16361 /* We have a pseudo which has been spilt onto the stack; there
16362 are two cases here: the first where there is a simple
16363 stack-slot replacement and a second where the stack-slot is
16364 out of range, or is used as a subreg. */
16365 if (reg_equiv_mem (REGNO (ref)))
16366 {
16367 ref = reg_equiv_mem (REGNO (ref));
16368 base = find_replacement (&XEXP (ref, 0));
16369 }
16370 else
16371 /* The slot is out of range, or was dressed up in a SUBREG. */
16372 base = reg_equiv_address (REGNO (ref));
16373
16374 /* PR 62254: If there is no equivalent memory location then just move
16375 the value as an SImode register move. This happens when the target
16376 architecture variant does not have an HImode register move. */
16377 if (base == NULL)
16378 {
16379 gcc_assert (REG_P (outval) || SUBREG_P (outval));
16380
16381 if (REG_P (outval))
16382 {
16383 emit_insn (gen_movsi (gen_rtx_SUBREG (SImode, ref, 0),
16384 gen_rtx_SUBREG (SImode, outval, 0)));
16385 }
16386 else /* SUBREG_P (outval) */
16387 {
16388 if (GET_MODE (SUBREG_REG (outval)) == SImode)
16389 emit_insn (gen_movsi (gen_rtx_SUBREG (SImode, ref, 0),
16390 SUBREG_REG (outval)));
16391 else
16392 /* FIXME: Handle other cases ? */
16393 gcc_unreachable ();
16394 }
16395 return;
16396 }
16397 }
16398 else
16399 base = find_replacement (&XEXP (ref, 0));
16400
16401 scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
16402
16403 /* Handle the case where the address is too complex to be offset by 1. */
16404 if (GET_CODE (base) == MINUS
16405 || (GET_CODE (base) == PLUS && !CONST_INT_P (XEXP (base, 1))))
16406 {
16407 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
16408
16409 /* Be careful not to destroy OUTVAL. */
16410 if (reg_overlap_mentioned_p (base_plus, outval))
16411 {
16412 /* Updating base_plus might destroy outval, see if we can
16413 swap the scratch and base_plus. */
16414 if (!reg_overlap_mentioned_p (scratch, outval))
16415 std::swap (scratch, base_plus);
16416 else
16417 {
16418 rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
16419
16420 /* Be conservative and copy OUTVAL into the scratch now,
16421 this should only be necessary if outval is a subreg
16422 of something larger than a word. */
16423 /* XXX Might this clobber base? I can't see how it can,
16424 since scratch is known to overlap with OUTVAL, and
16425 must be wider than a word. */
16426 emit_insn (gen_movhi (scratch_hi, outval));
16427 outval = scratch_hi;
16428 }
16429 }
16430
16431 emit_set_insn (base_plus, base);
16432 base = base_plus;
16433 }
16434 else if (GET_CODE (base) == PLUS)
16435 {
16436 /* The addend must be CONST_INT, or we would have dealt with it above. */
16437 HOST_WIDE_INT hi, lo;
16438
16439 offset += INTVAL (XEXP (base, 1));
16440 base = XEXP (base, 0);
16441
16442 /* Rework the address into a legal sequence of insns. */
16443 /* Valid range for lo is -4095 -> 4095 */
16444 lo = (offset >= 0
16445 ? (offset & 0xfff)
16446 : -((-offset) & 0xfff));
16447
16448 /* Corner case, if lo is the max offset then we would be out of range
16449 once we have added the additional 1 below, so bump the msb into the
16450 pre-loading insn(s). */
16451 if (lo == 4095)
16452 lo &= 0x7ff;
16453
16454 hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
16455 ^ (HOST_WIDE_INT) 0x80000000)
16456 - (HOST_WIDE_INT) 0x80000000);
16457
16458 gcc_assert (hi + lo == offset);
16459
16460 if (hi != 0)
16461 {
16462 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
16463
16464 /* Be careful not to destroy OUTVAL. */
16465 if (reg_overlap_mentioned_p (base_plus, outval))
16466 {
16467 /* Updating base_plus might destroy outval, see if we
16468 can swap the scratch and base_plus. */
16469 if (!reg_overlap_mentioned_p (scratch, outval))
16470 std::swap (scratch, base_plus);
16471 else
16472 {
16473 rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
16474
16475 /* Be conservative and copy outval into scratch now,
16476 this should only be necessary if outval is a
16477 subreg of something larger than a word. */
16478 /* XXX Might this clobber base? I can't see how it
16479 can, since scratch is known to overlap with
16480 outval. */
16481 emit_insn (gen_movhi (scratch_hi, outval));
16482 outval = scratch_hi;
16483 }
16484 }
16485
16486 /* Get the base address; addsi3 knows how to handle constants
16487 that require more than one insn. */
16488 emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
16489 base = base_plus;
16490 offset = lo;
16491 }
16492 }
16493
16494 if (BYTES_BIG_ENDIAN)
16495 {
16496 emit_insn (gen_movqi (gen_rtx_MEM (QImode,
16497 plus_constant (Pmode, base,
16498 offset + 1)),
16499 gen_lowpart (QImode, outval)));
16500 emit_insn (gen_lshrsi3 (scratch,
16501 gen_rtx_SUBREG (SImode, outval, 0),
16502 GEN_INT (8)));
16503 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, base,
16504 offset)),
16505 gen_lowpart (QImode, scratch)));
16506 }
16507 else
16508 {
16509 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, base,
16510 offset)),
16511 gen_lowpart (QImode, outval)));
16512 emit_insn (gen_lshrsi3 (scratch,
16513 gen_rtx_SUBREG (SImode, outval, 0),
16514 GEN_INT (8)));
16515 emit_insn (gen_movqi (gen_rtx_MEM (QImode,
16516 plus_constant (Pmode, base,
16517 offset + 1)),
16518 gen_lowpart (QImode, scratch)));
16519 }
16520 }
16521
16522 /* Return true if a type must be passed in memory. For AAPCS, small aggregates
16523 (padded to the size of a word) should be passed in a register. */
16524
16525 static bool
16526 arm_must_pass_in_stack (const function_arg_info &arg)
16527 {
16528 if (TARGET_AAPCS_BASED)
16529 return must_pass_in_stack_var_size (arg);
16530 else
16531 return must_pass_in_stack_var_size_or_pad (arg);
16532 }
16533
16534
16535 /* Implement TARGET_FUNCTION_ARG_PADDING; return PAD_UPWARD if the lowest
16536 byte of a stack argument has useful data. For legacy APCS ABIs we use
16537 the default. For AAPCS based ABIs small aggregate types are placed
16538 in the lowest memory address. */
16539
16540 static pad_direction
16541 arm_function_arg_padding (machine_mode mode, const_tree type)
16542 {
16543 if (!TARGET_AAPCS_BASED)
16544 return default_function_arg_padding (mode, type);
16545
16546 if (type && BYTES_BIG_ENDIAN && INTEGRAL_TYPE_P (type))
16547 return PAD_DOWNWARD;
16548
16549 return PAD_UPWARD;
16550 }
16551
16552
16553 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
16554 Return !BYTES_BIG_ENDIAN if the least significant byte of the
16555 register has useful data, and return the opposite if the most
16556 significant byte does. */
16557
16558 bool
16559 arm_pad_reg_upward (machine_mode mode,
16560 tree type, int first ATTRIBUTE_UNUSED)
16561 {
16562 if (TARGET_AAPCS_BASED && BYTES_BIG_ENDIAN)
16563 {
16564 /* For AAPCS, small aggregates, small fixed-point types,
16565 and small complex types are always padded upwards. */
16566 if (type)
16567 {
16568 if ((AGGREGATE_TYPE_P (type)
16569 || TREE_CODE (type) == COMPLEX_TYPE
16570 || FIXED_POINT_TYPE_P (type))
16571 && int_size_in_bytes (type) <= 4)
16572 return true;
16573 }
16574 else
16575 {
16576 if ((COMPLEX_MODE_P (mode) || ALL_FIXED_POINT_MODE_P (mode))
16577 && GET_MODE_SIZE (mode) <= 4)
16578 return true;
16579 }
16580 }
16581
16582 /* Otherwise, use default padding. */
16583 return !BYTES_BIG_ENDIAN;
16584 }
16585
16586 /* Returns true iff OFFSET is valid for use in an LDRD/STRD instruction,
16587 assuming that the address in the base register is word aligned. */
16588 bool
16589 offset_ok_for_ldrd_strd (HOST_WIDE_INT offset)
16590 {
16591 HOST_WIDE_INT max_offset;
16592
16593 /* Offset must be a multiple of 4 in Thumb mode. */
16594 if (TARGET_THUMB2 && ((offset & 3) != 0))
16595 return false;
16596
16597 if (TARGET_THUMB2)
16598 max_offset = 1020;
16599 else if (TARGET_ARM)
16600 max_offset = 255;
16601 else
16602 return false;
16603
16604 return ((offset <= max_offset) && (offset >= -max_offset));
16605 }
16606
16607 /* Checks whether the operands are valid for use in an LDRD/STRD instruction.
16608 Assumes that RT, RT2, and RN are REG. This is guaranteed by the patterns.
16609 Assumes that the address in the base register RN is word aligned. Pattern
16610 guarantees that both memory accesses use the same base register,
16611 the offsets are constants within the range, and the gap between the offsets is 4.
16612 If preload complete then check that registers are legal. WBACK indicates whether
16613 address is updated. LOAD indicates whether memory access is load or store. */
16614 bool
16615 operands_ok_ldrd_strd (rtx rt, rtx rt2, rtx rn, HOST_WIDE_INT offset,
16616 bool wback, bool load)
16617 {
16618 unsigned int t, t2, n;
16619
16620 if (!reload_completed)
16621 return true;
16622
16623 if (!offset_ok_for_ldrd_strd (offset))
16624 return false;
16625
16626 t = REGNO (rt);
16627 t2 = REGNO (rt2);
16628 n = REGNO (rn);
16629
16630 if ((TARGET_THUMB2)
16631 && ((wback && (n == t || n == t2))
16632 || (t == SP_REGNUM)
16633 || (t == PC_REGNUM)
16634 || (t2 == SP_REGNUM)
16635 || (t2 == PC_REGNUM)
16636 || (!load && (n == PC_REGNUM))
16637 || (load && (t == t2))
16638 /* Triggers Cortex-M3 LDRD errata. */
16639 || (!wback && load && fix_cm3_ldrd && (n == t))))
16640 return false;
16641
16642 if ((TARGET_ARM)
16643 && ((wback && (n == t || n == t2))
16644 || (t2 == PC_REGNUM)
16645 || (t % 2 != 0) /* First destination register is not even. */
16646 || (t2 != t + 1)
16647 /* PC can be used as base register (for offset addressing only),
16648 but it is depricated. */
16649 || (n == PC_REGNUM)))
16650 return false;
16651
16652 return true;
16653 }
16654
16655 /* Return true if a 64-bit access with alignment ALIGN and with a
16656 constant offset OFFSET from the base pointer is permitted on this
16657 architecture. */
16658 static bool
16659 align_ok_ldrd_strd (HOST_WIDE_INT align, HOST_WIDE_INT offset)
16660 {
16661 return (unaligned_access
16662 ? (align >= BITS_PER_WORD && (offset & 3) == 0)
16663 : (align >= 2 * BITS_PER_WORD && (offset & 7) == 0));
16664 }
16665
16666 /* Helper for gen_operands_ldrd_strd. Returns true iff the memory
16667 operand MEM's address contains an immediate offset from the base
16668 register and has no side effects, in which case it sets BASE,
16669 OFFSET and ALIGN accordingly. */
16670 static bool
16671 mem_ok_for_ldrd_strd (rtx mem, rtx *base, rtx *offset, HOST_WIDE_INT *align)
16672 {
16673 rtx addr;
16674
16675 gcc_assert (base != NULL && offset != NULL);
16676
16677 /* TODO: Handle more general memory operand patterns, such as
16678 PRE_DEC and PRE_INC. */
16679
16680 if (side_effects_p (mem))
16681 return false;
16682
16683 /* Can't deal with subregs. */
16684 if (SUBREG_P (mem))
16685 return false;
16686
16687 gcc_assert (MEM_P (mem));
16688
16689 *offset = const0_rtx;
16690 *align = MEM_ALIGN (mem);
16691
16692 addr = XEXP (mem, 0);
16693
16694 /* If addr isn't valid for DImode, then we can't handle it. */
16695 if (!arm_legitimate_address_p (DImode, addr,
16696 reload_in_progress || reload_completed))
16697 return false;
16698
16699 if (REG_P (addr))
16700 {
16701 *base = addr;
16702 return true;
16703 }
16704 else if (GET_CODE (addr) == PLUS)
16705 {
16706 *base = XEXP (addr, 0);
16707 *offset = XEXP (addr, 1);
16708 return (REG_P (*base) && CONST_INT_P (*offset));
16709 }
16710
16711 return false;
16712 }
16713
16714 /* Called from a peephole2 to replace two word-size accesses with a
16715 single LDRD/STRD instruction. Returns true iff we can generate a
16716 new instruction sequence. That is, both accesses use the same base
16717 register and the gap between constant offsets is 4. This function
16718 may reorder its operands to match ldrd/strd RTL templates.
16719 OPERANDS are the operands found by the peephole matcher;
16720 OPERANDS[0,1] are register operands, and OPERANDS[2,3] are the
16721 corresponding memory operands. LOAD indicaates whether the access
16722 is load or store. CONST_STORE indicates a store of constant
16723 integer values held in OPERANDS[4,5] and assumes that the pattern
16724 is of length 4 insn, for the purpose of checking dead registers.
16725 COMMUTE indicates that register operands may be reordered. */
16726 bool
16727 gen_operands_ldrd_strd (rtx *operands, bool load,
16728 bool const_store, bool commute)
16729 {
16730 int nops = 2;
16731 HOST_WIDE_INT offsets[2], offset, align[2];
16732 rtx base = NULL_RTX;
16733 rtx cur_base, cur_offset, tmp;
16734 int i, gap;
16735 HARD_REG_SET regset;
16736
16737 gcc_assert (!const_store || !load);
16738 /* Check that the memory references are immediate offsets from the
16739 same base register. Extract the base register, the destination
16740 registers, and the corresponding memory offsets. */
16741 for (i = 0; i < nops; i++)
16742 {
16743 if (!mem_ok_for_ldrd_strd (operands[nops+i], &cur_base, &cur_offset,
16744 &align[i]))
16745 return false;
16746
16747 if (i == 0)
16748 base = cur_base;
16749 else if (REGNO (base) != REGNO (cur_base))
16750 return false;
16751
16752 offsets[i] = INTVAL (cur_offset);
16753 if (GET_CODE (operands[i]) == SUBREG)
16754 {
16755 tmp = SUBREG_REG (operands[i]);
16756 gcc_assert (GET_MODE (operands[i]) == GET_MODE (tmp));
16757 operands[i] = tmp;
16758 }
16759 }
16760
16761 /* Make sure there is no dependency between the individual loads. */
16762 if (load && REGNO (operands[0]) == REGNO (base))
16763 return false; /* RAW */
16764
16765 if (load && REGNO (operands[0]) == REGNO (operands[1]))
16766 return false; /* WAW */
16767
16768 /* If the same input register is used in both stores
16769 when storing different constants, try to find a free register.
16770 For example, the code
16771 mov r0, 0
16772 str r0, [r2]
16773 mov r0, 1
16774 str r0, [r2, #4]
16775 can be transformed into
16776 mov r1, 0
16777 mov r0, 1
16778 strd r1, r0, [r2]
16779 in Thumb mode assuming that r1 is free.
16780 For ARM mode do the same but only if the starting register
16781 can be made to be even. */
16782 if (const_store
16783 && REGNO (operands[0]) == REGNO (operands[1])
16784 && INTVAL (operands[4]) != INTVAL (operands[5]))
16785 {
16786 if (TARGET_THUMB2)
16787 {
16788 CLEAR_HARD_REG_SET (regset);
16789 tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
16790 if (tmp == NULL_RTX)
16791 return false;
16792
16793 /* Use the new register in the first load to ensure that
16794 if the original input register is not dead after peephole,
16795 then it will have the correct constant value. */
16796 operands[0] = tmp;
16797 }
16798 else if (TARGET_ARM)
16799 {
16800 int regno = REGNO (operands[0]);
16801 if (!peep2_reg_dead_p (4, operands[0]))
16802 {
16803 /* When the input register is even and is not dead after the
16804 pattern, it has to hold the second constant but we cannot
16805 form a legal STRD in ARM mode with this register as the second
16806 register. */
16807 if (regno % 2 == 0)
16808 return false;
16809
16810 /* Is regno-1 free? */
16811 SET_HARD_REG_SET (regset);
16812 CLEAR_HARD_REG_BIT(regset, regno - 1);
16813 tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
16814 if (tmp == NULL_RTX)
16815 return false;
16816
16817 operands[0] = tmp;
16818 }
16819 else
16820 {
16821 /* Find a DImode register. */
16822 CLEAR_HARD_REG_SET (regset);
16823 tmp = peep2_find_free_register (0, 4, "r", DImode, &regset);
16824 if (tmp != NULL_RTX)
16825 {
16826 operands[0] = simplify_gen_subreg (SImode, tmp, DImode, 0);
16827 operands[1] = simplify_gen_subreg (SImode, tmp, DImode, 4);
16828 }
16829 else
16830 {
16831 /* Can we use the input register to form a DI register? */
16832 SET_HARD_REG_SET (regset);
16833 CLEAR_HARD_REG_BIT(regset,
16834 regno % 2 == 0 ? regno + 1 : regno - 1);
16835 tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
16836 if (tmp == NULL_RTX)
16837 return false;
16838 operands[regno % 2 == 1 ? 0 : 1] = tmp;
16839 }
16840 }
16841
16842 gcc_assert (operands[0] != NULL_RTX);
16843 gcc_assert (operands[1] != NULL_RTX);
16844 gcc_assert (REGNO (operands[0]) % 2 == 0);
16845 gcc_assert (REGNO (operands[1]) == REGNO (operands[0]) + 1);
16846 }
16847 }
16848
16849 /* Make sure the instructions are ordered with lower memory access first. */
16850 if (offsets[0] > offsets[1])
16851 {
16852 gap = offsets[0] - offsets[1];
16853 offset = offsets[1];
16854
16855 /* Swap the instructions such that lower memory is accessed first. */
16856 std::swap (operands[0], operands[1]);
16857 std::swap (operands[2], operands[3]);
16858 std::swap (align[0], align[1]);
16859 if (const_store)
16860 std::swap (operands[4], operands[5]);
16861 }
16862 else
16863 {
16864 gap = offsets[1] - offsets[0];
16865 offset = offsets[0];
16866 }
16867
16868 /* Make sure accesses are to consecutive memory locations. */
16869 if (gap != GET_MODE_SIZE (SImode))
16870 return false;
16871
16872 if (!align_ok_ldrd_strd (align[0], offset))
16873 return false;
16874
16875 /* Make sure we generate legal instructions. */
16876 if (operands_ok_ldrd_strd (operands[0], operands[1], base, offset,
16877 false, load))
16878 return true;
16879
16880 /* In Thumb state, where registers are almost unconstrained, there
16881 is little hope to fix it. */
16882 if (TARGET_THUMB2)
16883 return false;
16884
16885 if (load && commute)
16886 {
16887 /* Try reordering registers. */
16888 std::swap (operands[0], operands[1]);
16889 if (operands_ok_ldrd_strd (operands[0], operands[1], base, offset,
16890 false, load))
16891 return true;
16892 }
16893
16894 if (const_store)
16895 {
16896 /* If input registers are dead after this pattern, they can be
16897 reordered or replaced by other registers that are free in the
16898 current pattern. */
16899 if (!peep2_reg_dead_p (4, operands[0])
16900 || !peep2_reg_dead_p (4, operands[1]))
16901 return false;
16902
16903 /* Try to reorder the input registers. */
16904 /* For example, the code
16905 mov r0, 0
16906 mov r1, 1
16907 str r1, [r2]
16908 str r0, [r2, #4]
16909 can be transformed into
16910 mov r1, 0
16911 mov r0, 1
16912 strd r0, [r2]
16913 */
16914 if (operands_ok_ldrd_strd (operands[1], operands[0], base, offset,
16915 false, false))
16916 {
16917 std::swap (operands[0], operands[1]);
16918 return true;
16919 }
16920
16921 /* Try to find a free DI register. */
16922 CLEAR_HARD_REG_SET (regset);
16923 add_to_hard_reg_set (&regset, SImode, REGNO (operands[0]));
16924 add_to_hard_reg_set (&regset, SImode, REGNO (operands[1]));
16925 while (true)
16926 {
16927 tmp = peep2_find_free_register (0, 4, "r", DImode, &regset);
16928 if (tmp == NULL_RTX)
16929 return false;
16930
16931 /* DREG must be an even-numbered register in DImode.
16932 Split it into SI registers. */
16933 operands[0] = simplify_gen_subreg (SImode, tmp, DImode, 0);
16934 operands[1] = simplify_gen_subreg (SImode, tmp, DImode, 4);
16935 gcc_assert (operands[0] != NULL_RTX);
16936 gcc_assert (operands[1] != NULL_RTX);
16937 gcc_assert (REGNO (operands[0]) % 2 == 0);
16938 gcc_assert (REGNO (operands[0]) + 1 == REGNO (operands[1]));
16939
16940 return (operands_ok_ldrd_strd (operands[0], operands[1],
16941 base, offset,
16942 false, load));
16943 }
16944 }
16945
16946 return false;
16947 }
16948
16949
16950 /* Return true if parallel execution of the two word-size accesses provided
16951 could be satisfied with a single LDRD/STRD instruction. Two word-size
16952 accesses are represented by the OPERANDS array, where OPERANDS[0,1] are
16953 register operands and OPERANDS[2,3] are the corresponding memory operands.
16954 */
16955 bool
16956 valid_operands_ldrd_strd (rtx *operands, bool load)
16957 {
16958 int nops = 2;
16959 HOST_WIDE_INT offsets[2], offset, align[2];
16960 rtx base = NULL_RTX;
16961 rtx cur_base, cur_offset;
16962 int i, gap;
16963
16964 /* Check that the memory references are immediate offsets from the
16965 same base register. Extract the base register, the destination
16966 registers, and the corresponding memory offsets. */
16967 for (i = 0; i < nops; i++)
16968 {
16969 if (!mem_ok_for_ldrd_strd (operands[nops+i], &cur_base, &cur_offset,
16970 &align[i]))
16971 return false;
16972
16973 if (i == 0)
16974 base = cur_base;
16975 else if (REGNO (base) != REGNO (cur_base))
16976 return false;
16977
16978 offsets[i] = INTVAL (cur_offset);
16979 if (GET_CODE (operands[i]) == SUBREG)
16980 return false;
16981 }
16982
16983 if (offsets[0] > offsets[1])
16984 return false;
16985
16986 gap = offsets[1] - offsets[0];
16987 offset = offsets[0];
16988
16989 /* Make sure accesses are to consecutive memory locations. */
16990 if (gap != GET_MODE_SIZE (SImode))
16991 return false;
16992
16993 if (!align_ok_ldrd_strd (align[0], offset))
16994 return false;
16995
16996 return operands_ok_ldrd_strd (operands[0], operands[1], base, offset,
16997 false, load);
16998 }
16999
17000 \f
17001 /* Print a symbolic form of X to the debug file, F. */
17002 static void
17003 arm_print_value (FILE *f, rtx x)
17004 {
17005 switch (GET_CODE (x))
17006 {
17007 case CONST_INT:
17008 fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
17009 return;
17010
17011 case CONST_DOUBLE:
17012 {
17013 char fpstr[20];
17014 real_to_decimal (fpstr, CONST_DOUBLE_REAL_VALUE (x),
17015 sizeof (fpstr), 0, 1);
17016 fputs (fpstr, f);
17017 }
17018 return;
17019
17020 case CONST_VECTOR:
17021 {
17022 int i;
17023
17024 fprintf (f, "<");
17025 for (i = 0; i < CONST_VECTOR_NUNITS (x); i++)
17026 {
17027 fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (CONST_VECTOR_ELT (x, i)));
17028 if (i < (CONST_VECTOR_NUNITS (x) - 1))
17029 fputc (',', f);
17030 }
17031 fprintf (f, ">");
17032 }
17033 return;
17034
17035 case CONST_STRING:
17036 fprintf (f, "\"%s\"", XSTR (x, 0));
17037 return;
17038
17039 case SYMBOL_REF:
17040 fprintf (f, "`%s'", XSTR (x, 0));
17041 return;
17042
17043 case LABEL_REF:
17044 fprintf (f, "L%d", INSN_UID (XEXP (x, 0)));
17045 return;
17046
17047 case CONST:
17048 arm_print_value (f, XEXP (x, 0));
17049 return;
17050
17051 case PLUS:
17052 arm_print_value (f, XEXP (x, 0));
17053 fprintf (f, "+");
17054 arm_print_value (f, XEXP (x, 1));
17055 return;
17056
17057 case PC:
17058 fprintf (f, "pc");
17059 return;
17060
17061 default:
17062 fprintf (f, "????");
17063 return;
17064 }
17065 }
17066 \f
17067 /* Routines for manipulation of the constant pool. */
17068
17069 /* Arm instructions cannot load a large constant directly into a
17070 register; they have to come from a pc relative load. The constant
17071 must therefore be placed in the addressable range of the pc
17072 relative load. Depending on the precise pc relative load
17073 instruction the range is somewhere between 256 bytes and 4k. This
17074 means that we often have to dump a constant inside a function, and
17075 generate code to branch around it.
17076
17077 It is important to minimize this, since the branches will slow
17078 things down and make the code larger.
17079
17080 Normally we can hide the table after an existing unconditional
17081 branch so that there is no interruption of the flow, but in the
17082 worst case the code looks like this:
17083
17084 ldr rn, L1
17085 ...
17086 b L2
17087 align
17088 L1: .long value
17089 L2:
17090 ...
17091
17092 ldr rn, L3
17093 ...
17094 b L4
17095 align
17096 L3: .long value
17097 L4:
17098 ...
17099
17100 We fix this by performing a scan after scheduling, which notices
17101 which instructions need to have their operands fetched from the
17102 constant table and builds the table.
17103
17104 The algorithm starts by building a table of all the constants that
17105 need fixing up and all the natural barriers in the function (places
17106 where a constant table can be dropped without breaking the flow).
17107 For each fixup we note how far the pc-relative replacement will be
17108 able to reach and the offset of the instruction into the function.
17109
17110 Having built the table we then group the fixes together to form
17111 tables that are as large as possible (subject to addressing
17112 constraints) and emit each table of constants after the last
17113 barrier that is within range of all the instructions in the group.
17114 If a group does not contain a barrier, then we forcibly create one
17115 by inserting a jump instruction into the flow. Once the table has
17116 been inserted, the insns are then modified to reference the
17117 relevant entry in the pool.
17118
17119 Possible enhancements to the algorithm (not implemented) are:
17120
17121 1) For some processors and object formats, there may be benefit in
17122 aligning the pools to the start of cache lines; this alignment
17123 would need to be taken into account when calculating addressability
17124 of a pool. */
17125
17126 /* These typedefs are located at the start of this file, so that
17127 they can be used in the prototypes there. This comment is to
17128 remind readers of that fact so that the following structures
17129 can be understood more easily.
17130
17131 typedef struct minipool_node Mnode;
17132 typedef struct minipool_fixup Mfix; */
17133
17134 struct minipool_node
17135 {
17136 /* Doubly linked chain of entries. */
17137 Mnode * next;
17138 Mnode * prev;
17139 /* The maximum offset into the code that this entry can be placed. While
17140 pushing fixes for forward references, all entries are sorted in order
17141 of increasing max_address. */
17142 HOST_WIDE_INT max_address;
17143 /* Similarly for an entry inserted for a backwards ref. */
17144 HOST_WIDE_INT min_address;
17145 /* The number of fixes referencing this entry. This can become zero
17146 if we "unpush" an entry. In this case we ignore the entry when we
17147 come to emit the code. */
17148 int refcount;
17149 /* The offset from the start of the minipool. */
17150 HOST_WIDE_INT offset;
17151 /* The value in table. */
17152 rtx value;
17153 /* The mode of value. */
17154 machine_mode mode;
17155 /* The size of the value. With iWMMXt enabled
17156 sizes > 4 also imply an alignment of 8-bytes. */
17157 int fix_size;
17158 };
17159
17160 struct minipool_fixup
17161 {
17162 Mfix * next;
17163 rtx_insn * insn;
17164 HOST_WIDE_INT address;
17165 rtx * loc;
17166 machine_mode mode;
17167 int fix_size;
17168 rtx value;
17169 Mnode * minipool;
17170 HOST_WIDE_INT forwards;
17171 HOST_WIDE_INT backwards;
17172 };
17173
17174 /* Fixes less than a word need padding out to a word boundary. */
17175 #define MINIPOOL_FIX_SIZE(mode) \
17176 (GET_MODE_SIZE ((mode)) >= 4 ? GET_MODE_SIZE ((mode)) : 4)
17177
17178 static Mnode * minipool_vector_head;
17179 static Mnode * minipool_vector_tail;
17180 static rtx_code_label *minipool_vector_label;
17181 static int minipool_pad;
17182
17183 /* The linked list of all minipool fixes required for this function. */
17184 Mfix * minipool_fix_head;
17185 Mfix * minipool_fix_tail;
17186 /* The fix entry for the current minipool, once it has been placed. */
17187 Mfix * minipool_barrier;
17188
17189 #ifndef JUMP_TABLES_IN_TEXT_SECTION
17190 #define JUMP_TABLES_IN_TEXT_SECTION 0
17191 #endif
17192
17193 static HOST_WIDE_INT
17194 get_jump_table_size (rtx_jump_table_data *insn)
17195 {
17196 /* ADDR_VECs only take room if read-only data does into the text
17197 section. */
17198 if (JUMP_TABLES_IN_TEXT_SECTION || readonly_data_section == text_section)
17199 {
17200 rtx body = PATTERN (insn);
17201 int elt = GET_CODE (body) == ADDR_DIFF_VEC ? 1 : 0;
17202 HOST_WIDE_INT size;
17203 HOST_WIDE_INT modesize;
17204
17205 modesize = GET_MODE_SIZE (GET_MODE (body));
17206 size = modesize * XVECLEN (body, elt);
17207 switch (modesize)
17208 {
17209 case 1:
17210 /* Round up size of TBB table to a halfword boundary. */
17211 size = (size + 1) & ~HOST_WIDE_INT_1;
17212 break;
17213 case 2:
17214 /* No padding necessary for TBH. */
17215 break;
17216 case 4:
17217 /* Add two bytes for alignment on Thumb. */
17218 if (TARGET_THUMB)
17219 size += 2;
17220 break;
17221 default:
17222 gcc_unreachable ();
17223 }
17224 return size;
17225 }
17226
17227 return 0;
17228 }
17229
17230 /* Emit insns to load the function address from FUNCDESC (an FDPIC
17231 function descriptor) into a register and the GOT address into the
17232 FDPIC register, returning an rtx for the register holding the
17233 function address. */
17234
17235 rtx
17236 arm_load_function_descriptor (rtx funcdesc)
17237 {
17238 rtx fnaddr_reg = gen_reg_rtx (Pmode);
17239 rtx pic_reg = gen_rtx_REG (Pmode, FDPIC_REGNUM);
17240 rtx fnaddr = gen_rtx_MEM (Pmode, funcdesc);
17241 rtx gotaddr = gen_rtx_MEM (Pmode, plus_constant (Pmode, funcdesc, 4));
17242
17243 emit_move_insn (fnaddr_reg, fnaddr);
17244
17245 /* The ABI requires the entry point address to be loaded first, but
17246 since we cannot support lazy binding for lack of atomic load of
17247 two 32-bits values, we do not need to bother to prevent the
17248 previous load from being moved after that of the GOT address. */
17249 emit_insn (gen_restore_pic_register_after_call (pic_reg, gotaddr));
17250
17251 return fnaddr_reg;
17252 }
17253
17254 /* Return the maximum amount of padding that will be inserted before
17255 label LABEL. */
17256 static HOST_WIDE_INT
17257 get_label_padding (rtx label)
17258 {
17259 HOST_WIDE_INT align, min_insn_size;
17260
17261 align = 1 << label_to_alignment (label).levels[0].log;
17262 min_insn_size = TARGET_THUMB ? 2 : 4;
17263 return align > min_insn_size ? align - min_insn_size : 0;
17264 }
17265
17266 /* Move a minipool fix MP from its current location to before MAX_MP.
17267 If MAX_MP is NULL, then MP doesn't need moving, but the addressing
17268 constraints may need updating. */
17269 static Mnode *
17270 move_minipool_fix_forward_ref (Mnode *mp, Mnode *max_mp,
17271 HOST_WIDE_INT max_address)
17272 {
17273 /* The code below assumes these are different. */
17274 gcc_assert (mp != max_mp);
17275
17276 if (max_mp == NULL)
17277 {
17278 if (max_address < mp->max_address)
17279 mp->max_address = max_address;
17280 }
17281 else
17282 {
17283 if (max_address > max_mp->max_address - mp->fix_size)
17284 mp->max_address = max_mp->max_address - mp->fix_size;
17285 else
17286 mp->max_address = max_address;
17287
17288 /* Unlink MP from its current position. Since max_mp is non-null,
17289 mp->prev must be non-null. */
17290 mp->prev->next = mp->next;
17291 if (mp->next != NULL)
17292 mp->next->prev = mp->prev;
17293 else
17294 minipool_vector_tail = mp->prev;
17295
17296 /* Re-insert it before MAX_MP. */
17297 mp->next = max_mp;
17298 mp->prev = max_mp->prev;
17299 max_mp->prev = mp;
17300
17301 if (mp->prev != NULL)
17302 mp->prev->next = mp;
17303 else
17304 minipool_vector_head = mp;
17305 }
17306
17307 /* Save the new entry. */
17308 max_mp = mp;
17309
17310 /* Scan over the preceding entries and adjust their addresses as
17311 required. */
17312 while (mp->prev != NULL
17313 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
17314 {
17315 mp->prev->max_address = mp->max_address - mp->prev->fix_size;
17316 mp = mp->prev;
17317 }
17318
17319 return max_mp;
17320 }
17321
17322 /* Add a constant to the minipool for a forward reference. Returns the
17323 node added or NULL if the constant will not fit in this pool. */
17324 static Mnode *
17325 add_minipool_forward_ref (Mfix *fix)
17326 {
17327 /* If set, max_mp is the first pool_entry that has a lower
17328 constraint than the one we are trying to add. */
17329 Mnode * max_mp = NULL;
17330 HOST_WIDE_INT max_address = fix->address + fix->forwards - minipool_pad;
17331 Mnode * mp;
17332
17333 /* If the minipool starts before the end of FIX->INSN then this FIX
17334 cannot be placed into the current pool. Furthermore, adding the
17335 new constant pool entry may cause the pool to start FIX_SIZE bytes
17336 earlier. */
17337 if (minipool_vector_head &&
17338 (fix->address + get_attr_length (fix->insn)
17339 >= minipool_vector_head->max_address - fix->fix_size))
17340 return NULL;
17341
17342 /* Scan the pool to see if a constant with the same value has
17343 already been added. While we are doing this, also note the
17344 location where we must insert the constant if it doesn't already
17345 exist. */
17346 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
17347 {
17348 if (GET_CODE (fix->value) == GET_CODE (mp->value)
17349 && fix->mode == mp->mode
17350 && (!LABEL_P (fix->value)
17351 || (CODE_LABEL_NUMBER (fix->value)
17352 == CODE_LABEL_NUMBER (mp->value)))
17353 && rtx_equal_p (fix->value, mp->value))
17354 {
17355 /* More than one fix references this entry. */
17356 mp->refcount++;
17357 return move_minipool_fix_forward_ref (mp, max_mp, max_address);
17358 }
17359
17360 /* Note the insertion point if necessary. */
17361 if (max_mp == NULL
17362 && mp->max_address > max_address)
17363 max_mp = mp;
17364
17365 /* If we are inserting an 8-bytes aligned quantity and
17366 we have not already found an insertion point, then
17367 make sure that all such 8-byte aligned quantities are
17368 placed at the start of the pool. */
17369 if (ARM_DOUBLEWORD_ALIGN
17370 && max_mp == NULL
17371 && fix->fix_size >= 8
17372 && mp->fix_size < 8)
17373 {
17374 max_mp = mp;
17375 max_address = mp->max_address;
17376 }
17377 }
17378
17379 /* The value is not currently in the minipool, so we need to create
17380 a new entry for it. If MAX_MP is NULL, the entry will be put on
17381 the end of the list since the placement is less constrained than
17382 any existing entry. Otherwise, we insert the new fix before
17383 MAX_MP and, if necessary, adjust the constraints on the other
17384 entries. */
17385 mp = XNEW (Mnode);
17386 mp->fix_size = fix->fix_size;
17387 mp->mode = fix->mode;
17388 mp->value = fix->value;
17389 mp->refcount = 1;
17390 /* Not yet required for a backwards ref. */
17391 mp->min_address = -65536;
17392
17393 if (max_mp == NULL)
17394 {
17395 mp->max_address = max_address;
17396 mp->next = NULL;
17397 mp->prev = minipool_vector_tail;
17398
17399 if (mp->prev == NULL)
17400 {
17401 minipool_vector_head = mp;
17402 minipool_vector_label = gen_label_rtx ();
17403 }
17404 else
17405 mp->prev->next = mp;
17406
17407 minipool_vector_tail = mp;
17408 }
17409 else
17410 {
17411 if (max_address > max_mp->max_address - mp->fix_size)
17412 mp->max_address = max_mp->max_address - mp->fix_size;
17413 else
17414 mp->max_address = max_address;
17415
17416 mp->next = max_mp;
17417 mp->prev = max_mp->prev;
17418 max_mp->prev = mp;
17419 if (mp->prev != NULL)
17420 mp->prev->next = mp;
17421 else
17422 minipool_vector_head = mp;
17423 }
17424
17425 /* Save the new entry. */
17426 max_mp = mp;
17427
17428 /* Scan over the preceding entries and adjust their addresses as
17429 required. */
17430 while (mp->prev != NULL
17431 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
17432 {
17433 mp->prev->max_address = mp->max_address - mp->prev->fix_size;
17434 mp = mp->prev;
17435 }
17436
17437 return max_mp;
17438 }
17439
17440 static Mnode *
17441 move_minipool_fix_backward_ref (Mnode *mp, Mnode *min_mp,
17442 HOST_WIDE_INT min_address)
17443 {
17444 HOST_WIDE_INT offset;
17445
17446 /* The code below assumes these are different. */
17447 gcc_assert (mp != min_mp);
17448
17449 if (min_mp == NULL)
17450 {
17451 if (min_address > mp->min_address)
17452 mp->min_address = min_address;
17453 }
17454 else
17455 {
17456 /* We will adjust this below if it is too loose. */
17457 mp->min_address = min_address;
17458
17459 /* Unlink MP from its current position. Since min_mp is non-null,
17460 mp->next must be non-null. */
17461 mp->next->prev = mp->prev;
17462 if (mp->prev != NULL)
17463 mp->prev->next = mp->next;
17464 else
17465 minipool_vector_head = mp->next;
17466
17467 /* Reinsert it after MIN_MP. */
17468 mp->prev = min_mp;
17469 mp->next = min_mp->next;
17470 min_mp->next = mp;
17471 if (mp->next != NULL)
17472 mp->next->prev = mp;
17473 else
17474 minipool_vector_tail = mp;
17475 }
17476
17477 min_mp = mp;
17478
17479 offset = 0;
17480 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
17481 {
17482 mp->offset = offset;
17483 if (mp->refcount > 0)
17484 offset += mp->fix_size;
17485
17486 if (mp->next && mp->next->min_address < mp->min_address + mp->fix_size)
17487 mp->next->min_address = mp->min_address + mp->fix_size;
17488 }
17489
17490 return min_mp;
17491 }
17492
17493 /* Add a constant to the minipool for a backward reference. Returns the
17494 node added or NULL if the constant will not fit in this pool.
17495
17496 Note that the code for insertion for a backwards reference can be
17497 somewhat confusing because the calculated offsets for each fix do
17498 not take into account the size of the pool (which is still under
17499 construction. */
17500 static Mnode *
17501 add_minipool_backward_ref (Mfix *fix)
17502 {
17503 /* If set, min_mp is the last pool_entry that has a lower constraint
17504 than the one we are trying to add. */
17505 Mnode *min_mp = NULL;
17506 /* This can be negative, since it is only a constraint. */
17507 HOST_WIDE_INT min_address = fix->address - fix->backwards;
17508 Mnode *mp;
17509
17510 /* If we can't reach the current pool from this insn, or if we can't
17511 insert this entry at the end of the pool without pushing other
17512 fixes out of range, then we don't try. This ensures that we
17513 can't fail later on. */
17514 if (min_address >= minipool_barrier->address
17515 || (minipool_vector_tail->min_address + fix->fix_size
17516 >= minipool_barrier->address))
17517 return NULL;
17518
17519 /* Scan the pool to see if a constant with the same value has
17520 already been added. While we are doing this, also note the
17521 location where we must insert the constant if it doesn't already
17522 exist. */
17523 for (mp = minipool_vector_tail; mp != NULL; mp = mp->prev)
17524 {
17525 if (GET_CODE (fix->value) == GET_CODE (mp->value)
17526 && fix->mode == mp->mode
17527 && (!LABEL_P (fix->value)
17528 || (CODE_LABEL_NUMBER (fix->value)
17529 == CODE_LABEL_NUMBER (mp->value)))
17530 && rtx_equal_p (fix->value, mp->value)
17531 /* Check that there is enough slack to move this entry to the
17532 end of the table (this is conservative). */
17533 && (mp->max_address
17534 > (minipool_barrier->address
17535 + minipool_vector_tail->offset
17536 + minipool_vector_tail->fix_size)))
17537 {
17538 mp->refcount++;
17539 return move_minipool_fix_backward_ref (mp, min_mp, min_address);
17540 }
17541
17542 if (min_mp != NULL)
17543 mp->min_address += fix->fix_size;
17544 else
17545 {
17546 /* Note the insertion point if necessary. */
17547 if (mp->min_address < min_address)
17548 {
17549 /* For now, we do not allow the insertion of 8-byte alignment
17550 requiring nodes anywhere but at the start of the pool. */
17551 if (ARM_DOUBLEWORD_ALIGN
17552 && fix->fix_size >= 8 && mp->fix_size < 8)
17553 return NULL;
17554 else
17555 min_mp = mp;
17556 }
17557 else if (mp->max_address
17558 < minipool_barrier->address + mp->offset + fix->fix_size)
17559 {
17560 /* Inserting before this entry would push the fix beyond
17561 its maximum address (which can happen if we have
17562 re-located a forwards fix); force the new fix to come
17563 after it. */
17564 if (ARM_DOUBLEWORD_ALIGN
17565 && fix->fix_size >= 8 && mp->fix_size < 8)
17566 return NULL;
17567 else
17568 {
17569 min_mp = mp;
17570 min_address = mp->min_address + fix->fix_size;
17571 }
17572 }
17573 /* Do not insert a non-8-byte aligned quantity before 8-byte
17574 aligned quantities. */
17575 else if (ARM_DOUBLEWORD_ALIGN
17576 && fix->fix_size < 8
17577 && mp->fix_size >= 8)
17578 {
17579 min_mp = mp;
17580 min_address = mp->min_address + fix->fix_size;
17581 }
17582 }
17583 }
17584
17585 /* We need to create a new entry. */
17586 mp = XNEW (Mnode);
17587 mp->fix_size = fix->fix_size;
17588 mp->mode = fix->mode;
17589 mp->value = fix->value;
17590 mp->refcount = 1;
17591 mp->max_address = minipool_barrier->address + 65536;
17592
17593 mp->min_address = min_address;
17594
17595 if (min_mp == NULL)
17596 {
17597 mp->prev = NULL;
17598 mp->next = minipool_vector_head;
17599
17600 if (mp->next == NULL)
17601 {
17602 minipool_vector_tail = mp;
17603 minipool_vector_label = gen_label_rtx ();
17604 }
17605 else
17606 mp->next->prev = mp;
17607
17608 minipool_vector_head = mp;
17609 }
17610 else
17611 {
17612 mp->next = min_mp->next;
17613 mp->prev = min_mp;
17614 min_mp->next = mp;
17615
17616 if (mp->next != NULL)
17617 mp->next->prev = mp;
17618 else
17619 minipool_vector_tail = mp;
17620 }
17621
17622 /* Save the new entry. */
17623 min_mp = mp;
17624
17625 if (mp->prev)
17626 mp = mp->prev;
17627 else
17628 mp->offset = 0;
17629
17630 /* Scan over the following entries and adjust their offsets. */
17631 while (mp->next != NULL)
17632 {
17633 if (mp->next->min_address < mp->min_address + mp->fix_size)
17634 mp->next->min_address = mp->min_address + mp->fix_size;
17635
17636 if (mp->refcount)
17637 mp->next->offset = mp->offset + mp->fix_size;
17638 else
17639 mp->next->offset = mp->offset;
17640
17641 mp = mp->next;
17642 }
17643
17644 return min_mp;
17645 }
17646
17647 static void
17648 assign_minipool_offsets (Mfix *barrier)
17649 {
17650 HOST_WIDE_INT offset = 0;
17651 Mnode *mp;
17652
17653 minipool_barrier = barrier;
17654
17655 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
17656 {
17657 mp->offset = offset;
17658
17659 if (mp->refcount > 0)
17660 offset += mp->fix_size;
17661 }
17662 }
17663
17664 /* Output the literal table */
17665 static void
17666 dump_minipool (rtx_insn *scan)
17667 {
17668 Mnode * mp;
17669 Mnode * nmp;
17670 int align64 = 0;
17671
17672 if (ARM_DOUBLEWORD_ALIGN)
17673 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
17674 if (mp->refcount > 0 && mp->fix_size >= 8)
17675 {
17676 align64 = 1;
17677 break;
17678 }
17679
17680 if (dump_file)
17681 fprintf (dump_file,
17682 ";; Emitting minipool after insn %u; address %ld; align %d (bytes)\n",
17683 INSN_UID (scan), (unsigned long) minipool_barrier->address, align64 ? 8 : 4);
17684
17685 scan = emit_label_after (gen_label_rtx (), scan);
17686 scan = emit_insn_after (align64 ? gen_align_8 () : gen_align_4 (), scan);
17687 scan = emit_label_after (minipool_vector_label, scan);
17688
17689 for (mp = minipool_vector_head; mp != NULL; mp = nmp)
17690 {
17691 if (mp->refcount > 0)
17692 {
17693 if (dump_file)
17694 {
17695 fprintf (dump_file,
17696 ";; Offset %u, min %ld, max %ld ",
17697 (unsigned) mp->offset, (unsigned long) mp->min_address,
17698 (unsigned long) mp->max_address);
17699 arm_print_value (dump_file, mp->value);
17700 fputc ('\n', dump_file);
17701 }
17702
17703 rtx val = copy_rtx (mp->value);
17704
17705 switch (GET_MODE_SIZE (mp->mode))
17706 {
17707 #ifdef HAVE_consttable_1
17708 case 1:
17709 scan = emit_insn_after (gen_consttable_1 (val), scan);
17710 break;
17711
17712 #endif
17713 #ifdef HAVE_consttable_2
17714 case 2:
17715 scan = emit_insn_after (gen_consttable_2 (val), scan);
17716 break;
17717
17718 #endif
17719 #ifdef HAVE_consttable_4
17720 case 4:
17721 scan = emit_insn_after (gen_consttable_4 (val), scan);
17722 break;
17723
17724 #endif
17725 #ifdef HAVE_consttable_8
17726 case 8:
17727 scan = emit_insn_after (gen_consttable_8 (val), scan);
17728 break;
17729
17730 #endif
17731 #ifdef HAVE_consttable_16
17732 case 16:
17733 scan = emit_insn_after (gen_consttable_16 (val), scan);
17734 break;
17735
17736 #endif
17737 default:
17738 gcc_unreachable ();
17739 }
17740 }
17741
17742 nmp = mp->next;
17743 free (mp);
17744 }
17745
17746 minipool_vector_head = minipool_vector_tail = NULL;
17747 scan = emit_insn_after (gen_consttable_end (), scan);
17748 scan = emit_barrier_after (scan);
17749 }
17750
17751 /* Return the cost of forcibly inserting a barrier after INSN. */
17752 static int
17753 arm_barrier_cost (rtx_insn *insn)
17754 {
17755 /* Basing the location of the pool on the loop depth is preferable,
17756 but at the moment, the basic block information seems to be
17757 corrupt by this stage of the compilation. */
17758 int base_cost = 50;
17759 rtx_insn *next = next_nonnote_insn (insn);
17760
17761 if (next != NULL && LABEL_P (next))
17762 base_cost -= 20;
17763
17764 switch (GET_CODE (insn))
17765 {
17766 case CODE_LABEL:
17767 /* It will always be better to place the table before the label, rather
17768 than after it. */
17769 return 50;
17770
17771 case INSN:
17772 case CALL_INSN:
17773 return base_cost;
17774
17775 case JUMP_INSN:
17776 return base_cost - 10;
17777
17778 default:
17779 return base_cost + 10;
17780 }
17781 }
17782
17783 /* Find the best place in the insn stream in the range
17784 (FIX->address,MAX_ADDRESS) to forcibly insert a minipool barrier.
17785 Create the barrier by inserting a jump and add a new fix entry for
17786 it. */
17787 static Mfix *
17788 create_fix_barrier (Mfix *fix, HOST_WIDE_INT max_address)
17789 {
17790 HOST_WIDE_INT count = 0;
17791 rtx_barrier *barrier;
17792 rtx_insn *from = fix->insn;
17793 /* The instruction after which we will insert the jump. */
17794 rtx_insn *selected = NULL;
17795 int selected_cost;
17796 /* The address at which the jump instruction will be placed. */
17797 HOST_WIDE_INT selected_address;
17798 Mfix * new_fix;
17799 HOST_WIDE_INT max_count = max_address - fix->address;
17800 rtx_code_label *label = gen_label_rtx ();
17801
17802 selected_cost = arm_barrier_cost (from);
17803 selected_address = fix->address;
17804
17805 while (from && count < max_count)
17806 {
17807 rtx_jump_table_data *tmp;
17808 int new_cost;
17809
17810 /* This code shouldn't have been called if there was a natural barrier
17811 within range. */
17812 gcc_assert (!BARRIER_P (from));
17813
17814 /* Count the length of this insn. This must stay in sync with the
17815 code that pushes minipool fixes. */
17816 if (LABEL_P (from))
17817 count += get_label_padding (from);
17818 else
17819 count += get_attr_length (from);
17820
17821 /* If there is a jump table, add its length. */
17822 if (tablejump_p (from, NULL, &tmp))
17823 {
17824 count += get_jump_table_size (tmp);
17825
17826 /* Jump tables aren't in a basic block, so base the cost on
17827 the dispatch insn. If we select this location, we will
17828 still put the pool after the table. */
17829 new_cost = arm_barrier_cost (from);
17830
17831 if (count < max_count
17832 && (!selected || new_cost <= selected_cost))
17833 {
17834 selected = tmp;
17835 selected_cost = new_cost;
17836 selected_address = fix->address + count;
17837 }
17838
17839 /* Continue after the dispatch table. */
17840 from = NEXT_INSN (tmp);
17841 continue;
17842 }
17843
17844 new_cost = arm_barrier_cost (from);
17845
17846 if (count < max_count
17847 && (!selected || new_cost <= selected_cost))
17848 {
17849 selected = from;
17850 selected_cost = new_cost;
17851 selected_address = fix->address + count;
17852 }
17853
17854 from = NEXT_INSN (from);
17855 }
17856
17857 /* Make sure that we found a place to insert the jump. */
17858 gcc_assert (selected);
17859
17860 /* Create a new JUMP_INSN that branches around a barrier. */
17861 from = emit_jump_insn_after (gen_jump (label), selected);
17862 JUMP_LABEL (from) = label;
17863 barrier = emit_barrier_after (from);
17864 emit_label_after (label, barrier);
17865
17866 /* Create a minipool barrier entry for the new barrier. */
17867 new_fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* new_fix));
17868 new_fix->insn = barrier;
17869 new_fix->address = selected_address;
17870 new_fix->next = fix->next;
17871 fix->next = new_fix;
17872
17873 return new_fix;
17874 }
17875
17876 /* Record that there is a natural barrier in the insn stream at
17877 ADDRESS. */
17878 static void
17879 push_minipool_barrier (rtx_insn *insn, HOST_WIDE_INT address)
17880 {
17881 Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
17882
17883 fix->insn = insn;
17884 fix->address = address;
17885
17886 fix->next = NULL;
17887 if (minipool_fix_head != NULL)
17888 minipool_fix_tail->next = fix;
17889 else
17890 minipool_fix_head = fix;
17891
17892 minipool_fix_tail = fix;
17893 }
17894
17895 /* Record INSN, which will need fixing up to load a value from the
17896 minipool. ADDRESS is the offset of the insn since the start of the
17897 function; LOC is a pointer to the part of the insn which requires
17898 fixing; VALUE is the constant that must be loaded, which is of type
17899 MODE. */
17900 static void
17901 push_minipool_fix (rtx_insn *insn, HOST_WIDE_INT address, rtx *loc,
17902 machine_mode mode, rtx value)
17903 {
17904 gcc_assert (!arm_disable_literal_pool);
17905 Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
17906
17907 fix->insn = insn;
17908 fix->address = address;
17909 fix->loc = loc;
17910 fix->mode = mode;
17911 fix->fix_size = MINIPOOL_FIX_SIZE (mode);
17912 fix->value = value;
17913 fix->forwards = get_attr_pool_range (insn);
17914 fix->backwards = get_attr_neg_pool_range (insn);
17915 fix->minipool = NULL;
17916
17917 /* If an insn doesn't have a range defined for it, then it isn't
17918 expecting to be reworked by this code. Better to stop now than
17919 to generate duff assembly code. */
17920 gcc_assert (fix->forwards || fix->backwards);
17921
17922 /* If an entry requires 8-byte alignment then assume all constant pools
17923 require 4 bytes of padding. Trying to do this later on a per-pool
17924 basis is awkward because existing pool entries have to be modified. */
17925 if (ARM_DOUBLEWORD_ALIGN && fix->fix_size >= 8)
17926 minipool_pad = 4;
17927
17928 if (dump_file)
17929 {
17930 fprintf (dump_file,
17931 ";; %smode fixup for i%d; addr %lu, range (%ld,%ld): ",
17932 GET_MODE_NAME (mode),
17933 INSN_UID (insn), (unsigned long) address,
17934 -1 * (long)fix->backwards, (long)fix->forwards);
17935 arm_print_value (dump_file, fix->value);
17936 fprintf (dump_file, "\n");
17937 }
17938
17939 /* Add it to the chain of fixes. */
17940 fix->next = NULL;
17941
17942 if (minipool_fix_head != NULL)
17943 minipool_fix_tail->next = fix;
17944 else
17945 minipool_fix_head = fix;
17946
17947 minipool_fix_tail = fix;
17948 }
17949
17950 /* Return maximum allowed cost of synthesizing a 64-bit constant VAL inline.
17951 Returns the number of insns needed, or 99 if we always want to synthesize
17952 the value. */
17953 int
17954 arm_max_const_double_inline_cost ()
17955 {
17956 return ((optimize_size || arm_ld_sched) ? 3 : 4);
17957 }
17958
17959 /* Return the cost of synthesizing a 64-bit constant VAL inline.
17960 Returns the number of insns needed, or 99 if we don't know how to
17961 do it. */
17962 int
17963 arm_const_double_inline_cost (rtx val)
17964 {
17965 rtx lowpart, highpart;
17966 machine_mode mode;
17967
17968 mode = GET_MODE (val);
17969
17970 if (mode == VOIDmode)
17971 mode = DImode;
17972
17973 gcc_assert (GET_MODE_SIZE (mode) == 8);
17974
17975 lowpart = gen_lowpart (SImode, val);
17976 highpart = gen_highpart_mode (SImode, mode, val);
17977
17978 gcc_assert (CONST_INT_P (lowpart));
17979 gcc_assert (CONST_INT_P (highpart));
17980
17981 return (arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (lowpart),
17982 NULL_RTX, NULL_RTX, 0, 0)
17983 + arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (highpart),
17984 NULL_RTX, NULL_RTX, 0, 0));
17985 }
17986
17987 /* Cost of loading a SImode constant. */
17988 static inline int
17989 arm_const_inline_cost (enum rtx_code code, rtx val)
17990 {
17991 return arm_gen_constant (code, SImode, NULL_RTX, INTVAL (val),
17992 NULL_RTX, NULL_RTX, 1, 0);
17993 }
17994
17995 /* Return true if it is worthwhile to split a 64-bit constant into two
17996 32-bit operations. This is the case if optimizing for size, or
17997 if we have load delay slots, or if one 32-bit part can be done with
17998 a single data operation. */
17999 bool
18000 arm_const_double_by_parts (rtx val)
18001 {
18002 machine_mode mode = GET_MODE (val);
18003 rtx part;
18004
18005 if (optimize_size || arm_ld_sched)
18006 return true;
18007
18008 if (mode == VOIDmode)
18009 mode = DImode;
18010
18011 part = gen_highpart_mode (SImode, mode, val);
18012
18013 gcc_assert (CONST_INT_P (part));
18014
18015 if (const_ok_for_arm (INTVAL (part))
18016 || const_ok_for_arm (~INTVAL (part)))
18017 return true;
18018
18019 part = gen_lowpart (SImode, val);
18020
18021 gcc_assert (CONST_INT_P (part));
18022
18023 if (const_ok_for_arm (INTVAL (part))
18024 || const_ok_for_arm (~INTVAL (part)))
18025 return true;
18026
18027 return false;
18028 }
18029
18030 /* Return true if it is possible to inline both the high and low parts
18031 of a 64-bit constant into 32-bit data processing instructions. */
18032 bool
18033 arm_const_double_by_immediates (rtx val)
18034 {
18035 machine_mode mode = GET_MODE (val);
18036 rtx part;
18037
18038 if (mode == VOIDmode)
18039 mode = DImode;
18040
18041 part = gen_highpart_mode (SImode, mode, val);
18042
18043 gcc_assert (CONST_INT_P (part));
18044
18045 if (!const_ok_for_arm (INTVAL (part)))
18046 return false;
18047
18048 part = gen_lowpart (SImode, val);
18049
18050 gcc_assert (CONST_INT_P (part));
18051
18052 if (!const_ok_for_arm (INTVAL (part)))
18053 return false;
18054
18055 return true;
18056 }
18057
18058 /* Scan INSN and note any of its operands that need fixing.
18059 If DO_PUSHES is false we do not actually push any of the fixups
18060 needed. */
18061 static void
18062 note_invalid_constants (rtx_insn *insn, HOST_WIDE_INT address, int do_pushes)
18063 {
18064 int opno;
18065
18066 extract_constrain_insn (insn);
18067
18068 if (recog_data.n_alternatives == 0)
18069 return;
18070
18071 /* Fill in recog_op_alt with information about the constraints of
18072 this insn. */
18073 preprocess_constraints (insn);
18074
18075 const operand_alternative *op_alt = which_op_alt ();
18076 for (opno = 0; opno < recog_data.n_operands; opno++)
18077 {
18078 /* Things we need to fix can only occur in inputs. */
18079 if (recog_data.operand_type[opno] != OP_IN)
18080 continue;
18081
18082 /* If this alternative is a memory reference, then any mention
18083 of constants in this alternative is really to fool reload
18084 into allowing us to accept one there. We need to fix them up
18085 now so that we output the right code. */
18086 if (op_alt[opno].memory_ok)
18087 {
18088 rtx op = recog_data.operand[opno];
18089
18090 if (CONSTANT_P (op))
18091 {
18092 if (do_pushes)
18093 push_minipool_fix (insn, address, recog_data.operand_loc[opno],
18094 recog_data.operand_mode[opno], op);
18095 }
18096 else if (MEM_P (op)
18097 && GET_CODE (XEXP (op, 0)) == SYMBOL_REF
18098 && CONSTANT_POOL_ADDRESS_P (XEXP (op, 0)))
18099 {
18100 if (do_pushes)
18101 {
18102 rtx cop = avoid_constant_pool_reference (op);
18103
18104 /* Casting the address of something to a mode narrower
18105 than a word can cause avoid_constant_pool_reference()
18106 to return the pool reference itself. That's no good to
18107 us here. Lets just hope that we can use the
18108 constant pool value directly. */
18109 if (op == cop)
18110 cop = get_pool_constant (XEXP (op, 0));
18111
18112 push_minipool_fix (insn, address,
18113 recog_data.operand_loc[opno],
18114 recog_data.operand_mode[opno], cop);
18115 }
18116
18117 }
18118 }
18119 }
18120
18121 return;
18122 }
18123
18124 /* This function computes the clear mask and PADDING_BITS_TO_CLEAR for structs
18125 and unions in the context of ARMv8-M Security Extensions. It is used as a
18126 helper function for both 'cmse_nonsecure_call' and 'cmse_nonsecure_entry'
18127 functions. The PADDING_BITS_TO_CLEAR pointer can be the base to either one
18128 or four masks, depending on whether it is being computed for a
18129 'cmse_nonsecure_entry' return value or a 'cmse_nonsecure_call' argument
18130 respectively. The tree for the type of the argument or a field within an
18131 argument is passed in ARG_TYPE, the current register this argument or field
18132 starts in is kept in the pointer REGNO and updated accordingly, the bit this
18133 argument or field starts at is passed in STARTING_BIT and the last used bit
18134 is kept in LAST_USED_BIT which is also updated accordingly. */
18135
18136 static unsigned HOST_WIDE_INT
18137 comp_not_to_clear_mask_str_un (tree arg_type, int * regno,
18138 uint32_t * padding_bits_to_clear,
18139 unsigned starting_bit, int * last_used_bit)
18140
18141 {
18142 unsigned HOST_WIDE_INT not_to_clear_reg_mask = 0;
18143
18144 if (TREE_CODE (arg_type) == RECORD_TYPE)
18145 {
18146 unsigned current_bit = starting_bit;
18147 tree field;
18148 long int offset, size;
18149
18150
18151 field = TYPE_FIELDS (arg_type);
18152 while (field)
18153 {
18154 /* The offset within a structure is always an offset from
18155 the start of that structure. Make sure we take that into the
18156 calculation of the register based offset that we use here. */
18157 offset = starting_bit;
18158 offset += TREE_INT_CST_ELT (DECL_FIELD_BIT_OFFSET (field), 0);
18159 offset %= 32;
18160
18161 /* This is the actual size of the field, for bitfields this is the
18162 bitfield width and not the container size. */
18163 size = TREE_INT_CST_ELT (DECL_SIZE (field), 0);
18164
18165 if (*last_used_bit != offset)
18166 {
18167 if (offset < *last_used_bit)
18168 {
18169 /* This field's offset is before the 'last_used_bit', that
18170 means this field goes on the next register. So we need to
18171 pad the rest of the current register and increase the
18172 register number. */
18173 uint32_t mask;
18174 mask = ((uint32_t)-1) - ((uint32_t) 1 << *last_used_bit);
18175 mask++;
18176
18177 padding_bits_to_clear[*regno] |= mask;
18178 not_to_clear_reg_mask |= HOST_WIDE_INT_1U << *regno;
18179 (*regno)++;
18180 }
18181 else
18182 {
18183 /* Otherwise we pad the bits between the last field's end and
18184 the start of the new field. */
18185 uint32_t mask;
18186
18187 mask = ((uint32_t)-1) >> (32 - offset);
18188 mask -= ((uint32_t) 1 << *last_used_bit) - 1;
18189 padding_bits_to_clear[*regno] |= mask;
18190 }
18191 current_bit = offset;
18192 }
18193
18194 /* Calculate further padding bits for inner structs/unions too. */
18195 if (RECORD_OR_UNION_TYPE_P (TREE_TYPE (field)))
18196 {
18197 *last_used_bit = current_bit;
18198 not_to_clear_reg_mask
18199 |= comp_not_to_clear_mask_str_un (TREE_TYPE (field), regno,
18200 padding_bits_to_clear, offset,
18201 last_used_bit);
18202 }
18203 else
18204 {
18205 /* Update 'current_bit' with this field's size. If the
18206 'current_bit' lies in a subsequent register, update 'regno' and
18207 reset 'current_bit' to point to the current bit in that new
18208 register. */
18209 current_bit += size;
18210 while (current_bit >= 32)
18211 {
18212 current_bit-=32;
18213 not_to_clear_reg_mask |= HOST_WIDE_INT_1U << *regno;
18214 (*regno)++;
18215 }
18216 *last_used_bit = current_bit;
18217 }
18218
18219 field = TREE_CHAIN (field);
18220 }
18221 not_to_clear_reg_mask |= HOST_WIDE_INT_1U << *regno;
18222 }
18223 else if (TREE_CODE (arg_type) == UNION_TYPE)
18224 {
18225 tree field, field_t;
18226 int i, regno_t, field_size;
18227 int max_reg = -1;
18228 int max_bit = -1;
18229 uint32_t mask;
18230 uint32_t padding_bits_to_clear_res[NUM_ARG_REGS]
18231 = {-1, -1, -1, -1};
18232
18233 /* To compute the padding bits in a union we only consider bits as
18234 padding bits if they are always either a padding bit or fall outside a
18235 fields size for all fields in the union. */
18236 field = TYPE_FIELDS (arg_type);
18237 while (field)
18238 {
18239 uint32_t padding_bits_to_clear_t[NUM_ARG_REGS]
18240 = {0U, 0U, 0U, 0U};
18241 int last_used_bit_t = *last_used_bit;
18242 regno_t = *regno;
18243 field_t = TREE_TYPE (field);
18244
18245 /* If the field's type is either a record or a union make sure to
18246 compute their padding bits too. */
18247 if (RECORD_OR_UNION_TYPE_P (field_t))
18248 not_to_clear_reg_mask
18249 |= comp_not_to_clear_mask_str_un (field_t, &regno_t,
18250 &padding_bits_to_clear_t[0],
18251 starting_bit, &last_used_bit_t);
18252 else
18253 {
18254 field_size = TREE_INT_CST_ELT (DECL_SIZE (field), 0);
18255 regno_t = (field_size / 32) + *regno;
18256 last_used_bit_t = (starting_bit + field_size) % 32;
18257 }
18258
18259 for (i = *regno; i < regno_t; i++)
18260 {
18261 /* For all but the last register used by this field only keep the
18262 padding bits that were padding bits in this field. */
18263 padding_bits_to_clear_res[i] &= padding_bits_to_clear_t[i];
18264 }
18265
18266 /* For the last register, keep all padding bits that were padding
18267 bits in this field and any padding bits that are still valid
18268 as padding bits but fall outside of this field's size. */
18269 mask = (((uint32_t) -1) - ((uint32_t) 1 << last_used_bit_t)) + 1;
18270 padding_bits_to_clear_res[regno_t]
18271 &= padding_bits_to_clear_t[regno_t] | mask;
18272
18273 /* Update the maximum size of the fields in terms of registers used
18274 ('max_reg') and the 'last_used_bit' in said register. */
18275 if (max_reg < regno_t)
18276 {
18277 max_reg = regno_t;
18278 max_bit = last_used_bit_t;
18279 }
18280 else if (max_reg == regno_t && max_bit < last_used_bit_t)
18281 max_bit = last_used_bit_t;
18282
18283 field = TREE_CHAIN (field);
18284 }
18285
18286 /* Update the current padding_bits_to_clear using the intersection of the
18287 padding bits of all the fields. */
18288 for (i=*regno; i < max_reg; i++)
18289 padding_bits_to_clear[i] |= padding_bits_to_clear_res[i];
18290
18291 /* Do not keep trailing padding bits, we do not know yet whether this
18292 is the end of the argument. */
18293 mask = ((uint32_t) 1 << max_bit) - 1;
18294 padding_bits_to_clear[max_reg]
18295 |= padding_bits_to_clear_res[max_reg] & mask;
18296
18297 *regno = max_reg;
18298 *last_used_bit = max_bit;
18299 }
18300 else
18301 /* This function should only be used for structs and unions. */
18302 gcc_unreachable ();
18303
18304 return not_to_clear_reg_mask;
18305 }
18306
18307 /* In the context of ARMv8-M Security Extensions, this function is used for both
18308 'cmse_nonsecure_call' and 'cmse_nonsecure_entry' functions to compute what
18309 registers are used when returning or passing arguments, which is then
18310 returned as a mask. It will also compute a mask to indicate padding/unused
18311 bits for each of these registers, and passes this through the
18312 PADDING_BITS_TO_CLEAR pointer. The tree of the argument type is passed in
18313 ARG_TYPE, the rtl representation of the argument is passed in ARG_RTX and
18314 the starting register used to pass this argument or return value is passed
18315 in REGNO. It makes use of 'comp_not_to_clear_mask_str_un' to compute these
18316 for struct and union types. */
18317
18318 static unsigned HOST_WIDE_INT
18319 compute_not_to_clear_mask (tree arg_type, rtx arg_rtx, int regno,
18320 uint32_t * padding_bits_to_clear)
18321
18322 {
18323 int last_used_bit = 0;
18324 unsigned HOST_WIDE_INT not_to_clear_mask;
18325
18326 if (RECORD_OR_UNION_TYPE_P (arg_type))
18327 {
18328 not_to_clear_mask
18329 = comp_not_to_clear_mask_str_un (arg_type, &regno,
18330 padding_bits_to_clear, 0,
18331 &last_used_bit);
18332
18333
18334 /* If the 'last_used_bit' is not zero, that means we are still using a
18335 part of the last 'regno'. In such cases we must clear the trailing
18336 bits. Otherwise we are not using regno and we should mark it as to
18337 clear. */
18338 if (last_used_bit != 0)
18339 padding_bits_to_clear[regno]
18340 |= ((uint32_t)-1) - ((uint32_t) 1 << last_used_bit) + 1;
18341 else
18342 not_to_clear_mask &= ~(HOST_WIDE_INT_1U << regno);
18343 }
18344 else
18345 {
18346 not_to_clear_mask = 0;
18347 /* We are not dealing with structs nor unions. So these arguments may be
18348 passed in floating point registers too. In some cases a BLKmode is
18349 used when returning or passing arguments in multiple VFP registers. */
18350 if (GET_MODE (arg_rtx) == BLKmode)
18351 {
18352 int i, arg_regs;
18353 rtx reg;
18354
18355 /* This should really only occur when dealing with the hard-float
18356 ABI. */
18357 gcc_assert (TARGET_HARD_FLOAT_ABI);
18358
18359 for (i = 0; i < XVECLEN (arg_rtx, 0); i++)
18360 {
18361 reg = XEXP (XVECEXP (arg_rtx, 0, i), 0);
18362 gcc_assert (REG_P (reg));
18363
18364 not_to_clear_mask |= HOST_WIDE_INT_1U << REGNO (reg);
18365
18366 /* If we are dealing with DF mode, make sure we don't
18367 clear either of the registers it addresses. */
18368 arg_regs = ARM_NUM_REGS (GET_MODE (reg));
18369 if (arg_regs > 1)
18370 {
18371 unsigned HOST_WIDE_INT mask;
18372 mask = HOST_WIDE_INT_1U << (REGNO (reg) + arg_regs);
18373 mask -= HOST_WIDE_INT_1U << REGNO (reg);
18374 not_to_clear_mask |= mask;
18375 }
18376 }
18377 }
18378 else
18379 {
18380 /* Otherwise we can rely on the MODE to determine how many registers
18381 are being used by this argument. */
18382 int arg_regs = ARM_NUM_REGS (GET_MODE (arg_rtx));
18383 not_to_clear_mask |= HOST_WIDE_INT_1U << REGNO (arg_rtx);
18384 if (arg_regs > 1)
18385 {
18386 unsigned HOST_WIDE_INT
18387 mask = HOST_WIDE_INT_1U << (REGNO (arg_rtx) + arg_regs);
18388 mask -= HOST_WIDE_INT_1U << REGNO (arg_rtx);
18389 not_to_clear_mask |= mask;
18390 }
18391 }
18392 }
18393
18394 return not_to_clear_mask;
18395 }
18396
18397 /* Clear registers secret before doing a cmse_nonsecure_call or returning from
18398 a cmse_nonsecure_entry function. TO_CLEAR_BITMAP indicates which registers
18399 are to be fully cleared, using the value in register CLEARING_REG if more
18400 efficient. The PADDING_BITS_LEN entries array PADDING_BITS_TO_CLEAR gives
18401 the bits that needs to be cleared in caller-saved core registers, with
18402 SCRATCH_REG used as a scratch register for that clearing.
18403
18404 NOTE: one of three following assertions must hold:
18405 - SCRATCH_REG is a low register
18406 - CLEARING_REG is in the set of registers fully cleared (ie. its bit is set
18407 in TO_CLEAR_BITMAP)
18408 - CLEARING_REG is a low register. */
18409
18410 static void
18411 cmse_clear_registers (sbitmap to_clear_bitmap, uint32_t *padding_bits_to_clear,
18412 int padding_bits_len, rtx scratch_reg, rtx clearing_reg)
18413 {
18414 bool saved_clearing = false;
18415 rtx saved_clearing_reg = NULL_RTX;
18416 int i, regno, clearing_regno, minregno = R0_REGNUM, maxregno = minregno - 1;
18417
18418 gcc_assert (arm_arch_cmse);
18419
18420 if (!bitmap_empty_p (to_clear_bitmap))
18421 {
18422 minregno = bitmap_first_set_bit (to_clear_bitmap);
18423 maxregno = bitmap_last_set_bit (to_clear_bitmap);
18424 }
18425 clearing_regno = REGNO (clearing_reg);
18426
18427 /* Clear padding bits. */
18428 gcc_assert (padding_bits_len <= NUM_ARG_REGS);
18429 for (i = 0, regno = R0_REGNUM; i < padding_bits_len; i++, regno++)
18430 {
18431 uint64_t mask;
18432 rtx rtx16, dest, cleared_reg = gen_rtx_REG (SImode, regno);
18433
18434 if (padding_bits_to_clear[i] == 0)
18435 continue;
18436
18437 /* If this is a Thumb-1 target and SCRATCH_REG is not a low register, use
18438 CLEARING_REG as scratch. */
18439 if (TARGET_THUMB1
18440 && REGNO (scratch_reg) > LAST_LO_REGNUM)
18441 {
18442 /* clearing_reg is not to be cleared, copy its value into scratch_reg
18443 such that we can use clearing_reg to clear the unused bits in the
18444 arguments. */
18445 if ((clearing_regno > maxregno
18446 || !bitmap_bit_p (to_clear_bitmap, clearing_regno))
18447 && !saved_clearing)
18448 {
18449 gcc_assert (clearing_regno <= LAST_LO_REGNUM);
18450 emit_move_insn (scratch_reg, clearing_reg);
18451 saved_clearing = true;
18452 saved_clearing_reg = scratch_reg;
18453 }
18454 scratch_reg = clearing_reg;
18455 }
18456
18457 /* Fill the lower half of the negated padding_bits_to_clear[i]. */
18458 mask = (~padding_bits_to_clear[i]) & 0xFFFF;
18459 emit_move_insn (scratch_reg, gen_int_mode (mask, SImode));
18460
18461 /* Fill the top half of the negated padding_bits_to_clear[i]. */
18462 mask = (~padding_bits_to_clear[i]) >> 16;
18463 rtx16 = gen_int_mode (16, SImode);
18464 dest = gen_rtx_ZERO_EXTRACT (SImode, scratch_reg, rtx16, rtx16);
18465 if (mask)
18466 emit_insn (gen_rtx_SET (dest, gen_int_mode (mask, SImode)));
18467
18468 emit_insn (gen_andsi3 (cleared_reg, cleared_reg, scratch_reg));
18469 }
18470 if (saved_clearing)
18471 emit_move_insn (clearing_reg, saved_clearing_reg);
18472
18473
18474 /* Clear full registers. */
18475
18476 if (TARGET_HAVE_FPCXT_CMSE)
18477 {
18478 rtvec vunspec_vec;
18479 int i, j, k, nb_regs;
18480 rtx use_seq, par, reg, set, vunspec;
18481 int to_clear_bitmap_size = SBITMAP_SIZE (to_clear_bitmap);
18482 auto_sbitmap core_regs_bitmap (to_clear_bitmap_size);
18483 auto_sbitmap to_clear_core_bitmap (to_clear_bitmap_size);
18484
18485 for (i = FIRST_VFP_REGNUM; i <= maxregno; i += nb_regs)
18486 {
18487 /* Find next register to clear and exit if none. */
18488 for (; i <= maxregno && !bitmap_bit_p (to_clear_bitmap, i); i++);
18489 if (i > maxregno)
18490 break;
18491
18492 /* Compute number of consecutive registers to clear. */
18493 for (j = i; j <= maxregno && bitmap_bit_p (to_clear_bitmap, j);
18494 j++);
18495 nb_regs = j - i;
18496
18497 /* Create VSCCLRM RTX pattern. */
18498 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (nb_regs + 1));
18499 vunspec_vec = gen_rtvec (1, gen_int_mode (0, SImode));
18500 vunspec = gen_rtx_UNSPEC_VOLATILE (SImode, vunspec_vec,
18501 VUNSPEC_VSCCLRM_VPR);
18502 XVECEXP (par, 0, 0) = vunspec;
18503
18504 /* Insert VFP register clearing RTX in the pattern. */
18505 start_sequence ();
18506 for (k = 1, j = i; j <= maxregno && k < nb_regs + 1; j++)
18507 {
18508 if (!bitmap_bit_p (to_clear_bitmap, j))
18509 continue;
18510
18511 reg = gen_rtx_REG (SFmode, j);
18512 set = gen_rtx_SET (reg, const0_rtx);
18513 XVECEXP (par, 0, k++) = set;
18514 emit_use (reg);
18515 }
18516 use_seq = get_insns ();
18517 end_sequence ();
18518
18519 emit_insn_after (use_seq, emit_insn (par));
18520 }
18521
18522 /* Get set of core registers to clear. */
18523 bitmap_clear (core_regs_bitmap);
18524 bitmap_set_range (core_regs_bitmap, R0_REGNUM,
18525 IP_REGNUM - R0_REGNUM + 1);
18526 bitmap_and (to_clear_core_bitmap, to_clear_bitmap,
18527 core_regs_bitmap);
18528 gcc_assert (!bitmap_empty_p (to_clear_core_bitmap));
18529
18530 if (bitmap_empty_p (to_clear_core_bitmap))
18531 return;
18532
18533 /* Create clrm RTX pattern. */
18534 nb_regs = bitmap_count_bits (to_clear_core_bitmap);
18535 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (nb_regs + 2));
18536
18537 /* Insert core register clearing RTX in the pattern. */
18538 start_sequence ();
18539 for (j = 0, i = minregno; j < nb_regs; i++)
18540 {
18541 if (!bitmap_bit_p (to_clear_core_bitmap, i))
18542 continue;
18543
18544 reg = gen_rtx_REG (SImode, i);
18545 set = gen_rtx_SET (reg, const0_rtx);
18546 XVECEXP (par, 0, j++) = set;
18547 emit_use (reg);
18548 }
18549
18550 /* Insert APSR register clearing RTX in the pattern
18551 * along with clobbering CC. */
18552 vunspec_vec = gen_rtvec (1, gen_int_mode (0, SImode));
18553 vunspec = gen_rtx_UNSPEC_VOLATILE (SImode, vunspec_vec,
18554 VUNSPEC_CLRM_APSR);
18555
18556 XVECEXP (par, 0, j++) = vunspec;
18557
18558 rtx ccreg = gen_rtx_REG (CCmode, CC_REGNUM);
18559 rtx clobber = gen_rtx_CLOBBER (VOIDmode, ccreg);
18560 XVECEXP (par, 0, j) = clobber;
18561
18562 use_seq = get_insns ();
18563 end_sequence ();
18564
18565 emit_insn_after (use_seq, emit_insn (par));
18566 }
18567 else
18568 {
18569 /* If not marked for clearing, clearing_reg already does not contain
18570 any secret. */
18571 if (clearing_regno <= maxregno
18572 && bitmap_bit_p (to_clear_bitmap, clearing_regno))
18573 {
18574 emit_move_insn (clearing_reg, const0_rtx);
18575 emit_use (clearing_reg);
18576 bitmap_clear_bit (to_clear_bitmap, clearing_regno);
18577 }
18578
18579 for (regno = minregno; regno <= maxregno; regno++)
18580 {
18581 if (!bitmap_bit_p (to_clear_bitmap, regno))
18582 continue;
18583
18584 if (IS_VFP_REGNUM (regno))
18585 {
18586 /* If regno is an even vfp register and its successor is also to
18587 be cleared, use vmov. */
18588 if (TARGET_VFP_DOUBLE
18589 && VFP_REGNO_OK_FOR_DOUBLE (regno)
18590 && bitmap_bit_p (to_clear_bitmap, regno + 1))
18591 {
18592 emit_move_insn (gen_rtx_REG (DFmode, regno),
18593 CONST1_RTX (DFmode));
18594 emit_use (gen_rtx_REG (DFmode, regno));
18595 regno++;
18596 }
18597 else
18598 {
18599 emit_move_insn (gen_rtx_REG (SFmode, regno),
18600 CONST1_RTX (SFmode));
18601 emit_use (gen_rtx_REG (SFmode, regno));
18602 }
18603 }
18604 else
18605 {
18606 emit_move_insn (gen_rtx_REG (SImode, regno), clearing_reg);
18607 emit_use (gen_rtx_REG (SImode, regno));
18608 }
18609 }
18610 }
18611 }
18612
18613 /* Clear core and caller-saved VFP registers not used to pass arguments before
18614 a cmse_nonsecure_call. Saving, clearing and restoring of VFP callee-saved
18615 registers is done in the __gnu_cmse_nonsecure_call libcall. See
18616 libgcc/config/arm/cmse_nonsecure_call.S. */
18617
18618 static void
18619 cmse_nonsecure_call_inline_register_clear (void)
18620 {
18621 basic_block bb;
18622
18623 FOR_EACH_BB_FN (bb, cfun)
18624 {
18625 rtx_insn *insn;
18626
18627 FOR_BB_INSNS (bb, insn)
18628 {
18629 bool clear_callee_saved = TARGET_HAVE_FPCXT_CMSE;
18630 /* frame = VFP regs + FPSCR + VPR. */
18631 unsigned lazy_store_stack_frame_size
18632 = (LAST_VFP_REGNUM - FIRST_VFP_REGNUM + 1 + 2) * UNITS_PER_WORD;
18633 unsigned long callee_saved_mask
18634 = ((1 << (LAST_HI_REGNUM + 1)) - 1)
18635 & ~((1 << (LAST_ARG_REGNUM + 1)) - 1);
18636 unsigned address_regnum, regno;
18637 unsigned max_int_regno
18638 = clear_callee_saved ? IP_REGNUM : LAST_ARG_REGNUM;
18639 unsigned max_fp_regno
18640 = TARGET_HAVE_FPCXT_CMSE ? LAST_VFP_REGNUM : D7_VFP_REGNUM;
18641 unsigned maxregno
18642 = TARGET_HARD_FLOAT_ABI ? max_fp_regno : max_int_regno;
18643 auto_sbitmap to_clear_bitmap (maxregno + 1);
18644 rtx_insn *seq;
18645 rtx pat, call, unspec, clearing_reg, ip_reg, shift;
18646 rtx address;
18647 CUMULATIVE_ARGS args_so_far_v;
18648 cumulative_args_t args_so_far;
18649 tree arg_type, fntype;
18650 bool first_param = true, lazy_fpclear = !TARGET_HARD_FLOAT_ABI;
18651 function_args_iterator args_iter;
18652 uint32_t padding_bits_to_clear[4] = {0U, 0U, 0U, 0U};
18653
18654 if (!NONDEBUG_INSN_P (insn))
18655 continue;
18656
18657 if (!CALL_P (insn))
18658 continue;
18659
18660 pat = PATTERN (insn);
18661 gcc_assert (GET_CODE (pat) == PARALLEL && XVECLEN (pat, 0) > 0);
18662 call = XVECEXP (pat, 0, 0);
18663
18664 /* Get the real call RTX if the insn sets a value, ie. returns. */
18665 if (GET_CODE (call) == SET)
18666 call = SET_SRC (call);
18667
18668 /* Check if it is a cmse_nonsecure_call. */
18669 unspec = XEXP (call, 0);
18670 if (GET_CODE (unspec) != UNSPEC
18671 || XINT (unspec, 1) != UNSPEC_NONSECURE_MEM)
18672 continue;
18673
18674 /* Mark registers that needs to be cleared. Those that holds a
18675 parameter are removed from the set further below. */
18676 bitmap_clear (to_clear_bitmap);
18677 bitmap_set_range (to_clear_bitmap, R0_REGNUM,
18678 max_int_regno - R0_REGNUM + 1);
18679
18680 /* Only look at the caller-saved floating point registers in case of
18681 -mfloat-abi=hard. For -mfloat-abi=softfp we will be using the
18682 lazy store and loads which clear both caller- and callee-saved
18683 registers. */
18684 if (!lazy_fpclear)
18685 {
18686 auto_sbitmap float_bitmap (maxregno + 1);
18687
18688 bitmap_clear (float_bitmap);
18689 bitmap_set_range (float_bitmap, FIRST_VFP_REGNUM,
18690 max_fp_regno - FIRST_VFP_REGNUM + 1);
18691 bitmap_ior (to_clear_bitmap, to_clear_bitmap, float_bitmap);
18692 }
18693
18694 /* Make sure the register used to hold the function address is not
18695 cleared. */
18696 address = RTVEC_ELT (XVEC (unspec, 0), 0);
18697 gcc_assert (MEM_P (address));
18698 gcc_assert (REG_P (XEXP (address, 0)));
18699 address_regnum = REGNO (XEXP (address, 0));
18700 if (address_regnum <= max_int_regno)
18701 bitmap_clear_bit (to_clear_bitmap, address_regnum);
18702
18703 /* Set basic block of call insn so that df rescan is performed on
18704 insns inserted here. */
18705 set_block_for_insn (insn, bb);
18706 df_set_flags (DF_DEFER_INSN_RESCAN);
18707 start_sequence ();
18708
18709 /* Make sure the scheduler doesn't schedule other insns beyond
18710 here. */
18711 emit_insn (gen_blockage ());
18712
18713 /* Walk through all arguments and clear registers appropriately.
18714 */
18715 fntype = TREE_TYPE (MEM_EXPR (address));
18716 arm_init_cumulative_args (&args_so_far_v, fntype, NULL_RTX,
18717 NULL_TREE);
18718 args_so_far = pack_cumulative_args (&args_so_far_v);
18719 FOREACH_FUNCTION_ARGS (fntype, arg_type, args_iter)
18720 {
18721 rtx arg_rtx;
18722 uint64_t to_clear_args_mask;
18723
18724 if (VOID_TYPE_P (arg_type))
18725 continue;
18726
18727 function_arg_info arg (arg_type, /*named=*/true);
18728 if (!first_param)
18729 /* ??? We should advance after processing the argument and pass
18730 the argument we're advancing past. */
18731 arm_function_arg_advance (args_so_far, arg);
18732
18733 arg_rtx = arm_function_arg (args_so_far, arg);
18734 gcc_assert (REG_P (arg_rtx));
18735 to_clear_args_mask
18736 = compute_not_to_clear_mask (arg_type, arg_rtx,
18737 REGNO (arg_rtx),
18738 &padding_bits_to_clear[0]);
18739 if (to_clear_args_mask)
18740 {
18741 for (regno = R0_REGNUM; regno <= maxregno; regno++)
18742 {
18743 if (to_clear_args_mask & (1ULL << regno))
18744 bitmap_clear_bit (to_clear_bitmap, regno);
18745 }
18746 }
18747
18748 first_param = false;
18749 }
18750
18751 /* We use right shift and left shift to clear the LSB of the address
18752 we jump to instead of using bic, to avoid having to use an extra
18753 register on Thumb-1. */
18754 clearing_reg = XEXP (address, 0);
18755 shift = gen_rtx_LSHIFTRT (SImode, clearing_reg, const1_rtx);
18756 emit_insn (gen_rtx_SET (clearing_reg, shift));
18757 shift = gen_rtx_ASHIFT (SImode, clearing_reg, const1_rtx);
18758 emit_insn (gen_rtx_SET (clearing_reg, shift));
18759
18760 if (clear_callee_saved)
18761 {
18762 rtx push_insn =
18763 emit_multi_reg_push (callee_saved_mask, callee_saved_mask);
18764 /* Disable frame debug info in push because it needs to be
18765 disabled for pop (see below). */
18766 RTX_FRAME_RELATED_P (push_insn) = 0;
18767
18768 /* Lazy store multiple. */
18769 if (lazy_fpclear)
18770 {
18771 rtx imm;
18772 rtx_insn *add_insn;
18773
18774 imm = gen_int_mode (- lazy_store_stack_frame_size, SImode);
18775 add_insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
18776 stack_pointer_rtx, imm));
18777 arm_add_cfa_adjust_cfa_note (add_insn,
18778 - lazy_store_stack_frame_size,
18779 stack_pointer_rtx,
18780 stack_pointer_rtx);
18781 emit_insn (gen_lazy_store_multiple_insn (stack_pointer_rtx));
18782 }
18783 /* Save VFP callee-saved registers. */
18784 else
18785 {
18786 vfp_emit_fstmd (D7_VFP_REGNUM + 1,
18787 (max_fp_regno - D7_VFP_REGNUM) / 2);
18788 /* Disable frame debug info in push because it needs to be
18789 disabled for vpop (see below). */
18790 RTX_FRAME_RELATED_P (get_last_insn ()) = 0;
18791 }
18792 }
18793
18794 /* Clear caller-saved registers that leak before doing a non-secure
18795 call. */
18796 ip_reg = gen_rtx_REG (SImode, IP_REGNUM);
18797 cmse_clear_registers (to_clear_bitmap, padding_bits_to_clear,
18798 NUM_ARG_REGS, ip_reg, clearing_reg);
18799
18800 seq = get_insns ();
18801 end_sequence ();
18802 emit_insn_before (seq, insn);
18803
18804 if (TARGET_HAVE_FPCXT_CMSE)
18805 {
18806 rtx_insn *last, *pop_insn, *after = insn;
18807
18808 start_sequence ();
18809
18810 /* Lazy load multiple done as part of libcall in Armv8-M. */
18811 if (lazy_fpclear)
18812 {
18813 rtx imm = gen_int_mode (lazy_store_stack_frame_size, SImode);
18814 emit_insn (gen_lazy_load_multiple_insn (stack_pointer_rtx));
18815 rtx_insn *add_insn =
18816 emit_insn (gen_addsi3 (stack_pointer_rtx,
18817 stack_pointer_rtx, imm));
18818 arm_add_cfa_adjust_cfa_note (add_insn,
18819 lazy_store_stack_frame_size,
18820 stack_pointer_rtx,
18821 stack_pointer_rtx);
18822 }
18823 /* Restore VFP callee-saved registers. */
18824 else
18825 {
18826 int nb_callee_saved_vfp_regs =
18827 (max_fp_regno - D7_VFP_REGNUM) / 2;
18828 arm_emit_vfp_multi_reg_pop (D7_VFP_REGNUM + 1,
18829 nb_callee_saved_vfp_regs,
18830 stack_pointer_rtx);
18831 /* Disable frame debug info in vpop because the SP adjustment
18832 is made using a CFA adjustment note while CFA used is
18833 sometimes R7. This then causes an assert failure in the
18834 CFI note creation code. */
18835 RTX_FRAME_RELATED_P (get_last_insn ()) = 0;
18836 }
18837
18838 arm_emit_multi_reg_pop (callee_saved_mask);
18839 pop_insn = get_last_insn ();
18840
18841 /* Disable frame debug info in pop because they reset the state
18842 of popped registers to what it was at the beginning of the
18843 function, before the prologue. This leads to incorrect state
18844 when doing the pop after the nonsecure call for registers that
18845 are pushed both in prologue and before the nonsecure call.
18846
18847 It also occasionally triggers an assert failure in CFI note
18848 creation code when there are two codepaths to the epilogue,
18849 one of which does not go through the nonsecure call.
18850 Obviously this mean that debugging between the push and pop is
18851 not reliable. */
18852 RTX_FRAME_RELATED_P (pop_insn) = 0;
18853
18854 seq = get_insns ();
18855 last = get_last_insn ();
18856 end_sequence ();
18857
18858 emit_insn_after (seq, after);
18859
18860 /* Skip pop we have just inserted after nonsecure call, we know
18861 it does not contain a nonsecure call. */
18862 insn = last;
18863 }
18864 }
18865 }
18866 }
18867
18868 /* Rewrite move insn into subtract of 0 if the condition codes will
18869 be useful in next conditional jump insn. */
18870
18871 static void
18872 thumb1_reorg (void)
18873 {
18874 basic_block bb;
18875
18876 FOR_EACH_BB_FN (bb, cfun)
18877 {
18878 rtx dest, src;
18879 rtx cmp, op0, op1, set = NULL;
18880 rtx_insn *prev, *insn = BB_END (bb);
18881 bool insn_clobbered = false;
18882
18883 while (insn != BB_HEAD (bb) && !NONDEBUG_INSN_P (insn))
18884 insn = PREV_INSN (insn);
18885
18886 /* Find the last cbranchsi4_insn in basic block BB. */
18887 if (insn == BB_HEAD (bb)
18888 || INSN_CODE (insn) != CODE_FOR_cbranchsi4_insn)
18889 continue;
18890
18891 /* Get the register with which we are comparing. */
18892 cmp = XEXP (SET_SRC (PATTERN (insn)), 0);
18893 op0 = XEXP (cmp, 0);
18894 op1 = XEXP (cmp, 1);
18895
18896 /* Check that comparison is against ZERO. */
18897 if (!CONST_INT_P (op1) || INTVAL (op1) != 0)
18898 continue;
18899
18900 /* Find the first flag setting insn before INSN in basic block BB. */
18901 gcc_assert (insn != BB_HEAD (bb));
18902 for (prev = PREV_INSN (insn);
18903 (!insn_clobbered
18904 && prev != BB_HEAD (bb)
18905 && (NOTE_P (prev)
18906 || DEBUG_INSN_P (prev)
18907 || ((set = single_set (prev)) != NULL
18908 && get_attr_conds (prev) == CONDS_NOCOND)));
18909 prev = PREV_INSN (prev))
18910 {
18911 if (reg_set_p (op0, prev))
18912 insn_clobbered = true;
18913 }
18914
18915 /* Skip if op0 is clobbered by insn other than prev. */
18916 if (insn_clobbered)
18917 continue;
18918
18919 if (!set)
18920 continue;
18921
18922 dest = SET_DEST (set);
18923 src = SET_SRC (set);
18924 if (!low_register_operand (dest, SImode)
18925 || !low_register_operand (src, SImode))
18926 continue;
18927
18928 /* Rewrite move into subtract of 0 if its operand is compared with ZERO
18929 in INSN. Both src and dest of the move insn are checked. */
18930 if (REGNO (op0) == REGNO (src) || REGNO (op0) == REGNO (dest))
18931 {
18932 dest = copy_rtx (dest);
18933 src = copy_rtx (src);
18934 src = gen_rtx_MINUS (SImode, src, const0_rtx);
18935 PATTERN (prev) = gen_rtx_SET (dest, src);
18936 INSN_CODE (prev) = -1;
18937 /* Set test register in INSN to dest. */
18938 XEXP (cmp, 0) = copy_rtx (dest);
18939 INSN_CODE (insn) = -1;
18940 }
18941 }
18942 }
18943
18944 /* Convert instructions to their cc-clobbering variant if possible, since
18945 that allows us to use smaller encodings. */
18946
18947 static void
18948 thumb2_reorg (void)
18949 {
18950 basic_block bb;
18951 regset_head live;
18952
18953 INIT_REG_SET (&live);
18954
18955 /* We are freeing block_for_insn in the toplev to keep compatibility
18956 with old MDEP_REORGS that are not CFG based. Recompute it now. */
18957 compute_bb_for_insn ();
18958 df_analyze ();
18959
18960 enum Convert_Action {SKIP, CONV, SWAP_CONV};
18961
18962 FOR_EACH_BB_FN (bb, cfun)
18963 {
18964 if ((current_tune->disparage_flag_setting_t16_encodings
18965 == tune_params::DISPARAGE_FLAGS_ALL)
18966 && optimize_bb_for_speed_p (bb))
18967 continue;
18968
18969 rtx_insn *insn;
18970 Convert_Action action = SKIP;
18971 Convert_Action action_for_partial_flag_setting
18972 = ((current_tune->disparage_flag_setting_t16_encodings
18973 != tune_params::DISPARAGE_FLAGS_NEITHER)
18974 && optimize_bb_for_speed_p (bb))
18975 ? SKIP : CONV;
18976
18977 COPY_REG_SET (&live, DF_LR_OUT (bb));
18978 df_simulate_initialize_backwards (bb, &live);
18979 FOR_BB_INSNS_REVERSE (bb, insn)
18980 {
18981 if (NONJUMP_INSN_P (insn)
18982 && !REGNO_REG_SET_P (&live, CC_REGNUM)
18983 && GET_CODE (PATTERN (insn)) == SET)
18984 {
18985 action = SKIP;
18986 rtx pat = PATTERN (insn);
18987 rtx dst = XEXP (pat, 0);
18988 rtx src = XEXP (pat, 1);
18989 rtx op0 = NULL_RTX, op1 = NULL_RTX;
18990
18991 if (UNARY_P (src) || BINARY_P (src))
18992 op0 = XEXP (src, 0);
18993
18994 if (BINARY_P (src))
18995 op1 = XEXP (src, 1);
18996
18997 if (low_register_operand (dst, SImode))
18998 {
18999 switch (GET_CODE (src))
19000 {
19001 case PLUS:
19002 /* Adding two registers and storing the result
19003 in the first source is already a 16-bit
19004 operation. */
19005 if (rtx_equal_p (dst, op0)
19006 && register_operand (op1, SImode))
19007 break;
19008
19009 if (low_register_operand (op0, SImode))
19010 {
19011 /* ADDS <Rd>,<Rn>,<Rm> */
19012 if (low_register_operand (op1, SImode))
19013 action = CONV;
19014 /* ADDS <Rdn>,#<imm8> */
19015 /* SUBS <Rdn>,#<imm8> */
19016 else if (rtx_equal_p (dst, op0)
19017 && CONST_INT_P (op1)
19018 && IN_RANGE (INTVAL (op1), -255, 255))
19019 action = CONV;
19020 /* ADDS <Rd>,<Rn>,#<imm3> */
19021 /* SUBS <Rd>,<Rn>,#<imm3> */
19022 else if (CONST_INT_P (op1)
19023 && IN_RANGE (INTVAL (op1), -7, 7))
19024 action = CONV;
19025 }
19026 /* ADCS <Rd>, <Rn> */
19027 else if (GET_CODE (XEXP (src, 0)) == PLUS
19028 && rtx_equal_p (XEXP (XEXP (src, 0), 0), dst)
19029 && low_register_operand (XEXP (XEXP (src, 0), 1),
19030 SImode)
19031 && COMPARISON_P (op1)
19032 && cc_register (XEXP (op1, 0), VOIDmode)
19033 && maybe_get_arm_condition_code (op1) == ARM_CS
19034 && XEXP (op1, 1) == const0_rtx)
19035 action = CONV;
19036 break;
19037
19038 case MINUS:
19039 /* RSBS <Rd>,<Rn>,#0
19040 Not handled here: see NEG below. */
19041 /* SUBS <Rd>,<Rn>,#<imm3>
19042 SUBS <Rdn>,#<imm8>
19043 Not handled here: see PLUS above. */
19044 /* SUBS <Rd>,<Rn>,<Rm> */
19045 if (low_register_operand (op0, SImode)
19046 && low_register_operand (op1, SImode))
19047 action = CONV;
19048 break;
19049
19050 case MULT:
19051 /* MULS <Rdm>,<Rn>,<Rdm>
19052 As an exception to the rule, this is only used
19053 when optimizing for size since MULS is slow on all
19054 known implementations. We do not even want to use
19055 MULS in cold code, if optimizing for speed, so we
19056 test the global flag here. */
19057 if (!optimize_size)
19058 break;
19059 /* Fall through. */
19060 case AND:
19061 case IOR:
19062 case XOR:
19063 /* ANDS <Rdn>,<Rm> */
19064 if (rtx_equal_p (dst, op0)
19065 && low_register_operand (op1, SImode))
19066 action = action_for_partial_flag_setting;
19067 else if (rtx_equal_p (dst, op1)
19068 && low_register_operand (op0, SImode))
19069 action = action_for_partial_flag_setting == SKIP
19070 ? SKIP : SWAP_CONV;
19071 break;
19072
19073 case ASHIFTRT:
19074 case ASHIFT:
19075 case LSHIFTRT:
19076 /* ASRS <Rdn>,<Rm> */
19077 /* LSRS <Rdn>,<Rm> */
19078 /* LSLS <Rdn>,<Rm> */
19079 if (rtx_equal_p (dst, op0)
19080 && low_register_operand (op1, SImode))
19081 action = action_for_partial_flag_setting;
19082 /* ASRS <Rd>,<Rm>,#<imm5> */
19083 /* LSRS <Rd>,<Rm>,#<imm5> */
19084 /* LSLS <Rd>,<Rm>,#<imm5> */
19085 else if (low_register_operand (op0, SImode)
19086 && CONST_INT_P (op1)
19087 && IN_RANGE (INTVAL (op1), 0, 31))
19088 action = action_for_partial_flag_setting;
19089 break;
19090
19091 case ROTATERT:
19092 /* RORS <Rdn>,<Rm> */
19093 if (rtx_equal_p (dst, op0)
19094 && low_register_operand (op1, SImode))
19095 action = action_for_partial_flag_setting;
19096 break;
19097
19098 case NOT:
19099 /* MVNS <Rd>,<Rm> */
19100 if (low_register_operand (op0, SImode))
19101 action = action_for_partial_flag_setting;
19102 break;
19103
19104 case NEG:
19105 /* NEGS <Rd>,<Rm> (a.k.a RSBS) */
19106 if (low_register_operand (op0, SImode))
19107 action = CONV;
19108 break;
19109
19110 case CONST_INT:
19111 /* MOVS <Rd>,#<imm8> */
19112 if (CONST_INT_P (src)
19113 && IN_RANGE (INTVAL (src), 0, 255))
19114 action = action_for_partial_flag_setting;
19115 break;
19116
19117 case REG:
19118 /* MOVS and MOV<c> with registers have different
19119 encodings, so are not relevant here. */
19120 break;
19121
19122 default:
19123 break;
19124 }
19125 }
19126
19127 if (action != SKIP)
19128 {
19129 rtx ccreg = gen_rtx_REG (CCmode, CC_REGNUM);
19130 rtx clobber = gen_rtx_CLOBBER (VOIDmode, ccreg);
19131 rtvec vec;
19132
19133 if (action == SWAP_CONV)
19134 {
19135 src = copy_rtx (src);
19136 XEXP (src, 0) = op1;
19137 XEXP (src, 1) = op0;
19138 pat = gen_rtx_SET (dst, src);
19139 vec = gen_rtvec (2, pat, clobber);
19140 }
19141 else /* action == CONV */
19142 vec = gen_rtvec (2, pat, clobber);
19143
19144 PATTERN (insn) = gen_rtx_PARALLEL (VOIDmode, vec);
19145 INSN_CODE (insn) = -1;
19146 }
19147 }
19148
19149 if (NONDEBUG_INSN_P (insn))
19150 df_simulate_one_insn_backwards (bb, insn, &live);
19151 }
19152 }
19153
19154 CLEAR_REG_SET (&live);
19155 }
19156
19157 /* Gcc puts the pool in the wrong place for ARM, since we can only
19158 load addresses a limited distance around the pc. We do some
19159 special munging to move the constant pool values to the correct
19160 point in the code. */
19161 static void
19162 arm_reorg (void)
19163 {
19164 rtx_insn *insn;
19165 HOST_WIDE_INT address = 0;
19166 Mfix * fix;
19167
19168 if (use_cmse)
19169 cmse_nonsecure_call_inline_register_clear ();
19170
19171 /* We cannot run the Thumb passes for thunks because there is no CFG. */
19172 if (cfun->is_thunk)
19173 ;
19174 else if (TARGET_THUMB1)
19175 thumb1_reorg ();
19176 else if (TARGET_THUMB2)
19177 thumb2_reorg ();
19178
19179 /* Ensure all insns that must be split have been split at this point.
19180 Otherwise, the pool placement code below may compute incorrect
19181 insn lengths. Note that when optimizing, all insns have already
19182 been split at this point. */
19183 if (!optimize)
19184 split_all_insns_noflow ();
19185
19186 /* Make sure we do not attempt to create a literal pool even though it should
19187 no longer be necessary to create any. */
19188 if (arm_disable_literal_pool)
19189 return ;
19190
19191 minipool_fix_head = minipool_fix_tail = NULL;
19192
19193 /* The first insn must always be a note, or the code below won't
19194 scan it properly. */
19195 insn = get_insns ();
19196 gcc_assert (NOTE_P (insn));
19197 minipool_pad = 0;
19198
19199 /* Scan all the insns and record the operands that will need fixing. */
19200 for (insn = next_nonnote_insn (insn); insn; insn = next_nonnote_insn (insn))
19201 {
19202 if (BARRIER_P (insn))
19203 push_minipool_barrier (insn, address);
19204 else if (INSN_P (insn))
19205 {
19206 rtx_jump_table_data *table;
19207
19208 note_invalid_constants (insn, address, true);
19209 address += get_attr_length (insn);
19210
19211 /* If the insn is a vector jump, add the size of the table
19212 and skip the table. */
19213 if (tablejump_p (insn, NULL, &table))
19214 {
19215 address += get_jump_table_size (table);
19216 insn = table;
19217 }
19218 }
19219 else if (LABEL_P (insn))
19220 /* Add the worst-case padding due to alignment. We don't add
19221 the _current_ padding because the minipool insertions
19222 themselves might change it. */
19223 address += get_label_padding (insn);
19224 }
19225
19226 fix = minipool_fix_head;
19227
19228 /* Now scan the fixups and perform the required changes. */
19229 while (fix)
19230 {
19231 Mfix * ftmp;
19232 Mfix * fdel;
19233 Mfix * last_added_fix;
19234 Mfix * last_barrier = NULL;
19235 Mfix * this_fix;
19236
19237 /* Skip any further barriers before the next fix. */
19238 while (fix && BARRIER_P (fix->insn))
19239 fix = fix->next;
19240
19241 /* No more fixes. */
19242 if (fix == NULL)
19243 break;
19244
19245 last_added_fix = NULL;
19246
19247 for (ftmp = fix; ftmp; ftmp = ftmp->next)
19248 {
19249 if (BARRIER_P (ftmp->insn))
19250 {
19251 if (ftmp->address >= minipool_vector_head->max_address)
19252 break;
19253
19254 last_barrier = ftmp;
19255 }
19256 else if ((ftmp->minipool = add_minipool_forward_ref (ftmp)) == NULL)
19257 break;
19258
19259 last_added_fix = ftmp; /* Keep track of the last fix added. */
19260 }
19261
19262 /* If we found a barrier, drop back to that; any fixes that we
19263 could have reached but come after the barrier will now go in
19264 the next mini-pool. */
19265 if (last_barrier != NULL)
19266 {
19267 /* Reduce the refcount for those fixes that won't go into this
19268 pool after all. */
19269 for (fdel = last_barrier->next;
19270 fdel && fdel != ftmp;
19271 fdel = fdel->next)
19272 {
19273 fdel->minipool->refcount--;
19274 fdel->minipool = NULL;
19275 }
19276
19277 ftmp = last_barrier;
19278 }
19279 else
19280 {
19281 /* ftmp is first fix that we can't fit into this pool and
19282 there no natural barriers that we could use. Insert a
19283 new barrier in the code somewhere between the previous
19284 fix and this one, and arrange to jump around it. */
19285 HOST_WIDE_INT max_address;
19286
19287 /* The last item on the list of fixes must be a barrier, so
19288 we can never run off the end of the list of fixes without
19289 last_barrier being set. */
19290 gcc_assert (ftmp);
19291
19292 max_address = minipool_vector_head->max_address;
19293 /* Check that there isn't another fix that is in range that
19294 we couldn't fit into this pool because the pool was
19295 already too large: we need to put the pool before such an
19296 instruction. The pool itself may come just after the
19297 fix because create_fix_barrier also allows space for a
19298 jump instruction. */
19299 if (ftmp->address < max_address)
19300 max_address = ftmp->address + 1;
19301
19302 last_barrier = create_fix_barrier (last_added_fix, max_address);
19303 }
19304
19305 assign_minipool_offsets (last_barrier);
19306
19307 while (ftmp)
19308 {
19309 if (!BARRIER_P (ftmp->insn)
19310 && ((ftmp->minipool = add_minipool_backward_ref (ftmp))
19311 == NULL))
19312 break;
19313
19314 ftmp = ftmp->next;
19315 }
19316
19317 /* Scan over the fixes we have identified for this pool, fixing them
19318 up and adding the constants to the pool itself. */
19319 for (this_fix = fix; this_fix && ftmp != this_fix;
19320 this_fix = this_fix->next)
19321 if (!BARRIER_P (this_fix->insn))
19322 {
19323 rtx addr
19324 = plus_constant (Pmode,
19325 gen_rtx_LABEL_REF (VOIDmode,
19326 minipool_vector_label),
19327 this_fix->minipool->offset);
19328 *this_fix->loc = gen_rtx_MEM (this_fix->mode, addr);
19329 }
19330
19331 dump_minipool (last_barrier->insn);
19332 fix = ftmp;
19333 }
19334
19335 /* From now on we must synthesize any constants that we can't handle
19336 directly. This can happen if the RTL gets split during final
19337 instruction generation. */
19338 cfun->machine->after_arm_reorg = 1;
19339
19340 /* Free the minipool memory. */
19341 obstack_free (&minipool_obstack, minipool_startobj);
19342 }
19343 \f
19344 /* Routines to output assembly language. */
19345
19346 /* Return string representation of passed in real value. */
19347 static const char *
19348 fp_const_from_val (REAL_VALUE_TYPE *r)
19349 {
19350 if (!fp_consts_inited)
19351 init_fp_table ();
19352
19353 gcc_assert (real_equal (r, &value_fp0));
19354 return "0";
19355 }
19356
19357 /* OPERANDS[0] is the entire list of insns that constitute pop,
19358 OPERANDS[1] is the base register, RETURN_PC is true iff return insn
19359 is in the list, UPDATE is true iff the list contains explicit
19360 update of base register. */
19361 void
19362 arm_output_multireg_pop (rtx *operands, bool return_pc, rtx cond, bool reverse,
19363 bool update)
19364 {
19365 int i;
19366 char pattern[100];
19367 int offset;
19368 const char *conditional;
19369 int num_saves = XVECLEN (operands[0], 0);
19370 unsigned int regno;
19371 unsigned int regno_base = REGNO (operands[1]);
19372 bool interrupt_p = IS_INTERRUPT (arm_current_func_type ());
19373
19374 offset = 0;
19375 offset += update ? 1 : 0;
19376 offset += return_pc ? 1 : 0;
19377
19378 /* Is the base register in the list? */
19379 for (i = offset; i < num_saves; i++)
19380 {
19381 regno = REGNO (XEXP (XVECEXP (operands[0], 0, i), 0));
19382 /* If SP is in the list, then the base register must be SP. */
19383 gcc_assert ((regno != SP_REGNUM) || (regno_base == SP_REGNUM));
19384 /* If base register is in the list, there must be no explicit update. */
19385 if (regno == regno_base)
19386 gcc_assert (!update);
19387 }
19388
19389 conditional = reverse ? "%?%D0" : "%?%d0";
19390 /* Can't use POP if returning from an interrupt. */
19391 if ((regno_base == SP_REGNUM) && update && !(interrupt_p && return_pc))
19392 sprintf (pattern, "pop%s\t{", conditional);
19393 else
19394 {
19395 /* Output ldmfd when the base register is SP, otherwise output ldmia.
19396 It's just a convention, their semantics are identical. */
19397 if (regno_base == SP_REGNUM)
19398 sprintf (pattern, "ldmfd%s\t", conditional);
19399 else if (update)
19400 sprintf (pattern, "ldmia%s\t", conditional);
19401 else
19402 sprintf (pattern, "ldm%s\t", conditional);
19403
19404 strcat (pattern, reg_names[regno_base]);
19405 if (update)
19406 strcat (pattern, "!, {");
19407 else
19408 strcat (pattern, ", {");
19409 }
19410
19411 /* Output the first destination register. */
19412 strcat (pattern,
19413 reg_names[REGNO (XEXP (XVECEXP (operands[0], 0, offset), 0))]);
19414
19415 /* Output the rest of the destination registers. */
19416 for (i = offset + 1; i < num_saves; i++)
19417 {
19418 strcat (pattern, ", ");
19419 strcat (pattern,
19420 reg_names[REGNO (XEXP (XVECEXP (operands[0], 0, i), 0))]);
19421 }
19422
19423 strcat (pattern, "}");
19424
19425 if (interrupt_p && return_pc)
19426 strcat (pattern, "^");
19427
19428 output_asm_insn (pattern, &cond);
19429 }
19430
19431
19432 /* Output the assembly for a store multiple. */
19433
19434 const char *
19435 vfp_output_vstmd (rtx * operands)
19436 {
19437 char pattern[100];
19438 int p;
19439 int base;
19440 int i;
19441 rtx addr_reg = REG_P (XEXP (operands[0], 0))
19442 ? XEXP (operands[0], 0)
19443 : XEXP (XEXP (operands[0], 0), 0);
19444 bool push_p = REGNO (addr_reg) == SP_REGNUM;
19445
19446 if (push_p)
19447 strcpy (pattern, "vpush%?.64\t{%P1");
19448 else
19449 strcpy (pattern, "vstmdb%?.64\t%m0!, {%P1");
19450
19451 p = strlen (pattern);
19452
19453 gcc_assert (REG_P (operands[1]));
19454
19455 base = (REGNO (operands[1]) - FIRST_VFP_REGNUM) / 2;
19456 for (i = 1; i < XVECLEN (operands[2], 0); i++)
19457 {
19458 p += sprintf (&pattern[p], ", d%d", base + i);
19459 }
19460 strcpy (&pattern[p], "}");
19461
19462 output_asm_insn (pattern, operands);
19463 return "";
19464 }
19465
19466
19467 /* Emit RTL to save block of VFP register pairs to the stack. Returns the
19468 number of bytes pushed. */
19469
19470 static int
19471 vfp_emit_fstmd (int base_reg, int count)
19472 {
19473 rtx par;
19474 rtx dwarf;
19475 rtx tmp, reg;
19476 int i;
19477
19478 /* Workaround ARM10 VFPr1 bug. Data corruption can occur when exactly two
19479 register pairs are stored by a store multiple insn. We avoid this
19480 by pushing an extra pair. */
19481 if (count == 2 && !arm_arch6)
19482 {
19483 if (base_reg == LAST_VFP_REGNUM - 3)
19484 base_reg -= 2;
19485 count++;
19486 }
19487
19488 /* FSTMD may not store more than 16 doubleword registers at once. Split
19489 larger stores into multiple parts (up to a maximum of two, in
19490 practice). */
19491 if (count > 16)
19492 {
19493 int saved;
19494 /* NOTE: base_reg is an internal register number, so each D register
19495 counts as 2. */
19496 saved = vfp_emit_fstmd (base_reg + 32, count - 16);
19497 saved += vfp_emit_fstmd (base_reg, 16);
19498 return saved;
19499 }
19500
19501 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count));
19502 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (count + 1));
19503
19504 reg = gen_rtx_REG (DFmode, base_reg);
19505 base_reg += 2;
19506
19507 XVECEXP (par, 0, 0)
19508 = gen_rtx_SET (gen_frame_mem
19509 (BLKmode,
19510 gen_rtx_PRE_MODIFY (Pmode,
19511 stack_pointer_rtx,
19512 plus_constant
19513 (Pmode, stack_pointer_rtx,
19514 - (count * 8)))
19515 ),
19516 gen_rtx_UNSPEC (BLKmode,
19517 gen_rtvec (1, reg),
19518 UNSPEC_PUSH_MULT));
19519
19520 tmp = gen_rtx_SET (stack_pointer_rtx,
19521 plus_constant (Pmode, stack_pointer_rtx, -(count * 8)));
19522 RTX_FRAME_RELATED_P (tmp) = 1;
19523 XVECEXP (dwarf, 0, 0) = tmp;
19524
19525 tmp = gen_rtx_SET (gen_frame_mem (DFmode, stack_pointer_rtx), reg);
19526 RTX_FRAME_RELATED_P (tmp) = 1;
19527 XVECEXP (dwarf, 0, 1) = tmp;
19528
19529 for (i = 1; i < count; i++)
19530 {
19531 reg = gen_rtx_REG (DFmode, base_reg);
19532 base_reg += 2;
19533 XVECEXP (par, 0, i) = gen_rtx_USE (VOIDmode, reg);
19534
19535 tmp = gen_rtx_SET (gen_frame_mem (DFmode,
19536 plus_constant (Pmode,
19537 stack_pointer_rtx,
19538 i * 8)),
19539 reg);
19540 RTX_FRAME_RELATED_P (tmp) = 1;
19541 XVECEXP (dwarf, 0, i + 1) = tmp;
19542 }
19543
19544 par = emit_insn (par);
19545 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
19546 RTX_FRAME_RELATED_P (par) = 1;
19547
19548 return count * 8;
19549 }
19550
19551 /* Returns true if -mcmse has been passed and the function pointed to by 'addr'
19552 has the cmse_nonsecure_call attribute and returns false otherwise. */
19553
19554 bool
19555 detect_cmse_nonsecure_call (tree addr)
19556 {
19557 if (!addr)
19558 return FALSE;
19559
19560 tree fntype = TREE_TYPE (addr);
19561 if (use_cmse && lookup_attribute ("cmse_nonsecure_call",
19562 TYPE_ATTRIBUTES (fntype)))
19563 return TRUE;
19564 return FALSE;
19565 }
19566
19567
19568 /* Emit a call instruction with pattern PAT. ADDR is the address of
19569 the call target. */
19570
19571 void
19572 arm_emit_call_insn (rtx pat, rtx addr, bool sibcall)
19573 {
19574 rtx insn;
19575
19576 insn = emit_call_insn (pat);
19577
19578 /* The PIC register is live on entry to VxWorks PIC PLT entries.
19579 If the call might use such an entry, add a use of the PIC register
19580 to the instruction's CALL_INSN_FUNCTION_USAGE. */
19581 if (TARGET_VXWORKS_RTP
19582 && flag_pic
19583 && !sibcall
19584 && SYMBOL_REF_P (addr)
19585 && (SYMBOL_REF_DECL (addr)
19586 ? !targetm.binds_local_p (SYMBOL_REF_DECL (addr))
19587 : !SYMBOL_REF_LOCAL_P (addr)))
19588 {
19589 require_pic_register (NULL_RTX, false /*compute_now*/);
19590 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), cfun->machine->pic_reg);
19591 }
19592
19593 if (TARGET_FDPIC)
19594 {
19595 rtx fdpic_reg = gen_rtx_REG (Pmode, FDPIC_REGNUM);
19596 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), fdpic_reg);
19597 }
19598
19599 if (TARGET_AAPCS_BASED)
19600 {
19601 /* For AAPCS, IP and CC can be clobbered by veneers inserted by the
19602 linker. We need to add an IP clobber to allow setting
19603 TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS to true. A CC clobber
19604 is not needed since it's a fixed register. */
19605 rtx *fusage = &CALL_INSN_FUNCTION_USAGE (insn);
19606 clobber_reg (fusage, gen_rtx_REG (word_mode, IP_REGNUM));
19607 }
19608 }
19609
19610 /* Output a 'call' insn. */
19611 const char *
19612 output_call (rtx *operands)
19613 {
19614 gcc_assert (!arm_arch5t); /* Patterns should call blx <reg> directly. */
19615
19616 /* Handle calls to lr using ip (which may be clobbered in subr anyway). */
19617 if (REGNO (operands[0]) == LR_REGNUM)
19618 {
19619 operands[0] = gen_rtx_REG (SImode, IP_REGNUM);
19620 output_asm_insn ("mov%?\t%0, %|lr", operands);
19621 }
19622
19623 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
19624
19625 if (TARGET_INTERWORK || arm_arch4t)
19626 output_asm_insn ("bx%?\t%0", operands);
19627 else
19628 output_asm_insn ("mov%?\t%|pc, %0", operands);
19629
19630 return "";
19631 }
19632
19633 /* Output a move from arm registers to arm registers of a long double
19634 OPERANDS[0] is the destination.
19635 OPERANDS[1] is the source. */
19636 const char *
19637 output_mov_long_double_arm_from_arm (rtx *operands)
19638 {
19639 /* We have to be careful here because the two might overlap. */
19640 int dest_start = REGNO (operands[0]);
19641 int src_start = REGNO (operands[1]);
19642 rtx ops[2];
19643 int i;
19644
19645 if (dest_start < src_start)
19646 {
19647 for (i = 0; i < 3; i++)
19648 {
19649 ops[0] = gen_rtx_REG (SImode, dest_start + i);
19650 ops[1] = gen_rtx_REG (SImode, src_start + i);
19651 output_asm_insn ("mov%?\t%0, %1", ops);
19652 }
19653 }
19654 else
19655 {
19656 for (i = 2; i >= 0; i--)
19657 {
19658 ops[0] = gen_rtx_REG (SImode, dest_start + i);
19659 ops[1] = gen_rtx_REG (SImode, src_start + i);
19660 output_asm_insn ("mov%?\t%0, %1", ops);
19661 }
19662 }
19663
19664 return "";
19665 }
19666
19667 void
19668 arm_emit_movpair (rtx dest, rtx src)
19669 {
19670 /* If the src is an immediate, simplify it. */
19671 if (CONST_INT_P (src))
19672 {
19673 HOST_WIDE_INT val = INTVAL (src);
19674 emit_set_insn (dest, GEN_INT (val & 0x0000ffff));
19675 if ((val >> 16) & 0x0000ffff)
19676 {
19677 emit_set_insn (gen_rtx_ZERO_EXTRACT (SImode, dest, GEN_INT (16),
19678 GEN_INT (16)),
19679 GEN_INT ((val >> 16) & 0x0000ffff));
19680 rtx_insn *insn = get_last_insn ();
19681 set_unique_reg_note (insn, REG_EQUAL, copy_rtx (src));
19682 }
19683 return;
19684 }
19685 emit_set_insn (dest, gen_rtx_HIGH (SImode, src));
19686 emit_set_insn (dest, gen_rtx_LO_SUM (SImode, dest, src));
19687 rtx_insn *insn = get_last_insn ();
19688 set_unique_reg_note (insn, REG_EQUAL, copy_rtx (src));
19689 }
19690
19691 /* Output a move between double words. It must be REG<-MEM
19692 or MEM<-REG. */
19693 const char *
19694 output_move_double (rtx *operands, bool emit, int *count)
19695 {
19696 enum rtx_code code0 = GET_CODE (operands[0]);
19697 enum rtx_code code1 = GET_CODE (operands[1]);
19698 rtx otherops[3];
19699 if (count)
19700 *count = 1;
19701
19702 /* The only case when this might happen is when
19703 you are looking at the length of a DImode instruction
19704 that has an invalid constant in it. */
19705 if (code0 == REG && code1 != MEM)
19706 {
19707 gcc_assert (!emit);
19708 *count = 2;
19709 return "";
19710 }
19711
19712 if (code0 == REG)
19713 {
19714 unsigned int reg0 = REGNO (operands[0]);
19715 const bool can_ldrd = TARGET_LDRD && (TARGET_THUMB2 || (reg0 % 2 == 0));
19716
19717 otherops[0] = gen_rtx_REG (SImode, 1 + reg0);
19718
19719 gcc_assert (code1 == MEM); /* Constraints should ensure this. */
19720
19721 switch (GET_CODE (XEXP (operands[1], 0)))
19722 {
19723 case REG:
19724
19725 if (emit)
19726 {
19727 if (can_ldrd
19728 && !(fix_cm3_ldrd && reg0 == REGNO(XEXP (operands[1], 0))))
19729 output_asm_insn ("ldrd%?\t%0, [%m1]", operands);
19730 else
19731 output_asm_insn ("ldmia%?\t%m1, %M0", operands);
19732 }
19733 break;
19734
19735 case PRE_INC:
19736 gcc_assert (can_ldrd);
19737 if (emit)
19738 output_asm_insn ("ldrd%?\t%0, [%m1, #8]!", operands);
19739 break;
19740
19741 case PRE_DEC:
19742 if (emit)
19743 {
19744 if (can_ldrd)
19745 output_asm_insn ("ldrd%?\t%0, [%m1, #-8]!", operands);
19746 else
19747 output_asm_insn ("ldmdb%?\t%m1!, %M0", operands);
19748 }
19749 break;
19750
19751 case POST_INC:
19752 if (emit)
19753 {
19754 if (can_ldrd)
19755 output_asm_insn ("ldrd%?\t%0, [%m1], #8", operands);
19756 else
19757 output_asm_insn ("ldmia%?\t%m1!, %M0", operands);
19758 }
19759 break;
19760
19761 case POST_DEC:
19762 gcc_assert (can_ldrd);
19763 if (emit)
19764 output_asm_insn ("ldrd%?\t%0, [%m1], #-8", operands);
19765 break;
19766
19767 case PRE_MODIFY:
19768 case POST_MODIFY:
19769 /* Autoicrement addressing modes should never have overlapping
19770 base and destination registers, and overlapping index registers
19771 are already prohibited, so this doesn't need to worry about
19772 fix_cm3_ldrd. */
19773 otherops[0] = operands[0];
19774 otherops[1] = XEXP (XEXP (XEXP (operands[1], 0), 1), 0);
19775 otherops[2] = XEXP (XEXP (XEXP (operands[1], 0), 1), 1);
19776
19777 if (GET_CODE (XEXP (operands[1], 0)) == PRE_MODIFY)
19778 {
19779 if (reg_overlap_mentioned_p (otherops[0], otherops[2]))
19780 {
19781 /* Registers overlap so split out the increment. */
19782 if (emit)
19783 {
19784 gcc_assert (can_ldrd);
19785 output_asm_insn ("add%?\t%1, %1, %2", otherops);
19786 output_asm_insn ("ldrd%?\t%0, [%1] @split", otherops);
19787 }
19788 if (count)
19789 *count = 2;
19790 }
19791 else
19792 {
19793 /* Use a single insn if we can.
19794 FIXME: IWMMXT allows offsets larger than ldrd can
19795 handle, fix these up with a pair of ldr. */
19796 if (can_ldrd
19797 && (TARGET_THUMB2
19798 || !CONST_INT_P (otherops[2])
19799 || (INTVAL (otherops[2]) > -256
19800 && INTVAL (otherops[2]) < 256)))
19801 {
19802 if (emit)
19803 output_asm_insn ("ldrd%?\t%0, [%1, %2]!", otherops);
19804 }
19805 else
19806 {
19807 if (emit)
19808 {
19809 output_asm_insn ("ldr%?\t%0, [%1, %2]!", otherops);
19810 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
19811 }
19812 if (count)
19813 *count = 2;
19814
19815 }
19816 }
19817 }
19818 else
19819 {
19820 /* Use a single insn if we can.
19821 FIXME: IWMMXT allows offsets larger than ldrd can handle,
19822 fix these up with a pair of ldr. */
19823 if (can_ldrd
19824 && (TARGET_THUMB2
19825 || !CONST_INT_P (otherops[2])
19826 || (INTVAL (otherops[2]) > -256
19827 && INTVAL (otherops[2]) < 256)))
19828 {
19829 if (emit)
19830 output_asm_insn ("ldrd%?\t%0, [%1], %2", otherops);
19831 }
19832 else
19833 {
19834 if (emit)
19835 {
19836 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
19837 output_asm_insn ("ldr%?\t%0, [%1], %2", otherops);
19838 }
19839 if (count)
19840 *count = 2;
19841 }
19842 }
19843 break;
19844
19845 case LABEL_REF:
19846 case CONST:
19847 /* We might be able to use ldrd %0, %1 here. However the range is
19848 different to ldr/adr, and it is broken on some ARMv7-M
19849 implementations. */
19850 /* Use the second register of the pair to avoid problematic
19851 overlap. */
19852 otherops[1] = operands[1];
19853 if (emit)
19854 output_asm_insn ("adr%?\t%0, %1", otherops);
19855 operands[1] = otherops[0];
19856 if (emit)
19857 {
19858 if (can_ldrd)
19859 output_asm_insn ("ldrd%?\t%0, [%1]", operands);
19860 else
19861 output_asm_insn ("ldmia%?\t%1, %M0", operands);
19862 }
19863
19864 if (count)
19865 *count = 2;
19866 break;
19867
19868 /* ??? This needs checking for thumb2. */
19869 default:
19870 if (arm_add_operand (XEXP (XEXP (operands[1], 0), 1),
19871 GET_MODE (XEXP (XEXP (operands[1], 0), 1))))
19872 {
19873 otherops[0] = operands[0];
19874 otherops[1] = XEXP (XEXP (operands[1], 0), 0);
19875 otherops[2] = XEXP (XEXP (operands[1], 0), 1);
19876
19877 if (GET_CODE (XEXP (operands[1], 0)) == PLUS)
19878 {
19879 if (CONST_INT_P (otherops[2]) && !TARGET_LDRD)
19880 {
19881 switch ((int) INTVAL (otherops[2]))
19882 {
19883 case -8:
19884 if (emit)
19885 output_asm_insn ("ldmdb%?\t%1, %M0", otherops);
19886 return "";
19887 case -4:
19888 if (TARGET_THUMB2)
19889 break;
19890 if (emit)
19891 output_asm_insn ("ldmda%?\t%1, %M0", otherops);
19892 return "";
19893 case 4:
19894 if (TARGET_THUMB2)
19895 break;
19896 if (emit)
19897 output_asm_insn ("ldmib%?\t%1, %M0", otherops);
19898 return "";
19899 }
19900 }
19901 otherops[0] = gen_rtx_REG(SImode, REGNO(operands[0]) + 1);
19902 operands[1] = otherops[0];
19903 if (can_ldrd
19904 && (REG_P (otherops[2])
19905 || TARGET_THUMB2
19906 || (CONST_INT_P (otherops[2])
19907 && INTVAL (otherops[2]) > -256
19908 && INTVAL (otherops[2]) < 256)))
19909 {
19910 if (reg_overlap_mentioned_p (operands[0],
19911 otherops[2]))
19912 {
19913 /* Swap base and index registers over to
19914 avoid a conflict. */
19915 std::swap (otherops[1], otherops[2]);
19916 }
19917 /* If both registers conflict, it will usually
19918 have been fixed by a splitter. */
19919 if (reg_overlap_mentioned_p (operands[0], otherops[2])
19920 || (fix_cm3_ldrd && reg0 == REGNO (otherops[1])))
19921 {
19922 if (emit)
19923 {
19924 output_asm_insn ("add%?\t%0, %1, %2", otherops);
19925 output_asm_insn ("ldrd%?\t%0, [%1]", operands);
19926 }
19927 if (count)
19928 *count = 2;
19929 }
19930 else
19931 {
19932 otherops[0] = operands[0];
19933 if (emit)
19934 output_asm_insn ("ldrd%?\t%0, [%1, %2]", otherops);
19935 }
19936 return "";
19937 }
19938
19939 if (CONST_INT_P (otherops[2]))
19940 {
19941 if (emit)
19942 {
19943 if (!(const_ok_for_arm (INTVAL (otherops[2]))))
19944 output_asm_insn ("sub%?\t%0, %1, #%n2", otherops);
19945 else
19946 output_asm_insn ("add%?\t%0, %1, %2", otherops);
19947 }
19948 }
19949 else
19950 {
19951 if (emit)
19952 output_asm_insn ("add%?\t%0, %1, %2", otherops);
19953 }
19954 }
19955 else
19956 {
19957 if (emit)
19958 output_asm_insn ("sub%?\t%0, %1, %2", otherops);
19959 }
19960
19961 if (count)
19962 *count = 2;
19963
19964 if (can_ldrd)
19965 return "ldrd%?\t%0, [%1]";
19966
19967 return "ldmia%?\t%1, %M0";
19968 }
19969 else
19970 {
19971 otherops[1] = adjust_address (operands[1], SImode, 4);
19972 /* Take care of overlapping base/data reg. */
19973 if (reg_mentioned_p (operands[0], operands[1]))
19974 {
19975 if (emit)
19976 {
19977 output_asm_insn ("ldr%?\t%0, %1", otherops);
19978 output_asm_insn ("ldr%?\t%0, %1", operands);
19979 }
19980 if (count)
19981 *count = 2;
19982
19983 }
19984 else
19985 {
19986 if (emit)
19987 {
19988 output_asm_insn ("ldr%?\t%0, %1", operands);
19989 output_asm_insn ("ldr%?\t%0, %1", otherops);
19990 }
19991 if (count)
19992 *count = 2;
19993 }
19994 }
19995 }
19996 }
19997 else
19998 {
19999 /* Constraints should ensure this. */
20000 gcc_assert (code0 == MEM && code1 == REG);
20001 gcc_assert ((REGNO (operands[1]) != IP_REGNUM)
20002 || (TARGET_ARM && TARGET_LDRD));
20003
20004 /* For TARGET_ARM the first source register of an STRD
20005 must be even. This is usually the case for double-word
20006 values but user assembly constraints can force an odd
20007 starting register. */
20008 bool allow_strd = TARGET_LDRD
20009 && !(TARGET_ARM && (REGNO (operands[1]) & 1) == 1);
20010 switch (GET_CODE (XEXP (operands[0], 0)))
20011 {
20012 case REG:
20013 if (emit)
20014 {
20015 if (allow_strd)
20016 output_asm_insn ("strd%?\t%1, [%m0]", operands);
20017 else
20018 output_asm_insn ("stm%?\t%m0, %M1", operands);
20019 }
20020 break;
20021
20022 case PRE_INC:
20023 gcc_assert (allow_strd);
20024 if (emit)
20025 output_asm_insn ("strd%?\t%1, [%m0, #8]!", operands);
20026 break;
20027
20028 case PRE_DEC:
20029 if (emit)
20030 {
20031 if (allow_strd)
20032 output_asm_insn ("strd%?\t%1, [%m0, #-8]!", operands);
20033 else
20034 output_asm_insn ("stmdb%?\t%m0!, %M1", operands);
20035 }
20036 break;
20037
20038 case POST_INC:
20039 if (emit)
20040 {
20041 if (allow_strd)
20042 output_asm_insn ("strd%?\t%1, [%m0], #8", operands);
20043 else
20044 output_asm_insn ("stm%?\t%m0!, %M1", operands);
20045 }
20046 break;
20047
20048 case POST_DEC:
20049 gcc_assert (allow_strd);
20050 if (emit)
20051 output_asm_insn ("strd%?\t%1, [%m0], #-8", operands);
20052 break;
20053
20054 case PRE_MODIFY:
20055 case POST_MODIFY:
20056 otherops[0] = operands[1];
20057 otherops[1] = XEXP (XEXP (XEXP (operands[0], 0), 1), 0);
20058 otherops[2] = XEXP (XEXP (XEXP (operands[0], 0), 1), 1);
20059
20060 /* IWMMXT allows offsets larger than strd can handle,
20061 fix these up with a pair of str. */
20062 if (!TARGET_THUMB2
20063 && CONST_INT_P (otherops[2])
20064 && (INTVAL(otherops[2]) <= -256
20065 || INTVAL(otherops[2]) >= 256))
20066 {
20067 if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
20068 {
20069 if (emit)
20070 {
20071 output_asm_insn ("str%?\t%0, [%1, %2]!", otherops);
20072 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
20073 }
20074 if (count)
20075 *count = 2;
20076 }
20077 else
20078 {
20079 if (emit)
20080 {
20081 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
20082 output_asm_insn ("str%?\t%0, [%1], %2", otherops);
20083 }
20084 if (count)
20085 *count = 2;
20086 }
20087 }
20088 else if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
20089 {
20090 if (emit)
20091 output_asm_insn ("strd%?\t%0, [%1, %2]!", otherops);
20092 }
20093 else
20094 {
20095 if (emit)
20096 output_asm_insn ("strd%?\t%0, [%1], %2", otherops);
20097 }
20098 break;
20099
20100 case PLUS:
20101 otherops[2] = XEXP (XEXP (operands[0], 0), 1);
20102 if (CONST_INT_P (otherops[2]) && !TARGET_LDRD)
20103 {
20104 switch ((int) INTVAL (XEXP (XEXP (operands[0], 0), 1)))
20105 {
20106 case -8:
20107 if (emit)
20108 output_asm_insn ("stmdb%?\t%m0, %M1", operands);
20109 return "";
20110
20111 case -4:
20112 if (TARGET_THUMB2)
20113 break;
20114 if (emit)
20115 output_asm_insn ("stmda%?\t%m0, %M1", operands);
20116 return "";
20117
20118 case 4:
20119 if (TARGET_THUMB2)
20120 break;
20121 if (emit)
20122 output_asm_insn ("stmib%?\t%m0, %M1", operands);
20123 return "";
20124 }
20125 }
20126 if (allow_strd
20127 && (REG_P (otherops[2])
20128 || TARGET_THUMB2
20129 || (CONST_INT_P (otherops[2])
20130 && INTVAL (otherops[2]) > -256
20131 && INTVAL (otherops[2]) < 256)))
20132 {
20133 otherops[0] = operands[1];
20134 otherops[1] = XEXP (XEXP (operands[0], 0), 0);
20135 if (emit)
20136 output_asm_insn ("strd%?\t%0, [%1, %2]", otherops);
20137 return "";
20138 }
20139 /* Fall through */
20140
20141 default:
20142 otherops[0] = adjust_address (operands[0], SImode, 4);
20143 otherops[1] = operands[1];
20144 if (emit)
20145 {
20146 output_asm_insn ("str%?\t%1, %0", operands);
20147 output_asm_insn ("str%?\t%H1, %0", otherops);
20148 }
20149 if (count)
20150 *count = 2;
20151 }
20152 }
20153
20154 return "";
20155 }
20156
20157 /* Output a move, load or store for quad-word vectors in ARM registers. Only
20158 handles MEMs accepted by neon_vector_mem_operand with TYPE=1. */
20159
20160 const char *
20161 output_move_quad (rtx *operands)
20162 {
20163 if (REG_P (operands[0]))
20164 {
20165 /* Load, or reg->reg move. */
20166
20167 if (MEM_P (operands[1]))
20168 {
20169 switch (GET_CODE (XEXP (operands[1], 0)))
20170 {
20171 case REG:
20172 output_asm_insn ("ldmia%?\t%m1, %M0", operands);
20173 break;
20174
20175 case LABEL_REF:
20176 case CONST:
20177 output_asm_insn ("adr%?\t%0, %1", operands);
20178 output_asm_insn ("ldmia%?\t%0, %M0", operands);
20179 break;
20180
20181 default:
20182 gcc_unreachable ();
20183 }
20184 }
20185 else
20186 {
20187 rtx ops[2];
20188 int dest, src, i;
20189
20190 gcc_assert (REG_P (operands[1]));
20191
20192 dest = REGNO (operands[0]);
20193 src = REGNO (operands[1]);
20194
20195 /* This seems pretty dumb, but hopefully GCC won't try to do it
20196 very often. */
20197 if (dest < src)
20198 for (i = 0; i < 4; i++)
20199 {
20200 ops[0] = gen_rtx_REG (SImode, dest + i);
20201 ops[1] = gen_rtx_REG (SImode, src + i);
20202 output_asm_insn ("mov%?\t%0, %1", ops);
20203 }
20204 else
20205 for (i = 3; i >= 0; i--)
20206 {
20207 ops[0] = gen_rtx_REG (SImode, dest + i);
20208 ops[1] = gen_rtx_REG (SImode, src + i);
20209 output_asm_insn ("mov%?\t%0, %1", ops);
20210 }
20211 }
20212 }
20213 else
20214 {
20215 gcc_assert (MEM_P (operands[0]));
20216 gcc_assert (REG_P (operands[1]));
20217 gcc_assert (!reg_overlap_mentioned_p (operands[1], operands[0]));
20218
20219 switch (GET_CODE (XEXP (operands[0], 0)))
20220 {
20221 case REG:
20222 output_asm_insn ("stm%?\t%m0, %M1", operands);
20223 break;
20224
20225 default:
20226 gcc_unreachable ();
20227 }
20228 }
20229
20230 return "";
20231 }
20232
20233 /* Output a VFP load or store instruction. */
20234
20235 const char *
20236 output_move_vfp (rtx *operands)
20237 {
20238 rtx reg, mem, addr, ops[2];
20239 int load = REG_P (operands[0]);
20240 int dp = GET_MODE_SIZE (GET_MODE (operands[0])) == 8;
20241 int sp = (!TARGET_VFP_FP16INST
20242 || GET_MODE_SIZE (GET_MODE (operands[0])) == 4);
20243 int integer_p = GET_MODE_CLASS (GET_MODE (operands[0])) == MODE_INT;
20244 const char *templ;
20245 char buff[50];
20246 machine_mode mode;
20247
20248 reg = operands[!load];
20249 mem = operands[load];
20250
20251 mode = GET_MODE (reg);
20252
20253 gcc_assert (REG_P (reg));
20254 gcc_assert (IS_VFP_REGNUM (REGNO (reg)));
20255 gcc_assert ((mode == HFmode && TARGET_HARD_FLOAT)
20256 || mode == SFmode
20257 || mode == DFmode
20258 || mode == HImode
20259 || mode == SImode
20260 || mode == DImode
20261 || (TARGET_NEON && VALID_NEON_DREG_MODE (mode)));
20262 gcc_assert (MEM_P (mem));
20263
20264 addr = XEXP (mem, 0);
20265
20266 switch (GET_CODE (addr))
20267 {
20268 case PRE_DEC:
20269 templ = "v%smdb%%?.%s\t%%0!, {%%%s1}%s";
20270 ops[0] = XEXP (addr, 0);
20271 ops[1] = reg;
20272 break;
20273
20274 case POST_INC:
20275 templ = "v%smia%%?.%s\t%%0!, {%%%s1}%s";
20276 ops[0] = XEXP (addr, 0);
20277 ops[1] = reg;
20278 break;
20279
20280 default:
20281 templ = "v%sr%%?.%s\t%%%s0, %%1%s";
20282 ops[0] = reg;
20283 ops[1] = mem;
20284 break;
20285 }
20286
20287 sprintf (buff, templ,
20288 load ? "ld" : "st",
20289 dp ? "64" : sp ? "32" : "16",
20290 dp ? "P" : "",
20291 integer_p ? "\t%@ int" : "");
20292 output_asm_insn (buff, ops);
20293
20294 return "";
20295 }
20296
20297 /* Output a Neon double-word or quad-word load or store, or a load
20298 or store for larger structure modes.
20299
20300 WARNING: The ordering of elements is weird in big-endian mode,
20301 because the EABI requires that vectors stored in memory appear
20302 as though they were stored by a VSTM, as required by the EABI.
20303 GCC RTL defines element ordering based on in-memory order.
20304 This can be different from the architectural ordering of elements
20305 within a NEON register. The intrinsics defined in arm_neon.h use the
20306 NEON register element ordering, not the GCC RTL element ordering.
20307
20308 For example, the in-memory ordering of a big-endian a quadword
20309 vector with 16-bit elements when stored from register pair {d0,d1}
20310 will be (lowest address first, d0[N] is NEON register element N):
20311
20312 [d0[3], d0[2], d0[1], d0[0], d1[7], d1[6], d1[5], d1[4]]
20313
20314 When necessary, quadword registers (dN, dN+1) are moved to ARM
20315 registers from rN in the order:
20316
20317 dN -> (rN+1, rN), dN+1 -> (rN+3, rN+2)
20318
20319 So that STM/LDM can be used on vectors in ARM registers, and the
20320 same memory layout will result as if VSTM/VLDM were used.
20321
20322 Instead of VSTM/VLDM we prefer to use VST1.64/VLD1.64 where
20323 possible, which allows use of appropriate alignment tags.
20324 Note that the choice of "64" is independent of the actual vector
20325 element size; this size simply ensures that the behavior is
20326 equivalent to VSTM/VLDM in both little-endian and big-endian mode.
20327
20328 Due to limitations of those instructions, use of VST1.64/VLD1.64
20329 is not possible if:
20330 - the address contains PRE_DEC, or
20331 - the mode refers to more than 4 double-word registers
20332
20333 In those cases, it would be possible to replace VSTM/VLDM by a
20334 sequence of instructions; this is not currently implemented since
20335 this is not certain to actually improve performance. */
20336
20337 const char *
20338 output_move_neon (rtx *operands)
20339 {
20340 rtx reg, mem, addr, ops[2];
20341 int regno, nregs, load = REG_P (operands[0]);
20342 const char *templ;
20343 char buff[50];
20344 machine_mode mode;
20345
20346 reg = operands[!load];
20347 mem = operands[load];
20348
20349 mode = GET_MODE (reg);
20350
20351 gcc_assert (REG_P (reg));
20352 regno = REGNO (reg);
20353 nregs = REG_NREGS (reg) / 2;
20354 gcc_assert (VFP_REGNO_OK_FOR_DOUBLE (regno)
20355 || NEON_REGNO_OK_FOR_QUAD (regno));
20356 gcc_assert (VALID_NEON_DREG_MODE (mode)
20357 || VALID_NEON_QREG_MODE (mode)
20358 || VALID_NEON_STRUCT_MODE (mode));
20359 gcc_assert (MEM_P (mem));
20360
20361 addr = XEXP (mem, 0);
20362
20363 /* Strip off const from addresses like (const (plus (...))). */
20364 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
20365 addr = XEXP (addr, 0);
20366
20367 switch (GET_CODE (addr))
20368 {
20369 case POST_INC:
20370 /* We have to use vldm / vstm for too-large modes. */
20371 if (nregs > 4 || (TARGET_HAVE_MVE && nregs >= 2))
20372 {
20373 templ = "v%smia%%?\t%%0!, %%h1";
20374 ops[0] = XEXP (addr, 0);
20375 }
20376 else
20377 {
20378 templ = "v%s1.64\t%%h1, %%A0";
20379 ops[0] = mem;
20380 }
20381 ops[1] = reg;
20382 break;
20383
20384 case PRE_DEC:
20385 /* We have to use vldm / vstm in this case, since there is no
20386 pre-decrement form of the vld1 / vst1 instructions. */
20387 templ = "v%smdb%%?\t%%0!, %%h1";
20388 ops[0] = XEXP (addr, 0);
20389 ops[1] = reg;
20390 break;
20391
20392 case POST_MODIFY:
20393 /* FIXME: Not currently enabled in neon_vector_mem_operand. */
20394 gcc_unreachable ();
20395
20396 case REG:
20397 /* We have to use vldm / vstm for too-large modes. */
20398 if (nregs > 1)
20399 {
20400 if (nregs > 4 || (TARGET_HAVE_MVE && nregs >= 2))
20401 templ = "v%smia%%?\t%%m0, %%h1";
20402 else
20403 templ = "v%s1.64\t%%h1, %%A0";
20404
20405 ops[0] = mem;
20406 ops[1] = reg;
20407 break;
20408 }
20409 /* Fall through. */
20410 case PLUS:
20411 if (GET_CODE (addr) == PLUS)
20412 addr = XEXP (addr, 0);
20413 /* Fall through. */
20414 case LABEL_REF:
20415 {
20416 int i;
20417 int overlap = -1;
20418 for (i = 0; i < nregs; i++)
20419 {
20420 /* We're only using DImode here because it's a convenient
20421 size. */
20422 ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * i);
20423 ops[1] = adjust_address (mem, DImode, 8 * i);
20424 if (reg_overlap_mentioned_p (ops[0], mem))
20425 {
20426 gcc_assert (overlap == -1);
20427 overlap = i;
20428 }
20429 else
20430 {
20431 if (TARGET_HAVE_MVE && LABEL_REF_P (addr))
20432 sprintf (buff, "v%sr.64\t%%P0, %%1", load ? "ld" : "st");
20433 else
20434 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
20435 output_asm_insn (buff, ops);
20436 }
20437 }
20438 if (overlap != -1)
20439 {
20440 ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * overlap);
20441 ops[1] = adjust_address (mem, SImode, 8 * overlap);
20442 if (TARGET_HAVE_MVE && LABEL_REF_P (addr))
20443 sprintf (buff, "v%sr.32\t%%P0, %%1", load ? "ld" : "st");
20444 else
20445 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
20446 output_asm_insn (buff, ops);
20447 }
20448
20449 return "";
20450 }
20451
20452 default:
20453 gcc_unreachable ();
20454 }
20455
20456 sprintf (buff, templ, load ? "ld" : "st");
20457 output_asm_insn (buff, ops);
20458
20459 return "";
20460 }
20461
20462 /* Compute and return the length of neon_mov<mode>, where <mode> is
20463 one of VSTRUCT modes: EI, OI, CI or XI. */
20464 int
20465 arm_attr_length_move_neon (rtx_insn *insn)
20466 {
20467 rtx reg, mem, addr;
20468 int load;
20469 machine_mode mode;
20470
20471 extract_insn_cached (insn);
20472
20473 if (REG_P (recog_data.operand[0]) && REG_P (recog_data.operand[1]))
20474 {
20475 mode = GET_MODE (recog_data.operand[0]);
20476 switch (mode)
20477 {
20478 case E_EImode:
20479 case E_OImode:
20480 return 8;
20481 case E_CImode:
20482 return 12;
20483 case E_XImode:
20484 return 16;
20485 default:
20486 gcc_unreachable ();
20487 }
20488 }
20489
20490 load = REG_P (recog_data.operand[0]);
20491 reg = recog_data.operand[!load];
20492 mem = recog_data.operand[load];
20493
20494 gcc_assert (MEM_P (mem));
20495
20496 addr = XEXP (mem, 0);
20497
20498 /* Strip off const from addresses like (const (plus (...))). */
20499 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
20500 addr = XEXP (addr, 0);
20501
20502 if (LABEL_REF_P (addr) || GET_CODE (addr) == PLUS)
20503 {
20504 int insns = REG_NREGS (reg) / 2;
20505 return insns * 4;
20506 }
20507 else
20508 return 4;
20509 }
20510
20511 /* Return nonzero if the offset in the address is an immediate. Otherwise,
20512 return zero. */
20513
20514 int
20515 arm_address_offset_is_imm (rtx_insn *insn)
20516 {
20517 rtx mem, addr;
20518
20519 extract_insn_cached (insn);
20520
20521 if (REG_P (recog_data.operand[0]))
20522 return 0;
20523
20524 mem = recog_data.operand[0];
20525
20526 gcc_assert (MEM_P (mem));
20527
20528 addr = XEXP (mem, 0);
20529
20530 if (REG_P (addr)
20531 || (GET_CODE (addr) == PLUS
20532 && REG_P (XEXP (addr, 0))
20533 && CONST_INT_P (XEXP (addr, 1))))
20534 return 1;
20535 else
20536 return 0;
20537 }
20538
20539 /* Output an ADD r, s, #n where n may be too big for one instruction.
20540 If adding zero to one register, output nothing. */
20541 const char *
20542 output_add_immediate (rtx *operands)
20543 {
20544 HOST_WIDE_INT n = INTVAL (operands[2]);
20545
20546 if (n != 0 || REGNO (operands[0]) != REGNO (operands[1]))
20547 {
20548 if (n < 0)
20549 output_multi_immediate (operands,
20550 "sub%?\t%0, %1, %2", "sub%?\t%0, %0, %2", 2,
20551 -n);
20552 else
20553 output_multi_immediate (operands,
20554 "add%?\t%0, %1, %2", "add%?\t%0, %0, %2", 2,
20555 n);
20556 }
20557
20558 return "";
20559 }
20560
20561 /* Output a multiple immediate operation.
20562 OPERANDS is the vector of operands referred to in the output patterns.
20563 INSTR1 is the output pattern to use for the first constant.
20564 INSTR2 is the output pattern to use for subsequent constants.
20565 IMMED_OP is the index of the constant slot in OPERANDS.
20566 N is the constant value. */
20567 static const char *
20568 output_multi_immediate (rtx *operands, const char *instr1, const char *instr2,
20569 int immed_op, HOST_WIDE_INT n)
20570 {
20571 #if HOST_BITS_PER_WIDE_INT > 32
20572 n &= 0xffffffff;
20573 #endif
20574
20575 if (n == 0)
20576 {
20577 /* Quick and easy output. */
20578 operands[immed_op] = const0_rtx;
20579 output_asm_insn (instr1, operands);
20580 }
20581 else
20582 {
20583 int i;
20584 const char * instr = instr1;
20585
20586 /* Note that n is never zero here (which would give no output). */
20587 for (i = 0; i < 32; i += 2)
20588 {
20589 if (n & (3 << i))
20590 {
20591 operands[immed_op] = GEN_INT (n & (255 << i));
20592 output_asm_insn (instr, operands);
20593 instr = instr2;
20594 i += 6;
20595 }
20596 }
20597 }
20598
20599 return "";
20600 }
20601
20602 /* Return the name of a shifter operation. */
20603 static const char *
20604 arm_shift_nmem(enum rtx_code code)
20605 {
20606 switch (code)
20607 {
20608 case ASHIFT:
20609 return ARM_LSL_NAME;
20610
20611 case ASHIFTRT:
20612 return "asr";
20613
20614 case LSHIFTRT:
20615 return "lsr";
20616
20617 case ROTATERT:
20618 return "ror";
20619
20620 default:
20621 abort();
20622 }
20623 }
20624
20625 /* Return the appropriate ARM instruction for the operation code.
20626 The returned result should not be overwritten. OP is the rtx of the
20627 operation. SHIFT_FIRST_ARG is TRUE if the first argument of the operator
20628 was shifted. */
20629 const char *
20630 arithmetic_instr (rtx op, int shift_first_arg)
20631 {
20632 switch (GET_CODE (op))
20633 {
20634 case PLUS:
20635 return "add";
20636
20637 case MINUS:
20638 return shift_first_arg ? "rsb" : "sub";
20639
20640 case IOR:
20641 return "orr";
20642
20643 case XOR:
20644 return "eor";
20645
20646 case AND:
20647 return "and";
20648
20649 case ASHIFT:
20650 case ASHIFTRT:
20651 case LSHIFTRT:
20652 case ROTATERT:
20653 return arm_shift_nmem(GET_CODE(op));
20654
20655 default:
20656 gcc_unreachable ();
20657 }
20658 }
20659
20660 /* Ensure valid constant shifts and return the appropriate shift mnemonic
20661 for the operation code. The returned result should not be overwritten.
20662 OP is the rtx code of the shift.
20663 On exit, *AMOUNTP will be -1 if the shift is by a register, or a constant
20664 shift. */
20665 static const char *
20666 shift_op (rtx op, HOST_WIDE_INT *amountp)
20667 {
20668 const char * mnem;
20669 enum rtx_code code = GET_CODE (op);
20670
20671 switch (code)
20672 {
20673 case ROTATE:
20674 if (!CONST_INT_P (XEXP (op, 1)))
20675 {
20676 output_operand_lossage ("invalid shift operand");
20677 return NULL;
20678 }
20679
20680 code = ROTATERT;
20681 *amountp = 32 - INTVAL (XEXP (op, 1));
20682 mnem = "ror";
20683 break;
20684
20685 case ASHIFT:
20686 case ASHIFTRT:
20687 case LSHIFTRT:
20688 case ROTATERT:
20689 mnem = arm_shift_nmem(code);
20690 if (CONST_INT_P (XEXP (op, 1)))
20691 {
20692 *amountp = INTVAL (XEXP (op, 1));
20693 }
20694 else if (REG_P (XEXP (op, 1)))
20695 {
20696 *amountp = -1;
20697 return mnem;
20698 }
20699 else
20700 {
20701 output_operand_lossage ("invalid shift operand");
20702 return NULL;
20703 }
20704 break;
20705
20706 case MULT:
20707 /* We never have to worry about the amount being other than a
20708 power of 2, since this case can never be reloaded from a reg. */
20709 if (!CONST_INT_P (XEXP (op, 1)))
20710 {
20711 output_operand_lossage ("invalid shift operand");
20712 return NULL;
20713 }
20714
20715 *amountp = INTVAL (XEXP (op, 1)) & 0xFFFFFFFF;
20716
20717 /* Amount must be a power of two. */
20718 if (*amountp & (*amountp - 1))
20719 {
20720 output_operand_lossage ("invalid shift operand");
20721 return NULL;
20722 }
20723
20724 *amountp = exact_log2 (*amountp);
20725 gcc_assert (IN_RANGE (*amountp, 0, 31));
20726 return ARM_LSL_NAME;
20727
20728 default:
20729 output_operand_lossage ("invalid shift operand");
20730 return NULL;
20731 }
20732
20733 /* This is not 100% correct, but follows from the desire to merge
20734 multiplication by a power of 2 with the recognizer for a
20735 shift. >=32 is not a valid shift for "lsl", so we must try and
20736 output a shift that produces the correct arithmetical result.
20737 Using lsr #32 is identical except for the fact that the carry bit
20738 is not set correctly if we set the flags; but we never use the
20739 carry bit from such an operation, so we can ignore that. */
20740 if (code == ROTATERT)
20741 /* Rotate is just modulo 32. */
20742 *amountp &= 31;
20743 else if (*amountp != (*amountp & 31))
20744 {
20745 if (code == ASHIFT)
20746 mnem = "lsr";
20747 *amountp = 32;
20748 }
20749
20750 /* Shifts of 0 are no-ops. */
20751 if (*amountp == 0)
20752 return NULL;
20753
20754 return mnem;
20755 }
20756
20757 /* Output a .ascii pseudo-op, keeping track of lengths. This is
20758 because /bin/as is horribly restrictive. The judgement about
20759 whether or not each character is 'printable' (and can be output as
20760 is) or not (and must be printed with an octal escape) must be made
20761 with reference to the *host* character set -- the situation is
20762 similar to that discussed in the comments above pp_c_char in
20763 c-pretty-print.c. */
20764
20765 #define MAX_ASCII_LEN 51
20766
20767 void
20768 output_ascii_pseudo_op (FILE *stream, const unsigned char *p, int len)
20769 {
20770 int i;
20771 int len_so_far = 0;
20772
20773 fputs ("\t.ascii\t\"", stream);
20774
20775 for (i = 0; i < len; i++)
20776 {
20777 int c = p[i];
20778
20779 if (len_so_far >= MAX_ASCII_LEN)
20780 {
20781 fputs ("\"\n\t.ascii\t\"", stream);
20782 len_so_far = 0;
20783 }
20784
20785 if (ISPRINT (c))
20786 {
20787 if (c == '\\' || c == '\"')
20788 {
20789 putc ('\\', stream);
20790 len_so_far++;
20791 }
20792 putc (c, stream);
20793 len_so_far++;
20794 }
20795 else
20796 {
20797 fprintf (stream, "\\%03o", c);
20798 len_so_far += 4;
20799 }
20800 }
20801
20802 fputs ("\"\n", stream);
20803 }
20804 \f
20805
20806 /* Compute the register save mask for registers 0 through 12
20807 inclusive. This code is used by arm_compute_save_core_reg_mask (). */
20808
20809 static unsigned long
20810 arm_compute_save_reg0_reg12_mask (void)
20811 {
20812 unsigned long func_type = arm_current_func_type ();
20813 unsigned long save_reg_mask = 0;
20814 unsigned int reg;
20815
20816 if (IS_INTERRUPT (func_type))
20817 {
20818 unsigned int max_reg;
20819 /* Interrupt functions must not corrupt any registers,
20820 even call clobbered ones. If this is a leaf function
20821 we can just examine the registers used by the RTL, but
20822 otherwise we have to assume that whatever function is
20823 called might clobber anything, and so we have to save
20824 all the call-clobbered registers as well. */
20825 if (ARM_FUNC_TYPE (func_type) == ARM_FT_FIQ)
20826 /* FIQ handlers have registers r8 - r12 banked, so
20827 we only need to check r0 - r7, Normal ISRs only
20828 bank r14 and r15, so we must check up to r12.
20829 r13 is the stack pointer which is always preserved,
20830 so we do not need to consider it here. */
20831 max_reg = 7;
20832 else
20833 max_reg = 12;
20834
20835 for (reg = 0; reg <= max_reg; reg++)
20836 if (reg_needs_saving_p (reg))
20837 save_reg_mask |= (1 << reg);
20838
20839 /* Also save the pic base register if necessary. */
20840 if (PIC_REGISTER_MAY_NEED_SAVING
20841 && crtl->uses_pic_offset_table)
20842 save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
20843 }
20844 else if (IS_VOLATILE(func_type))
20845 {
20846 /* For noreturn functions we historically omitted register saves
20847 altogether. However this really messes up debugging. As a
20848 compromise save just the frame pointers. Combined with the link
20849 register saved elsewhere this should be sufficient to get
20850 a backtrace. */
20851 if (frame_pointer_needed)
20852 save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
20853 if (df_regs_ever_live_p (ARM_HARD_FRAME_POINTER_REGNUM))
20854 save_reg_mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
20855 if (df_regs_ever_live_p (THUMB_HARD_FRAME_POINTER_REGNUM))
20856 save_reg_mask |= 1 << THUMB_HARD_FRAME_POINTER_REGNUM;
20857 }
20858 else
20859 {
20860 /* In the normal case we only need to save those registers
20861 which are call saved and which are used by this function. */
20862 for (reg = 0; reg <= 11; reg++)
20863 if (df_regs_ever_live_p (reg) && callee_saved_reg_p (reg))
20864 save_reg_mask |= (1 << reg);
20865
20866 /* Handle the frame pointer as a special case. */
20867 if (frame_pointer_needed)
20868 save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
20869
20870 /* If we aren't loading the PIC register,
20871 don't stack it even though it may be live. */
20872 if (PIC_REGISTER_MAY_NEED_SAVING
20873 && (df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM)
20874 || crtl->uses_pic_offset_table))
20875 save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
20876
20877 /* The prologue will copy SP into R0, so save it. */
20878 if (IS_STACKALIGN (func_type))
20879 save_reg_mask |= 1;
20880 }
20881
20882 /* Save registers so the exception handler can modify them. */
20883 if (crtl->calls_eh_return)
20884 {
20885 unsigned int i;
20886
20887 for (i = 0; ; i++)
20888 {
20889 reg = EH_RETURN_DATA_REGNO (i);
20890 if (reg == INVALID_REGNUM)
20891 break;
20892 save_reg_mask |= 1 << reg;
20893 }
20894 }
20895
20896 return save_reg_mask;
20897 }
20898
20899 /* Return true if r3 is live at the start of the function. */
20900
20901 static bool
20902 arm_r3_live_at_start_p (void)
20903 {
20904 /* Just look at cfg info, which is still close enough to correct at this
20905 point. This gives false positives for broken functions that might use
20906 uninitialized data that happens to be allocated in r3, but who cares? */
20907 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)), 3);
20908 }
20909
20910 /* Compute the number of bytes used to store the static chain register on the
20911 stack, above the stack frame. We need to know this accurately to get the
20912 alignment of the rest of the stack frame correct. */
20913
20914 static int
20915 arm_compute_static_chain_stack_bytes (void)
20916 {
20917 /* Once the value is updated from the init value of -1, do not
20918 re-compute. */
20919 if (cfun->machine->static_chain_stack_bytes != -1)
20920 return cfun->machine->static_chain_stack_bytes;
20921
20922 /* See the defining assertion in arm_expand_prologue. */
20923 if (IS_NESTED (arm_current_func_type ())
20924 && ((TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
20925 || ((flag_stack_check == STATIC_BUILTIN_STACK_CHECK
20926 || flag_stack_clash_protection)
20927 && !df_regs_ever_live_p (LR_REGNUM)))
20928 && arm_r3_live_at_start_p ()
20929 && crtl->args.pretend_args_size == 0)
20930 return 4;
20931
20932 return 0;
20933 }
20934
20935 /* Compute a bit mask of which core registers need to be
20936 saved on the stack for the current function.
20937 This is used by arm_compute_frame_layout, which may add extra registers. */
20938
20939 static unsigned long
20940 arm_compute_save_core_reg_mask (void)
20941 {
20942 unsigned int save_reg_mask = 0;
20943 unsigned long func_type = arm_current_func_type ();
20944 unsigned int reg;
20945
20946 if (IS_NAKED (func_type))
20947 /* This should never really happen. */
20948 return 0;
20949
20950 /* If we are creating a stack frame, then we must save the frame pointer,
20951 IP (which will hold the old stack pointer), LR and the PC. */
20952 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
20953 save_reg_mask |=
20954 (1 << ARM_HARD_FRAME_POINTER_REGNUM)
20955 | (1 << IP_REGNUM)
20956 | (1 << LR_REGNUM)
20957 | (1 << PC_REGNUM);
20958
20959 save_reg_mask |= arm_compute_save_reg0_reg12_mask ();
20960
20961 /* Decide if we need to save the link register.
20962 Interrupt routines have their own banked link register,
20963 so they never need to save it.
20964 Otherwise if we do not use the link register we do not need to save
20965 it. If we are pushing other registers onto the stack however, we
20966 can save an instruction in the epilogue by pushing the link register
20967 now and then popping it back into the PC. This incurs extra memory
20968 accesses though, so we only do it when optimizing for size, and only
20969 if we know that we will not need a fancy return sequence. */
20970 if (df_regs_ever_live_p (LR_REGNUM)
20971 || (save_reg_mask
20972 && optimize_size
20973 && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
20974 && !crtl->tail_call_emit
20975 && !crtl->calls_eh_return))
20976 save_reg_mask |= 1 << LR_REGNUM;
20977
20978 if (cfun->machine->lr_save_eliminated)
20979 save_reg_mask &= ~ (1 << LR_REGNUM);
20980
20981 if (TARGET_REALLY_IWMMXT
20982 && ((bit_count (save_reg_mask)
20983 + ARM_NUM_INTS (crtl->args.pretend_args_size +
20984 arm_compute_static_chain_stack_bytes())
20985 ) % 2) != 0)
20986 {
20987 /* The total number of registers that are going to be pushed
20988 onto the stack is odd. We need to ensure that the stack
20989 is 64-bit aligned before we start to save iWMMXt registers,
20990 and also before we start to create locals. (A local variable
20991 might be a double or long long which we will load/store using
20992 an iWMMXt instruction). Therefore we need to push another
20993 ARM register, so that the stack will be 64-bit aligned. We
20994 try to avoid using the arg registers (r0 -r3) as they might be
20995 used to pass values in a tail call. */
20996 for (reg = 4; reg <= 12; reg++)
20997 if ((save_reg_mask & (1 << reg)) == 0)
20998 break;
20999
21000 if (reg <= 12)
21001 save_reg_mask |= (1 << reg);
21002 else
21003 {
21004 cfun->machine->sibcall_blocked = 1;
21005 save_reg_mask |= (1 << 3);
21006 }
21007 }
21008
21009 /* We may need to push an additional register for use initializing the
21010 PIC base register. */
21011 if (TARGET_THUMB2 && IS_NESTED (func_type) && flag_pic
21012 && (save_reg_mask & THUMB2_WORK_REGS) == 0)
21013 {
21014 reg = thumb_find_work_register (1 << 4);
21015 if (!call_used_or_fixed_reg_p (reg))
21016 save_reg_mask |= (1 << reg);
21017 }
21018
21019 return save_reg_mask;
21020 }
21021
21022 /* Compute a bit mask of which core registers need to be
21023 saved on the stack for the current function. */
21024 static unsigned long
21025 thumb1_compute_save_core_reg_mask (void)
21026 {
21027 unsigned long mask;
21028 unsigned reg;
21029
21030 mask = 0;
21031 for (reg = 0; reg < 12; reg ++)
21032 if (df_regs_ever_live_p (reg) && callee_saved_reg_p (reg))
21033 mask |= 1 << reg;
21034
21035 /* Handle the frame pointer as a special case. */
21036 if (frame_pointer_needed)
21037 mask |= 1 << HARD_FRAME_POINTER_REGNUM;
21038
21039 if (flag_pic
21040 && !TARGET_SINGLE_PIC_BASE
21041 && arm_pic_register != INVALID_REGNUM
21042 && crtl->uses_pic_offset_table)
21043 mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
21044
21045 /* See if we might need r11 for calls to _interwork_r11_call_via_rN(). */
21046 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
21047 mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
21048
21049 /* LR will also be pushed if any lo regs are pushed. */
21050 if (mask & 0xff || thumb_force_lr_save ())
21051 mask |= (1 << LR_REGNUM);
21052
21053 bool call_clobbered_scratch
21054 = (thumb1_prologue_unused_call_clobbered_lo_regs ()
21055 && thumb1_epilogue_unused_call_clobbered_lo_regs ());
21056
21057 /* Make sure we have a low work register if we need one. We will
21058 need one if we are going to push a high register, but we are not
21059 currently intending to push a low register. However if both the
21060 prologue and epilogue have a spare call-clobbered low register,
21061 then we won't need to find an additional work register. It does
21062 not need to be the same register in the prologue and
21063 epilogue. */
21064 if ((mask & 0xff) == 0
21065 && !call_clobbered_scratch
21066 && ((mask & 0x0f00) || TARGET_BACKTRACE))
21067 {
21068 /* Use thumb_find_work_register to choose which register
21069 we will use. If the register is live then we will
21070 have to push it. Use LAST_LO_REGNUM as our fallback
21071 choice for the register to select. */
21072 reg = thumb_find_work_register (1 << LAST_LO_REGNUM);
21073 /* Make sure the register returned by thumb_find_work_register is
21074 not part of the return value. */
21075 if (reg * UNITS_PER_WORD <= (unsigned) arm_size_return_regs ())
21076 reg = LAST_LO_REGNUM;
21077
21078 if (callee_saved_reg_p (reg))
21079 mask |= 1 << reg;
21080 }
21081
21082 /* The 504 below is 8 bytes less than 512 because there are two possible
21083 alignment words. We can't tell here if they will be present or not so we
21084 have to play it safe and assume that they are. */
21085 if ((CALLER_INTERWORKING_SLOT_SIZE +
21086 ROUND_UP_WORD (get_frame_size ()) +
21087 crtl->outgoing_args_size) >= 504)
21088 {
21089 /* This is the same as the code in thumb1_expand_prologue() which
21090 determines which register to use for stack decrement. */
21091 for (reg = LAST_ARG_REGNUM + 1; reg <= LAST_LO_REGNUM; reg++)
21092 if (mask & (1 << reg))
21093 break;
21094
21095 if (reg > LAST_LO_REGNUM)
21096 {
21097 /* Make sure we have a register available for stack decrement. */
21098 mask |= 1 << LAST_LO_REGNUM;
21099 }
21100 }
21101
21102 return mask;
21103 }
21104
21105 /* Return the number of bytes required to save VFP registers. */
21106 static int
21107 arm_get_vfp_saved_size (void)
21108 {
21109 unsigned int regno;
21110 int count;
21111 int saved;
21112
21113 saved = 0;
21114 /* Space for saved VFP registers. */
21115 if (TARGET_VFP_BASE)
21116 {
21117 count = 0;
21118 for (regno = FIRST_VFP_REGNUM;
21119 regno < LAST_VFP_REGNUM;
21120 regno += 2)
21121 {
21122 if (!reg_needs_saving_p (regno) && !reg_needs_saving_p (regno + 1))
21123 {
21124 if (count > 0)
21125 {
21126 /* Workaround ARM10 VFPr1 bug. */
21127 if (count == 2 && !arm_arch6)
21128 count++;
21129 saved += count * 8;
21130 }
21131 count = 0;
21132 }
21133 else
21134 count++;
21135 }
21136 if (count > 0)
21137 {
21138 if (count == 2 && !arm_arch6)
21139 count++;
21140 saved += count * 8;
21141 }
21142 }
21143 return saved;
21144 }
21145
21146
21147 /* Generate a function exit sequence. If REALLY_RETURN is false, then do
21148 everything bar the final return instruction. If simple_return is true,
21149 then do not output epilogue, because it has already been emitted in RTL.
21150
21151 Note: do not forget to update length attribute of corresponding insn pattern
21152 when changing assembly output (eg. length attribute of
21153 thumb2_cmse_entry_return when updating Armv8-M Mainline Security Extensions
21154 register clearing sequences). */
21155 const char *
21156 output_return_instruction (rtx operand, bool really_return, bool reverse,
21157 bool simple_return)
21158 {
21159 char conditional[10];
21160 char instr[100];
21161 unsigned reg;
21162 unsigned long live_regs_mask;
21163 unsigned long func_type;
21164 arm_stack_offsets *offsets;
21165
21166 func_type = arm_current_func_type ();
21167
21168 if (IS_NAKED (func_type))
21169 return "";
21170
21171 if (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN)
21172 {
21173 /* If this function was declared non-returning, and we have
21174 found a tail call, then we have to trust that the called
21175 function won't return. */
21176 if (really_return)
21177 {
21178 rtx ops[2];
21179
21180 /* Otherwise, trap an attempted return by aborting. */
21181 ops[0] = operand;
21182 ops[1] = gen_rtx_SYMBOL_REF (Pmode, NEED_PLT_RELOC ? "abort(PLT)"
21183 : "abort");
21184 assemble_external_libcall (ops[1]);
21185 output_asm_insn (reverse ? "bl%D0\t%a1" : "bl%d0\t%a1", ops);
21186 }
21187
21188 return "";
21189 }
21190
21191 gcc_assert (!cfun->calls_alloca || really_return);
21192
21193 sprintf (conditional, "%%?%%%c0", reverse ? 'D' : 'd');
21194
21195 cfun->machine->return_used_this_function = 1;
21196
21197 offsets = arm_get_frame_offsets ();
21198 live_regs_mask = offsets->saved_regs_mask;
21199
21200 if (!simple_return && live_regs_mask)
21201 {
21202 const char * return_reg;
21203
21204 /* If we do not have any special requirements for function exit
21205 (e.g. interworking) then we can load the return address
21206 directly into the PC. Otherwise we must load it into LR. */
21207 if (really_return
21208 && !IS_CMSE_ENTRY (func_type)
21209 && (IS_INTERRUPT (func_type) || !TARGET_INTERWORK))
21210 return_reg = reg_names[PC_REGNUM];
21211 else
21212 return_reg = reg_names[LR_REGNUM];
21213
21214 if ((live_regs_mask & (1 << IP_REGNUM)) == (1 << IP_REGNUM))
21215 {
21216 /* There are three possible reasons for the IP register
21217 being saved. 1) a stack frame was created, in which case
21218 IP contains the old stack pointer, or 2) an ISR routine
21219 corrupted it, or 3) it was saved to align the stack on
21220 iWMMXt. In case 1, restore IP into SP, otherwise just
21221 restore IP. */
21222 if (frame_pointer_needed)
21223 {
21224 live_regs_mask &= ~ (1 << IP_REGNUM);
21225 live_regs_mask |= (1 << SP_REGNUM);
21226 }
21227 else
21228 gcc_assert (IS_INTERRUPT (func_type) || TARGET_REALLY_IWMMXT);
21229 }
21230
21231 /* On some ARM architectures it is faster to use LDR rather than
21232 LDM to load a single register. On other architectures, the
21233 cost is the same. In 26 bit mode, or for exception handlers,
21234 we have to use LDM to load the PC so that the CPSR is also
21235 restored. */
21236 for (reg = 0; reg <= LAST_ARM_REGNUM; reg++)
21237 if (live_regs_mask == (1U << reg))
21238 break;
21239
21240 if (reg <= LAST_ARM_REGNUM
21241 && (reg != LR_REGNUM
21242 || ! really_return
21243 || ! IS_INTERRUPT (func_type)))
21244 {
21245 sprintf (instr, "ldr%s\t%%|%s, [%%|sp], #4", conditional,
21246 (reg == LR_REGNUM) ? return_reg : reg_names[reg]);
21247 }
21248 else
21249 {
21250 char *p;
21251 int first = 1;
21252
21253 /* Generate the load multiple instruction to restore the
21254 registers. Note we can get here, even if
21255 frame_pointer_needed is true, but only if sp already
21256 points to the base of the saved core registers. */
21257 if (live_regs_mask & (1 << SP_REGNUM))
21258 {
21259 unsigned HOST_WIDE_INT stack_adjust;
21260
21261 stack_adjust = offsets->outgoing_args - offsets->saved_regs;
21262 gcc_assert (stack_adjust == 0 || stack_adjust == 4);
21263
21264 if (stack_adjust && arm_arch5t && TARGET_ARM)
21265 sprintf (instr, "ldmib%s\t%%|sp, {", conditional);
21266 else
21267 {
21268 /* If we can't use ldmib (SA110 bug),
21269 then try to pop r3 instead. */
21270 if (stack_adjust)
21271 live_regs_mask |= 1 << 3;
21272
21273 sprintf (instr, "ldmfd%s\t%%|sp, {", conditional);
21274 }
21275 }
21276 /* For interrupt returns we have to use an LDM rather than
21277 a POP so that we can use the exception return variant. */
21278 else if (IS_INTERRUPT (func_type))
21279 sprintf (instr, "ldmfd%s\t%%|sp!, {", conditional);
21280 else
21281 sprintf (instr, "pop%s\t{", conditional);
21282
21283 p = instr + strlen (instr);
21284
21285 for (reg = 0; reg <= SP_REGNUM; reg++)
21286 if (live_regs_mask & (1 << reg))
21287 {
21288 int l = strlen (reg_names[reg]);
21289
21290 if (first)
21291 first = 0;
21292 else
21293 {
21294 memcpy (p, ", ", 2);
21295 p += 2;
21296 }
21297
21298 memcpy (p, "%|", 2);
21299 memcpy (p + 2, reg_names[reg], l);
21300 p += l + 2;
21301 }
21302
21303 if (live_regs_mask & (1 << LR_REGNUM))
21304 {
21305 sprintf (p, "%s%%|%s}", first ? "" : ", ", return_reg);
21306 /* If returning from an interrupt, restore the CPSR. */
21307 if (IS_INTERRUPT (func_type))
21308 strcat (p, "^");
21309 }
21310 else
21311 strcpy (p, "}");
21312 }
21313
21314 output_asm_insn (instr, & operand);
21315
21316 /* See if we need to generate an extra instruction to
21317 perform the actual function return. */
21318 if (really_return
21319 && func_type != ARM_FT_INTERWORKED
21320 && (live_regs_mask & (1 << LR_REGNUM)) != 0)
21321 {
21322 /* The return has already been handled
21323 by loading the LR into the PC. */
21324 return "";
21325 }
21326 }
21327
21328 if (really_return)
21329 {
21330 switch ((int) ARM_FUNC_TYPE (func_type))
21331 {
21332 case ARM_FT_ISR:
21333 case ARM_FT_FIQ:
21334 /* ??? This is wrong for unified assembly syntax. */
21335 sprintf (instr, "sub%ss\t%%|pc, %%|lr, #4", conditional);
21336 break;
21337
21338 case ARM_FT_INTERWORKED:
21339 gcc_assert (arm_arch5t || arm_arch4t);
21340 sprintf (instr, "bx%s\t%%|lr", conditional);
21341 break;
21342
21343 case ARM_FT_EXCEPTION:
21344 /* ??? This is wrong for unified assembly syntax. */
21345 sprintf (instr, "mov%ss\t%%|pc, %%|lr", conditional);
21346 break;
21347
21348 default:
21349 if (IS_CMSE_ENTRY (func_type))
21350 {
21351 /* For Armv8.1-M, this is cleared as part of the CLRM instruction
21352 emitted by cmse_nonsecure_entry_clear_before_return () and the
21353 VSTR/VLDR instructions in the prologue and epilogue. */
21354 if (!TARGET_HAVE_FPCXT_CMSE)
21355 {
21356 /* Check if we have to clear the 'GE bits' which is only used if
21357 parallel add and subtraction instructions are available. */
21358 if (TARGET_INT_SIMD)
21359 snprintf (instr, sizeof (instr),
21360 "msr%s\tAPSR_nzcvqg, %%|lr", conditional);
21361 else
21362 snprintf (instr, sizeof (instr),
21363 "msr%s\tAPSR_nzcvq, %%|lr", conditional);
21364
21365 output_asm_insn (instr, & operand);
21366 /* Do not clear FPSCR if targeting Armv8.1-M Mainline, VLDR takes
21367 care of it. */
21368 if (TARGET_HARD_FLOAT)
21369 {
21370 /* Clear the cumulative exception-status bits (0-4,7) and
21371 the condition code bits (28-31) of the FPSCR. We need
21372 to remember to clear the first scratch register used
21373 (IP) and save and restore the second (r4).
21374
21375 Important note: the length of the
21376 thumb2_cmse_entry_return insn pattern must account for
21377 the size of the below instructions. */
21378 output_asm_insn ("push\t{%|r4}", & operand);
21379 output_asm_insn ("vmrs\t%|ip, fpscr", & operand);
21380 output_asm_insn ("movw\t%|r4, #65376", & operand);
21381 output_asm_insn ("movt\t%|r4, #4095", & operand);
21382 output_asm_insn ("and\t%|ip, %|r4", & operand);
21383 output_asm_insn ("vmsr\tfpscr, %|ip", & operand);
21384 output_asm_insn ("pop\t{%|r4}", & operand);
21385 output_asm_insn ("mov\t%|ip, %|lr", & operand);
21386 }
21387 }
21388 snprintf (instr, sizeof (instr), "bxns\t%%|lr");
21389 }
21390 /* Use bx if it's available. */
21391 else if (arm_arch5t || arm_arch4t)
21392 sprintf (instr, "bx%s\t%%|lr", conditional);
21393 else
21394 sprintf (instr, "mov%s\t%%|pc, %%|lr", conditional);
21395 break;
21396 }
21397
21398 output_asm_insn (instr, & operand);
21399 }
21400
21401 return "";
21402 }
21403
21404 /* Output in FILE asm statements needed to declare the NAME of the function
21405 defined by its DECL node. */
21406
21407 void
21408 arm_asm_declare_function_name (FILE *file, const char *name, tree decl)
21409 {
21410 size_t cmse_name_len;
21411 char *cmse_name = 0;
21412 char cmse_prefix[] = "__acle_se_";
21413
21414 /* When compiling with ARMv8-M Security Extensions enabled, we should print an
21415 extra function label for each function with the 'cmse_nonsecure_entry'
21416 attribute. This extra function label should be prepended with
21417 '__acle_se_', telling the linker that it needs to create secure gateway
21418 veneers for this function. */
21419 if (use_cmse && lookup_attribute ("cmse_nonsecure_entry",
21420 DECL_ATTRIBUTES (decl)))
21421 {
21422 cmse_name_len = sizeof (cmse_prefix) + strlen (name);
21423 cmse_name = XALLOCAVEC (char, cmse_name_len);
21424 snprintf (cmse_name, cmse_name_len, "%s%s", cmse_prefix, name);
21425 targetm.asm_out.globalize_label (file, cmse_name);
21426
21427 ARM_DECLARE_FUNCTION_NAME (file, cmse_name, decl);
21428 ASM_OUTPUT_TYPE_DIRECTIVE (file, cmse_name, "function");
21429 }
21430
21431 ARM_DECLARE_FUNCTION_NAME (file, name, decl);
21432 ASM_OUTPUT_TYPE_DIRECTIVE (file, name, "function");
21433 ASM_DECLARE_RESULT (file, DECL_RESULT (decl));
21434 ASM_OUTPUT_LABEL (file, name);
21435
21436 if (cmse_name)
21437 ASM_OUTPUT_LABEL (file, cmse_name);
21438
21439 ARM_OUTPUT_FN_UNWIND (file, TRUE);
21440 }
21441
21442 /* Write the function name into the code section, directly preceding
21443 the function prologue.
21444
21445 Code will be output similar to this:
21446 t0
21447 .ascii "arm_poke_function_name", 0
21448 .align
21449 t1
21450 .word 0xff000000 + (t1 - t0)
21451 arm_poke_function_name
21452 mov ip, sp
21453 stmfd sp!, {fp, ip, lr, pc}
21454 sub fp, ip, #4
21455
21456 When performing a stack backtrace, code can inspect the value
21457 of 'pc' stored at 'fp' + 0. If the trace function then looks
21458 at location pc - 12 and the top 8 bits are set, then we know
21459 that there is a function name embedded immediately preceding this
21460 location and has length ((pc[-3]) & 0xff000000).
21461
21462 We assume that pc is declared as a pointer to an unsigned long.
21463
21464 It is of no benefit to output the function name if we are assembling
21465 a leaf function. These function types will not contain a stack
21466 backtrace structure, therefore it is not possible to determine the
21467 function name. */
21468 void
21469 arm_poke_function_name (FILE *stream, const char *name)
21470 {
21471 unsigned long alignlength;
21472 unsigned long length;
21473 rtx x;
21474
21475 length = strlen (name) + 1;
21476 alignlength = ROUND_UP_WORD (length);
21477
21478 ASM_OUTPUT_ASCII (stream, name, length);
21479 ASM_OUTPUT_ALIGN (stream, 2);
21480 x = GEN_INT ((unsigned HOST_WIDE_INT) 0xff000000 + alignlength);
21481 assemble_aligned_integer (UNITS_PER_WORD, x);
21482 }
21483
21484 /* Place some comments into the assembler stream
21485 describing the current function. */
21486 static void
21487 arm_output_function_prologue (FILE *f)
21488 {
21489 unsigned long func_type;
21490
21491 /* Sanity check. */
21492 gcc_assert (!arm_ccfsm_state && !arm_target_insn);
21493
21494 func_type = arm_current_func_type ();
21495
21496 switch ((int) ARM_FUNC_TYPE (func_type))
21497 {
21498 default:
21499 case ARM_FT_NORMAL:
21500 break;
21501 case ARM_FT_INTERWORKED:
21502 asm_fprintf (f, "\t%@ Function supports interworking.\n");
21503 break;
21504 case ARM_FT_ISR:
21505 asm_fprintf (f, "\t%@ Interrupt Service Routine.\n");
21506 break;
21507 case ARM_FT_FIQ:
21508 asm_fprintf (f, "\t%@ Fast Interrupt Service Routine.\n");
21509 break;
21510 case ARM_FT_EXCEPTION:
21511 asm_fprintf (f, "\t%@ ARM Exception Handler.\n");
21512 break;
21513 }
21514
21515 if (IS_NAKED (func_type))
21516 asm_fprintf (f, "\t%@ Naked Function: prologue and epilogue provided by programmer.\n");
21517
21518 if (IS_VOLATILE (func_type))
21519 asm_fprintf (f, "\t%@ Volatile: function does not return.\n");
21520
21521 if (IS_NESTED (func_type))
21522 asm_fprintf (f, "\t%@ Nested: function declared inside another function.\n");
21523 if (IS_STACKALIGN (func_type))
21524 asm_fprintf (f, "\t%@ Stack Align: May be called with mis-aligned SP.\n");
21525 if (IS_CMSE_ENTRY (func_type))
21526 asm_fprintf (f, "\t%@ Non-secure entry function: called from non-secure code.\n");
21527
21528 asm_fprintf (f, "\t%@ args = %wd, pretend = %d, frame = %wd\n",
21529 (HOST_WIDE_INT) crtl->args.size,
21530 crtl->args.pretend_args_size,
21531 (HOST_WIDE_INT) get_frame_size ());
21532
21533 asm_fprintf (f, "\t%@ frame_needed = %d, uses_anonymous_args = %d\n",
21534 frame_pointer_needed,
21535 cfun->machine->uses_anonymous_args);
21536
21537 if (cfun->machine->lr_save_eliminated)
21538 asm_fprintf (f, "\t%@ link register save eliminated.\n");
21539
21540 if (crtl->calls_eh_return)
21541 asm_fprintf (f, "\t@ Calls __builtin_eh_return.\n");
21542
21543 }
21544
21545 static void
21546 arm_output_function_epilogue (FILE *)
21547 {
21548 arm_stack_offsets *offsets;
21549
21550 if (TARGET_THUMB1)
21551 {
21552 int regno;
21553
21554 /* Emit any call-via-reg trampolines that are needed for v4t support
21555 of call_reg and call_value_reg type insns. */
21556 for (regno = 0; regno < LR_REGNUM; regno++)
21557 {
21558 rtx label = cfun->machine->call_via[regno];
21559
21560 if (label != NULL)
21561 {
21562 switch_to_section (function_section (current_function_decl));
21563 targetm.asm_out.internal_label (asm_out_file, "L",
21564 CODE_LABEL_NUMBER (label));
21565 asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
21566 }
21567 }
21568
21569 /* ??? Probably not safe to set this here, since it assumes that a
21570 function will be emitted as assembly immediately after we generate
21571 RTL for it. This does not happen for inline functions. */
21572 cfun->machine->return_used_this_function = 0;
21573 }
21574 else /* TARGET_32BIT */
21575 {
21576 /* We need to take into account any stack-frame rounding. */
21577 offsets = arm_get_frame_offsets ();
21578
21579 gcc_assert (!use_return_insn (FALSE, NULL)
21580 || (cfun->machine->return_used_this_function != 0)
21581 || offsets->saved_regs == offsets->outgoing_args
21582 || frame_pointer_needed);
21583 }
21584 }
21585
21586 /* Generate and emit a sequence of insns equivalent to PUSH, but using
21587 STR and STRD. If an even number of registers are being pushed, one
21588 or more STRD patterns are created for each register pair. If an
21589 odd number of registers are pushed, emit an initial STR followed by
21590 as many STRD instructions as are needed. This works best when the
21591 stack is initially 64-bit aligned (the normal case), since it
21592 ensures that each STRD is also 64-bit aligned. */
21593 static void
21594 thumb2_emit_strd_push (unsigned long saved_regs_mask)
21595 {
21596 int num_regs = 0;
21597 int i;
21598 int regno;
21599 rtx par = NULL_RTX;
21600 rtx dwarf = NULL_RTX;
21601 rtx tmp;
21602 bool first = true;
21603
21604 num_regs = bit_count (saved_regs_mask);
21605
21606 /* Must be at least one register to save, and can't save SP or PC. */
21607 gcc_assert (num_regs > 0 && num_regs <= 14);
21608 gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
21609 gcc_assert (!(saved_regs_mask & (1 << PC_REGNUM)));
21610
21611 /* Create sequence for DWARF info. All the frame-related data for
21612 debugging is held in this wrapper. */
21613 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_regs + 1));
21614
21615 /* Describe the stack adjustment. */
21616 tmp = gen_rtx_SET (stack_pointer_rtx,
21617 plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
21618 RTX_FRAME_RELATED_P (tmp) = 1;
21619 XVECEXP (dwarf, 0, 0) = tmp;
21620
21621 /* Find the first register. */
21622 for (regno = 0; (saved_regs_mask & (1 << regno)) == 0; regno++)
21623 ;
21624
21625 i = 0;
21626
21627 /* If there's an odd number of registers to push. Start off by
21628 pushing a single register. This ensures that subsequent strd
21629 operations are dword aligned (assuming that SP was originally
21630 64-bit aligned). */
21631 if ((num_regs & 1) != 0)
21632 {
21633 rtx reg, mem, insn;
21634
21635 reg = gen_rtx_REG (SImode, regno);
21636 if (num_regs == 1)
21637 mem = gen_frame_mem (Pmode, gen_rtx_PRE_DEC (Pmode,
21638 stack_pointer_rtx));
21639 else
21640 mem = gen_frame_mem (Pmode,
21641 gen_rtx_PRE_MODIFY
21642 (Pmode, stack_pointer_rtx,
21643 plus_constant (Pmode, stack_pointer_rtx,
21644 -4 * num_regs)));
21645
21646 tmp = gen_rtx_SET (mem, reg);
21647 RTX_FRAME_RELATED_P (tmp) = 1;
21648 insn = emit_insn (tmp);
21649 RTX_FRAME_RELATED_P (insn) = 1;
21650 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
21651 tmp = gen_rtx_SET (gen_frame_mem (Pmode, stack_pointer_rtx), reg);
21652 RTX_FRAME_RELATED_P (tmp) = 1;
21653 i++;
21654 regno++;
21655 XVECEXP (dwarf, 0, i) = tmp;
21656 first = false;
21657 }
21658
21659 while (i < num_regs)
21660 if (saved_regs_mask & (1 << regno))
21661 {
21662 rtx reg1, reg2, mem1, mem2;
21663 rtx tmp0, tmp1, tmp2;
21664 int regno2;
21665
21666 /* Find the register to pair with this one. */
21667 for (regno2 = regno + 1; (saved_regs_mask & (1 << regno2)) == 0;
21668 regno2++)
21669 ;
21670
21671 reg1 = gen_rtx_REG (SImode, regno);
21672 reg2 = gen_rtx_REG (SImode, regno2);
21673
21674 if (first)
21675 {
21676 rtx insn;
21677
21678 first = false;
21679 mem1 = gen_frame_mem (Pmode, plus_constant (Pmode,
21680 stack_pointer_rtx,
21681 -4 * num_regs));
21682 mem2 = gen_frame_mem (Pmode, plus_constant (Pmode,
21683 stack_pointer_rtx,
21684 -4 * (num_regs - 1)));
21685 tmp0 = gen_rtx_SET (stack_pointer_rtx,
21686 plus_constant (Pmode, stack_pointer_rtx,
21687 -4 * (num_regs)));
21688 tmp1 = gen_rtx_SET (mem1, reg1);
21689 tmp2 = gen_rtx_SET (mem2, reg2);
21690 RTX_FRAME_RELATED_P (tmp0) = 1;
21691 RTX_FRAME_RELATED_P (tmp1) = 1;
21692 RTX_FRAME_RELATED_P (tmp2) = 1;
21693 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (3));
21694 XVECEXP (par, 0, 0) = tmp0;
21695 XVECEXP (par, 0, 1) = tmp1;
21696 XVECEXP (par, 0, 2) = tmp2;
21697 insn = emit_insn (par);
21698 RTX_FRAME_RELATED_P (insn) = 1;
21699 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
21700 }
21701 else
21702 {
21703 mem1 = gen_frame_mem (Pmode, plus_constant (Pmode,
21704 stack_pointer_rtx,
21705 4 * i));
21706 mem2 = gen_frame_mem (Pmode, plus_constant (Pmode,
21707 stack_pointer_rtx,
21708 4 * (i + 1)));
21709 tmp1 = gen_rtx_SET (mem1, reg1);
21710 tmp2 = gen_rtx_SET (mem2, reg2);
21711 RTX_FRAME_RELATED_P (tmp1) = 1;
21712 RTX_FRAME_RELATED_P (tmp2) = 1;
21713 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
21714 XVECEXP (par, 0, 0) = tmp1;
21715 XVECEXP (par, 0, 1) = tmp2;
21716 emit_insn (par);
21717 }
21718
21719 /* Create unwind information. This is an approximation. */
21720 tmp1 = gen_rtx_SET (gen_frame_mem (Pmode,
21721 plus_constant (Pmode,
21722 stack_pointer_rtx,
21723 4 * i)),
21724 reg1);
21725 tmp2 = gen_rtx_SET (gen_frame_mem (Pmode,
21726 plus_constant (Pmode,
21727 stack_pointer_rtx,
21728 4 * (i + 1))),
21729 reg2);
21730
21731 RTX_FRAME_RELATED_P (tmp1) = 1;
21732 RTX_FRAME_RELATED_P (tmp2) = 1;
21733 XVECEXP (dwarf, 0, i + 1) = tmp1;
21734 XVECEXP (dwarf, 0, i + 2) = tmp2;
21735 i += 2;
21736 regno = regno2 + 1;
21737 }
21738 else
21739 regno++;
21740
21741 return;
21742 }
21743
21744 /* STRD in ARM mode requires consecutive registers. This function emits STRD
21745 whenever possible, otherwise it emits single-word stores. The first store
21746 also allocates stack space for all saved registers, using writeback with
21747 post-addressing mode. All other stores use offset addressing. If no STRD
21748 can be emitted, this function emits a sequence of single-word stores,
21749 and not an STM as before, because single-word stores provide more freedom
21750 scheduling and can be turned into an STM by peephole optimizations. */
21751 static void
21752 arm_emit_strd_push (unsigned long saved_regs_mask)
21753 {
21754 int num_regs = 0;
21755 int i, j, dwarf_index = 0;
21756 int offset = 0;
21757 rtx dwarf = NULL_RTX;
21758 rtx insn = NULL_RTX;
21759 rtx tmp, mem;
21760
21761 /* TODO: A more efficient code can be emitted by changing the
21762 layout, e.g., first push all pairs that can use STRD to keep the
21763 stack aligned, and then push all other registers. */
21764 for (i = 0; i <= LAST_ARM_REGNUM; i++)
21765 if (saved_regs_mask & (1 << i))
21766 num_regs++;
21767
21768 gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
21769 gcc_assert (!(saved_regs_mask & (1 << PC_REGNUM)));
21770 gcc_assert (num_regs > 0);
21771
21772 /* Create sequence for DWARF info. */
21773 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_regs + 1));
21774
21775 /* For dwarf info, we generate explicit stack update. */
21776 tmp = gen_rtx_SET (stack_pointer_rtx,
21777 plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
21778 RTX_FRAME_RELATED_P (tmp) = 1;
21779 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
21780
21781 /* Save registers. */
21782 offset = - 4 * num_regs;
21783 j = 0;
21784 while (j <= LAST_ARM_REGNUM)
21785 if (saved_regs_mask & (1 << j))
21786 {
21787 if ((j % 2 == 0)
21788 && (saved_regs_mask & (1 << (j + 1))))
21789 {
21790 /* Current register and previous register form register pair for
21791 which STRD can be generated. */
21792 if (offset < 0)
21793 {
21794 /* Allocate stack space for all saved registers. */
21795 tmp = plus_constant (Pmode, stack_pointer_rtx, offset);
21796 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
21797 mem = gen_frame_mem (DImode, tmp);
21798 offset = 0;
21799 }
21800 else if (offset > 0)
21801 mem = gen_frame_mem (DImode,
21802 plus_constant (Pmode,
21803 stack_pointer_rtx,
21804 offset));
21805 else
21806 mem = gen_frame_mem (DImode, stack_pointer_rtx);
21807
21808 tmp = gen_rtx_SET (mem, gen_rtx_REG (DImode, j));
21809 RTX_FRAME_RELATED_P (tmp) = 1;
21810 tmp = emit_insn (tmp);
21811
21812 /* Record the first store insn. */
21813 if (dwarf_index == 1)
21814 insn = tmp;
21815
21816 /* Generate dwarf info. */
21817 mem = gen_frame_mem (SImode,
21818 plus_constant (Pmode,
21819 stack_pointer_rtx,
21820 offset));
21821 tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j));
21822 RTX_FRAME_RELATED_P (tmp) = 1;
21823 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
21824
21825 mem = gen_frame_mem (SImode,
21826 plus_constant (Pmode,
21827 stack_pointer_rtx,
21828 offset + 4));
21829 tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j + 1));
21830 RTX_FRAME_RELATED_P (tmp) = 1;
21831 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
21832
21833 offset += 8;
21834 j += 2;
21835 }
21836 else
21837 {
21838 /* Emit a single word store. */
21839 if (offset < 0)
21840 {
21841 /* Allocate stack space for all saved registers. */
21842 tmp = plus_constant (Pmode, stack_pointer_rtx, offset);
21843 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
21844 mem = gen_frame_mem (SImode, tmp);
21845 offset = 0;
21846 }
21847 else if (offset > 0)
21848 mem = gen_frame_mem (SImode,
21849 plus_constant (Pmode,
21850 stack_pointer_rtx,
21851 offset));
21852 else
21853 mem = gen_frame_mem (SImode, stack_pointer_rtx);
21854
21855 tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j));
21856 RTX_FRAME_RELATED_P (tmp) = 1;
21857 tmp = emit_insn (tmp);
21858
21859 /* Record the first store insn. */
21860 if (dwarf_index == 1)
21861 insn = tmp;
21862
21863 /* Generate dwarf info. */
21864 mem = gen_frame_mem (SImode,
21865 plus_constant(Pmode,
21866 stack_pointer_rtx,
21867 offset));
21868 tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j));
21869 RTX_FRAME_RELATED_P (tmp) = 1;
21870 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
21871
21872 offset += 4;
21873 j += 1;
21874 }
21875 }
21876 else
21877 j++;
21878
21879 /* Attach dwarf info to the first insn we generate. */
21880 gcc_assert (insn != NULL_RTX);
21881 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
21882 RTX_FRAME_RELATED_P (insn) = 1;
21883 }
21884
21885 /* Generate and emit an insn that we will recognize as a push_multi.
21886 Unfortunately, since this insn does not reflect very well the actual
21887 semantics of the operation, we need to annotate the insn for the benefit
21888 of DWARF2 frame unwind information. DWARF_REGS_MASK is a subset of
21889 MASK for registers that should be annotated for DWARF2 frame unwind
21890 information. */
21891 static rtx
21892 emit_multi_reg_push (unsigned long mask, unsigned long dwarf_regs_mask)
21893 {
21894 int num_regs = 0;
21895 int num_dwarf_regs = 0;
21896 int i, j;
21897 rtx par;
21898 rtx dwarf;
21899 int dwarf_par_index;
21900 rtx tmp, reg;
21901
21902 /* We don't record the PC in the dwarf frame information. */
21903 dwarf_regs_mask &= ~(1 << PC_REGNUM);
21904
21905 for (i = 0; i <= LAST_ARM_REGNUM; i++)
21906 {
21907 if (mask & (1 << i))
21908 num_regs++;
21909 if (dwarf_regs_mask & (1 << i))
21910 num_dwarf_regs++;
21911 }
21912
21913 gcc_assert (num_regs && num_regs <= 16);
21914 gcc_assert ((dwarf_regs_mask & ~mask) == 0);
21915
21916 /* For the body of the insn we are going to generate an UNSPEC in
21917 parallel with several USEs. This allows the insn to be recognized
21918 by the push_multi pattern in the arm.md file.
21919
21920 The body of the insn looks something like this:
21921
21922 (parallel [
21923 (set (mem:BLK (pre_modify:SI (reg:SI sp)
21924 (const_int:SI <num>)))
21925 (unspec:BLK [(reg:SI r4)] UNSPEC_PUSH_MULT))
21926 (use (reg:SI XX))
21927 (use (reg:SI YY))
21928 ...
21929 ])
21930
21931 For the frame note however, we try to be more explicit and actually
21932 show each register being stored into the stack frame, plus a (single)
21933 decrement of the stack pointer. We do it this way in order to be
21934 friendly to the stack unwinding code, which only wants to see a single
21935 stack decrement per instruction. The RTL we generate for the note looks
21936 something like this:
21937
21938 (sequence [
21939 (set (reg:SI sp) (plus:SI (reg:SI sp) (const_int -20)))
21940 (set (mem:SI (reg:SI sp)) (reg:SI r4))
21941 (set (mem:SI (plus:SI (reg:SI sp) (const_int 4))) (reg:SI XX))
21942 (set (mem:SI (plus:SI (reg:SI sp) (const_int 8))) (reg:SI YY))
21943 ...
21944 ])
21945
21946 FIXME:: In an ideal world the PRE_MODIFY would not exist and
21947 instead we'd have a parallel expression detailing all
21948 the stores to the various memory addresses so that debug
21949 information is more up-to-date. Remember however while writing
21950 this to take care of the constraints with the push instruction.
21951
21952 Note also that this has to be taken care of for the VFP registers.
21953
21954 For more see PR43399. */
21955
21956 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs));
21957 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_dwarf_regs + 1));
21958 dwarf_par_index = 1;
21959
21960 for (i = 0; i <= LAST_ARM_REGNUM; i++)
21961 {
21962 if (mask & (1 << i))
21963 {
21964 reg = gen_rtx_REG (SImode, i);
21965
21966 XVECEXP (par, 0, 0)
21967 = gen_rtx_SET (gen_frame_mem
21968 (BLKmode,
21969 gen_rtx_PRE_MODIFY (Pmode,
21970 stack_pointer_rtx,
21971 plus_constant
21972 (Pmode, stack_pointer_rtx,
21973 -4 * num_regs))
21974 ),
21975 gen_rtx_UNSPEC (BLKmode,
21976 gen_rtvec (1, reg),
21977 UNSPEC_PUSH_MULT));
21978
21979 if (dwarf_regs_mask & (1 << i))
21980 {
21981 tmp = gen_rtx_SET (gen_frame_mem (SImode, stack_pointer_rtx),
21982 reg);
21983 RTX_FRAME_RELATED_P (tmp) = 1;
21984 XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
21985 }
21986
21987 break;
21988 }
21989 }
21990
21991 for (j = 1, i++; j < num_regs; i++)
21992 {
21993 if (mask & (1 << i))
21994 {
21995 reg = gen_rtx_REG (SImode, i);
21996
21997 XVECEXP (par, 0, j) = gen_rtx_USE (VOIDmode, reg);
21998
21999 if (dwarf_regs_mask & (1 << i))
22000 {
22001 tmp
22002 = gen_rtx_SET (gen_frame_mem
22003 (SImode,
22004 plus_constant (Pmode, stack_pointer_rtx,
22005 4 * j)),
22006 reg);
22007 RTX_FRAME_RELATED_P (tmp) = 1;
22008 XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
22009 }
22010
22011 j++;
22012 }
22013 }
22014
22015 par = emit_insn (par);
22016
22017 tmp = gen_rtx_SET (stack_pointer_rtx,
22018 plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
22019 RTX_FRAME_RELATED_P (tmp) = 1;
22020 XVECEXP (dwarf, 0, 0) = tmp;
22021
22022 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
22023
22024 return par;
22025 }
22026
22027 /* Add a REG_CFA_ADJUST_CFA REG note to INSN.
22028 SIZE is the offset to be adjusted.
22029 DEST and SRC might be stack_pointer_rtx or hard_frame_pointer_rtx. */
22030 static void
22031 arm_add_cfa_adjust_cfa_note (rtx insn, int size, rtx dest, rtx src)
22032 {
22033 rtx dwarf;
22034
22035 RTX_FRAME_RELATED_P (insn) = 1;
22036 dwarf = gen_rtx_SET (dest, plus_constant (Pmode, src, size));
22037 add_reg_note (insn, REG_CFA_ADJUST_CFA, dwarf);
22038 }
22039
22040 /* Generate and emit an insn pattern that we will recognize as a pop_multi.
22041 SAVED_REGS_MASK shows which registers need to be restored.
22042
22043 Unfortunately, since this insn does not reflect very well the actual
22044 semantics of the operation, we need to annotate the insn for the benefit
22045 of DWARF2 frame unwind information. */
22046 static void
22047 arm_emit_multi_reg_pop (unsigned long saved_regs_mask)
22048 {
22049 int num_regs = 0;
22050 int i, j;
22051 rtx par;
22052 rtx dwarf = NULL_RTX;
22053 rtx tmp, reg;
22054 bool return_in_pc = saved_regs_mask & (1 << PC_REGNUM);
22055 int offset_adj;
22056 int emit_update;
22057
22058 offset_adj = return_in_pc ? 1 : 0;
22059 for (i = 0; i <= LAST_ARM_REGNUM; i++)
22060 if (saved_regs_mask & (1 << i))
22061 num_regs++;
22062
22063 gcc_assert (num_regs && num_regs <= 16);
22064
22065 /* If SP is in reglist, then we don't emit SP update insn. */
22066 emit_update = (saved_regs_mask & (1 << SP_REGNUM)) ? 0 : 1;
22067
22068 /* The parallel needs to hold num_regs SETs
22069 and one SET for the stack update. */
22070 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs + emit_update + offset_adj));
22071
22072 if (return_in_pc)
22073 XVECEXP (par, 0, 0) = ret_rtx;
22074
22075 if (emit_update)
22076 {
22077 /* Increment the stack pointer, based on there being
22078 num_regs 4-byte registers to restore. */
22079 tmp = gen_rtx_SET (stack_pointer_rtx,
22080 plus_constant (Pmode,
22081 stack_pointer_rtx,
22082 4 * num_regs));
22083 RTX_FRAME_RELATED_P (tmp) = 1;
22084 XVECEXP (par, 0, offset_adj) = tmp;
22085 }
22086
22087 /* Now restore every reg, which may include PC. */
22088 for (j = 0, i = 0; j < num_regs; i++)
22089 if (saved_regs_mask & (1 << i))
22090 {
22091 reg = gen_rtx_REG (SImode, i);
22092 if ((num_regs == 1) && emit_update && !return_in_pc)
22093 {
22094 /* Emit single load with writeback. */
22095 tmp = gen_frame_mem (SImode,
22096 gen_rtx_POST_INC (Pmode,
22097 stack_pointer_rtx));
22098 tmp = emit_insn (gen_rtx_SET (reg, tmp));
22099 REG_NOTES (tmp) = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
22100 return;
22101 }
22102
22103 tmp = gen_rtx_SET (reg,
22104 gen_frame_mem
22105 (SImode,
22106 plus_constant (Pmode, stack_pointer_rtx, 4 * j)));
22107 RTX_FRAME_RELATED_P (tmp) = 1;
22108 XVECEXP (par, 0, j + emit_update + offset_adj) = tmp;
22109
22110 /* We need to maintain a sequence for DWARF info too. As dwarf info
22111 should not have PC, skip PC. */
22112 if (i != PC_REGNUM)
22113 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
22114
22115 j++;
22116 }
22117
22118 if (return_in_pc)
22119 par = emit_jump_insn (par);
22120 else
22121 par = emit_insn (par);
22122
22123 REG_NOTES (par) = dwarf;
22124 if (!return_in_pc)
22125 arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD * num_regs,
22126 stack_pointer_rtx, stack_pointer_rtx);
22127 }
22128
22129 /* Generate and emit an insn pattern that we will recognize as a pop_multi
22130 of NUM_REGS consecutive VFP regs, starting at FIRST_REG.
22131
22132 Unfortunately, since this insn does not reflect very well the actual
22133 semantics of the operation, we need to annotate the insn for the benefit
22134 of DWARF2 frame unwind information. */
22135 static void
22136 arm_emit_vfp_multi_reg_pop (int first_reg, int num_regs, rtx base_reg)
22137 {
22138 int i, j;
22139 rtx par;
22140 rtx dwarf = NULL_RTX;
22141 rtx tmp, reg;
22142
22143 gcc_assert (num_regs && num_regs <= 32);
22144
22145 /* Workaround ARM10 VFPr1 bug. */
22146 if (num_regs == 2 && !arm_arch6)
22147 {
22148 if (first_reg == 15)
22149 first_reg--;
22150
22151 num_regs++;
22152 }
22153
22154 /* We can emit at most 16 D-registers in a single pop_multi instruction, and
22155 there could be up to 32 D-registers to restore.
22156 If there are more than 16 D-registers, make two recursive calls,
22157 each of which emits one pop_multi instruction. */
22158 if (num_regs > 16)
22159 {
22160 arm_emit_vfp_multi_reg_pop (first_reg, 16, base_reg);
22161 arm_emit_vfp_multi_reg_pop (first_reg + 16, num_regs - 16, base_reg);
22162 return;
22163 }
22164
22165 /* The parallel needs to hold num_regs SETs
22166 and one SET for the stack update. */
22167 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs + 1));
22168
22169 /* Increment the stack pointer, based on there being
22170 num_regs 8-byte registers to restore. */
22171 tmp = gen_rtx_SET (base_reg, plus_constant (Pmode, base_reg, 8 * num_regs));
22172 RTX_FRAME_RELATED_P (tmp) = 1;
22173 XVECEXP (par, 0, 0) = tmp;
22174
22175 /* Now show every reg that will be restored, using a SET for each. */
22176 for (j = 0, i=first_reg; j < num_regs; i += 2)
22177 {
22178 reg = gen_rtx_REG (DFmode, i);
22179
22180 tmp = gen_rtx_SET (reg,
22181 gen_frame_mem
22182 (DFmode,
22183 plus_constant (Pmode, base_reg, 8 * j)));
22184 RTX_FRAME_RELATED_P (tmp) = 1;
22185 XVECEXP (par, 0, j + 1) = tmp;
22186
22187 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
22188
22189 j++;
22190 }
22191
22192 par = emit_insn (par);
22193 REG_NOTES (par) = dwarf;
22194
22195 /* Make sure cfa doesn't leave with IP_REGNUM to allow unwinding fron FP. */
22196 if (REGNO (base_reg) == IP_REGNUM)
22197 {
22198 RTX_FRAME_RELATED_P (par) = 1;
22199 add_reg_note (par, REG_CFA_DEF_CFA, hard_frame_pointer_rtx);
22200 }
22201 else
22202 arm_add_cfa_adjust_cfa_note (par, 2 * UNITS_PER_WORD * num_regs,
22203 base_reg, base_reg);
22204 }
22205
22206 /* Generate and emit a pattern that will be recognized as LDRD pattern. If even
22207 number of registers are being popped, multiple LDRD patterns are created for
22208 all register pairs. If odd number of registers are popped, last register is
22209 loaded by using LDR pattern. */
22210 static void
22211 thumb2_emit_ldrd_pop (unsigned long saved_regs_mask)
22212 {
22213 int num_regs = 0;
22214 int i, j;
22215 rtx par = NULL_RTX;
22216 rtx dwarf = NULL_RTX;
22217 rtx tmp, reg, tmp1;
22218 bool return_in_pc = saved_regs_mask & (1 << PC_REGNUM);
22219
22220 for (i = 0; i <= LAST_ARM_REGNUM; i++)
22221 if (saved_regs_mask & (1 << i))
22222 num_regs++;
22223
22224 gcc_assert (num_regs && num_regs <= 16);
22225
22226 /* We cannot generate ldrd for PC. Hence, reduce the count if PC is
22227 to be popped. So, if num_regs is even, now it will become odd,
22228 and we can generate pop with PC. If num_regs is odd, it will be
22229 even now, and ldr with return can be generated for PC. */
22230 if (return_in_pc)
22231 num_regs--;
22232
22233 gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
22234
22235 /* Var j iterates over all the registers to gather all the registers in
22236 saved_regs_mask. Var i gives index of saved registers in stack frame.
22237 A PARALLEL RTX of register-pair is created here, so that pattern for
22238 LDRD can be matched. As PC is always last register to be popped, and
22239 we have already decremented num_regs if PC, we don't have to worry
22240 about PC in this loop. */
22241 for (i = 0, j = 0; i < (num_regs - (num_regs % 2)); j++)
22242 if (saved_regs_mask & (1 << j))
22243 {
22244 /* Create RTX for memory load. */
22245 reg = gen_rtx_REG (SImode, j);
22246 tmp = gen_rtx_SET (reg,
22247 gen_frame_mem (SImode,
22248 plus_constant (Pmode,
22249 stack_pointer_rtx, 4 * i)));
22250 RTX_FRAME_RELATED_P (tmp) = 1;
22251
22252 if (i % 2 == 0)
22253 {
22254 /* When saved-register index (i) is even, the RTX to be emitted is
22255 yet to be created. Hence create it first. The LDRD pattern we
22256 are generating is :
22257 [ (SET (reg_t0) (MEM (PLUS (SP) (NUM))))
22258 (SET (reg_t1) (MEM (PLUS (SP) (NUM + 4)))) ]
22259 where target registers need not be consecutive. */
22260 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
22261 dwarf = NULL_RTX;
22262 }
22263
22264 /* ith register is added in PARALLEL RTX. If i is even, the reg_i is
22265 added as 0th element and if i is odd, reg_i is added as 1st element
22266 of LDRD pattern shown above. */
22267 XVECEXP (par, 0, (i % 2)) = tmp;
22268 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
22269
22270 if ((i % 2) == 1)
22271 {
22272 /* When saved-register index (i) is odd, RTXs for both the registers
22273 to be loaded are generated in above given LDRD pattern, and the
22274 pattern can be emitted now. */
22275 par = emit_insn (par);
22276 REG_NOTES (par) = dwarf;
22277 RTX_FRAME_RELATED_P (par) = 1;
22278 }
22279
22280 i++;
22281 }
22282
22283 /* If the number of registers pushed is odd AND return_in_pc is false OR
22284 number of registers are even AND return_in_pc is true, last register is
22285 popped using LDR. It can be PC as well. Hence, adjust the stack first and
22286 then LDR with post increment. */
22287
22288 /* Increment the stack pointer, based on there being
22289 num_regs 4-byte registers to restore. */
22290 tmp = gen_rtx_SET (stack_pointer_rtx,
22291 plus_constant (Pmode, stack_pointer_rtx, 4 * i));
22292 RTX_FRAME_RELATED_P (tmp) = 1;
22293 tmp = emit_insn (tmp);
22294 if (!return_in_pc)
22295 {
22296 arm_add_cfa_adjust_cfa_note (tmp, UNITS_PER_WORD * i,
22297 stack_pointer_rtx, stack_pointer_rtx);
22298 }
22299
22300 dwarf = NULL_RTX;
22301
22302 if (((num_regs % 2) == 1 && !return_in_pc)
22303 || ((num_regs % 2) == 0 && return_in_pc))
22304 {
22305 /* Scan for the single register to be popped. Skip until the saved
22306 register is found. */
22307 for (; (saved_regs_mask & (1 << j)) == 0; j++);
22308
22309 /* Gen LDR with post increment here. */
22310 tmp1 = gen_rtx_MEM (SImode,
22311 gen_rtx_POST_INC (SImode,
22312 stack_pointer_rtx));
22313 set_mem_alias_set (tmp1, get_frame_alias_set ());
22314
22315 reg = gen_rtx_REG (SImode, j);
22316 tmp = gen_rtx_SET (reg, tmp1);
22317 RTX_FRAME_RELATED_P (tmp) = 1;
22318 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
22319
22320 if (return_in_pc)
22321 {
22322 /* If return_in_pc, j must be PC_REGNUM. */
22323 gcc_assert (j == PC_REGNUM);
22324 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
22325 XVECEXP (par, 0, 0) = ret_rtx;
22326 XVECEXP (par, 0, 1) = tmp;
22327 par = emit_jump_insn (par);
22328 }
22329 else
22330 {
22331 par = emit_insn (tmp);
22332 REG_NOTES (par) = dwarf;
22333 arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD,
22334 stack_pointer_rtx, stack_pointer_rtx);
22335 }
22336
22337 }
22338 else if ((num_regs % 2) == 1 && return_in_pc)
22339 {
22340 /* There are 2 registers to be popped. So, generate the pattern
22341 pop_multiple_with_stack_update_and_return to pop in PC. */
22342 arm_emit_multi_reg_pop (saved_regs_mask & (~((1 << j) - 1)));
22343 }
22344
22345 return;
22346 }
22347
22348 /* LDRD in ARM mode needs consecutive registers as operands. This function
22349 emits LDRD whenever possible, otherwise it emits single-word loads. It uses
22350 offset addressing and then generates one separate stack udpate. This provides
22351 more scheduling freedom, compared to writeback on every load. However,
22352 if the function returns using load into PC directly
22353 (i.e., if PC is in SAVED_REGS_MASK), the stack needs to be updated
22354 before the last load. TODO: Add a peephole optimization to recognize
22355 the new epilogue sequence as an LDM instruction whenever possible. TODO: Add
22356 peephole optimization to merge the load at stack-offset zero
22357 with the stack update instruction using load with writeback
22358 in post-index addressing mode. */
22359 static void
22360 arm_emit_ldrd_pop (unsigned long saved_regs_mask)
22361 {
22362 int j = 0;
22363 int offset = 0;
22364 rtx par = NULL_RTX;
22365 rtx dwarf = NULL_RTX;
22366 rtx tmp, mem;
22367
22368 /* Restore saved registers. */
22369 gcc_assert (!((saved_regs_mask & (1 << SP_REGNUM))));
22370 j = 0;
22371 while (j <= LAST_ARM_REGNUM)
22372 if (saved_regs_mask & (1 << j))
22373 {
22374 if ((j % 2) == 0
22375 && (saved_regs_mask & (1 << (j + 1)))
22376 && (j + 1) != PC_REGNUM)
22377 {
22378 /* Current register and next register form register pair for which
22379 LDRD can be generated. PC is always the last register popped, and
22380 we handle it separately. */
22381 if (offset > 0)
22382 mem = gen_frame_mem (DImode,
22383 plus_constant (Pmode,
22384 stack_pointer_rtx,
22385 offset));
22386 else
22387 mem = gen_frame_mem (DImode, stack_pointer_rtx);
22388
22389 tmp = gen_rtx_SET (gen_rtx_REG (DImode, j), mem);
22390 tmp = emit_insn (tmp);
22391 RTX_FRAME_RELATED_P (tmp) = 1;
22392
22393 /* Generate dwarf info. */
22394
22395 dwarf = alloc_reg_note (REG_CFA_RESTORE,
22396 gen_rtx_REG (SImode, j),
22397 NULL_RTX);
22398 dwarf = alloc_reg_note (REG_CFA_RESTORE,
22399 gen_rtx_REG (SImode, j + 1),
22400 dwarf);
22401
22402 REG_NOTES (tmp) = dwarf;
22403
22404 offset += 8;
22405 j += 2;
22406 }
22407 else if (j != PC_REGNUM)
22408 {
22409 /* Emit a single word load. */
22410 if (offset > 0)
22411 mem = gen_frame_mem (SImode,
22412 plus_constant (Pmode,
22413 stack_pointer_rtx,
22414 offset));
22415 else
22416 mem = gen_frame_mem (SImode, stack_pointer_rtx);
22417
22418 tmp = gen_rtx_SET (gen_rtx_REG (SImode, j), mem);
22419 tmp = emit_insn (tmp);
22420 RTX_FRAME_RELATED_P (tmp) = 1;
22421
22422 /* Generate dwarf info. */
22423 REG_NOTES (tmp) = alloc_reg_note (REG_CFA_RESTORE,
22424 gen_rtx_REG (SImode, j),
22425 NULL_RTX);
22426
22427 offset += 4;
22428 j += 1;
22429 }
22430 else /* j == PC_REGNUM */
22431 j++;
22432 }
22433 else
22434 j++;
22435
22436 /* Update the stack. */
22437 if (offset > 0)
22438 {
22439 tmp = gen_rtx_SET (stack_pointer_rtx,
22440 plus_constant (Pmode,
22441 stack_pointer_rtx,
22442 offset));
22443 tmp = emit_insn (tmp);
22444 arm_add_cfa_adjust_cfa_note (tmp, offset,
22445 stack_pointer_rtx, stack_pointer_rtx);
22446 offset = 0;
22447 }
22448
22449 if (saved_regs_mask & (1 << PC_REGNUM))
22450 {
22451 /* Only PC is to be popped. */
22452 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
22453 XVECEXP (par, 0, 0) = ret_rtx;
22454 tmp = gen_rtx_SET (gen_rtx_REG (SImode, PC_REGNUM),
22455 gen_frame_mem (SImode,
22456 gen_rtx_POST_INC (SImode,
22457 stack_pointer_rtx)));
22458 RTX_FRAME_RELATED_P (tmp) = 1;
22459 XVECEXP (par, 0, 1) = tmp;
22460 par = emit_jump_insn (par);
22461
22462 /* Generate dwarf info. */
22463 dwarf = alloc_reg_note (REG_CFA_RESTORE,
22464 gen_rtx_REG (SImode, PC_REGNUM),
22465 NULL_RTX);
22466 REG_NOTES (par) = dwarf;
22467 arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD,
22468 stack_pointer_rtx, stack_pointer_rtx);
22469 }
22470 }
22471
22472 /* Calculate the size of the return value that is passed in registers. */
22473 static unsigned
22474 arm_size_return_regs (void)
22475 {
22476 machine_mode mode;
22477
22478 if (crtl->return_rtx != 0)
22479 mode = GET_MODE (crtl->return_rtx);
22480 else
22481 mode = DECL_MODE (DECL_RESULT (current_function_decl));
22482
22483 return GET_MODE_SIZE (mode);
22484 }
22485
22486 /* Return true if the current function needs to save/restore LR. */
22487 static bool
22488 thumb_force_lr_save (void)
22489 {
22490 return !cfun->machine->lr_save_eliminated
22491 && (!crtl->is_leaf
22492 || thumb_far_jump_used_p ()
22493 || df_regs_ever_live_p (LR_REGNUM));
22494 }
22495
22496 /* We do not know if r3 will be available because
22497 we do have an indirect tailcall happening in this
22498 particular case. */
22499 static bool
22500 is_indirect_tailcall_p (rtx call)
22501 {
22502 rtx pat = PATTERN (call);
22503
22504 /* Indirect tail call. */
22505 pat = XVECEXP (pat, 0, 0);
22506 if (GET_CODE (pat) == SET)
22507 pat = SET_SRC (pat);
22508
22509 pat = XEXP (XEXP (pat, 0), 0);
22510 return REG_P (pat);
22511 }
22512
22513 /* Return true if r3 is used by any of the tail call insns in the
22514 current function. */
22515 static bool
22516 any_sibcall_could_use_r3 (void)
22517 {
22518 edge_iterator ei;
22519 edge e;
22520
22521 if (!crtl->tail_call_emit)
22522 return false;
22523 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
22524 if (e->flags & EDGE_SIBCALL)
22525 {
22526 rtx_insn *call = BB_END (e->src);
22527 if (!CALL_P (call))
22528 call = prev_nonnote_nondebug_insn (call);
22529 gcc_assert (CALL_P (call) && SIBLING_CALL_P (call));
22530 if (find_regno_fusage (call, USE, 3)
22531 || is_indirect_tailcall_p (call))
22532 return true;
22533 }
22534 return false;
22535 }
22536
22537
22538 /* Compute the distance from register FROM to register TO.
22539 These can be the arg pointer (26), the soft frame pointer (25),
22540 the stack pointer (13) or the hard frame pointer (11).
22541 In thumb mode r7 is used as the soft frame pointer, if needed.
22542 Typical stack layout looks like this:
22543
22544 old stack pointer -> | |
22545 ----
22546 | | \
22547 | | saved arguments for
22548 | | vararg functions
22549 | | /
22550 --
22551 hard FP & arg pointer -> | | \
22552 | | stack
22553 | | frame
22554 | | /
22555 --
22556 | | \
22557 | | call saved
22558 | | registers
22559 soft frame pointer -> | | /
22560 --
22561 | | \
22562 | | local
22563 | | variables
22564 locals base pointer -> | | /
22565 --
22566 | | \
22567 | | outgoing
22568 | | arguments
22569 current stack pointer -> | | /
22570 --
22571
22572 For a given function some or all of these stack components
22573 may not be needed, giving rise to the possibility of
22574 eliminating some of the registers.
22575
22576 The values returned by this function must reflect the behavior
22577 of arm_expand_prologue () and arm_compute_save_core_reg_mask ().
22578
22579 The sign of the number returned reflects the direction of stack
22580 growth, so the values are positive for all eliminations except
22581 from the soft frame pointer to the hard frame pointer.
22582
22583 SFP may point just inside the local variables block to ensure correct
22584 alignment. */
22585
22586
22587 /* Return cached stack offsets. */
22588
22589 static arm_stack_offsets *
22590 arm_get_frame_offsets (void)
22591 {
22592 struct arm_stack_offsets *offsets;
22593
22594 offsets = &cfun->machine->stack_offsets;
22595
22596 return offsets;
22597 }
22598
22599
22600 /* Calculate stack offsets. These are used to calculate register elimination
22601 offsets and in prologue/epilogue code. Also calculates which registers
22602 should be saved. */
22603
22604 static void
22605 arm_compute_frame_layout (void)
22606 {
22607 struct arm_stack_offsets *offsets;
22608 unsigned long func_type;
22609 int saved;
22610 int core_saved;
22611 HOST_WIDE_INT frame_size;
22612 int i;
22613
22614 offsets = &cfun->machine->stack_offsets;
22615
22616 /* Initially this is the size of the local variables. It will translated
22617 into an offset once we have determined the size of preceding data. */
22618 frame_size = ROUND_UP_WORD (get_frame_size ());
22619
22620 /* Space for variadic functions. */
22621 offsets->saved_args = crtl->args.pretend_args_size;
22622
22623 /* In Thumb mode this is incorrect, but never used. */
22624 offsets->frame
22625 = (offsets->saved_args
22626 + arm_compute_static_chain_stack_bytes ()
22627 + (frame_pointer_needed ? 4 : 0));
22628
22629 if (TARGET_32BIT)
22630 {
22631 unsigned int regno;
22632
22633 offsets->saved_regs_mask = arm_compute_save_core_reg_mask ();
22634 core_saved = bit_count (offsets->saved_regs_mask) * 4;
22635 saved = core_saved;
22636
22637 /* We know that SP will be doubleword aligned on entry, and we must
22638 preserve that condition at any subroutine call. We also require the
22639 soft frame pointer to be doubleword aligned. */
22640
22641 if (TARGET_REALLY_IWMMXT)
22642 {
22643 /* Check for the call-saved iWMMXt registers. */
22644 for (regno = FIRST_IWMMXT_REGNUM;
22645 regno <= LAST_IWMMXT_REGNUM;
22646 regno++)
22647 if (reg_needs_saving_p (regno))
22648 saved += 8;
22649 }
22650
22651 func_type = arm_current_func_type ();
22652 /* Space for saved VFP registers. */
22653 if (! IS_VOLATILE (func_type)
22654 && TARGET_VFP_BASE)
22655 saved += arm_get_vfp_saved_size ();
22656
22657 /* Allocate space for saving/restoring FPCXTNS in Armv8.1-M Mainline
22658 nonecure entry functions with VSTR/VLDR. */
22659 if (TARGET_HAVE_FPCXT_CMSE && IS_CMSE_ENTRY (func_type))
22660 saved += 4;
22661 }
22662 else /* TARGET_THUMB1 */
22663 {
22664 offsets->saved_regs_mask = thumb1_compute_save_core_reg_mask ();
22665 core_saved = bit_count (offsets->saved_regs_mask) * 4;
22666 saved = core_saved;
22667 if (TARGET_BACKTRACE)
22668 saved += 16;
22669 }
22670
22671 /* Saved registers include the stack frame. */
22672 offsets->saved_regs
22673 = offsets->saved_args + arm_compute_static_chain_stack_bytes () + saved;
22674 offsets->soft_frame = offsets->saved_regs + CALLER_INTERWORKING_SLOT_SIZE;
22675
22676 /* A leaf function does not need any stack alignment if it has nothing
22677 on the stack. */
22678 if (crtl->is_leaf && frame_size == 0
22679 /* However if it calls alloca(), we have a dynamically allocated
22680 block of BIGGEST_ALIGNMENT on stack, so still do stack alignment. */
22681 && ! cfun->calls_alloca)
22682 {
22683 offsets->outgoing_args = offsets->soft_frame;
22684 offsets->locals_base = offsets->soft_frame;
22685 return;
22686 }
22687
22688 /* Ensure SFP has the correct alignment. */
22689 if (ARM_DOUBLEWORD_ALIGN
22690 && (offsets->soft_frame & 7))
22691 {
22692 offsets->soft_frame += 4;
22693 /* Try to align stack by pushing an extra reg. Don't bother doing this
22694 when there is a stack frame as the alignment will be rolled into
22695 the normal stack adjustment. */
22696 if (frame_size + crtl->outgoing_args_size == 0)
22697 {
22698 int reg = -1;
22699
22700 /* Register r3 is caller-saved. Normally it does not need to be
22701 saved on entry by the prologue. However if we choose to save
22702 it for padding then we may confuse the compiler into thinking
22703 a prologue sequence is required when in fact it is not. This
22704 will occur when shrink-wrapping if r3 is used as a scratch
22705 register and there are no other callee-saved writes.
22706
22707 This situation can be avoided when other callee-saved registers
22708 are available and r3 is not mandatory if we choose a callee-saved
22709 register for padding. */
22710 bool prefer_callee_reg_p = false;
22711
22712 /* If it is safe to use r3, then do so. This sometimes
22713 generates better code on Thumb-2 by avoiding the need to
22714 use 32-bit push/pop instructions. */
22715 if (! any_sibcall_could_use_r3 ()
22716 && arm_size_return_regs () <= 12
22717 && (offsets->saved_regs_mask & (1 << 3)) == 0
22718 && (TARGET_THUMB2
22719 || !(TARGET_LDRD && current_tune->prefer_ldrd_strd)))
22720 {
22721 reg = 3;
22722 if (!TARGET_THUMB2)
22723 prefer_callee_reg_p = true;
22724 }
22725 if (reg == -1
22726 || prefer_callee_reg_p)
22727 {
22728 for (i = 4; i <= (TARGET_THUMB1 ? LAST_LO_REGNUM : 11); i++)
22729 {
22730 /* Avoid fixed registers; they may be changed at
22731 arbitrary times so it's unsafe to restore them
22732 during the epilogue. */
22733 if (!fixed_regs[i]
22734 && (offsets->saved_regs_mask & (1 << i)) == 0)
22735 {
22736 reg = i;
22737 break;
22738 }
22739 }
22740 }
22741
22742 if (reg != -1)
22743 {
22744 offsets->saved_regs += 4;
22745 offsets->saved_regs_mask |= (1 << reg);
22746 }
22747 }
22748 }
22749
22750 offsets->locals_base = offsets->soft_frame + frame_size;
22751 offsets->outgoing_args = (offsets->locals_base
22752 + crtl->outgoing_args_size);
22753
22754 if (ARM_DOUBLEWORD_ALIGN)
22755 {
22756 /* Ensure SP remains doubleword aligned. */
22757 if (offsets->outgoing_args & 7)
22758 offsets->outgoing_args += 4;
22759 gcc_assert (!(offsets->outgoing_args & 7));
22760 }
22761 }
22762
22763
22764 /* Calculate the relative offsets for the different stack pointers. Positive
22765 offsets are in the direction of stack growth. */
22766
22767 HOST_WIDE_INT
22768 arm_compute_initial_elimination_offset (unsigned int from, unsigned int to)
22769 {
22770 arm_stack_offsets *offsets;
22771
22772 offsets = arm_get_frame_offsets ();
22773
22774 /* OK, now we have enough information to compute the distances.
22775 There must be an entry in these switch tables for each pair
22776 of registers in ELIMINABLE_REGS, even if some of the entries
22777 seem to be redundant or useless. */
22778 switch (from)
22779 {
22780 case ARG_POINTER_REGNUM:
22781 switch (to)
22782 {
22783 case THUMB_HARD_FRAME_POINTER_REGNUM:
22784 return 0;
22785
22786 case FRAME_POINTER_REGNUM:
22787 /* This is the reverse of the soft frame pointer
22788 to hard frame pointer elimination below. */
22789 return offsets->soft_frame - offsets->saved_args;
22790
22791 case ARM_HARD_FRAME_POINTER_REGNUM:
22792 /* This is only non-zero in the case where the static chain register
22793 is stored above the frame. */
22794 return offsets->frame - offsets->saved_args - 4;
22795
22796 case STACK_POINTER_REGNUM:
22797 /* If nothing has been pushed on the stack at all
22798 then this will return -4. This *is* correct! */
22799 return offsets->outgoing_args - (offsets->saved_args + 4);
22800
22801 default:
22802 gcc_unreachable ();
22803 }
22804 gcc_unreachable ();
22805
22806 case FRAME_POINTER_REGNUM:
22807 switch (to)
22808 {
22809 case THUMB_HARD_FRAME_POINTER_REGNUM:
22810 return 0;
22811
22812 case ARM_HARD_FRAME_POINTER_REGNUM:
22813 /* The hard frame pointer points to the top entry in the
22814 stack frame. The soft frame pointer to the bottom entry
22815 in the stack frame. If there is no stack frame at all,
22816 then they are identical. */
22817
22818 return offsets->frame - offsets->soft_frame;
22819
22820 case STACK_POINTER_REGNUM:
22821 return offsets->outgoing_args - offsets->soft_frame;
22822
22823 default:
22824 gcc_unreachable ();
22825 }
22826 gcc_unreachable ();
22827
22828 default:
22829 /* You cannot eliminate from the stack pointer.
22830 In theory you could eliminate from the hard frame
22831 pointer to the stack pointer, but this will never
22832 happen, since if a stack frame is not needed the
22833 hard frame pointer will never be used. */
22834 gcc_unreachable ();
22835 }
22836 }
22837
22838 /* Given FROM and TO register numbers, say whether this elimination is
22839 allowed. Frame pointer elimination is automatically handled.
22840
22841 All eliminations are permissible. Note that ARG_POINTER_REGNUM and
22842 HARD_FRAME_POINTER_REGNUM are in fact the same thing. If we need a frame
22843 pointer, we must eliminate FRAME_POINTER_REGNUM into
22844 HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM or
22845 ARG_POINTER_REGNUM. */
22846
22847 bool
22848 arm_can_eliminate (const int from, const int to)
22849 {
22850 return ((to == FRAME_POINTER_REGNUM && from == ARG_POINTER_REGNUM) ? false :
22851 (to == STACK_POINTER_REGNUM && frame_pointer_needed) ? false :
22852 (to == ARM_HARD_FRAME_POINTER_REGNUM && TARGET_THUMB) ? false :
22853 (to == THUMB_HARD_FRAME_POINTER_REGNUM && TARGET_ARM) ? false :
22854 true);
22855 }
22856
22857 /* Emit RTL to save coprocessor registers on function entry. Returns the
22858 number of bytes pushed. */
22859
22860 static int
22861 arm_save_coproc_regs(void)
22862 {
22863 int saved_size = 0;
22864 unsigned reg;
22865 unsigned start_reg;
22866 rtx insn;
22867
22868 if (TARGET_REALLY_IWMMXT)
22869 for (reg = LAST_IWMMXT_REGNUM; reg >= FIRST_IWMMXT_REGNUM; reg--)
22870 if (reg_needs_saving_p (reg))
22871 {
22872 insn = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
22873 insn = gen_rtx_MEM (V2SImode, insn);
22874 insn = emit_set_insn (insn, gen_rtx_REG (V2SImode, reg));
22875 RTX_FRAME_RELATED_P (insn) = 1;
22876 saved_size += 8;
22877 }
22878
22879 if (TARGET_VFP_BASE)
22880 {
22881 start_reg = FIRST_VFP_REGNUM;
22882
22883 for (reg = FIRST_VFP_REGNUM; reg < LAST_VFP_REGNUM; reg += 2)
22884 {
22885 if (!reg_needs_saving_p (reg) && !reg_needs_saving_p (reg + 1))
22886 {
22887 if (start_reg != reg)
22888 saved_size += vfp_emit_fstmd (start_reg,
22889 (reg - start_reg) / 2);
22890 start_reg = reg + 2;
22891 }
22892 }
22893 if (start_reg != reg)
22894 saved_size += vfp_emit_fstmd (start_reg,
22895 (reg - start_reg) / 2);
22896 }
22897 return saved_size;
22898 }
22899
22900
22901 /* Set the Thumb frame pointer from the stack pointer. */
22902
22903 static void
22904 thumb_set_frame_pointer (arm_stack_offsets *offsets)
22905 {
22906 HOST_WIDE_INT amount;
22907 rtx insn, dwarf;
22908
22909 amount = offsets->outgoing_args - offsets->locals_base;
22910 if (amount < 1024)
22911 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
22912 stack_pointer_rtx, GEN_INT (amount)));
22913 else
22914 {
22915 emit_insn (gen_movsi (hard_frame_pointer_rtx, GEN_INT (amount)));
22916 /* Thumb-2 RTL patterns expect sp as the first input. Thumb-1
22917 expects the first two operands to be the same. */
22918 if (TARGET_THUMB2)
22919 {
22920 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
22921 stack_pointer_rtx,
22922 hard_frame_pointer_rtx));
22923 }
22924 else
22925 {
22926 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
22927 hard_frame_pointer_rtx,
22928 stack_pointer_rtx));
22929 }
22930 dwarf = gen_rtx_SET (hard_frame_pointer_rtx,
22931 plus_constant (Pmode, stack_pointer_rtx, amount));
22932 RTX_FRAME_RELATED_P (dwarf) = 1;
22933 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
22934 }
22935
22936 RTX_FRAME_RELATED_P (insn) = 1;
22937 }
22938
22939 struct scratch_reg {
22940 rtx reg;
22941 bool saved;
22942 };
22943
22944 /* Return a short-lived scratch register for use as a 2nd scratch register on
22945 function entry after the registers are saved in the prologue. This register
22946 must be released by means of release_scratch_register_on_entry. IP is not
22947 considered since it is always used as the 1st scratch register if available.
22948
22949 REGNO1 is the index number of the 1st scratch register and LIVE_REGS is the
22950 mask of live registers. */
22951
22952 static void
22953 get_scratch_register_on_entry (struct scratch_reg *sr, unsigned int regno1,
22954 unsigned long live_regs)
22955 {
22956 int regno = -1;
22957
22958 sr->saved = false;
22959
22960 if (regno1 != LR_REGNUM && (live_regs & (1 << LR_REGNUM)) != 0)
22961 regno = LR_REGNUM;
22962 else
22963 {
22964 unsigned int i;
22965
22966 for (i = 4; i < 11; i++)
22967 if (regno1 != i && (live_regs & (1 << i)) != 0)
22968 {
22969 regno = i;
22970 break;
22971 }
22972
22973 if (regno < 0)
22974 {
22975 /* If IP is used as the 1st scratch register for a nested function,
22976 then either r3 wasn't available or is used to preserve IP. */
22977 if (regno1 == IP_REGNUM && IS_NESTED (arm_current_func_type ()))
22978 regno1 = 3;
22979 regno = (regno1 == 3 ? 2 : 3);
22980 sr->saved
22981 = REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)),
22982 regno);
22983 }
22984 }
22985
22986 sr->reg = gen_rtx_REG (SImode, regno);
22987 if (sr->saved)
22988 {
22989 rtx addr = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
22990 rtx insn = emit_set_insn (gen_frame_mem (SImode, addr), sr->reg);
22991 rtx x = gen_rtx_SET (stack_pointer_rtx,
22992 plus_constant (Pmode, stack_pointer_rtx, -4));
22993 RTX_FRAME_RELATED_P (insn) = 1;
22994 add_reg_note (insn, REG_FRAME_RELATED_EXPR, x);
22995 }
22996 }
22997
22998 /* Release a scratch register obtained from the preceding function. */
22999
23000 static void
23001 release_scratch_register_on_entry (struct scratch_reg *sr)
23002 {
23003 if (sr->saved)
23004 {
23005 rtx addr = gen_rtx_POST_INC (Pmode, stack_pointer_rtx);
23006 rtx insn = emit_set_insn (sr->reg, gen_frame_mem (SImode, addr));
23007 rtx x = gen_rtx_SET (stack_pointer_rtx,
23008 plus_constant (Pmode, stack_pointer_rtx, 4));
23009 RTX_FRAME_RELATED_P (insn) = 1;
23010 add_reg_note (insn, REG_FRAME_RELATED_EXPR, x);
23011 }
23012 }
23013
23014 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
23015
23016 #if PROBE_INTERVAL > 4096
23017 #error Cannot use indexed addressing mode for stack probing
23018 #endif
23019
23020 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
23021 inclusive. These are offsets from the current stack pointer. REGNO1
23022 is the index number of the 1st scratch register and LIVE_REGS is the
23023 mask of live registers. */
23024
23025 static void
23026 arm_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size,
23027 unsigned int regno1, unsigned long live_regs)
23028 {
23029 rtx reg1 = gen_rtx_REG (Pmode, regno1);
23030
23031 /* See if we have a constant small number of probes to generate. If so,
23032 that's the easy case. */
23033 if (size <= PROBE_INTERVAL)
23034 {
23035 emit_move_insn (reg1, GEN_INT (first + PROBE_INTERVAL));
23036 emit_set_insn (reg1, gen_rtx_MINUS (Pmode, stack_pointer_rtx, reg1));
23037 emit_stack_probe (plus_constant (Pmode, reg1, PROBE_INTERVAL - size));
23038 }
23039
23040 /* The run-time loop is made up of 10 insns in the generic case while the
23041 compile-time loop is made up of 4+2*(n-2) insns for n # of intervals. */
23042 else if (size <= 5 * PROBE_INTERVAL)
23043 {
23044 HOST_WIDE_INT i, rem;
23045
23046 emit_move_insn (reg1, GEN_INT (first + PROBE_INTERVAL));
23047 emit_set_insn (reg1, gen_rtx_MINUS (Pmode, stack_pointer_rtx, reg1));
23048 emit_stack_probe (reg1);
23049
23050 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 2 until
23051 it exceeds SIZE. If only two probes are needed, this will not
23052 generate any code. Then probe at FIRST + SIZE. */
23053 for (i = 2 * PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
23054 {
23055 emit_set_insn (reg1, plus_constant (Pmode, reg1, -PROBE_INTERVAL));
23056 emit_stack_probe (reg1);
23057 }
23058
23059 rem = size - (i - PROBE_INTERVAL);
23060 if (rem > 4095 || (TARGET_THUMB2 && rem > 255))
23061 {
23062 emit_set_insn (reg1, plus_constant (Pmode, reg1, -PROBE_INTERVAL));
23063 emit_stack_probe (plus_constant (Pmode, reg1, PROBE_INTERVAL - rem));
23064 }
23065 else
23066 emit_stack_probe (plus_constant (Pmode, reg1, -rem));
23067 }
23068
23069 /* Otherwise, do the same as above, but in a loop. Note that we must be
23070 extra careful with variables wrapping around because we might be at
23071 the very top (or the very bottom) of the address space and we have
23072 to be able to handle this case properly; in particular, we use an
23073 equality test for the loop condition. */
23074 else
23075 {
23076 HOST_WIDE_INT rounded_size;
23077 struct scratch_reg sr;
23078
23079 get_scratch_register_on_entry (&sr, regno1, live_regs);
23080
23081 emit_move_insn (reg1, GEN_INT (first));
23082
23083
23084 /* Step 1: round SIZE to the previous multiple of the interval. */
23085
23086 rounded_size = size & -PROBE_INTERVAL;
23087 emit_move_insn (sr.reg, GEN_INT (rounded_size));
23088
23089
23090 /* Step 2: compute initial and final value of the loop counter. */
23091
23092 /* TEST_ADDR = SP + FIRST. */
23093 emit_set_insn (reg1, gen_rtx_MINUS (Pmode, stack_pointer_rtx, reg1));
23094
23095 /* LAST_ADDR = SP + FIRST + ROUNDED_SIZE. */
23096 emit_set_insn (sr.reg, gen_rtx_MINUS (Pmode, reg1, sr.reg));
23097
23098
23099 /* Step 3: the loop
23100
23101 do
23102 {
23103 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
23104 probe at TEST_ADDR
23105 }
23106 while (TEST_ADDR != LAST_ADDR)
23107
23108 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
23109 until it is equal to ROUNDED_SIZE. */
23110
23111 emit_insn (gen_probe_stack_range (reg1, reg1, sr.reg));
23112
23113
23114 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
23115 that SIZE is equal to ROUNDED_SIZE. */
23116
23117 if (size != rounded_size)
23118 {
23119 HOST_WIDE_INT rem = size - rounded_size;
23120
23121 if (rem > 4095 || (TARGET_THUMB2 && rem > 255))
23122 {
23123 emit_set_insn (sr.reg,
23124 plus_constant (Pmode, sr.reg, -PROBE_INTERVAL));
23125 emit_stack_probe (plus_constant (Pmode, sr.reg,
23126 PROBE_INTERVAL - rem));
23127 }
23128 else
23129 emit_stack_probe (plus_constant (Pmode, sr.reg, -rem));
23130 }
23131
23132 release_scratch_register_on_entry (&sr);
23133 }
23134
23135 /* Make sure nothing is scheduled before we are done. */
23136 emit_insn (gen_blockage ());
23137 }
23138
23139 /* Probe a range of stack addresses from REG1 to REG2 inclusive. These are
23140 absolute addresses. */
23141
23142 const char *
23143 output_probe_stack_range (rtx reg1, rtx reg2)
23144 {
23145 static int labelno = 0;
23146 char loop_lab[32];
23147 rtx xops[2];
23148
23149 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno++);
23150
23151 /* Loop. */
23152 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
23153
23154 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
23155 xops[0] = reg1;
23156 xops[1] = GEN_INT (PROBE_INTERVAL);
23157 output_asm_insn ("sub\t%0, %0, %1", xops);
23158
23159 /* Probe at TEST_ADDR. */
23160 output_asm_insn ("str\tr0, [%0, #0]", xops);
23161
23162 /* Test if TEST_ADDR == LAST_ADDR. */
23163 xops[1] = reg2;
23164 output_asm_insn ("cmp\t%0, %1", xops);
23165
23166 /* Branch. */
23167 fputs ("\tbne\t", asm_out_file);
23168 assemble_name_raw (asm_out_file, loop_lab);
23169 fputc ('\n', asm_out_file);
23170
23171 return "";
23172 }
23173
23174 /* Generate the prologue instructions for entry into an ARM or Thumb-2
23175 function. */
23176 void
23177 arm_expand_prologue (void)
23178 {
23179 rtx amount;
23180 rtx insn;
23181 rtx ip_rtx;
23182 unsigned long live_regs_mask;
23183 unsigned long func_type;
23184 int fp_offset = 0;
23185 int saved_pretend_args = 0;
23186 int saved_regs = 0;
23187 unsigned HOST_WIDE_INT args_to_push;
23188 HOST_WIDE_INT size;
23189 arm_stack_offsets *offsets;
23190 bool clobber_ip;
23191
23192 func_type = arm_current_func_type ();
23193
23194 /* Naked functions don't have prologues. */
23195 if (IS_NAKED (func_type))
23196 {
23197 if (flag_stack_usage_info)
23198 current_function_static_stack_size = 0;
23199 return;
23200 }
23201
23202 /* Make a copy of c_f_p_a_s as we may need to modify it locally. */
23203 args_to_push = crtl->args.pretend_args_size;
23204
23205 /* Compute which register we will have to save onto the stack. */
23206 offsets = arm_get_frame_offsets ();
23207 live_regs_mask = offsets->saved_regs_mask;
23208
23209 ip_rtx = gen_rtx_REG (SImode, IP_REGNUM);
23210
23211 if (IS_STACKALIGN (func_type))
23212 {
23213 rtx r0, r1;
23214
23215 /* Handle a word-aligned stack pointer. We generate the following:
23216
23217 mov r0, sp
23218 bic r1, r0, #7
23219 mov sp, r1
23220 <save and restore r0 in normal prologue/epilogue>
23221 mov sp, r0
23222 bx lr
23223
23224 The unwinder doesn't need to know about the stack realignment.
23225 Just tell it we saved SP in r0. */
23226 gcc_assert (TARGET_THUMB2 && !arm_arch_notm && args_to_push == 0);
23227
23228 r0 = gen_rtx_REG (SImode, R0_REGNUM);
23229 r1 = gen_rtx_REG (SImode, R1_REGNUM);
23230
23231 insn = emit_insn (gen_movsi (r0, stack_pointer_rtx));
23232 RTX_FRAME_RELATED_P (insn) = 1;
23233 add_reg_note (insn, REG_CFA_REGISTER, NULL);
23234
23235 emit_insn (gen_andsi3 (r1, r0, GEN_INT (~(HOST_WIDE_INT)7)));
23236
23237 /* ??? The CFA changes here, which may cause GDB to conclude that it
23238 has entered a different function. That said, the unwind info is
23239 correct, individually, before and after this instruction because
23240 we've described the save of SP, which will override the default
23241 handling of SP as restoring from the CFA. */
23242 emit_insn (gen_movsi (stack_pointer_rtx, r1));
23243 }
23244
23245 /* Let's compute the static_chain_stack_bytes required and store it. Right
23246 now the value must be -1 as stored by arm_init_machine_status (). */
23247 cfun->machine->static_chain_stack_bytes
23248 = arm_compute_static_chain_stack_bytes ();
23249
23250 /* The static chain register is the same as the IP register. If it is
23251 clobbered when creating the frame, we need to save and restore it. */
23252 clobber_ip = IS_NESTED (func_type)
23253 && ((TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
23254 || ((flag_stack_check == STATIC_BUILTIN_STACK_CHECK
23255 || flag_stack_clash_protection)
23256 && !df_regs_ever_live_p (LR_REGNUM)
23257 && arm_r3_live_at_start_p ()));
23258
23259 /* Find somewhere to store IP whilst the frame is being created.
23260 We try the following places in order:
23261
23262 1. The last argument register r3 if it is available.
23263 2. A slot on the stack above the frame if there are no
23264 arguments to push onto the stack.
23265 3. Register r3 again, after pushing the argument registers
23266 onto the stack, if this is a varargs function.
23267 4. The last slot on the stack created for the arguments to
23268 push, if this isn't a varargs function.
23269
23270 Note - we only need to tell the dwarf2 backend about the SP
23271 adjustment in the second variant; the static chain register
23272 doesn't need to be unwound, as it doesn't contain a value
23273 inherited from the caller. */
23274 if (clobber_ip)
23275 {
23276 if (!arm_r3_live_at_start_p ())
23277 insn = emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
23278 else if (args_to_push == 0)
23279 {
23280 rtx addr, dwarf;
23281
23282 gcc_assert(arm_compute_static_chain_stack_bytes() == 4);
23283 saved_regs += 4;
23284
23285 addr = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
23286 insn = emit_set_insn (gen_frame_mem (SImode, addr), ip_rtx);
23287 fp_offset = 4;
23288
23289 /* Just tell the dwarf backend that we adjusted SP. */
23290 dwarf = gen_rtx_SET (stack_pointer_rtx,
23291 plus_constant (Pmode, stack_pointer_rtx,
23292 -fp_offset));
23293 RTX_FRAME_RELATED_P (insn) = 1;
23294 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
23295 }
23296 else
23297 {
23298 /* Store the args on the stack. */
23299 if (cfun->machine->uses_anonymous_args)
23300 {
23301 insn = emit_multi_reg_push ((0xf0 >> (args_to_push / 4)) & 0xf,
23302 (0xf0 >> (args_to_push / 4)) & 0xf);
23303 emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
23304 saved_pretend_args = 1;
23305 }
23306 else
23307 {
23308 rtx addr, dwarf;
23309
23310 if (args_to_push == 4)
23311 addr = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
23312 else
23313 addr = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx,
23314 plus_constant (Pmode,
23315 stack_pointer_rtx,
23316 -args_to_push));
23317
23318 insn = emit_set_insn (gen_frame_mem (SImode, addr), ip_rtx);
23319
23320 /* Just tell the dwarf backend that we adjusted SP. */
23321 dwarf = gen_rtx_SET (stack_pointer_rtx,
23322 plus_constant (Pmode, stack_pointer_rtx,
23323 -args_to_push));
23324 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
23325 }
23326
23327 RTX_FRAME_RELATED_P (insn) = 1;
23328 fp_offset = args_to_push;
23329 args_to_push = 0;
23330 }
23331 }
23332
23333 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
23334 {
23335 if (IS_INTERRUPT (func_type))
23336 {
23337 /* Interrupt functions must not corrupt any registers.
23338 Creating a frame pointer however, corrupts the IP
23339 register, so we must push it first. */
23340 emit_multi_reg_push (1 << IP_REGNUM, 1 << IP_REGNUM);
23341
23342 /* Do not set RTX_FRAME_RELATED_P on this insn.
23343 The dwarf stack unwinding code only wants to see one
23344 stack decrement per function, and this is not it. If
23345 this instruction is labeled as being part of the frame
23346 creation sequence then dwarf2out_frame_debug_expr will
23347 die when it encounters the assignment of IP to FP
23348 later on, since the use of SP here establishes SP as
23349 the CFA register and not IP.
23350
23351 Anyway this instruction is not really part of the stack
23352 frame creation although it is part of the prologue. */
23353 }
23354
23355 insn = emit_set_insn (ip_rtx,
23356 plus_constant (Pmode, stack_pointer_rtx,
23357 fp_offset));
23358 RTX_FRAME_RELATED_P (insn) = 1;
23359 }
23360
23361 /* Armv8.1-M Mainline nonsecure entry: save FPCXTNS on stack using VSTR. */
23362 if (TARGET_HAVE_FPCXT_CMSE && IS_CMSE_ENTRY (func_type))
23363 {
23364 saved_regs += 4;
23365 insn = emit_insn (gen_push_fpsysreg_insn (stack_pointer_rtx,
23366 GEN_INT (FPCXTNS_ENUM)));
23367 rtx dwarf = gen_rtx_SET (stack_pointer_rtx,
23368 plus_constant (Pmode, stack_pointer_rtx, -4));
23369 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
23370 RTX_FRAME_RELATED_P (insn) = 1;
23371 }
23372
23373 if (args_to_push)
23374 {
23375 /* Push the argument registers, or reserve space for them. */
23376 if (cfun->machine->uses_anonymous_args)
23377 insn = emit_multi_reg_push
23378 ((0xf0 >> (args_to_push / 4)) & 0xf,
23379 (0xf0 >> (args_to_push / 4)) & 0xf);
23380 else
23381 insn = emit_insn
23382 (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
23383 GEN_INT (- args_to_push)));
23384 RTX_FRAME_RELATED_P (insn) = 1;
23385 }
23386
23387 /* If this is an interrupt service routine, and the link register
23388 is going to be pushed, and we're not generating extra
23389 push of IP (needed when frame is needed and frame layout if apcs),
23390 subtracting four from LR now will mean that the function return
23391 can be done with a single instruction. */
23392 if ((func_type == ARM_FT_ISR || func_type == ARM_FT_FIQ)
23393 && (live_regs_mask & (1 << LR_REGNUM)) != 0
23394 && !(frame_pointer_needed && TARGET_APCS_FRAME)
23395 && TARGET_ARM)
23396 {
23397 rtx lr = gen_rtx_REG (SImode, LR_REGNUM);
23398
23399 emit_set_insn (lr, plus_constant (SImode, lr, -4));
23400 }
23401
23402 if (live_regs_mask)
23403 {
23404 unsigned long dwarf_regs_mask = live_regs_mask;
23405
23406 saved_regs += bit_count (live_regs_mask) * 4;
23407 if (optimize_size && !frame_pointer_needed
23408 && saved_regs == offsets->saved_regs - offsets->saved_args)
23409 {
23410 /* If no coprocessor registers are being pushed and we don't have
23411 to worry about a frame pointer then push extra registers to
23412 create the stack frame. This is done in a way that does not
23413 alter the frame layout, so is independent of the epilogue. */
23414 int n;
23415 int frame;
23416 n = 0;
23417 while (n < 8 && (live_regs_mask & (1 << n)) == 0)
23418 n++;
23419 frame = offsets->outgoing_args - (offsets->saved_args + saved_regs);
23420 if (frame && n * 4 >= frame)
23421 {
23422 n = frame / 4;
23423 live_regs_mask |= (1 << n) - 1;
23424 saved_regs += frame;
23425 }
23426 }
23427
23428 if (TARGET_LDRD
23429 && current_tune->prefer_ldrd_strd
23430 && !optimize_function_for_size_p (cfun))
23431 {
23432 gcc_checking_assert (live_regs_mask == dwarf_regs_mask);
23433 if (TARGET_THUMB2)
23434 thumb2_emit_strd_push (live_regs_mask);
23435 else if (TARGET_ARM
23436 && !TARGET_APCS_FRAME
23437 && !IS_INTERRUPT (func_type))
23438 arm_emit_strd_push (live_regs_mask);
23439 else
23440 {
23441 insn = emit_multi_reg_push (live_regs_mask, live_regs_mask);
23442 RTX_FRAME_RELATED_P (insn) = 1;
23443 }
23444 }
23445 else
23446 {
23447 insn = emit_multi_reg_push (live_regs_mask, dwarf_regs_mask);
23448 RTX_FRAME_RELATED_P (insn) = 1;
23449 }
23450 }
23451
23452 if (! IS_VOLATILE (func_type))
23453 saved_regs += arm_save_coproc_regs ();
23454
23455 if (frame_pointer_needed && TARGET_ARM)
23456 {
23457 /* Create the new frame pointer. */
23458 if (TARGET_APCS_FRAME)
23459 {
23460 insn = GEN_INT (-(4 + args_to_push + fp_offset));
23461 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx, ip_rtx, insn));
23462 RTX_FRAME_RELATED_P (insn) = 1;
23463 }
23464 else
23465 {
23466 insn = GEN_INT (saved_regs - (4 + fp_offset));
23467 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
23468 stack_pointer_rtx, insn));
23469 RTX_FRAME_RELATED_P (insn) = 1;
23470 }
23471 }
23472
23473 size = offsets->outgoing_args - offsets->saved_args;
23474 if (flag_stack_usage_info)
23475 current_function_static_stack_size = size;
23476
23477 /* If this isn't an interrupt service routine and we have a frame, then do
23478 stack checking. We use IP as the first scratch register, except for the
23479 non-APCS nested functions if LR or r3 are available (see clobber_ip). */
23480 if (!IS_INTERRUPT (func_type)
23481 && (flag_stack_check == STATIC_BUILTIN_STACK_CHECK
23482 || flag_stack_clash_protection))
23483 {
23484 unsigned int regno;
23485
23486 if (!IS_NESTED (func_type) || clobber_ip)
23487 regno = IP_REGNUM;
23488 else if (df_regs_ever_live_p (LR_REGNUM))
23489 regno = LR_REGNUM;
23490 else
23491 regno = 3;
23492
23493 if (crtl->is_leaf && !cfun->calls_alloca)
23494 {
23495 if (size > PROBE_INTERVAL && size > get_stack_check_protect ())
23496 arm_emit_probe_stack_range (get_stack_check_protect (),
23497 size - get_stack_check_protect (),
23498 regno, live_regs_mask);
23499 }
23500 else if (size > 0)
23501 arm_emit_probe_stack_range (get_stack_check_protect (), size,
23502 regno, live_regs_mask);
23503 }
23504
23505 /* Recover the static chain register. */
23506 if (clobber_ip)
23507 {
23508 if (!arm_r3_live_at_start_p () || saved_pretend_args)
23509 insn = gen_rtx_REG (SImode, 3);
23510 else
23511 {
23512 insn = plus_constant (Pmode, hard_frame_pointer_rtx, 4);
23513 insn = gen_frame_mem (SImode, insn);
23514 }
23515 emit_set_insn (ip_rtx, insn);
23516 emit_insn (gen_force_register_use (ip_rtx));
23517 }
23518
23519 if (offsets->outgoing_args != offsets->saved_args + saved_regs)
23520 {
23521 /* This add can produce multiple insns for a large constant, so we
23522 need to get tricky. */
23523 rtx_insn *last = get_last_insn ();
23524
23525 amount = GEN_INT (offsets->saved_args + saved_regs
23526 - offsets->outgoing_args);
23527
23528 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
23529 amount));
23530 do
23531 {
23532 last = last ? NEXT_INSN (last) : get_insns ();
23533 RTX_FRAME_RELATED_P (last) = 1;
23534 }
23535 while (last != insn);
23536
23537 /* If the frame pointer is needed, emit a special barrier that
23538 will prevent the scheduler from moving stores to the frame
23539 before the stack adjustment. */
23540 if (frame_pointer_needed)
23541 emit_insn (gen_stack_tie (stack_pointer_rtx,
23542 hard_frame_pointer_rtx));
23543 }
23544
23545
23546 if (frame_pointer_needed && TARGET_THUMB2)
23547 thumb_set_frame_pointer (offsets);
23548
23549 if (flag_pic && arm_pic_register != INVALID_REGNUM)
23550 {
23551 unsigned long mask;
23552
23553 mask = live_regs_mask;
23554 mask &= THUMB2_WORK_REGS;
23555 if (!IS_NESTED (func_type))
23556 mask |= (1 << IP_REGNUM);
23557 arm_load_pic_register (mask, NULL_RTX);
23558 }
23559
23560 /* If we are profiling, make sure no instructions are scheduled before
23561 the call to mcount. Similarly if the user has requested no
23562 scheduling in the prolog. Similarly if we want non-call exceptions
23563 using the EABI unwinder, to prevent faulting instructions from being
23564 swapped with a stack adjustment. */
23565 if (crtl->profile || !TARGET_SCHED_PROLOG
23566 || (arm_except_unwind_info (&global_options) == UI_TARGET
23567 && cfun->can_throw_non_call_exceptions))
23568 emit_insn (gen_blockage ());
23569
23570 /* If the link register is being kept alive, with the return address in it,
23571 then make sure that it does not get reused by the ce2 pass. */
23572 if ((live_regs_mask & (1 << LR_REGNUM)) == 0)
23573 cfun->machine->lr_save_eliminated = 1;
23574 }
23575 \f
23576 /* Print condition code to STREAM. Helper function for arm_print_operand. */
23577 static void
23578 arm_print_condition (FILE *stream)
23579 {
23580 if (arm_ccfsm_state == 3 || arm_ccfsm_state == 4)
23581 {
23582 /* Branch conversion is not implemented for Thumb-2. */
23583 if (TARGET_THUMB)
23584 {
23585 output_operand_lossage ("predicated Thumb instruction");
23586 return;
23587 }
23588 if (current_insn_predicate != NULL)
23589 {
23590 output_operand_lossage
23591 ("predicated instruction in conditional sequence");
23592 return;
23593 }
23594
23595 fputs (arm_condition_codes[arm_current_cc], stream);
23596 }
23597 else if (current_insn_predicate)
23598 {
23599 enum arm_cond_code code;
23600
23601 if (TARGET_THUMB1)
23602 {
23603 output_operand_lossage ("predicated Thumb instruction");
23604 return;
23605 }
23606
23607 code = get_arm_condition_code (current_insn_predicate);
23608 fputs (arm_condition_codes[code], stream);
23609 }
23610 }
23611
23612
23613 /* Globally reserved letters: acln
23614 Puncutation letters currently used: @_|?().!#
23615 Lower case letters currently used: bcdefhimpqtvwxyz
23616 Upper case letters currently used: ABCDEFGHIJKLMNOPQRSTU
23617 Letters previously used, but now deprecated/obsolete: sVWXYZ.
23618
23619 Note that the global reservation for 'c' is only for CONSTANT_ADDRESS_P.
23620
23621 If CODE is 'd', then the X is a condition operand and the instruction
23622 should only be executed if the condition is true.
23623 if CODE is 'D', then the X is a condition operand and the instruction
23624 should only be executed if the condition is false: however, if the mode
23625 of the comparison is CCFPEmode, then always execute the instruction -- we
23626 do this because in these circumstances !GE does not necessarily imply LT;
23627 in these cases the instruction pattern will take care to make sure that
23628 an instruction containing %d will follow, thereby undoing the effects of
23629 doing this instruction unconditionally.
23630 If CODE is 'N' then X is a floating point operand that must be negated
23631 before output.
23632 If CODE is 'B' then output a bitwise inverted value of X (a const int).
23633 If X is a REG and CODE is `M', output a ldm/stm style multi-reg. */
23634 static void
23635 arm_print_operand (FILE *stream, rtx x, int code)
23636 {
23637 switch (code)
23638 {
23639 case '@':
23640 fputs (ASM_COMMENT_START, stream);
23641 return;
23642
23643 case '_':
23644 fputs (user_label_prefix, stream);
23645 return;
23646
23647 case '|':
23648 fputs (REGISTER_PREFIX, stream);
23649 return;
23650
23651 case '?':
23652 arm_print_condition (stream);
23653 return;
23654
23655 case '.':
23656 /* The current condition code for a condition code setting instruction.
23657 Preceded by 's' in unified syntax, otherwise followed by 's'. */
23658 fputc('s', stream);
23659 arm_print_condition (stream);
23660 return;
23661
23662 case '!':
23663 /* If the instruction is conditionally executed then print
23664 the current condition code, otherwise print 's'. */
23665 gcc_assert (TARGET_THUMB2);
23666 if (current_insn_predicate)
23667 arm_print_condition (stream);
23668 else
23669 fputc('s', stream);
23670 break;
23671
23672 /* %# is a "break" sequence. It doesn't output anything, but is used to
23673 separate e.g. operand numbers from following text, if that text consists
23674 of further digits which we don't want to be part of the operand
23675 number. */
23676 case '#':
23677 return;
23678
23679 case 'N':
23680 {
23681 REAL_VALUE_TYPE r;
23682 r = real_value_negate (CONST_DOUBLE_REAL_VALUE (x));
23683 fprintf (stream, "%s", fp_const_from_val (&r));
23684 }
23685 return;
23686
23687 /* An integer or symbol address without a preceding # sign. */
23688 case 'c':
23689 switch (GET_CODE (x))
23690 {
23691 case CONST_INT:
23692 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
23693 break;
23694
23695 case SYMBOL_REF:
23696 output_addr_const (stream, x);
23697 break;
23698
23699 case CONST:
23700 if (GET_CODE (XEXP (x, 0)) == PLUS
23701 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
23702 {
23703 output_addr_const (stream, x);
23704 break;
23705 }
23706 /* Fall through. */
23707
23708 default:
23709 output_operand_lossage ("Unsupported operand for code '%c'", code);
23710 }
23711 return;
23712
23713 /* An integer that we want to print in HEX. */
23714 case 'x':
23715 switch (GET_CODE (x))
23716 {
23717 case CONST_INT:
23718 fprintf (stream, "#" HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
23719 break;
23720
23721 default:
23722 output_operand_lossage ("Unsupported operand for code '%c'", code);
23723 }
23724 return;
23725
23726 case 'B':
23727 if (CONST_INT_P (x))
23728 {
23729 HOST_WIDE_INT val;
23730 val = ARM_SIGN_EXTEND (~INTVAL (x));
23731 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, val);
23732 }
23733 else
23734 {
23735 putc ('~', stream);
23736 output_addr_const (stream, x);
23737 }
23738 return;
23739
23740 case 'b':
23741 /* Print the log2 of a CONST_INT. */
23742 {
23743 HOST_WIDE_INT val;
23744
23745 if (!CONST_INT_P (x)
23746 || (val = exact_log2 (INTVAL (x) & 0xffffffff)) < 0)
23747 output_operand_lossage ("Unsupported operand for code '%c'", code);
23748 else
23749 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val);
23750 }
23751 return;
23752
23753 case 'L':
23754 /* The low 16 bits of an immediate constant. */
23755 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL(x) & 0xffff);
23756 return;
23757
23758 case 'i':
23759 fprintf (stream, "%s", arithmetic_instr (x, 1));
23760 return;
23761
23762 case 'I':
23763 fprintf (stream, "%s", arithmetic_instr (x, 0));
23764 return;
23765
23766 case 'S':
23767 {
23768 HOST_WIDE_INT val;
23769 const char *shift;
23770
23771 shift = shift_op (x, &val);
23772
23773 if (shift)
23774 {
23775 fprintf (stream, ", %s ", shift);
23776 if (val == -1)
23777 arm_print_operand (stream, XEXP (x, 1), 0);
23778 else
23779 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val);
23780 }
23781 }
23782 return;
23783
23784 /* An explanation of the 'Q', 'R' and 'H' register operands:
23785
23786 In a pair of registers containing a DI or DF value the 'Q'
23787 operand returns the register number of the register containing
23788 the least significant part of the value. The 'R' operand returns
23789 the register number of the register containing the most
23790 significant part of the value.
23791
23792 The 'H' operand returns the higher of the two register numbers.
23793 On a run where WORDS_BIG_ENDIAN is true the 'H' operand is the
23794 same as the 'Q' operand, since the most significant part of the
23795 value is held in the lower number register. The reverse is true
23796 on systems where WORDS_BIG_ENDIAN is false.
23797
23798 The purpose of these operands is to distinguish between cases
23799 where the endian-ness of the values is important (for example
23800 when they are added together), and cases where the endian-ness
23801 is irrelevant, but the order of register operations is important.
23802 For example when loading a value from memory into a register
23803 pair, the endian-ness does not matter. Provided that the value
23804 from the lower memory address is put into the lower numbered
23805 register, and the value from the higher address is put into the
23806 higher numbered register, the load will work regardless of whether
23807 the value being loaded is big-wordian or little-wordian. The
23808 order of the two register loads can matter however, if the address
23809 of the memory location is actually held in one of the registers
23810 being overwritten by the load.
23811
23812 The 'Q' and 'R' constraints are also available for 64-bit
23813 constants. */
23814 case 'Q':
23815 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
23816 {
23817 rtx part = gen_lowpart (SImode, x);
23818 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
23819 return;
23820 }
23821
23822 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
23823 {
23824 output_operand_lossage ("invalid operand for code '%c'", code);
23825 return;
23826 }
23827
23828 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 1 : 0));
23829 return;
23830
23831 case 'R':
23832 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
23833 {
23834 machine_mode mode = GET_MODE (x);
23835 rtx part;
23836
23837 if (mode == VOIDmode)
23838 mode = DImode;
23839 part = gen_highpart_mode (SImode, mode, x);
23840 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
23841 return;
23842 }
23843
23844 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
23845 {
23846 output_operand_lossage ("invalid operand for code '%c'", code);
23847 return;
23848 }
23849
23850 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 0 : 1));
23851 return;
23852
23853 case 'H':
23854 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
23855 {
23856 output_operand_lossage ("invalid operand for code '%c'", code);
23857 return;
23858 }
23859
23860 asm_fprintf (stream, "%r", REGNO (x) + 1);
23861 return;
23862
23863 case 'J':
23864 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
23865 {
23866 output_operand_lossage ("invalid operand for code '%c'", code);
23867 return;
23868 }
23869
23870 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 3 : 2));
23871 return;
23872
23873 case 'K':
23874 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
23875 {
23876 output_operand_lossage ("invalid operand for code '%c'", code);
23877 return;
23878 }
23879
23880 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 2 : 3));
23881 return;
23882
23883 case 'm':
23884 asm_fprintf (stream, "%r",
23885 REG_P (XEXP (x, 0))
23886 ? REGNO (XEXP (x, 0)) : REGNO (XEXP (XEXP (x, 0), 0)));
23887 return;
23888
23889 case 'M':
23890 asm_fprintf (stream, "{%r-%r}",
23891 REGNO (x),
23892 REGNO (x) + ARM_NUM_REGS (GET_MODE (x)) - 1);
23893 return;
23894
23895 /* Like 'M', but writing doubleword vector registers, for use by Neon
23896 insns. */
23897 case 'h':
23898 {
23899 int regno = (REGNO (x) - FIRST_VFP_REGNUM) / 2;
23900 int numregs = ARM_NUM_REGS (GET_MODE (x)) / 2;
23901 if (numregs == 1)
23902 asm_fprintf (stream, "{d%d}", regno);
23903 else
23904 asm_fprintf (stream, "{d%d-d%d}", regno, regno + numregs - 1);
23905 }
23906 return;
23907
23908 case 'd':
23909 /* CONST_TRUE_RTX means always -- that's the default. */
23910 if (x == const_true_rtx)
23911 return;
23912
23913 if (!COMPARISON_P (x))
23914 {
23915 output_operand_lossage ("invalid operand for code '%c'", code);
23916 return;
23917 }
23918
23919 fputs (arm_condition_codes[get_arm_condition_code (x)],
23920 stream);
23921 return;
23922
23923 case 'D':
23924 /* CONST_TRUE_RTX means not always -- i.e. never. We shouldn't ever
23925 want to do that. */
23926 if (x == const_true_rtx)
23927 {
23928 output_operand_lossage ("instruction never executed");
23929 return;
23930 }
23931 if (!COMPARISON_P (x))
23932 {
23933 output_operand_lossage ("invalid operand for code '%c'", code);
23934 return;
23935 }
23936
23937 fputs (arm_condition_codes[ARM_INVERSE_CONDITION_CODE
23938 (get_arm_condition_code (x))],
23939 stream);
23940 return;
23941
23942 case 's':
23943 case 'V':
23944 case 'W':
23945 case 'X':
23946 case 'Y':
23947 case 'Z':
23948 /* Former Maverick support, removed after GCC-4.7. */
23949 output_operand_lossage ("obsolete Maverick format code '%c'", code);
23950 return;
23951
23952 case 'U':
23953 if (!REG_P (x)
23954 || REGNO (x) < FIRST_IWMMXT_GR_REGNUM
23955 || REGNO (x) > LAST_IWMMXT_GR_REGNUM)
23956 /* Bad value for wCG register number. */
23957 {
23958 output_operand_lossage ("invalid operand for code '%c'", code);
23959 return;
23960 }
23961
23962 else
23963 fprintf (stream, "%d", REGNO (x) - FIRST_IWMMXT_GR_REGNUM);
23964 return;
23965
23966 /* Print an iWMMXt control register name. */
23967 case 'w':
23968 if (!CONST_INT_P (x)
23969 || INTVAL (x) < 0
23970 || INTVAL (x) >= 16)
23971 /* Bad value for wC register number. */
23972 {
23973 output_operand_lossage ("invalid operand for code '%c'", code);
23974 return;
23975 }
23976
23977 else
23978 {
23979 static const char * wc_reg_names [16] =
23980 {
23981 "wCID", "wCon", "wCSSF", "wCASF",
23982 "wC4", "wC5", "wC6", "wC7",
23983 "wCGR0", "wCGR1", "wCGR2", "wCGR3",
23984 "wC12", "wC13", "wC14", "wC15"
23985 };
23986
23987 fputs (wc_reg_names [INTVAL (x)], stream);
23988 }
23989 return;
23990
23991 /* Print the high single-precision register of a VFP double-precision
23992 register. */
23993 case 'p':
23994 {
23995 machine_mode mode = GET_MODE (x);
23996 int regno;
23997
23998 if (GET_MODE_SIZE (mode) != 8 || !REG_P (x))
23999 {
24000 output_operand_lossage ("invalid operand for code '%c'", code);
24001 return;
24002 }
24003
24004 regno = REGNO (x);
24005 if (!VFP_REGNO_OK_FOR_DOUBLE (regno))
24006 {
24007 output_operand_lossage ("invalid operand for code '%c'", code);
24008 return;
24009 }
24010
24011 fprintf (stream, "s%d", regno - FIRST_VFP_REGNUM + 1);
24012 }
24013 return;
24014
24015 /* Print a VFP/Neon double precision or quad precision register name. */
24016 case 'P':
24017 case 'q':
24018 {
24019 machine_mode mode = GET_MODE (x);
24020 int is_quad = (code == 'q');
24021 int regno;
24022
24023 if (GET_MODE_SIZE (mode) != (is_quad ? 16 : 8))
24024 {
24025 output_operand_lossage ("invalid operand for code '%c'", code);
24026 return;
24027 }
24028
24029 if (!REG_P (x)
24030 || !IS_VFP_REGNUM (REGNO (x)))
24031 {
24032 output_operand_lossage ("invalid operand for code '%c'", code);
24033 return;
24034 }
24035
24036 regno = REGNO (x);
24037 if ((is_quad && !NEON_REGNO_OK_FOR_QUAD (regno))
24038 || (!is_quad && !VFP_REGNO_OK_FOR_DOUBLE (regno)))
24039 {
24040 output_operand_lossage ("invalid operand for code '%c'", code);
24041 return;
24042 }
24043
24044 fprintf (stream, "%c%d", is_quad ? 'q' : 'd',
24045 (regno - FIRST_VFP_REGNUM) >> (is_quad ? 2 : 1));
24046 }
24047 return;
24048
24049 /* These two codes print the low/high doubleword register of a Neon quad
24050 register, respectively. For pair-structure types, can also print
24051 low/high quadword registers. */
24052 case 'e':
24053 case 'f':
24054 {
24055 machine_mode mode = GET_MODE (x);
24056 int regno;
24057
24058 if ((GET_MODE_SIZE (mode) != 16
24059 && GET_MODE_SIZE (mode) != 32) || !REG_P (x))
24060 {
24061 output_operand_lossage ("invalid operand for code '%c'", code);
24062 return;
24063 }
24064
24065 regno = REGNO (x);
24066 if (!NEON_REGNO_OK_FOR_QUAD (regno))
24067 {
24068 output_operand_lossage ("invalid operand for code '%c'", code);
24069 return;
24070 }
24071
24072 if (GET_MODE_SIZE (mode) == 16)
24073 fprintf (stream, "d%d", ((regno - FIRST_VFP_REGNUM) >> 1)
24074 + (code == 'f' ? 1 : 0));
24075 else
24076 fprintf (stream, "q%d", ((regno - FIRST_VFP_REGNUM) >> 2)
24077 + (code == 'f' ? 1 : 0));
24078 }
24079 return;
24080
24081 /* Print a VFPv3 floating-point constant, represented as an integer
24082 index. */
24083 case 'G':
24084 {
24085 int index = vfp3_const_double_index (x);
24086 gcc_assert (index != -1);
24087 fprintf (stream, "%d", index);
24088 }
24089 return;
24090
24091 /* Print bits representing opcode features for Neon.
24092
24093 Bit 0 is 1 for signed, 0 for unsigned. Floats count as signed
24094 and polynomials as unsigned.
24095
24096 Bit 1 is 1 for floats and polynomials, 0 for ordinary integers.
24097
24098 Bit 2 is 1 for rounding functions, 0 otherwise. */
24099
24100 /* Identify the type as 's', 'u', 'p' or 'f'. */
24101 case 'T':
24102 {
24103 HOST_WIDE_INT bits = INTVAL (x);
24104 fputc ("uspf"[bits & 3], stream);
24105 }
24106 return;
24107
24108 /* Likewise, but signed and unsigned integers are both 'i'. */
24109 case 'F':
24110 {
24111 HOST_WIDE_INT bits = INTVAL (x);
24112 fputc ("iipf"[bits & 3], stream);
24113 }
24114 return;
24115
24116 /* As for 'T', but emit 'u' instead of 'p'. */
24117 case 't':
24118 {
24119 HOST_WIDE_INT bits = INTVAL (x);
24120 fputc ("usuf"[bits & 3], stream);
24121 }
24122 return;
24123
24124 /* Bit 2: rounding (vs none). */
24125 case 'O':
24126 {
24127 HOST_WIDE_INT bits = INTVAL (x);
24128 fputs ((bits & 4) != 0 ? "r" : "", stream);
24129 }
24130 return;
24131
24132 /* Memory operand for vld1/vst1 instruction. */
24133 case 'A':
24134 {
24135 rtx addr;
24136 bool postinc = FALSE;
24137 rtx postinc_reg = NULL;
24138 unsigned align, memsize, align_bits;
24139
24140 gcc_assert (MEM_P (x));
24141 addr = XEXP (x, 0);
24142 if (GET_CODE (addr) == POST_INC)
24143 {
24144 postinc = 1;
24145 addr = XEXP (addr, 0);
24146 }
24147 if (GET_CODE (addr) == POST_MODIFY)
24148 {
24149 postinc_reg = XEXP( XEXP (addr, 1), 1);
24150 addr = XEXP (addr, 0);
24151 }
24152 asm_fprintf (stream, "[%r", REGNO (addr));
24153
24154 /* We know the alignment of this access, so we can emit a hint in the
24155 instruction (for some alignments) as an aid to the memory subsystem
24156 of the target. */
24157 align = MEM_ALIGN (x) >> 3;
24158 memsize = MEM_SIZE (x);
24159
24160 /* Only certain alignment specifiers are supported by the hardware. */
24161 if (memsize == 32 && (align % 32) == 0)
24162 align_bits = 256;
24163 else if ((memsize == 16 || memsize == 32) && (align % 16) == 0)
24164 align_bits = 128;
24165 else if (memsize >= 8 && (align % 8) == 0)
24166 align_bits = 64;
24167 else
24168 align_bits = 0;
24169
24170 if (align_bits != 0)
24171 asm_fprintf (stream, ":%d", align_bits);
24172
24173 asm_fprintf (stream, "]");
24174
24175 if (postinc)
24176 fputs("!", stream);
24177 if (postinc_reg)
24178 asm_fprintf (stream, ", %r", REGNO (postinc_reg));
24179 }
24180 return;
24181
24182 /* To print the memory operand with "Ux" or "Uj" constraint. Based on the
24183 rtx_code the memory operands output looks like following.
24184 1. [Rn], #+/-<imm>
24185 2. [Rn, #+/-<imm>]!
24186 3. [Rn, #+/-<imm>]
24187 4. [Rn]. */
24188 case 'E':
24189 {
24190 rtx addr;
24191 rtx postinc_reg = NULL;
24192 unsigned inc_val = 0;
24193 enum rtx_code code;
24194
24195 gcc_assert (MEM_P (x));
24196 addr = XEXP (x, 0);
24197 code = GET_CODE (addr);
24198 if (code == POST_INC || code == POST_DEC || code == PRE_INC
24199 || code == PRE_DEC)
24200 {
24201 asm_fprintf (stream, "[%r", REGNO (XEXP (addr, 0)));
24202 inc_val = GET_MODE_SIZE (GET_MODE (x));
24203 if (code == POST_INC || code == POST_DEC)
24204 asm_fprintf (stream, "], #%s%d",(code == POST_INC)
24205 ? "": "-", inc_val);
24206 else
24207 asm_fprintf (stream, ", #%s%d]!",(code == PRE_INC)
24208 ? "": "-", inc_val);
24209 }
24210 else if (code == POST_MODIFY || code == PRE_MODIFY)
24211 {
24212 asm_fprintf (stream, "[%r", REGNO (XEXP (addr, 0)));
24213 postinc_reg = XEXP ( XEXP (x, 1), 1);
24214 if (postinc_reg && CONST_INT_P (postinc_reg))
24215 {
24216 if (code == POST_MODIFY)
24217 asm_fprintf (stream, "], #%wd",INTVAL (postinc_reg));
24218 else
24219 asm_fprintf (stream, ", #%wd]!",INTVAL (postinc_reg));
24220 }
24221 }
24222 else if (code == PLUS)
24223 {
24224 rtx base = XEXP (addr, 0);
24225 rtx index = XEXP (addr, 1);
24226
24227 gcc_assert (REG_P (base) && CONST_INT_P (index));
24228
24229 HOST_WIDE_INT offset = INTVAL (index);
24230 asm_fprintf (stream, "[%r, #%wd]", REGNO (base), offset);
24231 }
24232 else
24233 {
24234 gcc_assert (REG_P (addr));
24235 asm_fprintf (stream, "[%r]",REGNO (addr));
24236 }
24237 }
24238 return;
24239
24240 case 'C':
24241 {
24242 rtx addr;
24243
24244 gcc_assert (MEM_P (x));
24245 addr = XEXP (x, 0);
24246 gcc_assert (REG_P (addr));
24247 asm_fprintf (stream, "[%r]", REGNO (addr));
24248 }
24249 return;
24250
24251 /* Translate an S register number into a D register number and element index. */
24252 case 'y':
24253 {
24254 machine_mode mode = GET_MODE (x);
24255 int regno;
24256
24257 if (GET_MODE_SIZE (mode) != 4 || !REG_P (x))
24258 {
24259 output_operand_lossage ("invalid operand for code '%c'", code);
24260 return;
24261 }
24262
24263 regno = REGNO (x);
24264 if (!VFP_REGNO_OK_FOR_SINGLE (regno))
24265 {
24266 output_operand_lossage ("invalid operand for code '%c'", code);
24267 return;
24268 }
24269
24270 regno = regno - FIRST_VFP_REGNUM;
24271 fprintf (stream, "d%d[%d]", regno / 2, regno % 2);
24272 }
24273 return;
24274
24275 case 'v':
24276 gcc_assert (CONST_DOUBLE_P (x));
24277 int result;
24278 result = vfp3_const_double_for_fract_bits (x);
24279 if (result == 0)
24280 result = vfp3_const_double_for_bits (x);
24281 fprintf (stream, "#%d", result);
24282 return;
24283
24284 /* Register specifier for vld1.16/vst1.16. Translate the S register
24285 number into a D register number and element index. */
24286 case 'z':
24287 {
24288 machine_mode mode = GET_MODE (x);
24289 int regno;
24290
24291 if (GET_MODE_SIZE (mode) != 2 || !REG_P (x))
24292 {
24293 output_operand_lossage ("invalid operand for code '%c'", code);
24294 return;
24295 }
24296
24297 regno = REGNO (x);
24298 if (!VFP_REGNO_OK_FOR_SINGLE (regno))
24299 {
24300 output_operand_lossage ("invalid operand for code '%c'", code);
24301 return;
24302 }
24303
24304 regno = regno - FIRST_VFP_REGNUM;
24305 fprintf (stream, "d%d[%d]", regno/2, ((regno % 2) ? 2 : 0));
24306 }
24307 return;
24308
24309 default:
24310 if (x == 0)
24311 {
24312 output_operand_lossage ("missing operand");
24313 return;
24314 }
24315
24316 switch (GET_CODE (x))
24317 {
24318 case REG:
24319 asm_fprintf (stream, "%r", REGNO (x));
24320 break;
24321
24322 case MEM:
24323 output_address (GET_MODE (x), XEXP (x, 0));
24324 break;
24325
24326 case CONST_DOUBLE:
24327 {
24328 char fpstr[20];
24329 real_to_decimal (fpstr, CONST_DOUBLE_REAL_VALUE (x),
24330 sizeof (fpstr), 0, 1);
24331 fprintf (stream, "#%s", fpstr);
24332 }
24333 break;
24334
24335 default:
24336 gcc_assert (GET_CODE (x) != NEG);
24337 fputc ('#', stream);
24338 if (GET_CODE (x) == HIGH)
24339 {
24340 fputs (":lower16:", stream);
24341 x = XEXP (x, 0);
24342 }
24343
24344 output_addr_const (stream, x);
24345 break;
24346 }
24347 }
24348 }
24349 \f
24350 /* Target hook for printing a memory address. */
24351 static void
24352 arm_print_operand_address (FILE *stream, machine_mode mode, rtx x)
24353 {
24354 if (TARGET_32BIT)
24355 {
24356 int is_minus = GET_CODE (x) == MINUS;
24357
24358 if (REG_P (x))
24359 asm_fprintf (stream, "[%r]", REGNO (x));
24360 else if (GET_CODE (x) == PLUS || is_minus)
24361 {
24362 rtx base = XEXP (x, 0);
24363 rtx index = XEXP (x, 1);
24364 HOST_WIDE_INT offset = 0;
24365 if (!REG_P (base)
24366 || (REG_P (index) && REGNO (index) == SP_REGNUM))
24367 {
24368 /* Ensure that BASE is a register. */
24369 /* (one of them must be). */
24370 /* Also ensure the SP is not used as in index register. */
24371 std::swap (base, index);
24372 }
24373 switch (GET_CODE (index))
24374 {
24375 case CONST_INT:
24376 offset = INTVAL (index);
24377 if (is_minus)
24378 offset = -offset;
24379 asm_fprintf (stream, "[%r, #%wd]",
24380 REGNO (base), offset);
24381 break;
24382
24383 case REG:
24384 asm_fprintf (stream, "[%r, %s%r]",
24385 REGNO (base), is_minus ? "-" : "",
24386 REGNO (index));
24387 break;
24388
24389 case MULT:
24390 case ASHIFTRT:
24391 case LSHIFTRT:
24392 case ASHIFT:
24393 case ROTATERT:
24394 {
24395 asm_fprintf (stream, "[%r, %s%r",
24396 REGNO (base), is_minus ? "-" : "",
24397 REGNO (XEXP (index, 0)));
24398 arm_print_operand (stream, index, 'S');
24399 fputs ("]", stream);
24400 break;
24401 }
24402
24403 default:
24404 gcc_unreachable ();
24405 }
24406 }
24407 else if (GET_CODE (x) == PRE_INC || GET_CODE (x) == POST_INC
24408 || GET_CODE (x) == PRE_DEC || GET_CODE (x) == POST_DEC)
24409 {
24410 gcc_assert (REG_P (XEXP (x, 0)));
24411
24412 if (GET_CODE (x) == PRE_DEC || GET_CODE (x) == PRE_INC)
24413 asm_fprintf (stream, "[%r, #%s%d]!",
24414 REGNO (XEXP (x, 0)),
24415 GET_CODE (x) == PRE_DEC ? "-" : "",
24416 GET_MODE_SIZE (mode));
24417 else if (TARGET_HAVE_MVE && (mode == OImode || mode == XImode))
24418 asm_fprintf (stream, "[%r]!", REGNO (XEXP (x,0)));
24419 else
24420 asm_fprintf (stream, "[%r], #%s%d", REGNO (XEXP (x, 0)),
24421 GET_CODE (x) == POST_DEC ? "-" : "",
24422 GET_MODE_SIZE (mode));
24423 }
24424 else if (GET_CODE (x) == PRE_MODIFY)
24425 {
24426 asm_fprintf (stream, "[%r, ", REGNO (XEXP (x, 0)));
24427 if (CONST_INT_P (XEXP (XEXP (x, 1), 1)))
24428 asm_fprintf (stream, "#%wd]!",
24429 INTVAL (XEXP (XEXP (x, 1), 1)));
24430 else
24431 asm_fprintf (stream, "%r]!",
24432 REGNO (XEXP (XEXP (x, 1), 1)));
24433 }
24434 else if (GET_CODE (x) == POST_MODIFY)
24435 {
24436 asm_fprintf (stream, "[%r], ", REGNO (XEXP (x, 0)));
24437 if (CONST_INT_P (XEXP (XEXP (x, 1), 1)))
24438 asm_fprintf (stream, "#%wd",
24439 INTVAL (XEXP (XEXP (x, 1), 1)));
24440 else
24441 asm_fprintf (stream, "%r",
24442 REGNO (XEXP (XEXP (x, 1), 1)));
24443 }
24444 else output_addr_const (stream, x);
24445 }
24446 else
24447 {
24448 if (REG_P (x))
24449 asm_fprintf (stream, "[%r]", REGNO (x));
24450 else if (GET_CODE (x) == POST_INC)
24451 asm_fprintf (stream, "%r!", REGNO (XEXP (x, 0)));
24452 else if (GET_CODE (x) == PLUS)
24453 {
24454 gcc_assert (REG_P (XEXP (x, 0)));
24455 if (CONST_INT_P (XEXP (x, 1)))
24456 asm_fprintf (stream, "[%r, #%wd]",
24457 REGNO (XEXP (x, 0)),
24458 INTVAL (XEXP (x, 1)));
24459 else
24460 asm_fprintf (stream, "[%r, %r]",
24461 REGNO (XEXP (x, 0)),
24462 REGNO (XEXP (x, 1)));
24463 }
24464 else
24465 output_addr_const (stream, x);
24466 }
24467 }
24468 \f
24469 /* Target hook for indicating whether a punctuation character for
24470 TARGET_PRINT_OPERAND is valid. */
24471 static bool
24472 arm_print_operand_punct_valid_p (unsigned char code)
24473 {
24474 return (code == '@' || code == '|' || code == '.'
24475 || code == '(' || code == ')' || code == '#'
24476 || (TARGET_32BIT && (code == '?'))
24477 || (TARGET_THUMB2 && (code == '!'))
24478 || (TARGET_THUMB && (code == '_')));
24479 }
24480 \f
24481 /* Target hook for assembling integer objects. The ARM version needs to
24482 handle word-sized values specially. */
24483 static bool
24484 arm_assemble_integer (rtx x, unsigned int size, int aligned_p)
24485 {
24486 machine_mode mode;
24487
24488 if (size == UNITS_PER_WORD && aligned_p)
24489 {
24490 fputs ("\t.word\t", asm_out_file);
24491 output_addr_const (asm_out_file, x);
24492
24493 /* Mark symbols as position independent. We only do this in the
24494 .text segment, not in the .data segment. */
24495 if (NEED_GOT_RELOC && flag_pic && making_const_table &&
24496 (SYMBOL_REF_P (x) || LABEL_REF_P (x)))
24497 {
24498 /* See legitimize_pic_address for an explanation of the
24499 TARGET_VXWORKS_RTP check. */
24500 /* References to weak symbols cannot be resolved locally:
24501 they may be overridden by a non-weak definition at link
24502 time. */
24503 if (!arm_pic_data_is_text_relative
24504 || (SYMBOL_REF_P (x)
24505 && (!SYMBOL_REF_LOCAL_P (x)
24506 || (SYMBOL_REF_DECL (x)
24507 ? DECL_WEAK (SYMBOL_REF_DECL (x)) : 0)
24508 || (SYMBOL_REF_FUNCTION_P (x)
24509 && !arm_fdpic_local_funcdesc_p (x)))))
24510 {
24511 if (TARGET_FDPIC && SYMBOL_REF_FUNCTION_P (x))
24512 fputs ("(GOTFUNCDESC)", asm_out_file);
24513 else
24514 fputs ("(GOT)", asm_out_file);
24515 }
24516 else
24517 {
24518 if (TARGET_FDPIC && SYMBOL_REF_FUNCTION_P (x))
24519 fputs ("(GOTOFFFUNCDESC)", asm_out_file);
24520 else
24521 {
24522 bool is_readonly;
24523
24524 if (!TARGET_FDPIC
24525 || arm_is_segment_info_known (x, &is_readonly))
24526 fputs ("(GOTOFF)", asm_out_file);
24527 else
24528 fputs ("(GOT)", asm_out_file);
24529 }
24530 }
24531 }
24532
24533 /* For FDPIC we also have to mark symbol for .data section. */
24534 if (TARGET_FDPIC
24535 && !making_const_table
24536 && SYMBOL_REF_P (x)
24537 && SYMBOL_REF_FUNCTION_P (x))
24538 fputs ("(FUNCDESC)", asm_out_file);
24539
24540 fputc ('\n', asm_out_file);
24541 return true;
24542 }
24543
24544 mode = GET_MODE (x);
24545
24546 if (arm_vector_mode_supported_p (mode))
24547 {
24548 int i, units;
24549
24550 gcc_assert (GET_CODE (x) == CONST_VECTOR);
24551
24552 units = CONST_VECTOR_NUNITS (x);
24553 size = GET_MODE_UNIT_SIZE (mode);
24554
24555 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
24556 for (i = 0; i < units; i++)
24557 {
24558 rtx elt = CONST_VECTOR_ELT (x, i);
24559 assemble_integer
24560 (elt, size, i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT, 1);
24561 }
24562 else
24563 for (i = 0; i < units; i++)
24564 {
24565 rtx elt = CONST_VECTOR_ELT (x, i);
24566 assemble_real
24567 (*CONST_DOUBLE_REAL_VALUE (elt),
24568 as_a <scalar_float_mode> (GET_MODE_INNER (mode)),
24569 i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT);
24570 }
24571
24572 return true;
24573 }
24574
24575 return default_assemble_integer (x, size, aligned_p);
24576 }
24577
24578 static void
24579 arm_elf_asm_cdtor (rtx symbol, int priority, bool is_ctor)
24580 {
24581 section *s;
24582
24583 if (!TARGET_AAPCS_BASED)
24584 {
24585 (is_ctor ?
24586 default_named_section_asm_out_constructor
24587 : default_named_section_asm_out_destructor) (symbol, priority);
24588 return;
24589 }
24590
24591 /* Put these in the .init_array section, using a special relocation. */
24592 if (priority != DEFAULT_INIT_PRIORITY)
24593 {
24594 char buf[18];
24595 sprintf (buf, "%s.%.5u",
24596 is_ctor ? ".init_array" : ".fini_array",
24597 priority);
24598 s = get_section (buf, SECTION_WRITE | SECTION_NOTYPE, NULL_TREE);
24599 }
24600 else if (is_ctor)
24601 s = ctors_section;
24602 else
24603 s = dtors_section;
24604
24605 switch_to_section (s);
24606 assemble_align (POINTER_SIZE);
24607 fputs ("\t.word\t", asm_out_file);
24608 output_addr_const (asm_out_file, symbol);
24609 fputs ("(target1)\n", asm_out_file);
24610 }
24611
24612 /* Add a function to the list of static constructors. */
24613
24614 static void
24615 arm_elf_asm_constructor (rtx symbol, int priority)
24616 {
24617 arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/true);
24618 }
24619
24620 /* Add a function to the list of static destructors. */
24621
24622 static void
24623 arm_elf_asm_destructor (rtx symbol, int priority)
24624 {
24625 arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/false);
24626 }
24627 \f
24628 /* A finite state machine takes care of noticing whether or not instructions
24629 can be conditionally executed, and thus decrease execution time and code
24630 size by deleting branch instructions. The fsm is controlled by
24631 final_prescan_insn, and controls the actions of ASM_OUTPUT_OPCODE. */
24632
24633 /* The state of the fsm controlling condition codes are:
24634 0: normal, do nothing special
24635 1: make ASM_OUTPUT_OPCODE not output this instruction
24636 2: make ASM_OUTPUT_OPCODE not output this instruction
24637 3: make instructions conditional
24638 4: make instructions conditional
24639
24640 State transitions (state->state by whom under condition):
24641 0 -> 1 final_prescan_insn if the `target' is a label
24642 0 -> 2 final_prescan_insn if the `target' is an unconditional branch
24643 1 -> 3 ASM_OUTPUT_OPCODE after not having output the conditional branch
24644 2 -> 4 ASM_OUTPUT_OPCODE after not having output the conditional branch
24645 3 -> 0 (*targetm.asm_out.internal_label) if the `target' label is reached
24646 (the target label has CODE_LABEL_NUMBER equal to arm_target_label).
24647 4 -> 0 final_prescan_insn if the `target' unconditional branch is reached
24648 (the target insn is arm_target_insn).
24649
24650 If the jump clobbers the conditions then we use states 2 and 4.
24651
24652 A similar thing can be done with conditional return insns.
24653
24654 XXX In case the `target' is an unconditional branch, this conditionalising
24655 of the instructions always reduces code size, but not always execution
24656 time. But then, I want to reduce the code size to somewhere near what
24657 /bin/cc produces. */
24658
24659 /* In addition to this, state is maintained for Thumb-2 COND_EXEC
24660 instructions. When a COND_EXEC instruction is seen the subsequent
24661 instructions are scanned so that multiple conditional instructions can be
24662 combined into a single IT block. arm_condexec_count and arm_condexec_mask
24663 specify the length and true/false mask for the IT block. These will be
24664 decremented/zeroed by arm_asm_output_opcode as the insns are output. */
24665
24666 /* Returns the index of the ARM condition code string in
24667 `arm_condition_codes', or ARM_NV if the comparison is invalid.
24668 COMPARISON should be an rtx like `(eq (...) (...))'. */
24669
24670 enum arm_cond_code
24671 maybe_get_arm_condition_code (rtx comparison)
24672 {
24673 machine_mode mode = GET_MODE (XEXP (comparison, 0));
24674 enum arm_cond_code code;
24675 enum rtx_code comp_code = GET_CODE (comparison);
24676
24677 if (GET_MODE_CLASS (mode) != MODE_CC)
24678 mode = SELECT_CC_MODE (comp_code, XEXP (comparison, 0),
24679 XEXP (comparison, 1));
24680
24681 switch (mode)
24682 {
24683 case E_CC_DNEmode: code = ARM_NE; goto dominance;
24684 case E_CC_DEQmode: code = ARM_EQ; goto dominance;
24685 case E_CC_DGEmode: code = ARM_GE; goto dominance;
24686 case E_CC_DGTmode: code = ARM_GT; goto dominance;
24687 case E_CC_DLEmode: code = ARM_LE; goto dominance;
24688 case E_CC_DLTmode: code = ARM_LT; goto dominance;
24689 case E_CC_DGEUmode: code = ARM_CS; goto dominance;
24690 case E_CC_DGTUmode: code = ARM_HI; goto dominance;
24691 case E_CC_DLEUmode: code = ARM_LS; goto dominance;
24692 case E_CC_DLTUmode: code = ARM_CC;
24693
24694 dominance:
24695 if (comp_code == EQ)
24696 return ARM_INVERSE_CONDITION_CODE (code);
24697 if (comp_code == NE)
24698 return code;
24699 return ARM_NV;
24700
24701 case E_CC_NZmode:
24702 switch (comp_code)
24703 {
24704 case NE: return ARM_NE;
24705 case EQ: return ARM_EQ;
24706 case GE: return ARM_PL;
24707 case LT: return ARM_MI;
24708 default: return ARM_NV;
24709 }
24710
24711 case E_CC_Zmode:
24712 switch (comp_code)
24713 {
24714 case NE: return ARM_NE;
24715 case EQ: return ARM_EQ;
24716 default: return ARM_NV;
24717 }
24718
24719 case E_CC_Nmode:
24720 switch (comp_code)
24721 {
24722 case NE: return ARM_MI;
24723 case EQ: return ARM_PL;
24724 default: return ARM_NV;
24725 }
24726
24727 case E_CCFPEmode:
24728 case E_CCFPmode:
24729 /* We can handle all cases except UNEQ and LTGT. */
24730 switch (comp_code)
24731 {
24732 case GE: return ARM_GE;
24733 case GT: return ARM_GT;
24734 case LE: return ARM_LS;
24735 case LT: return ARM_MI;
24736 case NE: return ARM_NE;
24737 case EQ: return ARM_EQ;
24738 case ORDERED: return ARM_VC;
24739 case UNORDERED: return ARM_VS;
24740 case UNLT: return ARM_LT;
24741 case UNLE: return ARM_LE;
24742 case UNGT: return ARM_HI;
24743 case UNGE: return ARM_PL;
24744 /* UNEQ and LTGT do not have a representation. */
24745 case UNEQ: /* Fall through. */
24746 case LTGT: /* Fall through. */
24747 default: return ARM_NV;
24748 }
24749
24750 case E_CC_SWPmode:
24751 switch (comp_code)
24752 {
24753 case NE: return ARM_NE;
24754 case EQ: return ARM_EQ;
24755 case GE: return ARM_LE;
24756 case GT: return ARM_LT;
24757 case LE: return ARM_GE;
24758 case LT: return ARM_GT;
24759 case GEU: return ARM_LS;
24760 case GTU: return ARM_CC;
24761 case LEU: return ARM_CS;
24762 case LTU: return ARM_HI;
24763 default: return ARM_NV;
24764 }
24765
24766 case E_CC_Cmode:
24767 switch (comp_code)
24768 {
24769 case LTU: return ARM_CS;
24770 case GEU: return ARM_CC;
24771 default: return ARM_NV;
24772 }
24773
24774 case E_CC_NVmode:
24775 switch (comp_code)
24776 {
24777 case GE: return ARM_GE;
24778 case LT: return ARM_LT;
24779 default: return ARM_NV;
24780 }
24781
24782 case E_CC_Bmode:
24783 switch (comp_code)
24784 {
24785 case GEU: return ARM_CS;
24786 case LTU: return ARM_CC;
24787 default: return ARM_NV;
24788 }
24789
24790 case E_CC_Vmode:
24791 switch (comp_code)
24792 {
24793 case NE: return ARM_VS;
24794 case EQ: return ARM_VC;
24795 default: return ARM_NV;
24796 }
24797
24798 case E_CC_ADCmode:
24799 switch (comp_code)
24800 {
24801 case GEU: return ARM_CS;
24802 case LTU: return ARM_CC;
24803 default: return ARM_NV;
24804 }
24805
24806 case E_CCmode:
24807 case E_CC_RSBmode:
24808 switch (comp_code)
24809 {
24810 case NE: return ARM_NE;
24811 case EQ: return ARM_EQ;
24812 case GE: return ARM_GE;
24813 case GT: return ARM_GT;
24814 case LE: return ARM_LE;
24815 case LT: return ARM_LT;
24816 case GEU: return ARM_CS;
24817 case GTU: return ARM_HI;
24818 case LEU: return ARM_LS;
24819 case LTU: return ARM_CC;
24820 default: return ARM_NV;
24821 }
24822
24823 default: gcc_unreachable ();
24824 }
24825 }
24826
24827 /* Like maybe_get_arm_condition_code, but never return ARM_NV. */
24828 static enum arm_cond_code
24829 get_arm_condition_code (rtx comparison)
24830 {
24831 enum arm_cond_code code = maybe_get_arm_condition_code (comparison);
24832 gcc_assert (code != ARM_NV);
24833 return code;
24834 }
24835
24836 /* Implement TARGET_FIXED_CONDITION_CODE_REGS. We only have condition
24837 code registers when not targetting Thumb1. The VFP condition register
24838 only exists when generating hard-float code. */
24839 static bool
24840 arm_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
24841 {
24842 if (!TARGET_32BIT)
24843 return false;
24844
24845 *p1 = CC_REGNUM;
24846 *p2 = TARGET_VFP_BASE ? VFPCC_REGNUM : INVALID_REGNUM;
24847 return true;
24848 }
24849
24850 /* Tell arm_asm_output_opcode to output IT blocks for conditionally executed
24851 instructions. */
24852 void
24853 thumb2_final_prescan_insn (rtx_insn *insn)
24854 {
24855 rtx_insn *first_insn = insn;
24856 rtx body = PATTERN (insn);
24857 rtx predicate;
24858 enum arm_cond_code code;
24859 int n;
24860 int mask;
24861 int max;
24862
24863 /* max_insns_skipped in the tune was already taken into account in the
24864 cost model of ifcvt pass when generating COND_EXEC insns. At this stage
24865 just emit the IT blocks as we can. It does not make sense to split
24866 the IT blocks. */
24867 max = MAX_INSN_PER_IT_BLOCK;
24868
24869 /* Remove the previous insn from the count of insns to be output. */
24870 if (arm_condexec_count)
24871 arm_condexec_count--;
24872
24873 /* Nothing to do if we are already inside a conditional block. */
24874 if (arm_condexec_count)
24875 return;
24876
24877 if (GET_CODE (body) != COND_EXEC)
24878 return;
24879
24880 /* Conditional jumps are implemented directly. */
24881 if (JUMP_P (insn))
24882 return;
24883
24884 predicate = COND_EXEC_TEST (body);
24885 arm_current_cc = get_arm_condition_code (predicate);
24886
24887 n = get_attr_ce_count (insn);
24888 arm_condexec_count = 1;
24889 arm_condexec_mask = (1 << n) - 1;
24890 arm_condexec_masklen = n;
24891 /* See if subsequent instructions can be combined into the same block. */
24892 for (;;)
24893 {
24894 insn = next_nonnote_insn (insn);
24895
24896 /* Jumping into the middle of an IT block is illegal, so a label or
24897 barrier terminates the block. */
24898 if (!NONJUMP_INSN_P (insn) && !JUMP_P (insn))
24899 break;
24900
24901 body = PATTERN (insn);
24902 /* USE and CLOBBER aren't really insns, so just skip them. */
24903 if (GET_CODE (body) == USE
24904 || GET_CODE (body) == CLOBBER)
24905 continue;
24906
24907 /* ??? Recognize conditional jumps, and combine them with IT blocks. */
24908 if (GET_CODE (body) != COND_EXEC)
24909 break;
24910 /* Maximum number of conditionally executed instructions in a block. */
24911 n = get_attr_ce_count (insn);
24912 if (arm_condexec_masklen + n > max)
24913 break;
24914
24915 predicate = COND_EXEC_TEST (body);
24916 code = get_arm_condition_code (predicate);
24917 mask = (1 << n) - 1;
24918 if (arm_current_cc == code)
24919 arm_condexec_mask |= (mask << arm_condexec_masklen);
24920 else if (arm_current_cc != ARM_INVERSE_CONDITION_CODE(code))
24921 break;
24922
24923 arm_condexec_count++;
24924 arm_condexec_masklen += n;
24925
24926 /* A jump must be the last instruction in a conditional block. */
24927 if (JUMP_P (insn))
24928 break;
24929 }
24930 /* Restore recog_data (getting the attributes of other insns can
24931 destroy this array, but final.c assumes that it remains intact
24932 across this call). */
24933 extract_constrain_insn_cached (first_insn);
24934 }
24935
24936 void
24937 arm_final_prescan_insn (rtx_insn *insn)
24938 {
24939 /* BODY will hold the body of INSN. */
24940 rtx body = PATTERN (insn);
24941
24942 /* This will be 1 if trying to repeat the trick, and things need to be
24943 reversed if it appears to fail. */
24944 int reverse = 0;
24945
24946 /* If we start with a return insn, we only succeed if we find another one. */
24947 int seeking_return = 0;
24948 enum rtx_code return_code = UNKNOWN;
24949
24950 /* START_INSN will hold the insn from where we start looking. This is the
24951 first insn after the following code_label if REVERSE is true. */
24952 rtx_insn *start_insn = insn;
24953
24954 /* If in state 4, check if the target branch is reached, in order to
24955 change back to state 0. */
24956 if (arm_ccfsm_state == 4)
24957 {
24958 if (insn == arm_target_insn)
24959 {
24960 arm_target_insn = NULL;
24961 arm_ccfsm_state = 0;
24962 }
24963 return;
24964 }
24965
24966 /* If in state 3, it is possible to repeat the trick, if this insn is an
24967 unconditional branch to a label, and immediately following this branch
24968 is the previous target label which is only used once, and the label this
24969 branch jumps to is not too far off. */
24970 if (arm_ccfsm_state == 3)
24971 {
24972 if (simplejump_p (insn))
24973 {
24974 start_insn = next_nonnote_insn (start_insn);
24975 if (BARRIER_P (start_insn))
24976 {
24977 /* XXX Isn't this always a barrier? */
24978 start_insn = next_nonnote_insn (start_insn);
24979 }
24980 if (LABEL_P (start_insn)
24981 && CODE_LABEL_NUMBER (start_insn) == arm_target_label
24982 && LABEL_NUSES (start_insn) == 1)
24983 reverse = TRUE;
24984 else
24985 return;
24986 }
24987 else if (ANY_RETURN_P (body))
24988 {
24989 start_insn = next_nonnote_insn (start_insn);
24990 if (BARRIER_P (start_insn))
24991 start_insn = next_nonnote_insn (start_insn);
24992 if (LABEL_P (start_insn)
24993 && CODE_LABEL_NUMBER (start_insn) == arm_target_label
24994 && LABEL_NUSES (start_insn) == 1)
24995 {
24996 reverse = TRUE;
24997 seeking_return = 1;
24998 return_code = GET_CODE (body);
24999 }
25000 else
25001 return;
25002 }
25003 else
25004 return;
25005 }
25006
25007 gcc_assert (!arm_ccfsm_state || reverse);
25008 if (!JUMP_P (insn))
25009 return;
25010
25011 /* This jump might be paralleled with a clobber of the condition codes
25012 the jump should always come first */
25013 if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0) > 0)
25014 body = XVECEXP (body, 0, 0);
25015
25016 if (reverse
25017 || (GET_CODE (body) == SET && GET_CODE (SET_DEST (body)) == PC
25018 && GET_CODE (SET_SRC (body)) == IF_THEN_ELSE))
25019 {
25020 int insns_skipped;
25021 int fail = FALSE, succeed = FALSE;
25022 /* Flag which part of the IF_THEN_ELSE is the LABEL_REF. */
25023 int then_not_else = TRUE;
25024 rtx_insn *this_insn = start_insn;
25025 rtx label = 0;
25026
25027 /* Register the insn jumped to. */
25028 if (reverse)
25029 {
25030 if (!seeking_return)
25031 label = XEXP (SET_SRC (body), 0);
25032 }
25033 else if (GET_CODE (XEXP (SET_SRC (body), 1)) == LABEL_REF)
25034 label = XEXP (XEXP (SET_SRC (body), 1), 0);
25035 else if (GET_CODE (XEXP (SET_SRC (body), 2)) == LABEL_REF)
25036 {
25037 label = XEXP (XEXP (SET_SRC (body), 2), 0);
25038 then_not_else = FALSE;
25039 }
25040 else if (ANY_RETURN_P (XEXP (SET_SRC (body), 1)))
25041 {
25042 seeking_return = 1;
25043 return_code = GET_CODE (XEXP (SET_SRC (body), 1));
25044 }
25045 else if (ANY_RETURN_P (XEXP (SET_SRC (body), 2)))
25046 {
25047 seeking_return = 1;
25048 return_code = GET_CODE (XEXP (SET_SRC (body), 2));
25049 then_not_else = FALSE;
25050 }
25051 else
25052 gcc_unreachable ();
25053
25054 /* See how many insns this branch skips, and what kind of insns. If all
25055 insns are okay, and the label or unconditional branch to the same
25056 label is not too far away, succeed. */
25057 for (insns_skipped = 0;
25058 !fail && !succeed && insns_skipped++ < max_insns_skipped;)
25059 {
25060 rtx scanbody;
25061
25062 this_insn = next_nonnote_insn (this_insn);
25063 if (!this_insn)
25064 break;
25065
25066 switch (GET_CODE (this_insn))
25067 {
25068 case CODE_LABEL:
25069 /* Succeed if it is the target label, otherwise fail since
25070 control falls in from somewhere else. */
25071 if (this_insn == label)
25072 {
25073 arm_ccfsm_state = 1;
25074 succeed = TRUE;
25075 }
25076 else
25077 fail = TRUE;
25078 break;
25079
25080 case BARRIER:
25081 /* Succeed if the following insn is the target label.
25082 Otherwise fail.
25083 If return insns are used then the last insn in a function
25084 will be a barrier. */
25085 this_insn = next_nonnote_insn (this_insn);
25086 if (this_insn && this_insn == label)
25087 {
25088 arm_ccfsm_state = 1;
25089 succeed = TRUE;
25090 }
25091 else
25092 fail = TRUE;
25093 break;
25094
25095 case CALL_INSN:
25096 /* The AAPCS says that conditional calls should not be
25097 used since they make interworking inefficient (the
25098 linker can't transform BL<cond> into BLX). That's
25099 only a problem if the machine has BLX. */
25100 if (arm_arch5t)
25101 {
25102 fail = TRUE;
25103 break;
25104 }
25105
25106 /* Succeed if the following insn is the target label, or
25107 if the following two insns are a barrier and the
25108 target label. */
25109 this_insn = next_nonnote_insn (this_insn);
25110 if (this_insn && BARRIER_P (this_insn))
25111 this_insn = next_nonnote_insn (this_insn);
25112
25113 if (this_insn && this_insn == label
25114 && insns_skipped < max_insns_skipped)
25115 {
25116 arm_ccfsm_state = 1;
25117 succeed = TRUE;
25118 }
25119 else
25120 fail = TRUE;
25121 break;
25122
25123 case JUMP_INSN:
25124 /* If this is an unconditional branch to the same label, succeed.
25125 If it is to another label, do nothing. If it is conditional,
25126 fail. */
25127 /* XXX Probably, the tests for SET and the PC are
25128 unnecessary. */
25129
25130 scanbody = PATTERN (this_insn);
25131 if (GET_CODE (scanbody) == SET
25132 && GET_CODE (SET_DEST (scanbody)) == PC)
25133 {
25134 if (GET_CODE (SET_SRC (scanbody)) == LABEL_REF
25135 && XEXP (SET_SRC (scanbody), 0) == label && !reverse)
25136 {
25137 arm_ccfsm_state = 2;
25138 succeed = TRUE;
25139 }
25140 else if (GET_CODE (SET_SRC (scanbody)) == IF_THEN_ELSE)
25141 fail = TRUE;
25142 }
25143 /* Fail if a conditional return is undesirable (e.g. on a
25144 StrongARM), but still allow this if optimizing for size. */
25145 else if (GET_CODE (scanbody) == return_code
25146 && !use_return_insn (TRUE, NULL)
25147 && !optimize_size)
25148 fail = TRUE;
25149 else if (GET_CODE (scanbody) == return_code)
25150 {
25151 arm_ccfsm_state = 2;
25152 succeed = TRUE;
25153 }
25154 else if (GET_CODE (scanbody) == PARALLEL)
25155 {
25156 switch (get_attr_conds (this_insn))
25157 {
25158 case CONDS_NOCOND:
25159 break;
25160 default:
25161 fail = TRUE;
25162 break;
25163 }
25164 }
25165 else
25166 fail = TRUE; /* Unrecognized jump (e.g. epilogue). */
25167
25168 break;
25169
25170 case INSN:
25171 /* Instructions using or affecting the condition codes make it
25172 fail. */
25173 scanbody = PATTERN (this_insn);
25174 if (!(GET_CODE (scanbody) == SET
25175 || GET_CODE (scanbody) == PARALLEL)
25176 || get_attr_conds (this_insn) != CONDS_NOCOND)
25177 fail = TRUE;
25178 break;
25179
25180 default:
25181 break;
25182 }
25183 }
25184 if (succeed)
25185 {
25186 if ((!seeking_return) && (arm_ccfsm_state == 1 || reverse))
25187 arm_target_label = CODE_LABEL_NUMBER (label);
25188 else
25189 {
25190 gcc_assert (seeking_return || arm_ccfsm_state == 2);
25191
25192 while (this_insn && GET_CODE (PATTERN (this_insn)) == USE)
25193 {
25194 this_insn = next_nonnote_insn (this_insn);
25195 gcc_assert (!this_insn
25196 || (!BARRIER_P (this_insn)
25197 && !LABEL_P (this_insn)));
25198 }
25199 if (!this_insn)
25200 {
25201 /* Oh, dear! we ran off the end.. give up. */
25202 extract_constrain_insn_cached (insn);
25203 arm_ccfsm_state = 0;
25204 arm_target_insn = NULL;
25205 return;
25206 }
25207 arm_target_insn = this_insn;
25208 }
25209
25210 /* If REVERSE is true, ARM_CURRENT_CC needs to be inverted from
25211 what it was. */
25212 if (!reverse)
25213 arm_current_cc = get_arm_condition_code (XEXP (SET_SRC (body), 0));
25214
25215 if (reverse || then_not_else)
25216 arm_current_cc = ARM_INVERSE_CONDITION_CODE (arm_current_cc);
25217 }
25218
25219 /* Restore recog_data (getting the attributes of other insns can
25220 destroy this array, but final.c assumes that it remains intact
25221 across this call. */
25222 extract_constrain_insn_cached (insn);
25223 }
25224 }
25225
25226 /* Output IT instructions. */
25227 void
25228 thumb2_asm_output_opcode (FILE * stream)
25229 {
25230 char buff[5];
25231 int n;
25232
25233 if (arm_condexec_mask)
25234 {
25235 for (n = 0; n < arm_condexec_masklen; n++)
25236 buff[n] = (arm_condexec_mask & (1 << n)) ? 't' : 'e';
25237 buff[n] = 0;
25238 asm_fprintf(stream, "i%s\t%s\n\t", buff,
25239 arm_condition_codes[arm_current_cc]);
25240 arm_condexec_mask = 0;
25241 }
25242 }
25243
25244 /* Implement TARGET_HARD_REGNO_NREGS. On the ARM core regs are
25245 UNITS_PER_WORD bytes wide. */
25246 static unsigned int
25247 arm_hard_regno_nregs (unsigned int regno, machine_mode mode)
25248 {
25249 if (TARGET_32BIT
25250 && regno > PC_REGNUM
25251 && regno != FRAME_POINTER_REGNUM
25252 && regno != ARG_POINTER_REGNUM
25253 && !IS_VFP_REGNUM (regno))
25254 return 1;
25255
25256 return ARM_NUM_REGS (mode);
25257 }
25258
25259 /* Implement TARGET_HARD_REGNO_MODE_OK. */
25260 static bool
25261 arm_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
25262 {
25263 if (GET_MODE_CLASS (mode) == MODE_CC)
25264 return (regno == CC_REGNUM
25265 || (TARGET_VFP_BASE
25266 && regno == VFPCC_REGNUM));
25267
25268 if (regno == CC_REGNUM && GET_MODE_CLASS (mode) != MODE_CC)
25269 return false;
25270
25271 if (IS_VPR_REGNUM (regno))
25272 return mode == HImode;
25273
25274 if (TARGET_THUMB1)
25275 /* For the Thumb we only allow values bigger than SImode in
25276 registers 0 - 6, so that there is always a second low
25277 register available to hold the upper part of the value.
25278 We probably we ought to ensure that the register is the
25279 start of an even numbered register pair. */
25280 return (ARM_NUM_REGS (mode) < 2) || (regno < LAST_LO_REGNUM);
25281
25282 if (TARGET_VFP_BASE && IS_VFP_REGNUM (regno))
25283 {
25284 if (mode == DFmode || mode == DImode)
25285 return VFP_REGNO_OK_FOR_DOUBLE (regno);
25286
25287 if (mode == HFmode || mode == BFmode || mode == HImode
25288 || mode == SFmode || mode == SImode)
25289 return VFP_REGNO_OK_FOR_SINGLE (regno);
25290
25291 if (TARGET_NEON)
25292 return (VALID_NEON_DREG_MODE (mode) && VFP_REGNO_OK_FOR_DOUBLE (regno))
25293 || (VALID_NEON_QREG_MODE (mode)
25294 && NEON_REGNO_OK_FOR_QUAD (regno))
25295 || (mode == TImode && NEON_REGNO_OK_FOR_NREGS (regno, 2))
25296 || (mode == EImode && NEON_REGNO_OK_FOR_NREGS (regno, 3))
25297 || (mode == OImode && NEON_REGNO_OK_FOR_NREGS (regno, 4))
25298 || (mode == CImode && NEON_REGNO_OK_FOR_NREGS (regno, 6))
25299 || (mode == XImode && NEON_REGNO_OK_FOR_NREGS (regno, 8));
25300 if (TARGET_HAVE_MVE)
25301 return ((VALID_MVE_MODE (mode) && NEON_REGNO_OK_FOR_QUAD (regno))
25302 || (mode == OImode && NEON_REGNO_OK_FOR_NREGS (regno, 4))
25303 || (mode == XImode && NEON_REGNO_OK_FOR_NREGS (regno, 8)));
25304
25305 return false;
25306 }
25307
25308 if (TARGET_REALLY_IWMMXT)
25309 {
25310 if (IS_IWMMXT_GR_REGNUM (regno))
25311 return mode == SImode;
25312
25313 if (IS_IWMMXT_REGNUM (regno))
25314 return VALID_IWMMXT_REG_MODE (mode);
25315 }
25316
25317 /* We allow almost any value to be stored in the general registers.
25318 Restrict doubleword quantities to even register pairs in ARM state
25319 so that we can use ldrd. The same restriction applies for MVE
25320 in order to support Armv8.1-M Mainline instructions.
25321 Do not allow very large Neon structure opaque modes in general
25322 registers; they would use too many. */
25323 if (regno <= LAST_ARM_REGNUM)
25324 {
25325 if (ARM_NUM_REGS (mode) > 4)
25326 return false;
25327
25328 if (TARGET_THUMB2 && !(TARGET_HAVE_MVE || TARGET_CDE))
25329 return true;
25330
25331 return !((TARGET_LDRD || TARGET_CDE)
25332 && GET_MODE_SIZE (mode) > 4 && (regno & 1) != 0);
25333 }
25334
25335 if (regno == FRAME_POINTER_REGNUM
25336 || regno == ARG_POINTER_REGNUM)
25337 /* We only allow integers in the fake hard registers. */
25338 return GET_MODE_CLASS (mode) == MODE_INT;
25339
25340 return false;
25341 }
25342
25343 /* Implement TARGET_MODES_TIEABLE_P. */
25344
25345 static bool
25346 arm_modes_tieable_p (machine_mode mode1, machine_mode mode2)
25347 {
25348 if (GET_MODE_CLASS (mode1) == GET_MODE_CLASS (mode2))
25349 return true;
25350
25351 /* We specifically want to allow elements of "structure" modes to
25352 be tieable to the structure. This more general condition allows
25353 other rarer situations too. */
25354 if ((TARGET_NEON
25355 && (VALID_NEON_DREG_MODE (mode1)
25356 || VALID_NEON_QREG_MODE (mode1)
25357 || VALID_NEON_STRUCT_MODE (mode1))
25358 && (VALID_NEON_DREG_MODE (mode2)
25359 || VALID_NEON_QREG_MODE (mode2)
25360 || VALID_NEON_STRUCT_MODE (mode2)))
25361 || (TARGET_HAVE_MVE
25362 && (VALID_MVE_MODE (mode1)
25363 || VALID_MVE_STRUCT_MODE (mode1))
25364 && (VALID_MVE_MODE (mode2)
25365 || VALID_MVE_STRUCT_MODE (mode2))))
25366 return true;
25367
25368 return false;
25369 }
25370
25371 /* For efficiency and historical reasons LO_REGS, HI_REGS and CC_REGS are
25372 not used in arm mode. */
25373
25374 enum reg_class
25375 arm_regno_class (int regno)
25376 {
25377 if (regno == PC_REGNUM)
25378 return NO_REGS;
25379
25380 if (IS_VPR_REGNUM (regno))
25381 return VPR_REG;
25382
25383 if (TARGET_THUMB1)
25384 {
25385 if (regno == STACK_POINTER_REGNUM)
25386 return STACK_REG;
25387 if (regno == CC_REGNUM)
25388 return CC_REG;
25389 if (regno < 8)
25390 return LO_REGS;
25391 return HI_REGS;
25392 }
25393
25394 if (TARGET_THUMB2 && regno < 8)
25395 return LO_REGS;
25396
25397 if ( regno <= LAST_ARM_REGNUM
25398 || regno == FRAME_POINTER_REGNUM
25399 || regno == ARG_POINTER_REGNUM)
25400 return TARGET_THUMB2 ? HI_REGS : GENERAL_REGS;
25401
25402 if (regno == CC_REGNUM || regno == VFPCC_REGNUM)
25403 return TARGET_THUMB2 ? CC_REG : NO_REGS;
25404
25405 if (IS_VFP_REGNUM (regno))
25406 {
25407 if (regno <= D7_VFP_REGNUM)
25408 return VFP_D0_D7_REGS;
25409 else if (regno <= LAST_LO_VFP_REGNUM)
25410 return VFP_LO_REGS;
25411 else
25412 return VFP_HI_REGS;
25413 }
25414
25415 if (IS_IWMMXT_REGNUM (regno))
25416 return IWMMXT_REGS;
25417
25418 if (IS_IWMMXT_GR_REGNUM (regno))
25419 return IWMMXT_GR_REGS;
25420
25421 return NO_REGS;
25422 }
25423
25424 /* Handle a special case when computing the offset
25425 of an argument from the frame pointer. */
25426 int
25427 arm_debugger_arg_offset (int value, rtx addr)
25428 {
25429 rtx_insn *insn;
25430
25431 /* We are only interested if dbxout_parms() failed to compute the offset. */
25432 if (value != 0)
25433 return 0;
25434
25435 /* We can only cope with the case where the address is held in a register. */
25436 if (!REG_P (addr))
25437 return 0;
25438
25439 /* If we are using the frame pointer to point at the argument, then
25440 an offset of 0 is correct. */
25441 if (REGNO (addr) == (unsigned) HARD_FRAME_POINTER_REGNUM)
25442 return 0;
25443
25444 /* If we are using the stack pointer to point at the
25445 argument, then an offset of 0 is correct. */
25446 /* ??? Check this is consistent with thumb2 frame layout. */
25447 if ((TARGET_THUMB || !frame_pointer_needed)
25448 && REGNO (addr) == SP_REGNUM)
25449 return 0;
25450
25451 /* Oh dear. The argument is pointed to by a register rather
25452 than being held in a register, or being stored at a known
25453 offset from the frame pointer. Since GDB only understands
25454 those two kinds of argument we must translate the address
25455 held in the register into an offset from the frame pointer.
25456 We do this by searching through the insns for the function
25457 looking to see where this register gets its value. If the
25458 register is initialized from the frame pointer plus an offset
25459 then we are in luck and we can continue, otherwise we give up.
25460
25461 This code is exercised by producing debugging information
25462 for a function with arguments like this:
25463
25464 double func (double a, double b, int c, double d) {return d;}
25465
25466 Without this code the stab for parameter 'd' will be set to
25467 an offset of 0 from the frame pointer, rather than 8. */
25468
25469 /* The if() statement says:
25470
25471 If the insn is a normal instruction
25472 and if the insn is setting the value in a register
25473 and if the register being set is the register holding the address of the argument
25474 and if the address is computing by an addition
25475 that involves adding to a register
25476 which is the frame pointer
25477 a constant integer
25478
25479 then... */
25480
25481 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
25482 {
25483 if ( NONJUMP_INSN_P (insn)
25484 && GET_CODE (PATTERN (insn)) == SET
25485 && REGNO (XEXP (PATTERN (insn), 0)) == REGNO (addr)
25486 && GET_CODE (XEXP (PATTERN (insn), 1)) == PLUS
25487 && REG_P (XEXP (XEXP (PATTERN (insn), 1), 0))
25488 && REGNO (XEXP (XEXP (PATTERN (insn), 1), 0)) == (unsigned) HARD_FRAME_POINTER_REGNUM
25489 && CONST_INT_P (XEXP (XEXP (PATTERN (insn), 1), 1))
25490 )
25491 {
25492 value = INTVAL (XEXP (XEXP (PATTERN (insn), 1), 1));
25493
25494 break;
25495 }
25496 }
25497
25498 if (value == 0)
25499 {
25500 debug_rtx (addr);
25501 warning (0, "unable to compute real location of stacked parameter");
25502 value = 8; /* XXX magic hack */
25503 }
25504
25505 return value;
25506 }
25507 \f
25508 /* Implement TARGET_PROMOTED_TYPE. */
25509
25510 static tree
25511 arm_promoted_type (const_tree t)
25512 {
25513 if (SCALAR_FLOAT_TYPE_P (t)
25514 && TYPE_PRECISION (t) == 16
25515 && TYPE_MAIN_VARIANT (t) == arm_fp16_type_node)
25516 return float_type_node;
25517 return NULL_TREE;
25518 }
25519
25520 /* Implement TARGET_SCALAR_MODE_SUPPORTED_P.
25521 This simply adds HFmode as a supported mode; even though we don't
25522 implement arithmetic on this type directly, it's supported by
25523 optabs conversions, much the way the double-word arithmetic is
25524 special-cased in the default hook. */
25525
25526 static bool
25527 arm_scalar_mode_supported_p (scalar_mode mode)
25528 {
25529 if (mode == HFmode)
25530 return (arm_fp16_format != ARM_FP16_FORMAT_NONE);
25531 else if (ALL_FIXED_POINT_MODE_P (mode))
25532 return true;
25533 else
25534 return default_scalar_mode_supported_p (mode);
25535 }
25536
25537 /* Set the value of FLT_EVAL_METHOD.
25538 ISO/IEC TS 18661-3 defines two values that we'd like to make use of:
25539
25540 0: evaluate all operations and constants, whose semantic type has at
25541 most the range and precision of type float, to the range and
25542 precision of float; evaluate all other operations and constants to
25543 the range and precision of the semantic type;
25544
25545 N, where _FloatN is a supported interchange floating type
25546 evaluate all operations and constants, whose semantic type has at
25547 most the range and precision of _FloatN type, to the range and
25548 precision of the _FloatN type; evaluate all other operations and
25549 constants to the range and precision of the semantic type;
25550
25551 If we have the ARMv8.2-A extensions then we support _Float16 in native
25552 precision, so we should set this to 16. Otherwise, we support the type,
25553 but want to evaluate expressions in float precision, so set this to
25554 0. */
25555
25556 static enum flt_eval_method
25557 arm_excess_precision (enum excess_precision_type type)
25558 {
25559 switch (type)
25560 {
25561 case EXCESS_PRECISION_TYPE_FAST:
25562 case EXCESS_PRECISION_TYPE_STANDARD:
25563 /* We can calculate either in 16-bit range and precision or
25564 32-bit range and precision. Make that decision based on whether
25565 we have native support for the ARMv8.2-A 16-bit floating-point
25566 instructions or not. */
25567 return (TARGET_VFP_FP16INST
25568 ? FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16
25569 : FLT_EVAL_METHOD_PROMOTE_TO_FLOAT);
25570 case EXCESS_PRECISION_TYPE_IMPLICIT:
25571 return FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16;
25572 default:
25573 gcc_unreachable ();
25574 }
25575 return FLT_EVAL_METHOD_UNPREDICTABLE;
25576 }
25577
25578
25579 /* Implement TARGET_FLOATN_MODE. Make very sure that we don't provide
25580 _Float16 if we are using anything other than ieee format for 16-bit
25581 floating point. Otherwise, punt to the default implementation. */
25582 static opt_scalar_float_mode
25583 arm_floatn_mode (int n, bool extended)
25584 {
25585 if (!extended && n == 16)
25586 {
25587 if (arm_fp16_format == ARM_FP16_FORMAT_IEEE)
25588 return HFmode;
25589 return opt_scalar_float_mode ();
25590 }
25591
25592 return default_floatn_mode (n, extended);
25593 }
25594
25595
25596 /* Set up OPERANDS for a register copy from SRC to DEST, taking care
25597 not to early-clobber SRC registers in the process.
25598
25599 We assume that the operands described by SRC and DEST represent a
25600 decomposed copy of OPERANDS[1] into OPERANDS[0]. COUNT is the
25601 number of components into which the copy has been decomposed. */
25602 void
25603 neon_disambiguate_copy (rtx *operands, rtx *dest, rtx *src, unsigned int count)
25604 {
25605 unsigned int i;
25606
25607 if (!reg_overlap_mentioned_p (operands[0], operands[1])
25608 || REGNO (operands[0]) < REGNO (operands[1]))
25609 {
25610 for (i = 0; i < count; i++)
25611 {
25612 operands[2 * i] = dest[i];
25613 operands[2 * i + 1] = src[i];
25614 }
25615 }
25616 else
25617 {
25618 for (i = 0; i < count; i++)
25619 {
25620 operands[2 * i] = dest[count - i - 1];
25621 operands[2 * i + 1] = src[count - i - 1];
25622 }
25623 }
25624 }
25625
25626 /* Split operands into moves from op[1] + op[2] into op[0]. */
25627
25628 void
25629 neon_split_vcombine (rtx operands[3])
25630 {
25631 unsigned int dest = REGNO (operands[0]);
25632 unsigned int src1 = REGNO (operands[1]);
25633 unsigned int src2 = REGNO (operands[2]);
25634 machine_mode halfmode = GET_MODE (operands[1]);
25635 unsigned int halfregs = REG_NREGS (operands[1]);
25636 rtx destlo, desthi;
25637
25638 if (src1 == dest && src2 == dest + halfregs)
25639 {
25640 /* No-op move. Can't split to nothing; emit something. */
25641 emit_note (NOTE_INSN_DELETED);
25642 return;
25643 }
25644
25645 /* Preserve register attributes for variable tracking. */
25646 destlo = gen_rtx_REG_offset (operands[0], halfmode, dest, 0);
25647 desthi = gen_rtx_REG_offset (operands[0], halfmode, dest + halfregs,
25648 GET_MODE_SIZE (halfmode));
25649
25650 /* Special case of reversed high/low parts. Use VSWP. */
25651 if (src2 == dest && src1 == dest + halfregs)
25652 {
25653 rtx x = gen_rtx_SET (destlo, operands[1]);
25654 rtx y = gen_rtx_SET (desthi, operands[2]);
25655 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, x, y)));
25656 return;
25657 }
25658
25659 if (!reg_overlap_mentioned_p (operands[2], destlo))
25660 {
25661 /* Try to avoid unnecessary moves if part of the result
25662 is in the right place already. */
25663 if (src1 != dest)
25664 emit_move_insn (destlo, operands[1]);
25665 if (src2 != dest + halfregs)
25666 emit_move_insn (desthi, operands[2]);
25667 }
25668 else
25669 {
25670 if (src2 != dest + halfregs)
25671 emit_move_insn (desthi, operands[2]);
25672 if (src1 != dest)
25673 emit_move_insn (destlo, operands[1]);
25674 }
25675 }
25676 \f
25677 /* Return the number (counting from 0) of
25678 the least significant set bit in MASK. */
25679
25680 inline static int
25681 number_of_first_bit_set (unsigned mask)
25682 {
25683 return ctz_hwi (mask);
25684 }
25685
25686 /* Like emit_multi_reg_push, but allowing for a different set of
25687 registers to be described as saved. MASK is the set of registers
25688 to be saved; REAL_REGS is the set of registers to be described as
25689 saved. If REAL_REGS is 0, only describe the stack adjustment. */
25690
25691 static rtx_insn *
25692 thumb1_emit_multi_reg_push (unsigned long mask, unsigned long real_regs)
25693 {
25694 unsigned long regno;
25695 rtx par[10], tmp, reg;
25696 rtx_insn *insn;
25697 int i, j;
25698
25699 /* Build the parallel of the registers actually being stored. */
25700 for (i = 0; mask; ++i, mask &= mask - 1)
25701 {
25702 regno = ctz_hwi (mask);
25703 reg = gen_rtx_REG (SImode, regno);
25704
25705 if (i == 0)
25706 tmp = gen_rtx_UNSPEC (BLKmode, gen_rtvec (1, reg), UNSPEC_PUSH_MULT);
25707 else
25708 tmp = gen_rtx_USE (VOIDmode, reg);
25709
25710 par[i] = tmp;
25711 }
25712
25713 tmp = plus_constant (Pmode, stack_pointer_rtx, -4 * i);
25714 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
25715 tmp = gen_frame_mem (BLKmode, tmp);
25716 tmp = gen_rtx_SET (tmp, par[0]);
25717 par[0] = tmp;
25718
25719 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (i, par));
25720 insn = emit_insn (tmp);
25721
25722 /* Always build the stack adjustment note for unwind info. */
25723 tmp = plus_constant (Pmode, stack_pointer_rtx, -4 * i);
25724 tmp = gen_rtx_SET (stack_pointer_rtx, tmp);
25725 par[0] = tmp;
25726
25727 /* Build the parallel of the registers recorded as saved for unwind. */
25728 for (j = 0; real_regs; ++j, real_regs &= real_regs - 1)
25729 {
25730 regno = ctz_hwi (real_regs);
25731 reg = gen_rtx_REG (SImode, regno);
25732
25733 tmp = plus_constant (Pmode, stack_pointer_rtx, j * 4);
25734 tmp = gen_frame_mem (SImode, tmp);
25735 tmp = gen_rtx_SET (tmp, reg);
25736 RTX_FRAME_RELATED_P (tmp) = 1;
25737 par[j + 1] = tmp;
25738 }
25739
25740 if (j == 0)
25741 tmp = par[0];
25742 else
25743 {
25744 RTX_FRAME_RELATED_P (par[0]) = 1;
25745 tmp = gen_rtx_SEQUENCE (VOIDmode, gen_rtvec_v (j + 1, par));
25746 }
25747
25748 add_reg_note (insn, REG_FRAME_RELATED_EXPR, tmp);
25749
25750 return insn;
25751 }
25752
25753 /* Emit code to push or pop registers to or from the stack. F is the
25754 assembly file. MASK is the registers to pop. */
25755 static void
25756 thumb_pop (FILE *f, unsigned long mask)
25757 {
25758 int regno;
25759 int lo_mask = mask & 0xFF;
25760
25761 gcc_assert (mask);
25762
25763 if (lo_mask == 0 && (mask & (1 << PC_REGNUM)))
25764 {
25765 /* Special case. Do not generate a POP PC statement here, do it in
25766 thumb_exit() */
25767 thumb_exit (f, -1);
25768 return;
25769 }
25770
25771 fprintf (f, "\tpop\t{");
25772
25773 /* Look at the low registers first. */
25774 for (regno = 0; regno <= LAST_LO_REGNUM; regno++, lo_mask >>= 1)
25775 {
25776 if (lo_mask & 1)
25777 {
25778 asm_fprintf (f, "%r", regno);
25779
25780 if ((lo_mask & ~1) != 0)
25781 fprintf (f, ", ");
25782 }
25783 }
25784
25785 if (mask & (1 << PC_REGNUM))
25786 {
25787 /* Catch popping the PC. */
25788 if (TARGET_INTERWORK || TARGET_BACKTRACE || crtl->calls_eh_return
25789 || IS_CMSE_ENTRY (arm_current_func_type ()))
25790 {
25791 /* The PC is never poped directly, instead
25792 it is popped into r3 and then BX is used. */
25793 fprintf (f, "}\n");
25794
25795 thumb_exit (f, -1);
25796
25797 return;
25798 }
25799 else
25800 {
25801 if (mask & 0xFF)
25802 fprintf (f, ", ");
25803
25804 asm_fprintf (f, "%r", PC_REGNUM);
25805 }
25806 }
25807
25808 fprintf (f, "}\n");
25809 }
25810
25811 /* Generate code to return from a thumb function.
25812 If 'reg_containing_return_addr' is -1, then the return address is
25813 actually on the stack, at the stack pointer.
25814
25815 Note: do not forget to update length attribute of corresponding insn pattern
25816 when changing assembly output (eg. length attribute of epilogue_insns when
25817 updating Armv8-M Baseline Security Extensions register clearing
25818 sequences). */
25819 static void
25820 thumb_exit (FILE *f, int reg_containing_return_addr)
25821 {
25822 unsigned regs_available_for_popping;
25823 unsigned regs_to_pop;
25824 int pops_needed;
25825 unsigned available;
25826 unsigned required;
25827 machine_mode mode;
25828 int size;
25829 int restore_a4 = FALSE;
25830
25831 /* Compute the registers we need to pop. */
25832 regs_to_pop = 0;
25833 pops_needed = 0;
25834
25835 if (reg_containing_return_addr == -1)
25836 {
25837 regs_to_pop |= 1 << LR_REGNUM;
25838 ++pops_needed;
25839 }
25840
25841 if (TARGET_BACKTRACE)
25842 {
25843 /* Restore the (ARM) frame pointer and stack pointer. */
25844 regs_to_pop |= (1 << ARM_HARD_FRAME_POINTER_REGNUM) | (1 << SP_REGNUM);
25845 pops_needed += 2;
25846 }
25847
25848 /* If there is nothing to pop then just emit the BX instruction and
25849 return. */
25850 if (pops_needed == 0)
25851 {
25852 if (crtl->calls_eh_return)
25853 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
25854
25855 if (IS_CMSE_ENTRY (arm_current_func_type ()))
25856 {
25857 /* For Armv8.1-M, this is cleared as part of the CLRM instruction
25858 emitted by cmse_nonsecure_entry_clear_before_return (). */
25859 if (!TARGET_HAVE_FPCXT_CMSE)
25860 asm_fprintf (f, "\tmsr\tAPSR_nzcvq, %r\n",
25861 reg_containing_return_addr);
25862 asm_fprintf (f, "\tbxns\t%r\n", reg_containing_return_addr);
25863 }
25864 else
25865 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
25866 return;
25867 }
25868 /* Otherwise if we are not supporting interworking and we have not created
25869 a backtrace structure and the function was not entered in ARM mode then
25870 just pop the return address straight into the PC. */
25871 else if (!TARGET_INTERWORK
25872 && !TARGET_BACKTRACE
25873 && !is_called_in_ARM_mode (current_function_decl)
25874 && !crtl->calls_eh_return
25875 && !IS_CMSE_ENTRY (arm_current_func_type ()))
25876 {
25877 asm_fprintf (f, "\tpop\t{%r}\n", PC_REGNUM);
25878 return;
25879 }
25880
25881 /* Find out how many of the (return) argument registers we can corrupt. */
25882 regs_available_for_popping = 0;
25883
25884 /* If returning via __builtin_eh_return, the bottom three registers
25885 all contain information needed for the return. */
25886 if (crtl->calls_eh_return)
25887 size = 12;
25888 else
25889 {
25890 /* If we can deduce the registers used from the function's
25891 return value. This is more reliable that examining
25892 df_regs_ever_live_p () because that will be set if the register is
25893 ever used in the function, not just if the register is used
25894 to hold a return value. */
25895
25896 if (crtl->return_rtx != 0)
25897 mode = GET_MODE (crtl->return_rtx);
25898 else
25899 mode = DECL_MODE (DECL_RESULT (current_function_decl));
25900
25901 size = GET_MODE_SIZE (mode);
25902
25903 if (size == 0)
25904 {
25905 /* In a void function we can use any argument register.
25906 In a function that returns a structure on the stack
25907 we can use the second and third argument registers. */
25908 if (mode == VOIDmode)
25909 regs_available_for_popping =
25910 (1 << ARG_REGISTER (1))
25911 | (1 << ARG_REGISTER (2))
25912 | (1 << ARG_REGISTER (3));
25913 else
25914 regs_available_for_popping =
25915 (1 << ARG_REGISTER (2))
25916 | (1 << ARG_REGISTER (3));
25917 }
25918 else if (size <= 4)
25919 regs_available_for_popping =
25920 (1 << ARG_REGISTER (2))
25921 | (1 << ARG_REGISTER (3));
25922 else if (size <= 8)
25923 regs_available_for_popping =
25924 (1 << ARG_REGISTER (3));
25925 }
25926
25927 /* Match registers to be popped with registers into which we pop them. */
25928 for (available = regs_available_for_popping,
25929 required = regs_to_pop;
25930 required != 0 && available != 0;
25931 available &= ~(available & - available),
25932 required &= ~(required & - required))
25933 -- pops_needed;
25934
25935 /* If we have any popping registers left over, remove them. */
25936 if (available > 0)
25937 regs_available_for_popping &= ~available;
25938
25939 /* Otherwise if we need another popping register we can use
25940 the fourth argument register. */
25941 else if (pops_needed)
25942 {
25943 /* If we have not found any free argument registers and
25944 reg a4 contains the return address, we must move it. */
25945 if (regs_available_for_popping == 0
25946 && reg_containing_return_addr == LAST_ARG_REGNUM)
25947 {
25948 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
25949 reg_containing_return_addr = LR_REGNUM;
25950 }
25951 else if (size > 12)
25952 {
25953 /* Register a4 is being used to hold part of the return value,
25954 but we have dire need of a free, low register. */
25955 restore_a4 = TRUE;
25956
25957 asm_fprintf (f, "\tmov\t%r, %r\n",IP_REGNUM, LAST_ARG_REGNUM);
25958 }
25959
25960 if (reg_containing_return_addr != LAST_ARG_REGNUM)
25961 {
25962 /* The fourth argument register is available. */
25963 regs_available_for_popping |= 1 << LAST_ARG_REGNUM;
25964
25965 --pops_needed;
25966 }
25967 }
25968
25969 /* Pop as many registers as we can. */
25970 thumb_pop (f, regs_available_for_popping);
25971
25972 /* Process the registers we popped. */
25973 if (reg_containing_return_addr == -1)
25974 {
25975 /* The return address was popped into the lowest numbered register. */
25976 regs_to_pop &= ~(1 << LR_REGNUM);
25977
25978 reg_containing_return_addr =
25979 number_of_first_bit_set (regs_available_for_popping);
25980
25981 /* Remove this register for the mask of available registers, so that
25982 the return address will not be corrupted by further pops. */
25983 regs_available_for_popping &= ~(1 << reg_containing_return_addr);
25984 }
25985
25986 /* If we popped other registers then handle them here. */
25987 if (regs_available_for_popping)
25988 {
25989 int frame_pointer;
25990
25991 /* Work out which register currently contains the frame pointer. */
25992 frame_pointer = number_of_first_bit_set (regs_available_for_popping);
25993
25994 /* Move it into the correct place. */
25995 asm_fprintf (f, "\tmov\t%r, %r\n",
25996 ARM_HARD_FRAME_POINTER_REGNUM, frame_pointer);
25997
25998 /* (Temporarily) remove it from the mask of popped registers. */
25999 regs_available_for_popping &= ~(1 << frame_pointer);
26000 regs_to_pop &= ~(1 << ARM_HARD_FRAME_POINTER_REGNUM);
26001
26002 if (regs_available_for_popping)
26003 {
26004 int stack_pointer;
26005
26006 /* We popped the stack pointer as well,
26007 find the register that contains it. */
26008 stack_pointer = number_of_first_bit_set (regs_available_for_popping);
26009
26010 /* Move it into the stack register. */
26011 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, stack_pointer);
26012
26013 /* At this point we have popped all necessary registers, so
26014 do not worry about restoring regs_available_for_popping
26015 to its correct value:
26016
26017 assert (pops_needed == 0)
26018 assert (regs_available_for_popping == (1 << frame_pointer))
26019 assert (regs_to_pop == (1 << STACK_POINTER)) */
26020 }
26021 else
26022 {
26023 /* Since we have just move the popped value into the frame
26024 pointer, the popping register is available for reuse, and
26025 we know that we still have the stack pointer left to pop. */
26026 regs_available_for_popping |= (1 << frame_pointer);
26027 }
26028 }
26029
26030 /* If we still have registers left on the stack, but we no longer have
26031 any registers into which we can pop them, then we must move the return
26032 address into the link register and make available the register that
26033 contained it. */
26034 if (regs_available_for_popping == 0 && pops_needed > 0)
26035 {
26036 regs_available_for_popping |= 1 << reg_containing_return_addr;
26037
26038 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM,
26039 reg_containing_return_addr);
26040
26041 reg_containing_return_addr = LR_REGNUM;
26042 }
26043
26044 /* If we have registers left on the stack then pop some more.
26045 We know that at most we will want to pop FP and SP. */
26046 if (pops_needed > 0)
26047 {
26048 int popped_into;
26049 int move_to;
26050
26051 thumb_pop (f, regs_available_for_popping);
26052
26053 /* We have popped either FP or SP.
26054 Move whichever one it is into the correct register. */
26055 popped_into = number_of_first_bit_set (regs_available_for_popping);
26056 move_to = number_of_first_bit_set (regs_to_pop);
26057
26058 asm_fprintf (f, "\tmov\t%r, %r\n", move_to, popped_into);
26059 --pops_needed;
26060 }
26061
26062 /* If we still have not popped everything then we must have only
26063 had one register available to us and we are now popping the SP. */
26064 if (pops_needed > 0)
26065 {
26066 int popped_into;
26067
26068 thumb_pop (f, regs_available_for_popping);
26069
26070 popped_into = number_of_first_bit_set (regs_available_for_popping);
26071
26072 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, popped_into);
26073 /*
26074 assert (regs_to_pop == (1 << STACK_POINTER))
26075 assert (pops_needed == 1)
26076 */
26077 }
26078
26079 /* If necessary restore the a4 register. */
26080 if (restore_a4)
26081 {
26082 if (reg_containing_return_addr != LR_REGNUM)
26083 {
26084 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
26085 reg_containing_return_addr = LR_REGNUM;
26086 }
26087
26088 asm_fprintf (f, "\tmov\t%r, %r\n", LAST_ARG_REGNUM, IP_REGNUM);
26089 }
26090
26091 if (crtl->calls_eh_return)
26092 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
26093
26094 /* Return to caller. */
26095 if (IS_CMSE_ENTRY (arm_current_func_type ()))
26096 {
26097 /* This is for the cases where LR is not being used to contain the return
26098 address. It may therefore contain information that we might not want
26099 to leak, hence it must be cleared. The value in R0 will never be a
26100 secret at this point, so it is safe to use it, see the clearing code
26101 in cmse_nonsecure_entry_clear_before_return (). */
26102 if (reg_containing_return_addr != LR_REGNUM)
26103 asm_fprintf (f, "\tmov\tlr, r0\n");
26104
26105 /* For Armv8.1-M, this is cleared as part of the CLRM instruction emitted
26106 by cmse_nonsecure_entry_clear_before_return (). */
26107 if (!TARGET_HAVE_FPCXT_CMSE)
26108 asm_fprintf (f, "\tmsr\tAPSR_nzcvq, %r\n", reg_containing_return_addr);
26109 asm_fprintf (f, "\tbxns\t%r\n", reg_containing_return_addr);
26110 }
26111 else
26112 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
26113 }
26114 \f
26115 /* Scan INSN just before assembler is output for it.
26116 For Thumb-1, we track the status of the condition codes; this
26117 information is used in the cbranchsi4_insn pattern. */
26118 void
26119 thumb1_final_prescan_insn (rtx_insn *insn)
26120 {
26121 if (flag_print_asm_name)
26122 asm_fprintf (asm_out_file, "%@ 0x%04x\n",
26123 INSN_ADDRESSES (INSN_UID (insn)));
26124 /* Don't overwrite the previous setter when we get to a cbranch. */
26125 if (INSN_CODE (insn) != CODE_FOR_cbranchsi4_insn)
26126 {
26127 enum attr_conds conds;
26128
26129 if (cfun->machine->thumb1_cc_insn)
26130 {
26131 if (modified_in_p (cfun->machine->thumb1_cc_op0, insn)
26132 || modified_in_p (cfun->machine->thumb1_cc_op1, insn))
26133 CC_STATUS_INIT;
26134 }
26135 conds = get_attr_conds (insn);
26136 if (conds == CONDS_SET)
26137 {
26138 rtx set = single_set (insn);
26139 cfun->machine->thumb1_cc_insn = insn;
26140 cfun->machine->thumb1_cc_op0 = SET_DEST (set);
26141 cfun->machine->thumb1_cc_op1 = const0_rtx;
26142 cfun->machine->thumb1_cc_mode = CC_NZmode;
26143 if (INSN_CODE (insn) == CODE_FOR_thumb1_subsi3_insn)
26144 {
26145 rtx src1 = XEXP (SET_SRC (set), 1);
26146 if (src1 == const0_rtx)
26147 cfun->machine->thumb1_cc_mode = CCmode;
26148 }
26149 else if (REG_P (SET_DEST (set)) && REG_P (SET_SRC (set)))
26150 {
26151 /* Record the src register operand instead of dest because
26152 cprop_hardreg pass propagates src. */
26153 cfun->machine->thumb1_cc_op0 = SET_SRC (set);
26154 }
26155 }
26156 else if (conds != CONDS_NOCOND)
26157 cfun->machine->thumb1_cc_insn = NULL_RTX;
26158 }
26159
26160 /* Check if unexpected far jump is used. */
26161 if (cfun->machine->lr_save_eliminated
26162 && get_attr_far_jump (insn) == FAR_JUMP_YES)
26163 internal_error("Unexpected thumb1 far jump");
26164 }
26165
26166 int
26167 thumb_shiftable_const (unsigned HOST_WIDE_INT val)
26168 {
26169 unsigned HOST_WIDE_INT mask = 0xff;
26170 int i;
26171
26172 val = val & (unsigned HOST_WIDE_INT)0xffffffffu;
26173 if (val == 0) /* XXX */
26174 return 0;
26175
26176 for (i = 0; i < 25; i++)
26177 if ((val & (mask << i)) == val)
26178 return 1;
26179
26180 return 0;
26181 }
26182
26183 /* Returns nonzero if the current function contains,
26184 or might contain a far jump. */
26185 static int
26186 thumb_far_jump_used_p (void)
26187 {
26188 rtx_insn *insn;
26189 bool far_jump = false;
26190 unsigned int func_size = 0;
26191
26192 /* If we have already decided that far jumps may be used,
26193 do not bother checking again, and always return true even if
26194 it turns out that they are not being used. Once we have made
26195 the decision that far jumps are present (and that hence the link
26196 register will be pushed onto the stack) we cannot go back on it. */
26197 if (cfun->machine->far_jump_used)
26198 return 1;
26199
26200 /* If this function is not being called from the prologue/epilogue
26201 generation code then it must be being called from the
26202 INITIAL_ELIMINATION_OFFSET macro. */
26203 if (!(ARM_DOUBLEWORD_ALIGN || reload_completed))
26204 {
26205 /* In this case we know that we are being asked about the elimination
26206 of the arg pointer register. If that register is not being used,
26207 then there are no arguments on the stack, and we do not have to
26208 worry that a far jump might force the prologue to push the link
26209 register, changing the stack offsets. In this case we can just
26210 return false, since the presence of far jumps in the function will
26211 not affect stack offsets.
26212
26213 If the arg pointer is live (or if it was live, but has now been
26214 eliminated and so set to dead) then we do have to test to see if
26215 the function might contain a far jump. This test can lead to some
26216 false negatives, since before reload is completed, then length of
26217 branch instructions is not known, so gcc defaults to returning their
26218 longest length, which in turn sets the far jump attribute to true.
26219
26220 A false negative will not result in bad code being generated, but it
26221 will result in a needless push and pop of the link register. We
26222 hope that this does not occur too often.
26223
26224 If we need doubleword stack alignment this could affect the other
26225 elimination offsets so we can't risk getting it wrong. */
26226 if (df_regs_ever_live_p (ARG_POINTER_REGNUM))
26227 cfun->machine->arg_pointer_live = 1;
26228 else if (!cfun->machine->arg_pointer_live)
26229 return 0;
26230 }
26231
26232 /* We should not change far_jump_used during or after reload, as there is
26233 no chance to change stack frame layout. */
26234 if (reload_in_progress || reload_completed)
26235 return 0;
26236
26237 /* Check to see if the function contains a branch
26238 insn with the far jump attribute set. */
26239 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
26240 {
26241 if (JUMP_P (insn) && get_attr_far_jump (insn) == FAR_JUMP_YES)
26242 {
26243 far_jump = true;
26244 }
26245 func_size += get_attr_length (insn);
26246 }
26247
26248 /* Attribute far_jump will always be true for thumb1 before
26249 shorten_branch pass. So checking far_jump attribute before
26250 shorten_branch isn't much useful.
26251
26252 Following heuristic tries to estimate more accurately if a far jump
26253 may finally be used. The heuristic is very conservative as there is
26254 no chance to roll-back the decision of not to use far jump.
26255
26256 Thumb1 long branch offset is -2048 to 2046. The worst case is each
26257 2-byte insn is associated with a 4 byte constant pool. Using
26258 function size 2048/3 as the threshold is conservative enough. */
26259 if (far_jump)
26260 {
26261 if ((func_size * 3) >= 2048)
26262 {
26263 /* Record the fact that we have decided that
26264 the function does use far jumps. */
26265 cfun->machine->far_jump_used = 1;
26266 return 1;
26267 }
26268 }
26269
26270 return 0;
26271 }
26272
26273 /* Return nonzero if FUNC must be entered in ARM mode. */
26274 static bool
26275 is_called_in_ARM_mode (tree func)
26276 {
26277 gcc_assert (TREE_CODE (func) == FUNCTION_DECL);
26278
26279 /* Ignore the problem about functions whose address is taken. */
26280 if (TARGET_CALLEE_INTERWORKING && TREE_PUBLIC (func))
26281 return true;
26282
26283 #ifdef ARM_PE
26284 return lookup_attribute ("interfacearm", DECL_ATTRIBUTES (func)) != NULL_TREE;
26285 #else
26286 return false;
26287 #endif
26288 }
26289
26290 /* Given the stack offsets and register mask in OFFSETS, decide how
26291 many additional registers to push instead of subtracting a constant
26292 from SP. For epilogues the principle is the same except we use pop.
26293 FOR_PROLOGUE indicates which we're generating. */
26294 static int
26295 thumb1_extra_regs_pushed (arm_stack_offsets *offsets, bool for_prologue)
26296 {
26297 HOST_WIDE_INT amount;
26298 unsigned long live_regs_mask = offsets->saved_regs_mask;
26299 /* Extract a mask of the ones we can give to the Thumb's push/pop
26300 instruction. */
26301 unsigned long l_mask = live_regs_mask & (for_prologue ? 0x40ff : 0xff);
26302 /* Then count how many other high registers will need to be pushed. */
26303 unsigned long high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
26304 int n_free, reg_base, size;
26305
26306 if (!for_prologue && frame_pointer_needed)
26307 amount = offsets->locals_base - offsets->saved_regs;
26308 else
26309 amount = offsets->outgoing_args - offsets->saved_regs;
26310
26311 /* If the stack frame size is 512 exactly, we can save one load
26312 instruction, which should make this a win even when optimizing
26313 for speed. */
26314 if (!optimize_size && amount != 512)
26315 return 0;
26316
26317 /* Can't do this if there are high registers to push. */
26318 if (high_regs_pushed != 0)
26319 return 0;
26320
26321 /* Shouldn't do it in the prologue if no registers would normally
26322 be pushed at all. In the epilogue, also allow it if we'll have
26323 a pop insn for the PC. */
26324 if (l_mask == 0
26325 && (for_prologue
26326 || TARGET_BACKTRACE
26327 || (live_regs_mask & 1 << LR_REGNUM) == 0
26328 || TARGET_INTERWORK
26329 || crtl->args.pretend_args_size != 0))
26330 return 0;
26331
26332 /* Don't do this if thumb_expand_prologue wants to emit instructions
26333 between the push and the stack frame allocation. */
26334 if (for_prologue
26335 && ((flag_pic && arm_pic_register != INVALID_REGNUM)
26336 || (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)))
26337 return 0;
26338
26339 reg_base = 0;
26340 n_free = 0;
26341 if (!for_prologue)
26342 {
26343 size = arm_size_return_regs ();
26344 reg_base = ARM_NUM_INTS (size);
26345 live_regs_mask >>= reg_base;
26346 }
26347
26348 while (reg_base + n_free < 8 && !(live_regs_mask & 1)
26349 && (for_prologue || call_used_or_fixed_reg_p (reg_base + n_free)))
26350 {
26351 live_regs_mask >>= 1;
26352 n_free++;
26353 }
26354
26355 if (n_free == 0)
26356 return 0;
26357 gcc_assert (amount / 4 * 4 == amount);
26358
26359 if (amount >= 512 && (amount - n_free * 4) < 512)
26360 return (amount - 508) / 4;
26361 if (amount <= n_free * 4)
26362 return amount / 4;
26363 return 0;
26364 }
26365
26366 /* The bits which aren't usefully expanded as rtl. */
26367 const char *
26368 thumb1_unexpanded_epilogue (void)
26369 {
26370 arm_stack_offsets *offsets;
26371 int regno;
26372 unsigned long live_regs_mask = 0;
26373 int high_regs_pushed = 0;
26374 int extra_pop;
26375 int had_to_push_lr;
26376 int size;
26377
26378 if (cfun->machine->return_used_this_function != 0)
26379 return "";
26380
26381 if (IS_NAKED (arm_current_func_type ()))
26382 return "";
26383
26384 offsets = arm_get_frame_offsets ();
26385 live_regs_mask = offsets->saved_regs_mask;
26386 high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
26387
26388 /* If we can deduce the registers used from the function's return value.
26389 This is more reliable that examining df_regs_ever_live_p () because that
26390 will be set if the register is ever used in the function, not just if
26391 the register is used to hold a return value. */
26392 size = arm_size_return_regs ();
26393
26394 extra_pop = thumb1_extra_regs_pushed (offsets, false);
26395 if (extra_pop > 0)
26396 {
26397 unsigned long extra_mask = (1 << extra_pop) - 1;
26398 live_regs_mask |= extra_mask << ARM_NUM_INTS (size);
26399 }
26400
26401 /* The prolog may have pushed some high registers to use as
26402 work registers. e.g. the testsuite file:
26403 gcc/testsuite/gcc/gcc.c-torture/execute/complex-2.c
26404 compiles to produce:
26405 push {r4, r5, r6, r7, lr}
26406 mov r7, r9
26407 mov r6, r8
26408 push {r6, r7}
26409 as part of the prolog. We have to undo that pushing here. */
26410
26411 if (high_regs_pushed)
26412 {
26413 unsigned long mask = live_regs_mask & 0xff;
26414 int next_hi_reg;
26415
26416 mask |= thumb1_epilogue_unused_call_clobbered_lo_regs ();
26417
26418 if (mask == 0)
26419 /* Oh dear! We have no low registers into which we can pop
26420 high registers! */
26421 internal_error
26422 ("no low registers available for popping high registers");
26423
26424 for (next_hi_reg = 12; next_hi_reg > LAST_LO_REGNUM; next_hi_reg--)
26425 if (live_regs_mask & (1 << next_hi_reg))
26426 break;
26427
26428 while (high_regs_pushed)
26429 {
26430 /* Find lo register(s) into which the high register(s) can
26431 be popped. */
26432 for (regno = LAST_LO_REGNUM; regno >= 0; regno--)
26433 {
26434 if (mask & (1 << regno))
26435 high_regs_pushed--;
26436 if (high_regs_pushed == 0)
26437 break;
26438 }
26439
26440 if (high_regs_pushed == 0 && regno >= 0)
26441 mask &= ~((1 << regno) - 1);
26442
26443 /* Pop the values into the low register(s). */
26444 thumb_pop (asm_out_file, mask);
26445
26446 /* Move the value(s) into the high registers. */
26447 for (regno = LAST_LO_REGNUM; regno >= 0; regno--)
26448 {
26449 if (mask & (1 << regno))
26450 {
26451 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", next_hi_reg,
26452 regno);
26453
26454 for (next_hi_reg--; next_hi_reg > LAST_LO_REGNUM;
26455 next_hi_reg--)
26456 if (live_regs_mask & (1 << next_hi_reg))
26457 break;
26458 }
26459 }
26460 }
26461 live_regs_mask &= ~0x0f00;
26462 }
26463
26464 had_to_push_lr = (live_regs_mask & (1 << LR_REGNUM)) != 0;
26465 live_regs_mask &= 0xff;
26466
26467 if (crtl->args.pretend_args_size == 0 || TARGET_BACKTRACE)
26468 {
26469 /* Pop the return address into the PC. */
26470 if (had_to_push_lr)
26471 live_regs_mask |= 1 << PC_REGNUM;
26472
26473 /* Either no argument registers were pushed or a backtrace
26474 structure was created which includes an adjusted stack
26475 pointer, so just pop everything. */
26476 if (live_regs_mask)
26477 thumb_pop (asm_out_file, live_regs_mask);
26478
26479 /* We have either just popped the return address into the
26480 PC or it is was kept in LR for the entire function.
26481 Note that thumb_pop has already called thumb_exit if the
26482 PC was in the list. */
26483 if (!had_to_push_lr)
26484 thumb_exit (asm_out_file, LR_REGNUM);
26485 }
26486 else
26487 {
26488 /* Pop everything but the return address. */
26489 if (live_regs_mask)
26490 thumb_pop (asm_out_file, live_regs_mask);
26491
26492 if (had_to_push_lr)
26493 {
26494 if (size > 12)
26495 {
26496 /* We have no free low regs, so save one. */
26497 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", IP_REGNUM,
26498 LAST_ARG_REGNUM);
26499 }
26500
26501 /* Get the return address into a temporary register. */
26502 thumb_pop (asm_out_file, 1 << LAST_ARG_REGNUM);
26503
26504 if (size > 12)
26505 {
26506 /* Move the return address to lr. */
26507 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LR_REGNUM,
26508 LAST_ARG_REGNUM);
26509 /* Restore the low register. */
26510 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LAST_ARG_REGNUM,
26511 IP_REGNUM);
26512 regno = LR_REGNUM;
26513 }
26514 else
26515 regno = LAST_ARG_REGNUM;
26516 }
26517 else
26518 regno = LR_REGNUM;
26519
26520 /* Remove the argument registers that were pushed onto the stack. */
26521 asm_fprintf (asm_out_file, "\tadd\t%r, %r, #%d\n",
26522 SP_REGNUM, SP_REGNUM,
26523 crtl->args.pretend_args_size);
26524
26525 thumb_exit (asm_out_file, regno);
26526 }
26527
26528 return "";
26529 }
26530
26531 /* Functions to save and restore machine-specific function data. */
26532 static struct machine_function *
26533 arm_init_machine_status (void)
26534 {
26535 struct machine_function *machine;
26536 machine = ggc_cleared_alloc<machine_function> ();
26537
26538 #if ARM_FT_UNKNOWN != 0
26539 machine->func_type = ARM_FT_UNKNOWN;
26540 #endif
26541 machine->static_chain_stack_bytes = -1;
26542 return machine;
26543 }
26544
26545 /* Return an RTX indicating where the return address to the
26546 calling function can be found. */
26547 rtx
26548 arm_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
26549 {
26550 if (count != 0)
26551 return NULL_RTX;
26552
26553 return get_hard_reg_initial_val (Pmode, LR_REGNUM);
26554 }
26555
26556 /* Do anything needed before RTL is emitted for each function. */
26557 void
26558 arm_init_expanders (void)
26559 {
26560 /* Arrange to initialize and mark the machine per-function status. */
26561 init_machine_status = arm_init_machine_status;
26562
26563 /* This is to stop the combine pass optimizing away the alignment
26564 adjustment of va_arg. */
26565 /* ??? It is claimed that this should not be necessary. */
26566 if (cfun)
26567 mark_reg_pointer (arg_pointer_rtx, PARM_BOUNDARY);
26568 }
26569
26570 /* Check that FUNC is called with a different mode. */
26571
26572 bool
26573 arm_change_mode_p (tree func)
26574 {
26575 if (TREE_CODE (func) != FUNCTION_DECL)
26576 return false;
26577
26578 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (func);
26579
26580 if (!callee_tree)
26581 callee_tree = target_option_default_node;
26582
26583 struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
26584 int flags = callee_opts->x_target_flags;
26585
26586 return (TARGET_THUMB_P (flags) != TARGET_THUMB);
26587 }
26588
26589 /* Like arm_compute_initial_elimination offset. Simpler because there
26590 isn't an ABI specified frame pointer for Thumb. Instead, we set it
26591 to point at the base of the local variables after static stack
26592 space for a function has been allocated. */
26593
26594 HOST_WIDE_INT
26595 thumb_compute_initial_elimination_offset (unsigned int from, unsigned int to)
26596 {
26597 arm_stack_offsets *offsets;
26598
26599 offsets = arm_get_frame_offsets ();
26600
26601 switch (from)
26602 {
26603 case ARG_POINTER_REGNUM:
26604 switch (to)
26605 {
26606 case STACK_POINTER_REGNUM:
26607 return offsets->outgoing_args - offsets->saved_args;
26608
26609 case FRAME_POINTER_REGNUM:
26610 return offsets->soft_frame - offsets->saved_args;
26611
26612 case ARM_HARD_FRAME_POINTER_REGNUM:
26613 return offsets->saved_regs - offsets->saved_args;
26614
26615 case THUMB_HARD_FRAME_POINTER_REGNUM:
26616 return offsets->locals_base - offsets->saved_args;
26617
26618 default:
26619 gcc_unreachable ();
26620 }
26621 break;
26622
26623 case FRAME_POINTER_REGNUM:
26624 switch (to)
26625 {
26626 case STACK_POINTER_REGNUM:
26627 return offsets->outgoing_args - offsets->soft_frame;
26628
26629 case ARM_HARD_FRAME_POINTER_REGNUM:
26630 return offsets->saved_regs - offsets->soft_frame;
26631
26632 case THUMB_HARD_FRAME_POINTER_REGNUM:
26633 return offsets->locals_base - offsets->soft_frame;
26634
26635 default:
26636 gcc_unreachable ();
26637 }
26638 break;
26639
26640 default:
26641 gcc_unreachable ();
26642 }
26643 }
26644
26645 /* Generate the function's prologue. */
26646
26647 void
26648 thumb1_expand_prologue (void)
26649 {
26650 rtx_insn *insn;
26651
26652 HOST_WIDE_INT amount;
26653 HOST_WIDE_INT size;
26654 arm_stack_offsets *offsets;
26655 unsigned long func_type;
26656 int regno;
26657 unsigned long live_regs_mask;
26658 unsigned long l_mask;
26659 unsigned high_regs_pushed = 0;
26660 bool lr_needs_saving;
26661
26662 func_type = arm_current_func_type ();
26663
26664 /* Naked functions don't have prologues. */
26665 if (IS_NAKED (func_type))
26666 {
26667 if (flag_stack_usage_info)
26668 current_function_static_stack_size = 0;
26669 return;
26670 }
26671
26672 if (IS_INTERRUPT (func_type))
26673 {
26674 error ("Interrupt Service Routines cannot be coded in Thumb-1 mode");
26675 return;
26676 }
26677
26678 if (is_called_in_ARM_mode (current_function_decl))
26679 emit_insn (gen_prologue_thumb1_interwork ());
26680
26681 offsets = arm_get_frame_offsets ();
26682 live_regs_mask = offsets->saved_regs_mask;
26683 lr_needs_saving = live_regs_mask & (1 << LR_REGNUM);
26684
26685 /* Extract a mask of the ones we can give to the Thumb's push instruction. */
26686 l_mask = live_regs_mask & 0x40ff;
26687 /* Then count how many other high registers will need to be pushed. */
26688 high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
26689
26690 if (crtl->args.pretend_args_size)
26691 {
26692 rtx x = GEN_INT (-crtl->args.pretend_args_size);
26693
26694 if (cfun->machine->uses_anonymous_args)
26695 {
26696 int num_pushes = ARM_NUM_INTS (crtl->args.pretend_args_size);
26697 unsigned long mask;
26698
26699 mask = 1ul << (LAST_ARG_REGNUM + 1);
26700 mask -= 1ul << (LAST_ARG_REGNUM + 1 - num_pushes);
26701
26702 insn = thumb1_emit_multi_reg_push (mask, 0);
26703 }
26704 else
26705 {
26706 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
26707 stack_pointer_rtx, x));
26708 }
26709 RTX_FRAME_RELATED_P (insn) = 1;
26710 }
26711
26712 if (TARGET_BACKTRACE)
26713 {
26714 HOST_WIDE_INT offset = 0;
26715 unsigned work_register;
26716 rtx work_reg, x, arm_hfp_rtx;
26717
26718 /* We have been asked to create a stack backtrace structure.
26719 The code looks like this:
26720
26721 0 .align 2
26722 0 func:
26723 0 sub SP, #16 Reserve space for 4 registers.
26724 2 push {R7} Push low registers.
26725 4 add R7, SP, #20 Get the stack pointer before the push.
26726 6 str R7, [SP, #8] Store the stack pointer
26727 (before reserving the space).
26728 8 mov R7, PC Get hold of the start of this code + 12.
26729 10 str R7, [SP, #16] Store it.
26730 12 mov R7, FP Get hold of the current frame pointer.
26731 14 str R7, [SP, #4] Store it.
26732 16 mov R7, LR Get hold of the current return address.
26733 18 str R7, [SP, #12] Store it.
26734 20 add R7, SP, #16 Point at the start of the
26735 backtrace structure.
26736 22 mov FP, R7 Put this value into the frame pointer. */
26737
26738 work_register = thumb_find_work_register (live_regs_mask);
26739 work_reg = gen_rtx_REG (SImode, work_register);
26740 arm_hfp_rtx = gen_rtx_REG (SImode, ARM_HARD_FRAME_POINTER_REGNUM);
26741
26742 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
26743 stack_pointer_rtx, GEN_INT (-16)));
26744 RTX_FRAME_RELATED_P (insn) = 1;
26745
26746 if (l_mask)
26747 {
26748 insn = thumb1_emit_multi_reg_push (l_mask, l_mask);
26749 RTX_FRAME_RELATED_P (insn) = 1;
26750 lr_needs_saving = false;
26751
26752 offset = bit_count (l_mask) * UNITS_PER_WORD;
26753 }
26754
26755 x = GEN_INT (offset + 16 + crtl->args.pretend_args_size);
26756 emit_insn (gen_addsi3 (work_reg, stack_pointer_rtx, x));
26757
26758 x = plus_constant (Pmode, stack_pointer_rtx, offset + 4);
26759 x = gen_frame_mem (SImode, x);
26760 emit_move_insn (x, work_reg);
26761
26762 /* Make sure that the instruction fetching the PC is in the right place
26763 to calculate "start of backtrace creation code + 12". */
26764 /* ??? The stores using the common WORK_REG ought to be enough to
26765 prevent the scheduler from doing anything weird. Failing that
26766 we could always move all of the following into an UNSPEC_VOLATILE. */
26767 if (l_mask)
26768 {
26769 x = gen_rtx_REG (SImode, PC_REGNUM);
26770 emit_move_insn (work_reg, x);
26771
26772 x = plus_constant (Pmode, stack_pointer_rtx, offset + 12);
26773 x = gen_frame_mem (SImode, x);
26774 emit_move_insn (x, work_reg);
26775
26776 emit_move_insn (work_reg, arm_hfp_rtx);
26777
26778 x = plus_constant (Pmode, stack_pointer_rtx, offset);
26779 x = gen_frame_mem (SImode, x);
26780 emit_move_insn (x, work_reg);
26781 }
26782 else
26783 {
26784 emit_move_insn (work_reg, arm_hfp_rtx);
26785
26786 x = plus_constant (Pmode, stack_pointer_rtx, offset);
26787 x = gen_frame_mem (SImode, x);
26788 emit_move_insn (x, work_reg);
26789
26790 x = gen_rtx_REG (SImode, PC_REGNUM);
26791 emit_move_insn (work_reg, x);
26792
26793 x = plus_constant (Pmode, stack_pointer_rtx, offset + 12);
26794 x = gen_frame_mem (SImode, x);
26795 emit_move_insn (x, work_reg);
26796 }
26797
26798 x = gen_rtx_REG (SImode, LR_REGNUM);
26799 emit_move_insn (work_reg, x);
26800
26801 x = plus_constant (Pmode, stack_pointer_rtx, offset + 8);
26802 x = gen_frame_mem (SImode, x);
26803 emit_move_insn (x, work_reg);
26804
26805 x = GEN_INT (offset + 12);
26806 emit_insn (gen_addsi3 (work_reg, stack_pointer_rtx, x));
26807
26808 emit_move_insn (arm_hfp_rtx, work_reg);
26809 }
26810 /* Optimization: If we are not pushing any low registers but we are going
26811 to push some high registers then delay our first push. This will just
26812 be a push of LR and we can combine it with the push of the first high
26813 register. */
26814 else if ((l_mask & 0xff) != 0
26815 || (high_regs_pushed == 0 && lr_needs_saving))
26816 {
26817 unsigned long mask = l_mask;
26818 mask |= (1 << thumb1_extra_regs_pushed (offsets, true)) - 1;
26819 insn = thumb1_emit_multi_reg_push (mask, mask);
26820 RTX_FRAME_RELATED_P (insn) = 1;
26821 lr_needs_saving = false;
26822 }
26823
26824 if (high_regs_pushed)
26825 {
26826 unsigned pushable_regs;
26827 unsigned next_hi_reg;
26828 unsigned arg_regs_num = TARGET_AAPCS_BASED ? crtl->args.info.aapcs_ncrn
26829 : crtl->args.info.nregs;
26830 unsigned arg_regs_mask = (1 << arg_regs_num) - 1;
26831
26832 for (next_hi_reg = 12; next_hi_reg > LAST_LO_REGNUM; next_hi_reg--)
26833 if (live_regs_mask & (1 << next_hi_reg))
26834 break;
26835
26836 /* Here we need to mask out registers used for passing arguments
26837 even if they can be pushed. This is to avoid using them to
26838 stash the high registers. Such kind of stash may clobber the
26839 use of arguments. */
26840 pushable_regs = l_mask & (~arg_regs_mask);
26841 pushable_regs |= thumb1_prologue_unused_call_clobbered_lo_regs ();
26842
26843 /* Normally, LR can be used as a scratch register once it has been
26844 saved; but if the function examines its own return address then
26845 the value is still live and we need to avoid using it. */
26846 bool return_addr_live
26847 = REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)),
26848 LR_REGNUM);
26849
26850 if (lr_needs_saving || return_addr_live)
26851 pushable_regs &= ~(1 << LR_REGNUM);
26852
26853 if (pushable_regs == 0)
26854 pushable_regs = 1 << thumb_find_work_register (live_regs_mask);
26855
26856 while (high_regs_pushed > 0)
26857 {
26858 unsigned long real_regs_mask = 0;
26859 unsigned long push_mask = 0;
26860
26861 for (regno = LR_REGNUM; regno >= 0; regno --)
26862 {
26863 if (pushable_regs & (1 << regno))
26864 {
26865 emit_move_insn (gen_rtx_REG (SImode, regno),
26866 gen_rtx_REG (SImode, next_hi_reg));
26867
26868 high_regs_pushed --;
26869 real_regs_mask |= (1 << next_hi_reg);
26870 push_mask |= (1 << regno);
26871
26872 if (high_regs_pushed)
26873 {
26874 for (next_hi_reg --; next_hi_reg > LAST_LO_REGNUM;
26875 next_hi_reg --)
26876 if (live_regs_mask & (1 << next_hi_reg))
26877 break;
26878 }
26879 else
26880 break;
26881 }
26882 }
26883
26884 /* If we had to find a work register and we have not yet
26885 saved the LR then add it to the list of regs to push. */
26886 if (lr_needs_saving)
26887 {
26888 push_mask |= 1 << LR_REGNUM;
26889 real_regs_mask |= 1 << LR_REGNUM;
26890 lr_needs_saving = false;
26891 /* If the return address is not live at this point, we
26892 can add LR to the list of registers that we can use
26893 for pushes. */
26894 if (!return_addr_live)
26895 pushable_regs |= 1 << LR_REGNUM;
26896 }
26897
26898 insn = thumb1_emit_multi_reg_push (push_mask, real_regs_mask);
26899 RTX_FRAME_RELATED_P (insn) = 1;
26900 }
26901 }
26902
26903 /* Load the pic register before setting the frame pointer,
26904 so we can use r7 as a temporary work register. */
26905 if (flag_pic && arm_pic_register != INVALID_REGNUM)
26906 arm_load_pic_register (live_regs_mask, NULL_RTX);
26907
26908 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
26909 emit_move_insn (gen_rtx_REG (Pmode, ARM_HARD_FRAME_POINTER_REGNUM),
26910 stack_pointer_rtx);
26911
26912 size = offsets->outgoing_args - offsets->saved_args;
26913 if (flag_stack_usage_info)
26914 current_function_static_stack_size = size;
26915
26916 /* If we have a frame, then do stack checking. FIXME: not implemented. */
26917 if ((flag_stack_check == STATIC_BUILTIN_STACK_CHECK
26918 || flag_stack_clash_protection)
26919 && size)
26920 sorry ("%<-fstack-check=specific%> for Thumb-1");
26921
26922 amount = offsets->outgoing_args - offsets->saved_regs;
26923 amount -= 4 * thumb1_extra_regs_pushed (offsets, true);
26924 if (amount)
26925 {
26926 if (amount < 512)
26927 {
26928 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
26929 GEN_INT (- amount)));
26930 RTX_FRAME_RELATED_P (insn) = 1;
26931 }
26932 else
26933 {
26934 rtx reg, dwarf;
26935
26936 /* The stack decrement is too big for an immediate value in a single
26937 insn. In theory we could issue multiple subtracts, but after
26938 three of them it becomes more space efficient to place the full
26939 value in the constant pool and load into a register. (Also the
26940 ARM debugger really likes to see only one stack decrement per
26941 function). So instead we look for a scratch register into which
26942 we can load the decrement, and then we subtract this from the
26943 stack pointer. Unfortunately on the thumb the only available
26944 scratch registers are the argument registers, and we cannot use
26945 these as they may hold arguments to the function. Instead we
26946 attempt to locate a call preserved register which is used by this
26947 function. If we can find one, then we know that it will have
26948 been pushed at the start of the prologue and so we can corrupt
26949 it now. */
26950 for (regno = LAST_ARG_REGNUM + 1; regno <= LAST_LO_REGNUM; regno++)
26951 if (live_regs_mask & (1 << regno))
26952 break;
26953
26954 gcc_assert(regno <= LAST_LO_REGNUM);
26955
26956 reg = gen_rtx_REG (SImode, regno);
26957
26958 emit_insn (gen_movsi (reg, GEN_INT (- amount)));
26959
26960 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
26961 stack_pointer_rtx, reg));
26962
26963 dwarf = gen_rtx_SET (stack_pointer_rtx,
26964 plus_constant (Pmode, stack_pointer_rtx,
26965 -amount));
26966 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
26967 RTX_FRAME_RELATED_P (insn) = 1;
26968 }
26969 }
26970
26971 if (frame_pointer_needed)
26972 thumb_set_frame_pointer (offsets);
26973
26974 /* If we are profiling, make sure no instructions are scheduled before
26975 the call to mcount. Similarly if the user has requested no
26976 scheduling in the prolog. Similarly if we want non-call exceptions
26977 using the EABI unwinder, to prevent faulting instructions from being
26978 swapped with a stack adjustment. */
26979 if (crtl->profile || !TARGET_SCHED_PROLOG
26980 || (arm_except_unwind_info (&global_options) == UI_TARGET
26981 && cfun->can_throw_non_call_exceptions))
26982 emit_insn (gen_blockage ());
26983
26984 cfun->machine->lr_save_eliminated = !thumb_force_lr_save ();
26985 if (live_regs_mask & 0xff)
26986 cfun->machine->lr_save_eliminated = 0;
26987 }
26988
26989 /* Clear caller saved registers not used to pass return values and leaked
26990 condition flags before exiting a cmse_nonsecure_entry function. */
26991
26992 void
26993 cmse_nonsecure_entry_clear_before_return (void)
26994 {
26995 bool clear_vfpregs = TARGET_HARD_FLOAT || TARGET_HAVE_FPCXT_CMSE;
26996 int regno, maxregno = clear_vfpregs ? LAST_VFP_REGNUM : IP_REGNUM;
26997 uint32_t padding_bits_to_clear = 0;
26998 auto_sbitmap to_clear_bitmap (maxregno + 1);
26999 rtx r1_reg, result_rtl, clearing_reg = NULL_RTX;
27000 tree result_type;
27001
27002 bitmap_clear (to_clear_bitmap);
27003 bitmap_set_range (to_clear_bitmap, R0_REGNUM, NUM_ARG_REGS);
27004 bitmap_set_bit (to_clear_bitmap, IP_REGNUM);
27005
27006 /* If we are not dealing with -mfloat-abi=soft we will need to clear VFP
27007 registers. */
27008 if (clear_vfpregs)
27009 {
27010 int float_bits = D7_VFP_REGNUM - FIRST_VFP_REGNUM + 1;
27011
27012 bitmap_set_range (to_clear_bitmap, FIRST_VFP_REGNUM, float_bits);
27013
27014 if (!TARGET_HAVE_FPCXT_CMSE)
27015 {
27016 /* Make sure we don't clear the two scratch registers used to clear
27017 the relevant FPSCR bits in output_return_instruction. */
27018 emit_use (gen_rtx_REG (SImode, IP_REGNUM));
27019 bitmap_clear_bit (to_clear_bitmap, IP_REGNUM);
27020 emit_use (gen_rtx_REG (SImode, 4));
27021 bitmap_clear_bit (to_clear_bitmap, 4);
27022 }
27023 }
27024
27025 /* If the user has defined registers to be caller saved, these are no longer
27026 restored by the function before returning and must thus be cleared for
27027 security purposes. */
27028 for (regno = NUM_ARG_REGS; regno <= maxregno; regno++)
27029 {
27030 /* We do not touch registers that can be used to pass arguments as per
27031 the AAPCS, since these should never be made callee-saved by user
27032 options. */
27033 if (IN_RANGE (regno, FIRST_VFP_REGNUM, D7_VFP_REGNUM))
27034 continue;
27035 if (IN_RANGE (regno, IP_REGNUM, PC_REGNUM))
27036 continue;
27037 if (!callee_saved_reg_p (regno)
27038 && (!IN_RANGE (regno, FIRST_VFP_REGNUM, LAST_VFP_REGNUM)
27039 || TARGET_HARD_FLOAT))
27040 bitmap_set_bit (to_clear_bitmap, regno);
27041 }
27042
27043 /* Make sure we do not clear the registers used to return the result in. */
27044 result_type = TREE_TYPE (DECL_RESULT (current_function_decl));
27045 if (!VOID_TYPE_P (result_type))
27046 {
27047 uint64_t to_clear_return_mask;
27048 result_rtl = arm_function_value (result_type, current_function_decl, 0);
27049
27050 /* No need to check that we return in registers, because we don't
27051 support returning on stack yet. */
27052 gcc_assert (REG_P (result_rtl));
27053 to_clear_return_mask
27054 = compute_not_to_clear_mask (result_type, result_rtl, 0,
27055 &padding_bits_to_clear);
27056 if (to_clear_return_mask)
27057 {
27058 gcc_assert ((unsigned) maxregno < sizeof (long long) * __CHAR_BIT__);
27059 for (regno = R0_REGNUM; regno <= maxregno; regno++)
27060 {
27061 if (to_clear_return_mask & (1ULL << regno))
27062 bitmap_clear_bit (to_clear_bitmap, regno);
27063 }
27064 }
27065 }
27066
27067 if (padding_bits_to_clear != 0)
27068 {
27069 int to_clear_bitmap_size = SBITMAP_SIZE ((sbitmap) to_clear_bitmap);
27070 auto_sbitmap to_clear_arg_regs_bitmap (to_clear_bitmap_size);
27071
27072 /* Padding_bits_to_clear is not 0 so we know we are dealing with
27073 returning a composite type, which only uses r0. Let's make sure that
27074 r1-r3 is cleared too. */
27075 bitmap_clear (to_clear_arg_regs_bitmap);
27076 bitmap_set_range (to_clear_arg_regs_bitmap, R1_REGNUM, NUM_ARG_REGS - 1);
27077 gcc_assert (bitmap_subset_p (to_clear_arg_regs_bitmap, to_clear_bitmap));
27078 }
27079
27080 /* Clear full registers that leak before returning. */
27081 clearing_reg = gen_rtx_REG (SImode, TARGET_THUMB1 ? R0_REGNUM : LR_REGNUM);
27082 r1_reg = gen_rtx_REG (SImode, R0_REGNUM + 1);
27083 cmse_clear_registers (to_clear_bitmap, &padding_bits_to_clear, 1, r1_reg,
27084 clearing_reg);
27085 }
27086
27087 /* Generate pattern *pop_multiple_with_stack_update_and_return if single
27088 POP instruction can be generated. LR should be replaced by PC. All
27089 the checks required are already done by USE_RETURN_INSN (). Hence,
27090 all we really need to check here is if single register is to be
27091 returned, or multiple register return. */
27092 void
27093 thumb2_expand_return (bool simple_return)
27094 {
27095 int i, num_regs;
27096 unsigned long saved_regs_mask;
27097 arm_stack_offsets *offsets;
27098
27099 offsets = arm_get_frame_offsets ();
27100 saved_regs_mask = offsets->saved_regs_mask;
27101
27102 for (i = 0, num_regs = 0; i <= LAST_ARM_REGNUM; i++)
27103 if (saved_regs_mask & (1 << i))
27104 num_regs++;
27105
27106 if (!simple_return && saved_regs_mask)
27107 {
27108 /* TODO: Verify that this path is never taken for cmse_nonsecure_entry
27109 functions or adapt code to handle according to ACLE. This path should
27110 not be reachable for cmse_nonsecure_entry functions though we prefer
27111 to assert it for now to ensure that future code changes do not silently
27112 change this behavior. */
27113 gcc_assert (!IS_CMSE_ENTRY (arm_current_func_type ()));
27114 if (num_regs == 1)
27115 {
27116 rtx par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
27117 rtx reg = gen_rtx_REG (SImode, PC_REGNUM);
27118 rtx addr = gen_rtx_MEM (SImode,
27119 gen_rtx_POST_INC (SImode,
27120 stack_pointer_rtx));
27121 set_mem_alias_set (addr, get_frame_alias_set ());
27122 XVECEXP (par, 0, 0) = ret_rtx;
27123 XVECEXP (par, 0, 1) = gen_rtx_SET (reg, addr);
27124 RTX_FRAME_RELATED_P (XVECEXP (par, 0, 1)) = 1;
27125 emit_jump_insn (par);
27126 }
27127 else
27128 {
27129 saved_regs_mask &= ~ (1 << LR_REGNUM);
27130 saved_regs_mask |= (1 << PC_REGNUM);
27131 arm_emit_multi_reg_pop (saved_regs_mask);
27132 }
27133 }
27134 else
27135 {
27136 if (IS_CMSE_ENTRY (arm_current_func_type ()))
27137 cmse_nonsecure_entry_clear_before_return ();
27138 emit_jump_insn (simple_return_rtx);
27139 }
27140 }
27141
27142 void
27143 thumb1_expand_epilogue (void)
27144 {
27145 HOST_WIDE_INT amount;
27146 arm_stack_offsets *offsets;
27147 int regno;
27148
27149 /* Naked functions don't have prologues. */
27150 if (IS_NAKED (arm_current_func_type ()))
27151 return;
27152
27153 offsets = arm_get_frame_offsets ();
27154 amount = offsets->outgoing_args - offsets->saved_regs;
27155
27156 if (frame_pointer_needed)
27157 {
27158 emit_insn (gen_movsi (stack_pointer_rtx, hard_frame_pointer_rtx));
27159 amount = offsets->locals_base - offsets->saved_regs;
27160 }
27161 amount -= 4 * thumb1_extra_regs_pushed (offsets, false);
27162
27163 gcc_assert (amount >= 0);
27164 if (amount)
27165 {
27166 emit_insn (gen_blockage ());
27167
27168 if (amount < 512)
27169 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
27170 GEN_INT (amount)));
27171 else
27172 {
27173 /* r3 is always free in the epilogue. */
27174 rtx reg = gen_rtx_REG (SImode, LAST_ARG_REGNUM);
27175
27176 emit_insn (gen_movsi (reg, GEN_INT (amount)));
27177 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, reg));
27178 }
27179 }
27180
27181 /* Emit a USE (stack_pointer_rtx), so that
27182 the stack adjustment will not be deleted. */
27183 emit_insn (gen_force_register_use (stack_pointer_rtx));
27184
27185 if (crtl->profile || !TARGET_SCHED_PROLOG)
27186 emit_insn (gen_blockage ());
27187
27188 /* Emit a clobber for each insn that will be restored in the epilogue,
27189 so that flow2 will get register lifetimes correct. */
27190 for (regno = 0; regno < 13; regno++)
27191 if (reg_needs_saving_p (regno))
27192 emit_clobber (gen_rtx_REG (SImode, regno));
27193
27194 if (! df_regs_ever_live_p (LR_REGNUM))
27195 emit_use (gen_rtx_REG (SImode, LR_REGNUM));
27196
27197 /* Clear all caller-saved regs that are not used to return. */
27198 if (IS_CMSE_ENTRY (arm_current_func_type ()))
27199 cmse_nonsecure_entry_clear_before_return ();
27200 }
27201
27202 /* Epilogue code for APCS frame. */
27203 static void
27204 arm_expand_epilogue_apcs_frame (bool really_return)
27205 {
27206 unsigned long func_type;
27207 unsigned long saved_regs_mask;
27208 int num_regs = 0;
27209 int i;
27210 int floats_from_frame = 0;
27211 arm_stack_offsets *offsets;
27212
27213 gcc_assert (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM);
27214 func_type = arm_current_func_type ();
27215
27216 /* Get frame offsets for ARM. */
27217 offsets = arm_get_frame_offsets ();
27218 saved_regs_mask = offsets->saved_regs_mask;
27219
27220 /* Find the offset of the floating-point save area in the frame. */
27221 floats_from_frame
27222 = (offsets->saved_args
27223 + arm_compute_static_chain_stack_bytes ()
27224 - offsets->frame);
27225
27226 /* Compute how many core registers saved and how far away the floats are. */
27227 for (i = 0; i <= LAST_ARM_REGNUM; i++)
27228 if (saved_regs_mask & (1 << i))
27229 {
27230 num_regs++;
27231 floats_from_frame += 4;
27232 }
27233
27234 if (TARGET_VFP_BASE)
27235 {
27236 int start_reg;
27237 rtx ip_rtx = gen_rtx_REG (SImode, IP_REGNUM);
27238
27239 /* The offset is from IP_REGNUM. */
27240 int saved_size = arm_get_vfp_saved_size ();
27241 if (saved_size > 0)
27242 {
27243 rtx_insn *insn;
27244 floats_from_frame += saved_size;
27245 insn = emit_insn (gen_addsi3 (ip_rtx,
27246 hard_frame_pointer_rtx,
27247 GEN_INT (-floats_from_frame)));
27248 arm_add_cfa_adjust_cfa_note (insn, -floats_from_frame,
27249 ip_rtx, hard_frame_pointer_rtx);
27250 }
27251
27252 /* Generate VFP register multi-pop. */
27253 start_reg = FIRST_VFP_REGNUM;
27254
27255 for (i = FIRST_VFP_REGNUM; i < LAST_VFP_REGNUM; i += 2)
27256 /* Look for a case where a reg does not need restoring. */
27257 if (!reg_needs_saving_p (i) && !reg_needs_saving_p (i + 1))
27258 {
27259 if (start_reg != i)
27260 arm_emit_vfp_multi_reg_pop (start_reg,
27261 (i - start_reg) / 2,
27262 gen_rtx_REG (SImode,
27263 IP_REGNUM));
27264 start_reg = i + 2;
27265 }
27266
27267 /* Restore the remaining regs that we have discovered (or possibly
27268 even all of them, if the conditional in the for loop never
27269 fired). */
27270 if (start_reg != i)
27271 arm_emit_vfp_multi_reg_pop (start_reg,
27272 (i - start_reg) / 2,
27273 gen_rtx_REG (SImode, IP_REGNUM));
27274 }
27275
27276 if (TARGET_IWMMXT)
27277 {
27278 /* The frame pointer is guaranteed to be non-double-word aligned, as
27279 it is set to double-word-aligned old_stack_pointer - 4. */
27280 rtx_insn *insn;
27281 int lrm_count = (num_regs % 2) ? (num_regs + 2) : (num_regs + 1);
27282
27283 for (i = LAST_IWMMXT_REGNUM; i >= FIRST_IWMMXT_REGNUM; i--)
27284 if (reg_needs_saving_p (i))
27285 {
27286 rtx addr = gen_frame_mem (V2SImode,
27287 plus_constant (Pmode, hard_frame_pointer_rtx,
27288 - lrm_count * 4));
27289 insn = emit_insn (gen_movsi (gen_rtx_REG (V2SImode, i), addr));
27290 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
27291 gen_rtx_REG (V2SImode, i),
27292 NULL_RTX);
27293 lrm_count += 2;
27294 }
27295 }
27296
27297 /* saved_regs_mask should contain IP which contains old stack pointer
27298 at the time of activation creation. Since SP and IP are adjacent registers,
27299 we can restore the value directly into SP. */
27300 gcc_assert (saved_regs_mask & (1 << IP_REGNUM));
27301 saved_regs_mask &= ~(1 << IP_REGNUM);
27302 saved_regs_mask |= (1 << SP_REGNUM);
27303
27304 /* There are two registers left in saved_regs_mask - LR and PC. We
27305 only need to restore LR (the return address), but to
27306 save time we can load it directly into PC, unless we need a
27307 special function exit sequence, or we are not really returning. */
27308 if (really_return
27309 && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
27310 && !crtl->calls_eh_return)
27311 /* Delete LR from the register mask, so that LR on
27312 the stack is loaded into the PC in the register mask. */
27313 saved_regs_mask &= ~(1 << LR_REGNUM);
27314 else
27315 saved_regs_mask &= ~(1 << PC_REGNUM);
27316
27317 num_regs = bit_count (saved_regs_mask);
27318 if ((offsets->outgoing_args != (1 + num_regs)) || cfun->calls_alloca)
27319 {
27320 rtx_insn *insn;
27321 emit_insn (gen_blockage ());
27322 /* Unwind the stack to just below the saved registers. */
27323 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
27324 hard_frame_pointer_rtx,
27325 GEN_INT (- 4 * num_regs)));
27326
27327 arm_add_cfa_adjust_cfa_note (insn, - 4 * num_regs,
27328 stack_pointer_rtx, hard_frame_pointer_rtx);
27329 }
27330
27331 arm_emit_multi_reg_pop (saved_regs_mask);
27332
27333 if (IS_INTERRUPT (func_type))
27334 {
27335 /* Interrupt handlers will have pushed the
27336 IP onto the stack, so restore it now. */
27337 rtx_insn *insn;
27338 rtx addr = gen_rtx_MEM (SImode,
27339 gen_rtx_POST_INC (SImode,
27340 stack_pointer_rtx));
27341 set_mem_alias_set (addr, get_frame_alias_set ());
27342 insn = emit_insn (gen_movsi (gen_rtx_REG (SImode, IP_REGNUM), addr));
27343 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
27344 gen_rtx_REG (SImode, IP_REGNUM),
27345 NULL_RTX);
27346 }
27347
27348 if (!really_return || (saved_regs_mask & (1 << PC_REGNUM)))
27349 return;
27350
27351 if (crtl->calls_eh_return)
27352 emit_insn (gen_addsi3 (stack_pointer_rtx,
27353 stack_pointer_rtx,
27354 gen_rtx_REG (SImode, ARM_EH_STACKADJ_REGNUM)));
27355
27356 if (IS_STACKALIGN (func_type))
27357 /* Restore the original stack pointer. Before prologue, the stack was
27358 realigned and the original stack pointer saved in r0. For details,
27359 see comment in arm_expand_prologue. */
27360 emit_insn (gen_movsi (stack_pointer_rtx, gen_rtx_REG (SImode, R0_REGNUM)));
27361
27362 emit_jump_insn (simple_return_rtx);
27363 }
27364
27365 /* Generate RTL to represent ARM epilogue. Really_return is true if the
27366 function is not a sibcall. */
27367 void
27368 arm_expand_epilogue (bool really_return)
27369 {
27370 unsigned long func_type;
27371 unsigned long saved_regs_mask;
27372 int num_regs = 0;
27373 int i;
27374 int amount;
27375 arm_stack_offsets *offsets;
27376
27377 func_type = arm_current_func_type ();
27378
27379 /* Naked functions don't have epilogue. Hence, generate return pattern, and
27380 let output_return_instruction take care of instruction emission if any. */
27381 if (IS_NAKED (func_type)
27382 || (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN))
27383 {
27384 if (really_return)
27385 emit_jump_insn (simple_return_rtx);
27386 return;
27387 }
27388
27389 /* If we are throwing an exception, then we really must be doing a
27390 return, so we can't tail-call. */
27391 gcc_assert (!crtl->calls_eh_return || really_return);
27392
27393 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
27394 {
27395 arm_expand_epilogue_apcs_frame (really_return);
27396 return;
27397 }
27398
27399 /* Get frame offsets for ARM. */
27400 offsets = arm_get_frame_offsets ();
27401 saved_regs_mask = offsets->saved_regs_mask;
27402 num_regs = bit_count (saved_regs_mask);
27403
27404 if (frame_pointer_needed)
27405 {
27406 rtx_insn *insn;
27407 /* Restore stack pointer if necessary. */
27408 if (TARGET_ARM)
27409 {
27410 /* In ARM mode, frame pointer points to first saved register.
27411 Restore stack pointer to last saved register. */
27412 amount = offsets->frame - offsets->saved_regs;
27413
27414 /* Force out any pending memory operations that reference stacked data
27415 before stack de-allocation occurs. */
27416 emit_insn (gen_blockage ());
27417 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
27418 hard_frame_pointer_rtx,
27419 GEN_INT (amount)));
27420 arm_add_cfa_adjust_cfa_note (insn, amount,
27421 stack_pointer_rtx,
27422 hard_frame_pointer_rtx);
27423
27424 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
27425 deleted. */
27426 emit_insn (gen_force_register_use (stack_pointer_rtx));
27427 }
27428 else
27429 {
27430 /* In Thumb-2 mode, the frame pointer points to the last saved
27431 register. */
27432 amount = offsets->locals_base - offsets->saved_regs;
27433 if (amount)
27434 {
27435 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
27436 hard_frame_pointer_rtx,
27437 GEN_INT (amount)));
27438 arm_add_cfa_adjust_cfa_note (insn, amount,
27439 hard_frame_pointer_rtx,
27440 hard_frame_pointer_rtx);
27441 }
27442
27443 /* Force out any pending memory operations that reference stacked data
27444 before stack de-allocation occurs. */
27445 emit_insn (gen_blockage ());
27446 insn = emit_insn (gen_movsi (stack_pointer_rtx,
27447 hard_frame_pointer_rtx));
27448 arm_add_cfa_adjust_cfa_note (insn, 0,
27449 stack_pointer_rtx,
27450 hard_frame_pointer_rtx);
27451 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
27452 deleted. */
27453 emit_insn (gen_force_register_use (stack_pointer_rtx));
27454 }
27455 }
27456 else
27457 {
27458 /* Pop off outgoing args and local frame to adjust stack pointer to
27459 last saved register. */
27460 amount = offsets->outgoing_args - offsets->saved_regs;
27461 if (amount)
27462 {
27463 rtx_insn *tmp;
27464 /* Force out any pending memory operations that reference stacked data
27465 before stack de-allocation occurs. */
27466 emit_insn (gen_blockage ());
27467 tmp = emit_insn (gen_addsi3 (stack_pointer_rtx,
27468 stack_pointer_rtx,
27469 GEN_INT (amount)));
27470 arm_add_cfa_adjust_cfa_note (tmp, amount,
27471 stack_pointer_rtx, stack_pointer_rtx);
27472 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is
27473 not deleted. */
27474 emit_insn (gen_force_register_use (stack_pointer_rtx));
27475 }
27476 }
27477
27478 if (TARGET_VFP_BASE)
27479 {
27480 /* Generate VFP register multi-pop. */
27481 int end_reg = LAST_VFP_REGNUM + 1;
27482
27483 /* Scan the registers in reverse order. We need to match
27484 any groupings made in the prologue and generate matching
27485 vldm operations. The need to match groups is because,
27486 unlike pop, vldm can only do consecutive regs. */
27487 for (i = LAST_VFP_REGNUM - 1; i >= FIRST_VFP_REGNUM; i -= 2)
27488 /* Look for a case where a reg does not need restoring. */
27489 if (!reg_needs_saving_p (i) && !reg_needs_saving_p (i + 1))
27490 {
27491 /* Restore the regs discovered so far (from reg+2 to
27492 end_reg). */
27493 if (end_reg > i + 2)
27494 arm_emit_vfp_multi_reg_pop (i + 2,
27495 (end_reg - (i + 2)) / 2,
27496 stack_pointer_rtx);
27497 end_reg = i;
27498 }
27499
27500 /* Restore the remaining regs that we have discovered (or possibly
27501 even all of them, if the conditional in the for loop never
27502 fired). */
27503 if (end_reg > i + 2)
27504 arm_emit_vfp_multi_reg_pop (i + 2,
27505 (end_reg - (i + 2)) / 2,
27506 stack_pointer_rtx);
27507 }
27508
27509 if (TARGET_IWMMXT)
27510 for (i = FIRST_IWMMXT_REGNUM; i <= LAST_IWMMXT_REGNUM; i++)
27511 if (reg_needs_saving_p (i))
27512 {
27513 rtx_insn *insn;
27514 rtx addr = gen_rtx_MEM (V2SImode,
27515 gen_rtx_POST_INC (SImode,
27516 stack_pointer_rtx));
27517 set_mem_alias_set (addr, get_frame_alias_set ());
27518 insn = emit_insn (gen_movsi (gen_rtx_REG (V2SImode, i), addr));
27519 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
27520 gen_rtx_REG (V2SImode, i),
27521 NULL_RTX);
27522 arm_add_cfa_adjust_cfa_note (insn, UNITS_PER_WORD,
27523 stack_pointer_rtx, stack_pointer_rtx);
27524 }
27525
27526 if (saved_regs_mask)
27527 {
27528 rtx insn;
27529 bool return_in_pc = false;
27530
27531 if (ARM_FUNC_TYPE (func_type) != ARM_FT_INTERWORKED
27532 && (TARGET_ARM || ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL)
27533 && !IS_CMSE_ENTRY (func_type)
27534 && !IS_STACKALIGN (func_type)
27535 && really_return
27536 && crtl->args.pretend_args_size == 0
27537 && saved_regs_mask & (1 << LR_REGNUM)
27538 && !crtl->calls_eh_return)
27539 {
27540 saved_regs_mask &= ~(1 << LR_REGNUM);
27541 saved_regs_mask |= (1 << PC_REGNUM);
27542 return_in_pc = true;
27543 }
27544
27545 if (num_regs == 1 && (!IS_INTERRUPT (func_type) || !return_in_pc))
27546 {
27547 for (i = 0; i <= LAST_ARM_REGNUM; i++)
27548 if (saved_regs_mask & (1 << i))
27549 {
27550 rtx addr = gen_rtx_MEM (SImode,
27551 gen_rtx_POST_INC (SImode,
27552 stack_pointer_rtx));
27553 set_mem_alias_set (addr, get_frame_alias_set ());
27554
27555 if (i == PC_REGNUM)
27556 {
27557 insn = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
27558 XVECEXP (insn, 0, 0) = ret_rtx;
27559 XVECEXP (insn, 0, 1) = gen_rtx_SET (gen_rtx_REG (SImode, i),
27560 addr);
27561 RTX_FRAME_RELATED_P (XVECEXP (insn, 0, 1)) = 1;
27562 insn = emit_jump_insn (insn);
27563 }
27564 else
27565 {
27566 insn = emit_insn (gen_movsi (gen_rtx_REG (SImode, i),
27567 addr));
27568 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
27569 gen_rtx_REG (SImode, i),
27570 NULL_RTX);
27571 arm_add_cfa_adjust_cfa_note (insn, UNITS_PER_WORD,
27572 stack_pointer_rtx,
27573 stack_pointer_rtx);
27574 }
27575 }
27576 }
27577 else
27578 {
27579 if (TARGET_LDRD
27580 && current_tune->prefer_ldrd_strd
27581 && !optimize_function_for_size_p (cfun))
27582 {
27583 if (TARGET_THUMB2)
27584 thumb2_emit_ldrd_pop (saved_regs_mask);
27585 else if (TARGET_ARM && !IS_INTERRUPT (func_type))
27586 arm_emit_ldrd_pop (saved_regs_mask);
27587 else
27588 arm_emit_multi_reg_pop (saved_regs_mask);
27589 }
27590 else
27591 arm_emit_multi_reg_pop (saved_regs_mask);
27592 }
27593
27594 if (return_in_pc)
27595 return;
27596 }
27597
27598 amount
27599 = crtl->args.pretend_args_size + arm_compute_static_chain_stack_bytes();
27600 if (amount)
27601 {
27602 int i, j;
27603 rtx dwarf = NULL_RTX;
27604 rtx_insn *tmp =
27605 emit_insn (gen_addsi3 (stack_pointer_rtx,
27606 stack_pointer_rtx,
27607 GEN_INT (amount)));
27608
27609 RTX_FRAME_RELATED_P (tmp) = 1;
27610
27611 if (cfun->machine->uses_anonymous_args)
27612 {
27613 /* Restore pretend args. Refer arm_expand_prologue on how to save
27614 pretend_args in stack. */
27615 int num_regs = crtl->args.pretend_args_size / 4;
27616 saved_regs_mask = (0xf0 >> num_regs) & 0xf;
27617 for (j = 0, i = 0; j < num_regs; i++)
27618 if (saved_regs_mask & (1 << i))
27619 {
27620 rtx reg = gen_rtx_REG (SImode, i);
27621 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
27622 j++;
27623 }
27624 REG_NOTES (tmp) = dwarf;
27625 }
27626 arm_add_cfa_adjust_cfa_note (tmp, amount,
27627 stack_pointer_rtx, stack_pointer_rtx);
27628 }
27629
27630 if (IS_CMSE_ENTRY (func_type))
27631 {
27632 /* CMSE_ENTRY always returns. */
27633 gcc_assert (really_return);
27634 /* Clear all caller-saved regs that are not used to return. */
27635 cmse_nonsecure_entry_clear_before_return ();
27636
27637 /* Armv8.1-M Mainline nonsecure entry: restore FPCXTNS from stack using
27638 VLDR. */
27639 if (TARGET_HAVE_FPCXT_CMSE)
27640 {
27641 rtx_insn *insn;
27642
27643 insn = emit_insn (gen_pop_fpsysreg_insn (stack_pointer_rtx,
27644 GEN_INT (FPCXTNS_ENUM)));
27645 rtx dwarf = gen_rtx_SET (stack_pointer_rtx,
27646 plus_constant (Pmode, stack_pointer_rtx, 4));
27647 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
27648 RTX_FRAME_RELATED_P (insn) = 1;
27649 }
27650 }
27651
27652 if (!really_return)
27653 return;
27654
27655 if (crtl->calls_eh_return)
27656 emit_insn (gen_addsi3 (stack_pointer_rtx,
27657 stack_pointer_rtx,
27658 gen_rtx_REG (SImode, ARM_EH_STACKADJ_REGNUM)));
27659
27660 if (IS_STACKALIGN (func_type))
27661 /* Restore the original stack pointer. Before prologue, the stack was
27662 realigned and the original stack pointer saved in r0. For details,
27663 see comment in arm_expand_prologue. */
27664 emit_insn (gen_movsi (stack_pointer_rtx, gen_rtx_REG (SImode, R0_REGNUM)));
27665
27666 emit_jump_insn (simple_return_rtx);
27667 }
27668
27669 /* Implementation of insn prologue_thumb1_interwork. This is the first
27670 "instruction" of a function called in ARM mode. Swap to thumb mode. */
27671
27672 const char *
27673 thumb1_output_interwork (void)
27674 {
27675 const char * name;
27676 FILE *f = asm_out_file;
27677
27678 gcc_assert (MEM_P (DECL_RTL (current_function_decl)));
27679 gcc_assert (GET_CODE (XEXP (DECL_RTL (current_function_decl), 0))
27680 == SYMBOL_REF);
27681 name = XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0);
27682
27683 /* Generate code sequence to switch us into Thumb mode. */
27684 /* The .code 32 directive has already been emitted by
27685 ASM_DECLARE_FUNCTION_NAME. */
27686 asm_fprintf (f, "\torr\t%r, %r, #1\n", IP_REGNUM, PC_REGNUM);
27687 asm_fprintf (f, "\tbx\t%r\n", IP_REGNUM);
27688
27689 /* Generate a label, so that the debugger will notice the
27690 change in instruction sets. This label is also used by
27691 the assembler to bypass the ARM code when this function
27692 is called from a Thumb encoded function elsewhere in the
27693 same file. Hence the definition of STUB_NAME here must
27694 agree with the definition in gas/config/tc-arm.c. */
27695
27696 #define STUB_NAME ".real_start_of"
27697
27698 fprintf (f, "\t.code\t16\n");
27699 #ifdef ARM_PE
27700 if (arm_dllexport_name_p (name))
27701 name = arm_strip_name_encoding (name);
27702 #endif
27703 asm_fprintf (f, "\t.globl %s%U%s\n", STUB_NAME, name);
27704 fprintf (f, "\t.thumb_func\n");
27705 asm_fprintf (f, "%s%U%s:\n", STUB_NAME, name);
27706
27707 return "";
27708 }
27709
27710 /* Handle the case of a double word load into a low register from
27711 a computed memory address. The computed address may involve a
27712 register which is overwritten by the load. */
27713 const char *
27714 thumb_load_double_from_address (rtx *operands)
27715 {
27716 rtx addr;
27717 rtx base;
27718 rtx offset;
27719 rtx arg1;
27720 rtx arg2;
27721
27722 gcc_assert (REG_P (operands[0]));
27723 gcc_assert (MEM_P (operands[1]));
27724
27725 /* Get the memory address. */
27726 addr = XEXP (operands[1], 0);
27727
27728 /* Work out how the memory address is computed. */
27729 switch (GET_CODE (addr))
27730 {
27731 case REG:
27732 operands[2] = adjust_address (operands[1], SImode, 4);
27733
27734 if (REGNO (operands[0]) == REGNO (addr))
27735 {
27736 output_asm_insn ("ldr\t%H0, %2", operands);
27737 output_asm_insn ("ldr\t%0, %1", operands);
27738 }
27739 else
27740 {
27741 output_asm_insn ("ldr\t%0, %1", operands);
27742 output_asm_insn ("ldr\t%H0, %2", operands);
27743 }
27744 break;
27745
27746 case CONST:
27747 /* Compute <address> + 4 for the high order load. */
27748 operands[2] = adjust_address (operands[1], SImode, 4);
27749
27750 output_asm_insn ("ldr\t%0, %1", operands);
27751 output_asm_insn ("ldr\t%H0, %2", operands);
27752 break;
27753
27754 case PLUS:
27755 arg1 = XEXP (addr, 0);
27756 arg2 = XEXP (addr, 1);
27757
27758 if (CONSTANT_P (arg1))
27759 base = arg2, offset = arg1;
27760 else
27761 base = arg1, offset = arg2;
27762
27763 gcc_assert (REG_P (base));
27764
27765 /* Catch the case of <address> = <reg> + <reg> */
27766 if (REG_P (offset))
27767 {
27768 int reg_offset = REGNO (offset);
27769 int reg_base = REGNO (base);
27770 int reg_dest = REGNO (operands[0]);
27771
27772 /* Add the base and offset registers together into the
27773 higher destination register. */
27774 asm_fprintf (asm_out_file, "\tadd\t%r, %r, %r",
27775 reg_dest + 1, reg_base, reg_offset);
27776
27777 /* Load the lower destination register from the address in
27778 the higher destination register. */
27779 asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #0]",
27780 reg_dest, reg_dest + 1);
27781
27782 /* Load the higher destination register from its own address
27783 plus 4. */
27784 asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #4]",
27785 reg_dest + 1, reg_dest + 1);
27786 }
27787 else
27788 {
27789 /* Compute <address> + 4 for the high order load. */
27790 operands[2] = adjust_address (operands[1], SImode, 4);
27791
27792 /* If the computed address is held in the low order register
27793 then load the high order register first, otherwise always
27794 load the low order register first. */
27795 if (REGNO (operands[0]) == REGNO (base))
27796 {
27797 output_asm_insn ("ldr\t%H0, %2", operands);
27798 output_asm_insn ("ldr\t%0, %1", operands);
27799 }
27800 else
27801 {
27802 output_asm_insn ("ldr\t%0, %1", operands);
27803 output_asm_insn ("ldr\t%H0, %2", operands);
27804 }
27805 }
27806 break;
27807
27808 case LABEL_REF:
27809 /* With no registers to worry about we can just load the value
27810 directly. */
27811 operands[2] = adjust_address (operands[1], SImode, 4);
27812
27813 output_asm_insn ("ldr\t%H0, %2", operands);
27814 output_asm_insn ("ldr\t%0, %1", operands);
27815 break;
27816
27817 default:
27818 gcc_unreachable ();
27819 }
27820
27821 return "";
27822 }
27823
27824 const char *
27825 thumb_output_move_mem_multiple (int n, rtx *operands)
27826 {
27827 switch (n)
27828 {
27829 case 2:
27830 if (REGNO (operands[4]) > REGNO (operands[5]))
27831 std::swap (operands[4], operands[5]);
27832
27833 output_asm_insn ("ldmia\t%1!, {%4, %5}", operands);
27834 output_asm_insn ("stmia\t%0!, {%4, %5}", operands);
27835 break;
27836
27837 case 3:
27838 if (REGNO (operands[4]) > REGNO (operands[5]))
27839 std::swap (operands[4], operands[5]);
27840 if (REGNO (operands[5]) > REGNO (operands[6]))
27841 std::swap (operands[5], operands[6]);
27842 if (REGNO (operands[4]) > REGNO (operands[5]))
27843 std::swap (operands[4], operands[5]);
27844
27845 output_asm_insn ("ldmia\t%1!, {%4, %5, %6}", operands);
27846 output_asm_insn ("stmia\t%0!, {%4, %5, %6}", operands);
27847 break;
27848
27849 default:
27850 gcc_unreachable ();
27851 }
27852
27853 return "";
27854 }
27855
27856 /* Output a call-via instruction for thumb state. */
27857 const char *
27858 thumb_call_via_reg (rtx reg)
27859 {
27860 int regno = REGNO (reg);
27861 rtx *labelp;
27862
27863 gcc_assert (regno < LR_REGNUM);
27864
27865 /* If we are in the normal text section we can use a single instance
27866 per compilation unit. If we are doing function sections, then we need
27867 an entry per section, since we can't rely on reachability. */
27868 if (in_section == text_section)
27869 {
27870 thumb_call_reg_needed = 1;
27871
27872 if (thumb_call_via_label[regno] == NULL)
27873 thumb_call_via_label[regno] = gen_label_rtx ();
27874 labelp = thumb_call_via_label + regno;
27875 }
27876 else
27877 {
27878 if (cfun->machine->call_via[regno] == NULL)
27879 cfun->machine->call_via[regno] = gen_label_rtx ();
27880 labelp = cfun->machine->call_via + regno;
27881 }
27882
27883 output_asm_insn ("bl\t%a0", labelp);
27884 return "";
27885 }
27886
27887 /* Routines for generating rtl. */
27888 void
27889 thumb_expand_cpymemqi (rtx *operands)
27890 {
27891 rtx out = copy_to_mode_reg (SImode, XEXP (operands[0], 0));
27892 rtx in = copy_to_mode_reg (SImode, XEXP (operands[1], 0));
27893 HOST_WIDE_INT len = INTVAL (operands[2]);
27894 HOST_WIDE_INT offset = 0;
27895
27896 while (len >= 12)
27897 {
27898 emit_insn (gen_cpymem12b (out, in, out, in));
27899 len -= 12;
27900 }
27901
27902 if (len >= 8)
27903 {
27904 emit_insn (gen_cpymem8b (out, in, out, in));
27905 len -= 8;
27906 }
27907
27908 if (len >= 4)
27909 {
27910 rtx reg = gen_reg_rtx (SImode);
27911 emit_insn (gen_movsi (reg, gen_rtx_MEM (SImode, in)));
27912 emit_insn (gen_movsi (gen_rtx_MEM (SImode, out), reg));
27913 len -= 4;
27914 offset += 4;
27915 }
27916
27917 if (len >= 2)
27918 {
27919 rtx reg = gen_reg_rtx (HImode);
27920 emit_insn (gen_movhi (reg, gen_rtx_MEM (HImode,
27921 plus_constant (Pmode, in,
27922 offset))));
27923 emit_insn (gen_movhi (gen_rtx_MEM (HImode, plus_constant (Pmode, out,
27924 offset)),
27925 reg));
27926 len -= 2;
27927 offset += 2;
27928 }
27929
27930 if (len)
27931 {
27932 rtx reg = gen_reg_rtx (QImode);
27933 emit_insn (gen_movqi (reg, gen_rtx_MEM (QImode,
27934 plus_constant (Pmode, in,
27935 offset))));
27936 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, out,
27937 offset)),
27938 reg));
27939 }
27940 }
27941
27942 void
27943 thumb_reload_out_hi (rtx *operands)
27944 {
27945 emit_insn (gen_thumb_movhi_clobber (operands[0], operands[1], operands[2]));
27946 }
27947
27948 /* Return the length of a function name prefix
27949 that starts with the character 'c'. */
27950 static int
27951 arm_get_strip_length (int c)
27952 {
27953 switch (c)
27954 {
27955 ARM_NAME_ENCODING_LENGTHS
27956 default: return 0;
27957 }
27958 }
27959
27960 /* Return a pointer to a function's name with any
27961 and all prefix encodings stripped from it. */
27962 const char *
27963 arm_strip_name_encoding (const char *name)
27964 {
27965 int skip;
27966
27967 while ((skip = arm_get_strip_length (* name)))
27968 name += skip;
27969
27970 return name;
27971 }
27972
27973 /* If there is a '*' anywhere in the name's prefix, then
27974 emit the stripped name verbatim, otherwise prepend an
27975 underscore if leading underscores are being used. */
27976 void
27977 arm_asm_output_labelref (FILE *stream, const char *name)
27978 {
27979 int skip;
27980 int verbatim = 0;
27981
27982 while ((skip = arm_get_strip_length (* name)))
27983 {
27984 verbatim |= (*name == '*');
27985 name += skip;
27986 }
27987
27988 if (verbatim)
27989 fputs (name, stream);
27990 else
27991 asm_fprintf (stream, "%U%s", name);
27992 }
27993
27994 /* This function is used to emit an EABI tag and its associated value.
27995 We emit the numerical value of the tag in case the assembler does not
27996 support textual tags. (Eg gas prior to 2.20). If requested we include
27997 the tag name in a comment so that anyone reading the assembler output
27998 will know which tag is being set.
27999
28000 This function is not static because arm-c.c needs it too. */
28001
28002 void
28003 arm_emit_eabi_attribute (const char *name, int num, int val)
28004 {
28005 asm_fprintf (asm_out_file, "\t.eabi_attribute %d, %d", num, val);
28006 if (flag_verbose_asm || flag_debug_asm)
28007 asm_fprintf (asm_out_file, "\t%s %s", ASM_COMMENT_START, name);
28008 asm_fprintf (asm_out_file, "\n");
28009 }
28010
28011 /* This function is used to print CPU tuning information as comment
28012 in assembler file. Pointers are not printed for now. */
28013
28014 void
28015 arm_print_tune_info (void)
28016 {
28017 asm_fprintf (asm_out_file, "\t" ASM_COMMENT_START ".tune parameters\n");
28018 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "constant_limit:\t%d\n",
28019 current_tune->constant_limit);
28020 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
28021 "max_insns_skipped:\t%d\n", current_tune->max_insns_skipped);
28022 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
28023 "prefetch.num_slots:\t%d\n", current_tune->prefetch.num_slots);
28024 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
28025 "prefetch.l1_cache_size:\t%d\n",
28026 current_tune->prefetch.l1_cache_size);
28027 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
28028 "prefetch.l1_cache_line_size:\t%d\n",
28029 current_tune->prefetch.l1_cache_line_size);
28030 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
28031 "prefer_constant_pool:\t%d\n",
28032 (int) current_tune->prefer_constant_pool);
28033 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
28034 "branch_cost:\t(s:speed, p:predictable)\n");
28035 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\ts&p\tcost\n");
28036 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\t00\t%d\n",
28037 current_tune->branch_cost (false, false));
28038 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\t01\t%d\n",
28039 current_tune->branch_cost (false, true));
28040 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\t10\t%d\n",
28041 current_tune->branch_cost (true, false));
28042 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\t11\t%d\n",
28043 current_tune->branch_cost (true, true));
28044 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
28045 "prefer_ldrd_strd:\t%d\n",
28046 (int) current_tune->prefer_ldrd_strd);
28047 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
28048 "logical_op_non_short_circuit:\t[%d,%d]\n",
28049 (int) current_tune->logical_op_non_short_circuit_thumb,
28050 (int) current_tune->logical_op_non_short_circuit_arm);
28051 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
28052 "disparage_flag_setting_t16_encodings:\t%d\n",
28053 (int) current_tune->disparage_flag_setting_t16_encodings);
28054 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
28055 "string_ops_prefer_neon:\t%d\n",
28056 (int) current_tune->string_ops_prefer_neon);
28057 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
28058 "max_insns_inline_memset:\t%d\n",
28059 current_tune->max_insns_inline_memset);
28060 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "fusible_ops:\t%u\n",
28061 current_tune->fusible_ops);
28062 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "sched_autopref:\t%d\n",
28063 (int) current_tune->sched_autopref);
28064 }
28065
28066 /* Print .arch and .arch_extension directives corresponding to the
28067 current architecture configuration. */
28068 static void
28069 arm_print_asm_arch_directives ()
28070 {
28071 const arch_option *arch
28072 = arm_parse_arch_option_name (all_architectures, "-march",
28073 arm_active_target.arch_name);
28074 auto_sbitmap opt_bits (isa_num_bits);
28075
28076 gcc_assert (arch);
28077
28078 asm_fprintf (asm_out_file, "\t.arch %s\n", arm_active_target.arch_name);
28079 arm_last_printed_arch_string = arm_active_target.arch_name;
28080 if (!arch->common.extensions)
28081 return;
28082
28083 for (const struct cpu_arch_extension *opt = arch->common.extensions;
28084 opt->name != NULL;
28085 opt++)
28086 {
28087 if (!opt->remove)
28088 {
28089 arm_initialize_isa (opt_bits, opt->isa_bits);
28090
28091 /* For the cases "-march=armv8.1-m.main+mve -mfloat-abi=soft" and
28092 "-march=armv8.1-m.main+mve.fp -mfloat-abi=soft" MVE and MVE with
28093 floating point instructions is disabled. So the following check
28094 restricts the printing of ".arch_extension mve" and
28095 ".arch_extension fp" (for mve.fp) in the assembly file. MVE needs
28096 this special behaviour because the feature bit "mve" and
28097 "mve_float" are not part of "fpu bits", so they are not cleared
28098 when -mfloat-abi=soft (i.e nofp) but the marco TARGET_HAVE_MVE and
28099 TARGET_HAVE_MVE_FLOAT are disabled. */
28100 if ((bitmap_bit_p (opt_bits, isa_bit_mve) && !TARGET_HAVE_MVE)
28101 || (bitmap_bit_p (opt_bits, isa_bit_mve_float)
28102 && !TARGET_HAVE_MVE_FLOAT))
28103 continue;
28104
28105 /* If every feature bit of this option is set in the target
28106 ISA specification, print out the option name. However,
28107 don't print anything if all the bits are part of the
28108 FPU specification. */
28109 if (bitmap_subset_p (opt_bits, arm_active_target.isa)
28110 && !bitmap_subset_p (opt_bits, isa_all_fpubits_internal))
28111 asm_fprintf (asm_out_file, "\t.arch_extension %s\n", opt->name);
28112 }
28113 }
28114 }
28115
28116 static void
28117 arm_file_start (void)
28118 {
28119 int val;
28120
28121 if (TARGET_BPABI)
28122 {
28123 /* We don't have a specified CPU. Use the architecture to
28124 generate the tags.
28125
28126 Note: it might be better to do this unconditionally, then the
28127 assembler would not need to know about all new CPU names as
28128 they are added. */
28129 if (!arm_active_target.core_name)
28130 {
28131 /* armv7ve doesn't support any extensions. */
28132 if (strcmp (arm_active_target.arch_name, "armv7ve") == 0)
28133 {
28134 /* Keep backward compatability for assemblers
28135 which don't support armv7ve. */
28136 asm_fprintf (asm_out_file, "\t.arch armv7-a\n");
28137 asm_fprintf (asm_out_file, "\t.arch_extension virt\n");
28138 asm_fprintf (asm_out_file, "\t.arch_extension idiv\n");
28139 asm_fprintf (asm_out_file, "\t.arch_extension sec\n");
28140 asm_fprintf (asm_out_file, "\t.arch_extension mp\n");
28141 arm_last_printed_arch_string = "armv7ve";
28142 }
28143 else
28144 arm_print_asm_arch_directives ();
28145 }
28146 else if (strncmp (arm_active_target.core_name, "generic", 7) == 0)
28147 {
28148 asm_fprintf (asm_out_file, "\t.arch %s\n",
28149 arm_active_target.core_name + 8);
28150 arm_last_printed_arch_string = arm_active_target.core_name + 8;
28151 }
28152 else
28153 {
28154 const char* truncated_name
28155 = arm_rewrite_selected_cpu (arm_active_target.core_name);
28156 if (bitmap_bit_p (arm_active_target.isa, isa_bit_quirk_no_asmcpu))
28157 asm_fprintf (asm_out_file, "\t.eabi_attribute 5, \"%s\"\n",
28158 truncated_name);
28159 else
28160 asm_fprintf (asm_out_file, "\t.cpu %s\n", truncated_name);
28161 }
28162
28163 if (print_tune_info)
28164 arm_print_tune_info ();
28165
28166 if (TARGET_HARD_FLOAT && TARGET_VFP_SINGLE)
28167 arm_emit_eabi_attribute ("Tag_ABI_HardFP_use", 27, 1);
28168
28169 if (TARGET_HARD_FLOAT_ABI)
28170 arm_emit_eabi_attribute ("Tag_ABI_VFP_args", 28, 1);
28171
28172 /* Some of these attributes only apply when the corresponding features
28173 are used. However we don't have any easy way of figuring this out.
28174 Conservatively record the setting that would have been used. */
28175
28176 if (flag_rounding_math)
28177 arm_emit_eabi_attribute ("Tag_ABI_FP_rounding", 19, 1);
28178
28179 if (!flag_unsafe_math_optimizations)
28180 {
28181 arm_emit_eabi_attribute ("Tag_ABI_FP_denormal", 20, 1);
28182 arm_emit_eabi_attribute ("Tag_ABI_FP_exceptions", 21, 1);
28183 }
28184 if (flag_signaling_nans)
28185 arm_emit_eabi_attribute ("Tag_ABI_FP_user_exceptions", 22, 1);
28186
28187 arm_emit_eabi_attribute ("Tag_ABI_FP_number_model", 23,
28188 flag_finite_math_only ? 1 : 3);
28189
28190 arm_emit_eabi_attribute ("Tag_ABI_align8_needed", 24, 1);
28191 arm_emit_eabi_attribute ("Tag_ABI_align8_preserved", 25, 1);
28192 arm_emit_eabi_attribute ("Tag_ABI_enum_size", 26,
28193 flag_short_enums ? 1 : 2);
28194
28195 /* Tag_ABI_optimization_goals. */
28196 if (optimize_size)
28197 val = 4;
28198 else if (optimize >= 2)
28199 val = 2;
28200 else if (optimize)
28201 val = 1;
28202 else
28203 val = 6;
28204 arm_emit_eabi_attribute ("Tag_ABI_optimization_goals", 30, val);
28205
28206 arm_emit_eabi_attribute ("Tag_CPU_unaligned_access", 34,
28207 unaligned_access);
28208
28209 if (arm_fp16_format)
28210 arm_emit_eabi_attribute ("Tag_ABI_FP_16bit_format", 38,
28211 (int) arm_fp16_format);
28212
28213 if (arm_lang_output_object_attributes_hook)
28214 arm_lang_output_object_attributes_hook();
28215 }
28216
28217 default_file_start ();
28218 }
28219
28220 static void
28221 arm_file_end (void)
28222 {
28223 int regno;
28224
28225 if (NEED_INDICATE_EXEC_STACK)
28226 /* Add .note.GNU-stack. */
28227 file_end_indicate_exec_stack ();
28228
28229 if (! thumb_call_reg_needed)
28230 return;
28231
28232 switch_to_section (text_section);
28233 asm_fprintf (asm_out_file, "\t.code 16\n");
28234 ASM_OUTPUT_ALIGN (asm_out_file, 1);
28235
28236 for (regno = 0; regno < LR_REGNUM; regno++)
28237 {
28238 rtx label = thumb_call_via_label[regno];
28239
28240 if (label != 0)
28241 {
28242 targetm.asm_out.internal_label (asm_out_file, "L",
28243 CODE_LABEL_NUMBER (label));
28244 asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
28245 }
28246 }
28247 }
28248
28249 #ifndef ARM_PE
28250 /* Symbols in the text segment can be accessed without indirecting via the
28251 constant pool; it may take an extra binary operation, but this is still
28252 faster than indirecting via memory. Don't do this when not optimizing,
28253 since we won't be calculating al of the offsets necessary to do this
28254 simplification. */
28255
28256 static void
28257 arm_encode_section_info (tree decl, rtx rtl, int first)
28258 {
28259 if (optimize > 0 && TREE_CONSTANT (decl))
28260 SYMBOL_REF_FLAG (XEXP (rtl, 0)) = 1;
28261
28262 default_encode_section_info (decl, rtl, first);
28263 }
28264 #endif /* !ARM_PE */
28265
28266 static void
28267 arm_internal_label (FILE *stream, const char *prefix, unsigned long labelno)
28268 {
28269 if (arm_ccfsm_state == 3 && (unsigned) arm_target_label == labelno
28270 && !strcmp (prefix, "L"))
28271 {
28272 arm_ccfsm_state = 0;
28273 arm_target_insn = NULL;
28274 }
28275 default_internal_label (stream, prefix, labelno);
28276 }
28277
28278 /* Define classes to generate code as RTL or output asm to a file.
28279 Using templates then allows to use the same code to output code
28280 sequences in the two formats. */
28281 class thumb1_const_rtl
28282 {
28283 public:
28284 thumb1_const_rtl (rtx dst) : dst (dst) {}
28285
28286 void mov (HOST_WIDE_INT val)
28287 {
28288 emit_set_insn (dst, GEN_INT (val));
28289 }
28290
28291 void add (HOST_WIDE_INT val)
28292 {
28293 emit_set_insn (dst, gen_rtx_PLUS (SImode, dst, GEN_INT (val)));
28294 }
28295
28296 void ashift (HOST_WIDE_INT shift)
28297 {
28298 emit_set_insn (dst, gen_rtx_ASHIFT (SImode, dst, GEN_INT (shift)));
28299 }
28300
28301 void neg ()
28302 {
28303 emit_set_insn (dst, gen_rtx_NEG (SImode, dst));
28304 }
28305
28306 private:
28307 rtx dst;
28308 };
28309
28310 class thumb1_const_print
28311 {
28312 public:
28313 thumb1_const_print (FILE *f, int regno)
28314 {
28315 t_file = f;
28316 dst_regname = reg_names[regno];
28317 }
28318
28319 void mov (HOST_WIDE_INT val)
28320 {
28321 asm_fprintf (t_file, "\tmovs\t%s, #" HOST_WIDE_INT_PRINT_DEC "\n",
28322 dst_regname, val);
28323 }
28324
28325 void add (HOST_WIDE_INT val)
28326 {
28327 asm_fprintf (t_file, "\tadds\t%s, #" HOST_WIDE_INT_PRINT_DEC "\n",
28328 dst_regname, val);
28329 }
28330
28331 void ashift (HOST_WIDE_INT shift)
28332 {
28333 asm_fprintf (t_file, "\tlsls\t%s, #" HOST_WIDE_INT_PRINT_DEC "\n",
28334 dst_regname, shift);
28335 }
28336
28337 void neg ()
28338 {
28339 asm_fprintf (t_file, "\trsbs\t%s, #0\n", dst_regname);
28340 }
28341
28342 private:
28343 FILE *t_file;
28344 const char *dst_regname;
28345 };
28346
28347 /* Emit a sequence of movs/adds/shift to produce a 32-bit constant.
28348 Avoid generating useless code when one of the bytes is zero. */
28349 template <class T>
28350 void
28351 thumb1_gen_const_int_1 (T dst, HOST_WIDE_INT op1)
28352 {
28353 bool mov_done_p = false;
28354 unsigned HOST_WIDE_INT val = op1;
28355 int shift = 0;
28356 int i;
28357
28358 gcc_assert (op1 == trunc_int_for_mode (op1, SImode));
28359
28360 if (val <= 255)
28361 {
28362 dst.mov (val);
28363 return;
28364 }
28365
28366 /* For negative numbers with the first nine bits set, build the
28367 opposite of OP1, then negate it, it's generally shorter and not
28368 longer. */
28369 if ((val & 0xFF800000) == 0xFF800000)
28370 {
28371 thumb1_gen_const_int_1 (dst, -op1);
28372 dst.neg ();
28373 return;
28374 }
28375
28376 /* In the general case, we need 7 instructions to build
28377 a 32 bits constant (1 movs, 3 lsls, 3 adds). We can
28378 do better if VAL is small enough, or
28379 right-shiftable by a suitable amount. If the
28380 right-shift enables to encode at least one less byte,
28381 it's worth it: we save a adds and a lsls at the
28382 expense of a final lsls. */
28383 int final_shift = number_of_first_bit_set (val);
28384
28385 int leading_zeroes = clz_hwi (val);
28386 int number_of_bytes_needed
28387 = ((HOST_BITS_PER_WIDE_INT - 1 - leading_zeroes)
28388 / BITS_PER_UNIT) + 1;
28389 int number_of_bytes_needed2
28390 = ((HOST_BITS_PER_WIDE_INT - 1 - leading_zeroes - final_shift)
28391 / BITS_PER_UNIT) + 1;
28392
28393 if (number_of_bytes_needed2 < number_of_bytes_needed)
28394 val >>= final_shift;
28395 else
28396 final_shift = 0;
28397
28398 /* If we are in a very small range, we can use either a single movs
28399 or movs+adds. */
28400 if (val <= 510)
28401 {
28402 if (val > 255)
28403 {
28404 unsigned HOST_WIDE_INT high = val - 255;
28405
28406 dst.mov (high);
28407 dst.add (255);
28408 }
28409 else
28410 dst.mov (val);
28411
28412 if (final_shift > 0)
28413 dst.ashift (final_shift);
28414 }
28415 else
28416 {
28417 /* General case, emit upper 3 bytes as needed. */
28418 for (i = 0; i < 3; i++)
28419 {
28420 unsigned HOST_WIDE_INT byte = (val >> (8 * (3 - i))) & 0xff;
28421
28422 if (byte)
28423 {
28424 /* We are about to emit new bits, stop accumulating a
28425 shift amount, and left-shift only if we have already
28426 emitted some upper bits. */
28427 if (mov_done_p)
28428 {
28429 dst.ashift (shift);
28430 dst.add (byte);
28431 }
28432 else
28433 dst.mov (byte);
28434
28435 /* Stop accumulating shift amount since we've just
28436 emitted some bits. */
28437 shift = 0;
28438
28439 mov_done_p = true;
28440 }
28441
28442 if (mov_done_p)
28443 shift += 8;
28444 }
28445
28446 /* Emit lower byte. */
28447 if (!mov_done_p)
28448 dst.mov (val & 0xff);
28449 else
28450 {
28451 dst.ashift (shift);
28452 if (val & 0xff)
28453 dst.add (val & 0xff);
28454 }
28455
28456 if (final_shift > 0)
28457 dst.ashift (final_shift);
28458 }
28459 }
28460
28461 /* Proxies for thumb1.md, since the thumb1_const_print and
28462 thumb1_const_rtl classes are not exported. */
28463 void
28464 thumb1_gen_const_int_rtl (rtx dst, HOST_WIDE_INT op1)
28465 {
28466 thumb1_const_rtl t (dst);
28467 thumb1_gen_const_int_1 (t, op1);
28468 }
28469
28470 void
28471 thumb1_gen_const_int_print (rtx dst, HOST_WIDE_INT op1)
28472 {
28473 thumb1_const_print t (asm_out_file, REGNO (dst));
28474 thumb1_gen_const_int_1 (t, op1);
28475 }
28476
28477 /* Output code to add DELTA to the first argument, and then jump
28478 to FUNCTION. Used for C++ multiple inheritance. */
28479
28480 static void
28481 arm_thumb1_mi_thunk (FILE *file, tree, HOST_WIDE_INT delta,
28482 HOST_WIDE_INT, tree function)
28483 {
28484 static int thunk_label = 0;
28485 char label[256];
28486 char labelpc[256];
28487 int mi_delta = delta;
28488 const char *const mi_op = mi_delta < 0 ? "sub" : "add";
28489 int shift = 0;
28490 int this_regno = (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function)
28491 ? 1 : 0);
28492 if (mi_delta < 0)
28493 mi_delta = - mi_delta;
28494
28495 final_start_function (emit_barrier (), file, 1);
28496
28497 if (TARGET_THUMB1)
28498 {
28499 int labelno = thunk_label++;
28500 ASM_GENERATE_INTERNAL_LABEL (label, "LTHUMBFUNC", labelno);
28501 /* Thunks are entered in arm mode when available. */
28502 if (TARGET_THUMB1_ONLY)
28503 {
28504 /* push r3 so we can use it as a temporary. */
28505 /* TODO: Omit this save if r3 is not used. */
28506 fputs ("\tpush {r3}\n", file);
28507
28508 /* With -mpure-code, we cannot load the address from the
28509 constant pool: we build it explicitly. */
28510 if (target_pure_code)
28511 {
28512 fputs ("\tmovs\tr3, #:upper8_15:#", file);
28513 assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
28514 fputc ('\n', file);
28515 fputs ("\tlsls r3, #8\n", file);
28516 fputs ("\tadds\tr3, #:upper0_7:#", file);
28517 assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
28518 fputc ('\n', file);
28519 fputs ("\tlsls r3, #8\n", file);
28520 fputs ("\tadds\tr3, #:lower8_15:#", file);
28521 assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
28522 fputc ('\n', file);
28523 fputs ("\tlsls r3, #8\n", file);
28524 fputs ("\tadds\tr3, #:lower0_7:#", file);
28525 assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
28526 fputc ('\n', file);
28527 }
28528 else
28529 fputs ("\tldr\tr3, ", file);
28530 }
28531 else
28532 {
28533 fputs ("\tldr\tr12, ", file);
28534 }
28535
28536 if (!target_pure_code)
28537 {
28538 assemble_name (file, label);
28539 fputc ('\n', file);
28540 }
28541
28542 if (flag_pic)
28543 {
28544 /* If we are generating PIC, the ldr instruction below loads
28545 "(target - 7) - .LTHUNKPCn" into r12. The pc reads as
28546 the address of the add + 8, so we have:
28547
28548 r12 = (target - 7) - .LTHUNKPCn + (.LTHUNKPCn + 8)
28549 = target + 1.
28550
28551 Note that we have "+ 1" because some versions of GNU ld
28552 don't set the low bit of the result for R_ARM_REL32
28553 relocations against thumb function symbols.
28554 On ARMv6M this is +4, not +8. */
28555 ASM_GENERATE_INTERNAL_LABEL (labelpc, "LTHUNKPC", labelno);
28556 assemble_name (file, labelpc);
28557 fputs (":\n", file);
28558 if (TARGET_THUMB1_ONLY)
28559 {
28560 /* This is 2 insns after the start of the thunk, so we know it
28561 is 4-byte aligned. */
28562 fputs ("\tadd\tr3, pc, r3\n", file);
28563 fputs ("\tmov r12, r3\n", file);
28564 }
28565 else
28566 fputs ("\tadd\tr12, pc, r12\n", file);
28567 }
28568 else if (TARGET_THUMB1_ONLY)
28569 fputs ("\tmov r12, r3\n", file);
28570 }
28571 if (TARGET_THUMB1_ONLY)
28572 {
28573 if (mi_delta > 255)
28574 {
28575 /* With -mpure-code, we cannot load MI_DELTA from the
28576 constant pool: we build it explicitly. */
28577 if (target_pure_code)
28578 {
28579 thumb1_const_print r3 (file, 3);
28580 thumb1_gen_const_int_1 (r3, mi_delta);
28581 }
28582 else
28583 {
28584 fputs ("\tldr\tr3, ", file);
28585 assemble_name (file, label);
28586 fputs ("+4\n", file);
28587 }
28588 asm_fprintf (file, "\t%ss\t%r, %r, r3\n",
28589 mi_op, this_regno, this_regno);
28590 }
28591 else if (mi_delta != 0)
28592 {
28593 /* Thumb1 unified syntax requires s suffix in instruction name when
28594 one of the operands is immediate. */
28595 asm_fprintf (file, "\t%ss\t%r, %r, #%d\n",
28596 mi_op, this_regno, this_regno,
28597 mi_delta);
28598 }
28599 }
28600 else
28601 {
28602 /* TODO: Use movw/movt for large constants when available. */
28603 while (mi_delta != 0)
28604 {
28605 if ((mi_delta & (3 << shift)) == 0)
28606 shift += 2;
28607 else
28608 {
28609 asm_fprintf (file, "\t%s\t%r, %r, #%d\n",
28610 mi_op, this_regno, this_regno,
28611 mi_delta & (0xff << shift));
28612 mi_delta &= ~(0xff << shift);
28613 shift += 8;
28614 }
28615 }
28616 }
28617 if (TARGET_THUMB1)
28618 {
28619 if (TARGET_THUMB1_ONLY)
28620 fputs ("\tpop\t{r3}\n", file);
28621
28622 fprintf (file, "\tbx\tr12\n");
28623
28624 /* With -mpure-code, we don't need to emit literals for the
28625 function address and delta since we emitted code to build
28626 them. */
28627 if (!target_pure_code)
28628 {
28629 ASM_OUTPUT_ALIGN (file, 2);
28630 assemble_name (file, label);
28631 fputs (":\n", file);
28632 if (flag_pic)
28633 {
28634 /* Output ".word .LTHUNKn-[3,7]-.LTHUNKPCn". */
28635 rtx tem = XEXP (DECL_RTL (function), 0);
28636 /* For TARGET_THUMB1_ONLY the thunk is in Thumb mode, so the PC
28637 pipeline offset is four rather than eight. Adjust the offset
28638 accordingly. */
28639 tem = plus_constant (GET_MODE (tem), tem,
28640 TARGET_THUMB1_ONLY ? -3 : -7);
28641 tem = gen_rtx_MINUS (GET_MODE (tem),
28642 tem,
28643 gen_rtx_SYMBOL_REF (Pmode,
28644 ggc_strdup (labelpc)));
28645 assemble_integer (tem, 4, BITS_PER_WORD, 1);
28646 }
28647 else
28648 /* Output ".word .LTHUNKn". */
28649 assemble_integer (XEXP (DECL_RTL (function), 0), 4, BITS_PER_WORD, 1);
28650
28651 if (TARGET_THUMB1_ONLY && mi_delta > 255)
28652 assemble_integer (GEN_INT (mi_delta), 4, BITS_PER_WORD, 1);
28653 }
28654 }
28655 else
28656 {
28657 fputs ("\tb\t", file);
28658 assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
28659 if (NEED_PLT_RELOC)
28660 fputs ("(PLT)", file);
28661 fputc ('\n', file);
28662 }
28663
28664 final_end_function ();
28665 }
28666
28667 /* MI thunk handling for TARGET_32BIT. */
28668
28669 static void
28670 arm32_output_mi_thunk (FILE *file, tree, HOST_WIDE_INT delta,
28671 HOST_WIDE_INT vcall_offset, tree function)
28672 {
28673 const bool long_call_p = arm_is_long_call_p (function);
28674
28675 /* On ARM, this_regno is R0 or R1 depending on
28676 whether the function returns an aggregate or not.
28677 */
28678 int this_regno = (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)),
28679 function)
28680 ? R1_REGNUM : R0_REGNUM);
28681
28682 rtx temp = gen_rtx_REG (Pmode, IP_REGNUM);
28683 rtx this_rtx = gen_rtx_REG (Pmode, this_regno);
28684 reload_completed = 1;
28685 emit_note (NOTE_INSN_PROLOGUE_END);
28686
28687 /* Add DELTA to THIS_RTX. */
28688 if (delta != 0)
28689 arm_split_constant (PLUS, Pmode, NULL_RTX,
28690 delta, this_rtx, this_rtx, false);
28691
28692 /* Add *(*THIS_RTX + VCALL_OFFSET) to THIS_RTX. */
28693 if (vcall_offset != 0)
28694 {
28695 /* Load *THIS_RTX. */
28696 emit_move_insn (temp, gen_rtx_MEM (Pmode, this_rtx));
28697 /* Compute *THIS_RTX + VCALL_OFFSET. */
28698 arm_split_constant (PLUS, Pmode, NULL_RTX, vcall_offset, temp, temp,
28699 false);
28700 /* Compute *(*THIS_RTX + VCALL_OFFSET). */
28701 emit_move_insn (temp, gen_rtx_MEM (Pmode, temp));
28702 emit_insn (gen_add3_insn (this_rtx, this_rtx, temp));
28703 }
28704
28705 /* Generate a tail call to the target function. */
28706 if (!TREE_USED (function))
28707 {
28708 assemble_external (function);
28709 TREE_USED (function) = 1;
28710 }
28711 rtx funexp = XEXP (DECL_RTL (function), 0);
28712 if (long_call_p)
28713 {
28714 emit_move_insn (temp, funexp);
28715 funexp = temp;
28716 }
28717 funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
28718 rtx_insn *insn = emit_call_insn (gen_sibcall (funexp, const0_rtx, NULL_RTX));
28719 SIBLING_CALL_P (insn) = 1;
28720 emit_barrier ();
28721
28722 /* Indirect calls require a bit of fixup in PIC mode. */
28723 if (long_call_p)
28724 {
28725 split_all_insns_noflow ();
28726 arm_reorg ();
28727 }
28728
28729 insn = get_insns ();
28730 shorten_branches (insn);
28731 final_start_function (insn, file, 1);
28732 final (insn, file, 1);
28733 final_end_function ();
28734
28735 /* Stop pretending this is a post-reload pass. */
28736 reload_completed = 0;
28737 }
28738
28739 /* Output code to add DELTA to the first argument, and then jump
28740 to FUNCTION. Used for C++ multiple inheritance. */
28741
28742 static void
28743 arm_output_mi_thunk (FILE *file, tree thunk, HOST_WIDE_INT delta,
28744 HOST_WIDE_INT vcall_offset, tree function)
28745 {
28746 const char *fnname = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (thunk));
28747
28748 assemble_start_function (thunk, fnname);
28749 if (TARGET_32BIT)
28750 arm32_output_mi_thunk (file, thunk, delta, vcall_offset, function);
28751 else
28752 arm_thumb1_mi_thunk (file, thunk, delta, vcall_offset, function);
28753 assemble_end_function (thunk, fnname);
28754 }
28755
28756 int
28757 arm_emit_vector_const (FILE *file, rtx x)
28758 {
28759 int i;
28760 const char * pattern;
28761
28762 gcc_assert (GET_CODE (x) == CONST_VECTOR);
28763
28764 switch (GET_MODE (x))
28765 {
28766 case E_V2SImode: pattern = "%08x"; break;
28767 case E_V4HImode: pattern = "%04x"; break;
28768 case E_V8QImode: pattern = "%02x"; break;
28769 default: gcc_unreachable ();
28770 }
28771
28772 fprintf (file, "0x");
28773 for (i = CONST_VECTOR_NUNITS (x); i--;)
28774 {
28775 rtx element;
28776
28777 element = CONST_VECTOR_ELT (x, i);
28778 fprintf (file, pattern, INTVAL (element));
28779 }
28780
28781 return 1;
28782 }
28783
28784 /* Emit a fp16 constant appropriately padded to occupy a 4-byte word.
28785 HFmode constant pool entries are actually loaded with ldr. */
28786 void
28787 arm_emit_fp16_const (rtx c)
28788 {
28789 long bits;
28790
28791 bits = real_to_target (NULL, CONST_DOUBLE_REAL_VALUE (c), HFmode);
28792 if (WORDS_BIG_ENDIAN)
28793 assemble_zeros (2);
28794 assemble_integer (GEN_INT (bits), 2, BITS_PER_WORD, 1);
28795 if (!WORDS_BIG_ENDIAN)
28796 assemble_zeros (2);
28797 }
28798
28799 const char *
28800 arm_output_load_gr (rtx *operands)
28801 {
28802 rtx reg;
28803 rtx offset;
28804 rtx wcgr;
28805 rtx sum;
28806
28807 if (!MEM_P (operands [1])
28808 || GET_CODE (sum = XEXP (operands [1], 0)) != PLUS
28809 || !REG_P (reg = XEXP (sum, 0))
28810 || !CONST_INT_P (offset = XEXP (sum, 1))
28811 || ((INTVAL (offset) < 1024) && (INTVAL (offset) > -1024)))
28812 return "wldrw%?\t%0, %1";
28813
28814 /* Fix up an out-of-range load of a GR register. */
28815 output_asm_insn ("str%?\t%0, [sp, #-4]!\t@ Start of GR load expansion", & reg);
28816 wcgr = operands[0];
28817 operands[0] = reg;
28818 output_asm_insn ("ldr%?\t%0, %1", operands);
28819
28820 operands[0] = wcgr;
28821 operands[1] = reg;
28822 output_asm_insn ("tmcr%?\t%0, %1", operands);
28823 output_asm_insn ("ldr%?\t%0, [sp], #4\t@ End of GR load expansion", & reg);
28824
28825 return "";
28826 }
28827
28828 /* Worker function for TARGET_SETUP_INCOMING_VARARGS.
28829
28830 On the ARM, PRETEND_SIZE is set in order to have the prologue push the last
28831 named arg and all anonymous args onto the stack.
28832 XXX I know the prologue shouldn't be pushing registers, but it is faster
28833 that way. */
28834
28835 static void
28836 arm_setup_incoming_varargs (cumulative_args_t pcum_v,
28837 const function_arg_info &arg,
28838 int *pretend_size,
28839 int second_time ATTRIBUTE_UNUSED)
28840 {
28841 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
28842 int nregs;
28843
28844 cfun->machine->uses_anonymous_args = 1;
28845 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
28846 {
28847 nregs = pcum->aapcs_ncrn;
28848 if (nregs & 1)
28849 {
28850 int res = arm_needs_doubleword_align (arg.mode, arg.type);
28851 if (res < 0 && warn_psabi)
28852 inform (input_location, "parameter passing for argument of "
28853 "type %qT changed in GCC 7.1", arg.type);
28854 else if (res > 0)
28855 {
28856 nregs++;
28857 if (res > 1 && warn_psabi)
28858 inform (input_location,
28859 "parameter passing for argument of type "
28860 "%qT changed in GCC 9.1", arg.type);
28861 }
28862 }
28863 }
28864 else
28865 nregs = pcum->nregs;
28866
28867 if (nregs < NUM_ARG_REGS)
28868 *pretend_size = (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
28869 }
28870
28871 /* We can't rely on the caller doing the proper promotion when
28872 using APCS or ATPCS. */
28873
28874 static bool
28875 arm_promote_prototypes (const_tree t ATTRIBUTE_UNUSED)
28876 {
28877 return !TARGET_AAPCS_BASED;
28878 }
28879
28880 static machine_mode
28881 arm_promote_function_mode (const_tree type ATTRIBUTE_UNUSED,
28882 machine_mode mode,
28883 int *punsignedp ATTRIBUTE_UNUSED,
28884 const_tree fntype ATTRIBUTE_UNUSED,
28885 int for_return ATTRIBUTE_UNUSED)
28886 {
28887 if (GET_MODE_CLASS (mode) == MODE_INT
28888 && GET_MODE_SIZE (mode) < 4)
28889 return SImode;
28890
28891 return mode;
28892 }
28893
28894
28895 static bool
28896 arm_default_short_enums (void)
28897 {
28898 return ARM_DEFAULT_SHORT_ENUMS;
28899 }
28900
28901
28902 /* AAPCS requires that anonymous bitfields affect structure alignment. */
28903
28904 static bool
28905 arm_align_anon_bitfield (void)
28906 {
28907 return TARGET_AAPCS_BASED;
28908 }
28909
28910
28911 /* The generic C++ ABI says 64-bit (long long). The EABI says 32-bit. */
28912
28913 static tree
28914 arm_cxx_guard_type (void)
28915 {
28916 return TARGET_AAPCS_BASED ? integer_type_node : long_long_integer_type_node;
28917 }
28918
28919
28920 /* The EABI says test the least significant bit of a guard variable. */
28921
28922 static bool
28923 arm_cxx_guard_mask_bit (void)
28924 {
28925 return TARGET_AAPCS_BASED;
28926 }
28927
28928
28929 /* The EABI specifies that all array cookies are 8 bytes long. */
28930
28931 static tree
28932 arm_get_cookie_size (tree type)
28933 {
28934 tree size;
28935
28936 if (!TARGET_AAPCS_BASED)
28937 return default_cxx_get_cookie_size (type);
28938
28939 size = build_int_cst (sizetype, 8);
28940 return size;
28941 }
28942
28943
28944 /* The EABI says that array cookies should also contain the element size. */
28945
28946 static bool
28947 arm_cookie_has_size (void)
28948 {
28949 return TARGET_AAPCS_BASED;
28950 }
28951
28952
28953 /* The EABI says constructors and destructors should return a pointer to
28954 the object constructed/destroyed. */
28955
28956 static bool
28957 arm_cxx_cdtor_returns_this (void)
28958 {
28959 return TARGET_AAPCS_BASED;
28960 }
28961
28962 /* The EABI says that an inline function may never be the key
28963 method. */
28964
28965 static bool
28966 arm_cxx_key_method_may_be_inline (void)
28967 {
28968 return !TARGET_AAPCS_BASED;
28969 }
28970
28971 static void
28972 arm_cxx_determine_class_data_visibility (tree decl)
28973 {
28974 if (!TARGET_AAPCS_BASED
28975 || !TARGET_DLLIMPORT_DECL_ATTRIBUTES)
28976 return;
28977
28978 /* In general, \S 3.2.5.5 of the ARM EABI requires that class data
28979 is exported. However, on systems without dynamic vague linkage,
28980 \S 3.2.5.6 says that COMDAT class data has hidden linkage. */
28981 if (!TARGET_ARM_DYNAMIC_VAGUE_LINKAGE_P && DECL_COMDAT (decl))
28982 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
28983 else
28984 DECL_VISIBILITY (decl) = VISIBILITY_DEFAULT;
28985 DECL_VISIBILITY_SPECIFIED (decl) = 1;
28986 }
28987
28988 static bool
28989 arm_cxx_class_data_always_comdat (void)
28990 {
28991 /* \S 3.2.5.4 of the ARM C++ ABI says that class data only have
28992 vague linkage if the class has no key function. */
28993 return !TARGET_AAPCS_BASED;
28994 }
28995
28996
28997 /* The EABI says __aeabi_atexit should be used to register static
28998 destructors. */
28999
29000 static bool
29001 arm_cxx_use_aeabi_atexit (void)
29002 {
29003 return TARGET_AAPCS_BASED;
29004 }
29005
29006
29007 void
29008 arm_set_return_address (rtx source, rtx scratch)
29009 {
29010 arm_stack_offsets *offsets;
29011 HOST_WIDE_INT delta;
29012 rtx addr, mem;
29013 unsigned long saved_regs;
29014
29015 offsets = arm_get_frame_offsets ();
29016 saved_regs = offsets->saved_regs_mask;
29017
29018 if ((saved_regs & (1 << LR_REGNUM)) == 0)
29019 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
29020 else
29021 {
29022 if (frame_pointer_needed)
29023 addr = plus_constant (Pmode, hard_frame_pointer_rtx, -4);
29024 else
29025 {
29026 /* LR will be the first saved register. */
29027 delta = offsets->outgoing_args - (offsets->frame + 4);
29028
29029
29030 if (delta >= 4096)
29031 {
29032 emit_insn (gen_addsi3 (scratch, stack_pointer_rtx,
29033 GEN_INT (delta & ~4095)));
29034 addr = scratch;
29035 delta &= 4095;
29036 }
29037 else
29038 addr = stack_pointer_rtx;
29039
29040 addr = plus_constant (Pmode, addr, delta);
29041 }
29042
29043 /* The store needs to be marked to prevent DSE from deleting
29044 it as dead if it is based on fp. */
29045 mem = gen_frame_mem (Pmode, addr);
29046 MEM_VOLATILE_P (mem) = true;
29047 emit_move_insn (mem, source);
29048 }
29049 }
29050
29051
29052 void
29053 thumb_set_return_address (rtx source, rtx scratch)
29054 {
29055 arm_stack_offsets *offsets;
29056 HOST_WIDE_INT delta;
29057 HOST_WIDE_INT limit;
29058 int reg;
29059 rtx addr, mem;
29060 unsigned long mask;
29061
29062 emit_use (source);
29063
29064 offsets = arm_get_frame_offsets ();
29065 mask = offsets->saved_regs_mask;
29066 if (mask & (1 << LR_REGNUM))
29067 {
29068 limit = 1024;
29069 /* Find the saved regs. */
29070 if (frame_pointer_needed)
29071 {
29072 delta = offsets->soft_frame - offsets->saved_args;
29073 reg = THUMB_HARD_FRAME_POINTER_REGNUM;
29074 if (TARGET_THUMB1)
29075 limit = 128;
29076 }
29077 else
29078 {
29079 delta = offsets->outgoing_args - offsets->saved_args;
29080 reg = SP_REGNUM;
29081 }
29082 /* Allow for the stack frame. */
29083 if (TARGET_THUMB1 && TARGET_BACKTRACE)
29084 delta -= 16;
29085 /* The link register is always the first saved register. */
29086 delta -= 4;
29087
29088 /* Construct the address. */
29089 addr = gen_rtx_REG (SImode, reg);
29090 if (delta > limit)
29091 {
29092 emit_insn (gen_movsi (scratch, GEN_INT (delta)));
29093 emit_insn (gen_addsi3 (scratch, scratch, stack_pointer_rtx));
29094 addr = scratch;
29095 }
29096 else
29097 addr = plus_constant (Pmode, addr, delta);
29098
29099 /* The store needs to be marked to prevent DSE from deleting
29100 it as dead if it is based on fp. */
29101 mem = gen_frame_mem (Pmode, addr);
29102 MEM_VOLATILE_P (mem) = true;
29103 emit_move_insn (mem, source);
29104 }
29105 else
29106 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
29107 }
29108
29109 /* Implements target hook vector_mode_supported_p. */
29110 bool
29111 arm_vector_mode_supported_p (machine_mode mode)
29112 {
29113 /* Neon also supports V2SImode, etc. listed in the clause below. */
29114 if (TARGET_NEON && (mode == V2SFmode || mode == V4SImode || mode == V8HImode
29115 || mode == V4HFmode || mode == V16QImode || mode == V4SFmode
29116 || mode == V2DImode || mode == V8HFmode || mode == V4BFmode
29117 || mode == V8BFmode))
29118 return true;
29119
29120 if ((TARGET_NEON || TARGET_IWMMXT)
29121 && ((mode == V2SImode)
29122 || (mode == V4HImode)
29123 || (mode == V8QImode)))
29124 return true;
29125
29126 if (TARGET_INT_SIMD && (mode == V4UQQmode || mode == V4QQmode
29127 || mode == V2UHQmode || mode == V2HQmode || mode == V2UHAmode
29128 || mode == V2HAmode))
29129 return true;
29130
29131 if (TARGET_HAVE_MVE
29132 && (mode == V2DImode || mode == V4SImode || mode == V8HImode
29133 || mode == V16QImode))
29134 return true;
29135
29136 if (TARGET_HAVE_MVE_FLOAT
29137 && (mode == V2DFmode || mode == V4SFmode || mode == V8HFmode))
29138 return true;
29139
29140 return false;
29141 }
29142
29143 /* Implements target hook array_mode_supported_p. */
29144
29145 static bool
29146 arm_array_mode_supported_p (machine_mode mode,
29147 unsigned HOST_WIDE_INT nelems)
29148 {
29149 /* We don't want to enable interleaved loads and stores for BYTES_BIG_ENDIAN
29150 for now, as the lane-swapping logic needs to be extended in the expanders.
29151 See PR target/82518. */
29152 if (TARGET_NEON && !BYTES_BIG_ENDIAN
29153 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
29154 && (nelems >= 2 && nelems <= 4))
29155 return true;
29156
29157 if (TARGET_HAVE_MVE && !BYTES_BIG_ENDIAN
29158 && VALID_MVE_MODE (mode) && (nelems == 2 || nelems == 4))
29159 return true;
29160
29161 return false;
29162 }
29163
29164 /* Use the option -mvectorize-with-neon-double to override the use of quardword
29165 registers when autovectorizing for Neon, at least until multiple vector
29166 widths are supported properly by the middle-end. */
29167
29168 static machine_mode
29169 arm_preferred_simd_mode (scalar_mode mode)
29170 {
29171 if (TARGET_NEON)
29172 switch (mode)
29173 {
29174 case E_HFmode:
29175 return TARGET_NEON_VECTORIZE_DOUBLE ? V4HFmode : V8HFmode;
29176 case E_SFmode:
29177 return TARGET_NEON_VECTORIZE_DOUBLE ? V2SFmode : V4SFmode;
29178 case E_SImode:
29179 return TARGET_NEON_VECTORIZE_DOUBLE ? V2SImode : V4SImode;
29180 case E_HImode:
29181 return TARGET_NEON_VECTORIZE_DOUBLE ? V4HImode : V8HImode;
29182 case E_QImode:
29183 return TARGET_NEON_VECTORIZE_DOUBLE ? V8QImode : V16QImode;
29184 case E_DImode:
29185 if (!TARGET_NEON_VECTORIZE_DOUBLE)
29186 return V2DImode;
29187 break;
29188
29189 default:;
29190 }
29191
29192 if (TARGET_REALLY_IWMMXT)
29193 switch (mode)
29194 {
29195 case E_SImode:
29196 return V2SImode;
29197 case E_HImode:
29198 return V4HImode;
29199 case E_QImode:
29200 return V8QImode;
29201
29202 default:;
29203 }
29204
29205 if (TARGET_HAVE_MVE)
29206 switch (mode)
29207 {
29208 case E_QImode:
29209 return V16QImode;
29210 case E_HImode:
29211 return V8HImode;
29212 case E_SImode:
29213 return V4SImode;
29214
29215 default:;
29216 }
29217
29218 if (TARGET_HAVE_MVE_FLOAT)
29219 switch (mode)
29220 {
29221 case E_HFmode:
29222 return V8HFmode;
29223 case E_SFmode:
29224 return V4SFmode;
29225
29226 default:;
29227 }
29228
29229 return word_mode;
29230 }
29231
29232 /* Implement TARGET_CLASS_LIKELY_SPILLED_P.
29233
29234 We need to define this for LO_REGS on Thumb-1. Otherwise we can end up
29235 using r0-r4 for function arguments, r7 for the stack frame and don't have
29236 enough left over to do doubleword arithmetic. For Thumb-2 all the
29237 potentially problematic instructions accept high registers so this is not
29238 necessary. Care needs to be taken to avoid adding new Thumb-2 patterns
29239 that require many low registers. */
29240 static bool
29241 arm_class_likely_spilled_p (reg_class_t rclass)
29242 {
29243 if ((TARGET_THUMB1 && rclass == LO_REGS)
29244 || rclass == CC_REG)
29245 return true;
29246
29247 return false;
29248 }
29249
29250 /* Implements target hook small_register_classes_for_mode_p. */
29251 bool
29252 arm_small_register_classes_for_mode_p (machine_mode mode ATTRIBUTE_UNUSED)
29253 {
29254 return TARGET_THUMB1;
29255 }
29256
29257 /* Implement TARGET_SHIFT_TRUNCATION_MASK. SImode shifts use normal
29258 ARM insns and therefore guarantee that the shift count is modulo 256.
29259 DImode shifts (those implemented by lib1funcs.S or by optabs.c)
29260 guarantee no particular behavior for out-of-range counts. */
29261
29262 static unsigned HOST_WIDE_INT
29263 arm_shift_truncation_mask (machine_mode mode)
29264 {
29265 return mode == SImode ? 255 : 0;
29266 }
29267
29268
29269 /* Map internal gcc register numbers to DWARF2 register numbers. */
29270
29271 unsigned int
29272 arm_dbx_register_number (unsigned int regno)
29273 {
29274 if (regno < 16)
29275 return regno;
29276
29277 if (IS_VFP_REGNUM (regno))
29278 {
29279 /* See comment in arm_dwarf_register_span. */
29280 if (VFP_REGNO_OK_FOR_SINGLE (regno))
29281 return 64 + regno - FIRST_VFP_REGNUM;
29282 else
29283 return 256 + (regno - FIRST_VFP_REGNUM) / 2;
29284 }
29285
29286 if (IS_IWMMXT_GR_REGNUM (regno))
29287 return 104 + regno - FIRST_IWMMXT_GR_REGNUM;
29288
29289 if (IS_IWMMXT_REGNUM (regno))
29290 return 112 + regno - FIRST_IWMMXT_REGNUM;
29291
29292 return DWARF_FRAME_REGISTERS;
29293 }
29294
29295 /* Dwarf models VFPv3 registers as 32 64-bit registers.
29296 GCC models tham as 64 32-bit registers, so we need to describe this to
29297 the DWARF generation code. Other registers can use the default. */
29298 static rtx
29299 arm_dwarf_register_span (rtx rtl)
29300 {
29301 machine_mode mode;
29302 unsigned regno;
29303 rtx parts[16];
29304 int nregs;
29305 int i;
29306
29307 regno = REGNO (rtl);
29308 if (!IS_VFP_REGNUM (regno))
29309 return NULL_RTX;
29310
29311 /* XXX FIXME: The EABI defines two VFP register ranges:
29312 64-95: Legacy VFPv2 numbering for S0-S31 (obsolescent)
29313 256-287: D0-D31
29314 The recommended encoding for S0-S31 is a DW_OP_bit_piece of the
29315 corresponding D register. Until GDB supports this, we shall use the
29316 legacy encodings. We also use these encodings for D0-D15 for
29317 compatibility with older debuggers. */
29318 mode = GET_MODE (rtl);
29319 if (GET_MODE_SIZE (mode) < 8)
29320 return NULL_RTX;
29321
29322 if (VFP_REGNO_OK_FOR_SINGLE (regno))
29323 {
29324 nregs = GET_MODE_SIZE (mode) / 4;
29325 for (i = 0; i < nregs; i += 2)
29326 if (TARGET_BIG_END)
29327 {
29328 parts[i] = gen_rtx_REG (SImode, regno + i + 1);
29329 parts[i + 1] = gen_rtx_REG (SImode, regno + i);
29330 }
29331 else
29332 {
29333 parts[i] = gen_rtx_REG (SImode, regno + i);
29334 parts[i + 1] = gen_rtx_REG (SImode, regno + i + 1);
29335 }
29336 }
29337 else
29338 {
29339 nregs = GET_MODE_SIZE (mode) / 8;
29340 for (i = 0; i < nregs; i++)
29341 parts[i] = gen_rtx_REG (DImode, regno + i);
29342 }
29343
29344 return gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nregs , parts));
29345 }
29346
29347 #if ARM_UNWIND_INFO
29348 /* Emit unwind directives for a store-multiple instruction or stack pointer
29349 push during alignment.
29350 These should only ever be generated by the function prologue code, so
29351 expect them to have a particular form.
29352 The store-multiple instruction sometimes pushes pc as the last register,
29353 although it should not be tracked into unwind information, or for -Os
29354 sometimes pushes some dummy registers before first register that needs
29355 to be tracked in unwind information; such dummy registers are there just
29356 to avoid separate stack adjustment, and will not be restored in the
29357 epilogue. */
29358
29359 static void
29360 arm_unwind_emit_sequence (FILE * asm_out_file, rtx p)
29361 {
29362 int i;
29363 HOST_WIDE_INT offset;
29364 HOST_WIDE_INT nregs;
29365 int reg_size;
29366 unsigned reg;
29367 unsigned lastreg;
29368 unsigned padfirst = 0, padlast = 0;
29369 rtx e;
29370
29371 e = XVECEXP (p, 0, 0);
29372 gcc_assert (GET_CODE (e) == SET);
29373
29374 /* First insn will adjust the stack pointer. */
29375 gcc_assert (GET_CODE (e) == SET
29376 && REG_P (SET_DEST (e))
29377 && REGNO (SET_DEST (e)) == SP_REGNUM
29378 && GET_CODE (SET_SRC (e)) == PLUS);
29379
29380 offset = -INTVAL (XEXP (SET_SRC (e), 1));
29381 nregs = XVECLEN (p, 0) - 1;
29382 gcc_assert (nregs);
29383
29384 reg = REGNO (SET_SRC (XVECEXP (p, 0, 1)));
29385 if (reg < 16)
29386 {
29387 /* For -Os dummy registers can be pushed at the beginning to
29388 avoid separate stack pointer adjustment. */
29389 e = XVECEXP (p, 0, 1);
29390 e = XEXP (SET_DEST (e), 0);
29391 if (GET_CODE (e) == PLUS)
29392 padfirst = INTVAL (XEXP (e, 1));
29393 gcc_assert (padfirst == 0 || optimize_size);
29394 /* The function prologue may also push pc, but not annotate it as it is
29395 never restored. We turn this into a stack pointer adjustment. */
29396 e = XVECEXP (p, 0, nregs);
29397 e = XEXP (SET_DEST (e), 0);
29398 if (GET_CODE (e) == PLUS)
29399 padlast = offset - INTVAL (XEXP (e, 1)) - 4;
29400 else
29401 padlast = offset - 4;
29402 gcc_assert (padlast == 0 || padlast == 4);
29403 if (padlast == 4)
29404 fprintf (asm_out_file, "\t.pad #4\n");
29405 reg_size = 4;
29406 fprintf (asm_out_file, "\t.save {");
29407 }
29408 else if (IS_VFP_REGNUM (reg))
29409 {
29410 reg_size = 8;
29411 fprintf (asm_out_file, "\t.vsave {");
29412 }
29413 else
29414 /* Unknown register type. */
29415 gcc_unreachable ();
29416
29417 /* If the stack increment doesn't match the size of the saved registers,
29418 something has gone horribly wrong. */
29419 gcc_assert (offset == padfirst + nregs * reg_size + padlast);
29420
29421 offset = padfirst;
29422 lastreg = 0;
29423 /* The remaining insns will describe the stores. */
29424 for (i = 1; i <= nregs; i++)
29425 {
29426 /* Expect (set (mem <addr>) (reg)).
29427 Where <addr> is (reg:SP) or (plus (reg:SP) (const_int)). */
29428 e = XVECEXP (p, 0, i);
29429 gcc_assert (GET_CODE (e) == SET
29430 && MEM_P (SET_DEST (e))
29431 && REG_P (SET_SRC (e)));
29432
29433 reg = REGNO (SET_SRC (e));
29434 gcc_assert (reg >= lastreg);
29435
29436 if (i != 1)
29437 fprintf (asm_out_file, ", ");
29438 /* We can't use %r for vfp because we need to use the
29439 double precision register names. */
29440 if (IS_VFP_REGNUM (reg))
29441 asm_fprintf (asm_out_file, "d%d", (reg - FIRST_VFP_REGNUM) / 2);
29442 else
29443 asm_fprintf (asm_out_file, "%r", reg);
29444
29445 if (flag_checking)
29446 {
29447 /* Check that the addresses are consecutive. */
29448 e = XEXP (SET_DEST (e), 0);
29449 if (GET_CODE (e) == PLUS)
29450 gcc_assert (REG_P (XEXP (e, 0))
29451 && REGNO (XEXP (e, 0)) == SP_REGNUM
29452 && CONST_INT_P (XEXP (e, 1))
29453 && offset == INTVAL (XEXP (e, 1)));
29454 else
29455 gcc_assert (i == 1
29456 && REG_P (e)
29457 && REGNO (e) == SP_REGNUM);
29458 offset += reg_size;
29459 }
29460 }
29461 fprintf (asm_out_file, "}\n");
29462 if (padfirst)
29463 fprintf (asm_out_file, "\t.pad #%d\n", padfirst);
29464 }
29465
29466 /* Emit unwind directives for a SET. */
29467
29468 static void
29469 arm_unwind_emit_set (FILE * asm_out_file, rtx p)
29470 {
29471 rtx e0;
29472 rtx e1;
29473 unsigned reg;
29474
29475 e0 = XEXP (p, 0);
29476 e1 = XEXP (p, 1);
29477 switch (GET_CODE (e0))
29478 {
29479 case MEM:
29480 /* Pushing a single register. */
29481 if (GET_CODE (XEXP (e0, 0)) != PRE_DEC
29482 || !REG_P (XEXP (XEXP (e0, 0), 0))
29483 || REGNO (XEXP (XEXP (e0, 0), 0)) != SP_REGNUM)
29484 abort ();
29485
29486 asm_fprintf (asm_out_file, "\t.save ");
29487 if (IS_VFP_REGNUM (REGNO (e1)))
29488 asm_fprintf(asm_out_file, "{d%d}\n",
29489 (REGNO (e1) - FIRST_VFP_REGNUM) / 2);
29490 else
29491 asm_fprintf(asm_out_file, "{%r}\n", REGNO (e1));
29492 break;
29493
29494 case REG:
29495 if (REGNO (e0) == SP_REGNUM)
29496 {
29497 /* A stack increment. */
29498 if (GET_CODE (e1) != PLUS
29499 || !REG_P (XEXP (e1, 0))
29500 || REGNO (XEXP (e1, 0)) != SP_REGNUM
29501 || !CONST_INT_P (XEXP (e1, 1)))
29502 abort ();
29503
29504 asm_fprintf (asm_out_file, "\t.pad #%wd\n",
29505 -INTVAL (XEXP (e1, 1)));
29506 }
29507 else if (REGNO (e0) == HARD_FRAME_POINTER_REGNUM)
29508 {
29509 HOST_WIDE_INT offset;
29510
29511 if (GET_CODE (e1) == PLUS)
29512 {
29513 if (!REG_P (XEXP (e1, 0))
29514 || !CONST_INT_P (XEXP (e1, 1)))
29515 abort ();
29516 reg = REGNO (XEXP (e1, 0));
29517 offset = INTVAL (XEXP (e1, 1));
29518 asm_fprintf (asm_out_file, "\t.setfp %r, %r, #%wd\n",
29519 HARD_FRAME_POINTER_REGNUM, reg,
29520 offset);
29521 }
29522 else if (REG_P (e1))
29523 {
29524 reg = REGNO (e1);
29525 asm_fprintf (asm_out_file, "\t.setfp %r, %r\n",
29526 HARD_FRAME_POINTER_REGNUM, reg);
29527 }
29528 else
29529 abort ();
29530 }
29531 else if (REG_P (e1) && REGNO (e1) == SP_REGNUM)
29532 {
29533 /* Move from sp to reg. */
29534 asm_fprintf (asm_out_file, "\t.movsp %r\n", REGNO (e0));
29535 }
29536 else if (GET_CODE (e1) == PLUS
29537 && REG_P (XEXP (e1, 0))
29538 && REGNO (XEXP (e1, 0)) == SP_REGNUM
29539 && CONST_INT_P (XEXP (e1, 1)))
29540 {
29541 /* Set reg to offset from sp. */
29542 asm_fprintf (asm_out_file, "\t.movsp %r, #%d\n",
29543 REGNO (e0), (int)INTVAL(XEXP (e1, 1)));
29544 }
29545 else
29546 abort ();
29547 break;
29548
29549 default:
29550 abort ();
29551 }
29552 }
29553
29554
29555 /* Emit unwind directives for the given insn. */
29556
29557 static void
29558 arm_unwind_emit (FILE * asm_out_file, rtx_insn *insn)
29559 {
29560 rtx note, pat;
29561 bool handled_one = false;
29562
29563 if (arm_except_unwind_info (&global_options) != UI_TARGET)
29564 return;
29565
29566 if (!(flag_unwind_tables || crtl->uses_eh_lsda)
29567 && (TREE_NOTHROW (current_function_decl)
29568 || crtl->all_throwers_are_sibcalls))
29569 return;
29570
29571 if (NOTE_P (insn) || !RTX_FRAME_RELATED_P (insn))
29572 return;
29573
29574 for (note = REG_NOTES (insn); note ; note = XEXP (note, 1))
29575 {
29576 switch (REG_NOTE_KIND (note))
29577 {
29578 case REG_FRAME_RELATED_EXPR:
29579 pat = XEXP (note, 0);
29580 goto found;
29581
29582 case REG_CFA_REGISTER:
29583 pat = XEXP (note, 0);
29584 if (pat == NULL)
29585 {
29586 pat = PATTERN (insn);
29587 if (GET_CODE (pat) == PARALLEL)
29588 pat = XVECEXP (pat, 0, 0);
29589 }
29590
29591 /* Only emitted for IS_STACKALIGN re-alignment. */
29592 {
29593 rtx dest, src;
29594 unsigned reg;
29595
29596 src = SET_SRC (pat);
29597 dest = SET_DEST (pat);
29598
29599 gcc_assert (src == stack_pointer_rtx);
29600 reg = REGNO (dest);
29601 asm_fprintf (asm_out_file, "\t.unwind_raw 0, 0x%x @ vsp = r%d\n",
29602 reg + 0x90, reg);
29603 }
29604 handled_one = true;
29605 break;
29606
29607 /* The INSN is generated in epilogue. It is set as RTX_FRAME_RELATED_P
29608 to get correct dwarf information for shrink-wrap. We should not
29609 emit unwind information for it because these are used either for
29610 pretend arguments or notes to adjust sp and restore registers from
29611 stack. */
29612 case REG_CFA_DEF_CFA:
29613 case REG_CFA_ADJUST_CFA:
29614 case REG_CFA_RESTORE:
29615 return;
29616
29617 case REG_CFA_EXPRESSION:
29618 case REG_CFA_OFFSET:
29619 /* ??? Only handling here what we actually emit. */
29620 gcc_unreachable ();
29621
29622 default:
29623 break;
29624 }
29625 }
29626 if (handled_one)
29627 return;
29628 pat = PATTERN (insn);
29629 found:
29630
29631 switch (GET_CODE (pat))
29632 {
29633 case SET:
29634 arm_unwind_emit_set (asm_out_file, pat);
29635 break;
29636
29637 case SEQUENCE:
29638 /* Store multiple. */
29639 arm_unwind_emit_sequence (asm_out_file, pat);
29640 break;
29641
29642 default:
29643 abort();
29644 }
29645 }
29646
29647
29648 /* Output a reference from a function exception table to the type_info
29649 object X. The EABI specifies that the symbol should be relocated by
29650 an R_ARM_TARGET2 relocation. */
29651
29652 static bool
29653 arm_output_ttype (rtx x)
29654 {
29655 fputs ("\t.word\t", asm_out_file);
29656 output_addr_const (asm_out_file, x);
29657 /* Use special relocations for symbol references. */
29658 if (!CONST_INT_P (x))
29659 fputs ("(TARGET2)", asm_out_file);
29660 fputc ('\n', asm_out_file);
29661
29662 return TRUE;
29663 }
29664
29665 /* Implement TARGET_ASM_EMIT_EXCEPT_PERSONALITY. */
29666
29667 static void
29668 arm_asm_emit_except_personality (rtx personality)
29669 {
29670 fputs ("\t.personality\t", asm_out_file);
29671 output_addr_const (asm_out_file, personality);
29672 fputc ('\n', asm_out_file);
29673 }
29674 #endif /* ARM_UNWIND_INFO */
29675
29676 /* Implement TARGET_ASM_INITIALIZE_SECTIONS. */
29677
29678 static void
29679 arm_asm_init_sections (void)
29680 {
29681 #if ARM_UNWIND_INFO
29682 exception_section = get_unnamed_section (0, output_section_asm_op,
29683 "\t.handlerdata");
29684 #endif /* ARM_UNWIND_INFO */
29685
29686 #ifdef OBJECT_FORMAT_ELF
29687 if (target_pure_code)
29688 text_section->unnamed.data = "\t.section .text,\"0x20000006\",%progbits";
29689 #endif
29690 }
29691
29692 /* Output unwind directives for the start/end of a function. */
29693
29694 void
29695 arm_output_fn_unwind (FILE * f, bool prologue)
29696 {
29697 if (arm_except_unwind_info (&global_options) != UI_TARGET)
29698 return;
29699
29700 if (prologue)
29701 fputs ("\t.fnstart\n", f);
29702 else
29703 {
29704 /* If this function will never be unwound, then mark it as such.
29705 The came condition is used in arm_unwind_emit to suppress
29706 the frame annotations. */
29707 if (!(flag_unwind_tables || crtl->uses_eh_lsda)
29708 && (TREE_NOTHROW (current_function_decl)
29709 || crtl->all_throwers_are_sibcalls))
29710 fputs("\t.cantunwind\n", f);
29711
29712 fputs ("\t.fnend\n", f);
29713 }
29714 }
29715
29716 static bool
29717 arm_emit_tls_decoration (FILE *fp, rtx x)
29718 {
29719 enum tls_reloc reloc;
29720 rtx val;
29721
29722 val = XVECEXP (x, 0, 0);
29723 reloc = (enum tls_reloc) INTVAL (XVECEXP (x, 0, 1));
29724
29725 output_addr_const (fp, val);
29726
29727 switch (reloc)
29728 {
29729 case TLS_GD32:
29730 fputs ("(tlsgd)", fp);
29731 break;
29732 case TLS_GD32_FDPIC:
29733 fputs ("(tlsgd_fdpic)", fp);
29734 break;
29735 case TLS_LDM32:
29736 fputs ("(tlsldm)", fp);
29737 break;
29738 case TLS_LDM32_FDPIC:
29739 fputs ("(tlsldm_fdpic)", fp);
29740 break;
29741 case TLS_LDO32:
29742 fputs ("(tlsldo)", fp);
29743 break;
29744 case TLS_IE32:
29745 fputs ("(gottpoff)", fp);
29746 break;
29747 case TLS_IE32_FDPIC:
29748 fputs ("(gottpoff_fdpic)", fp);
29749 break;
29750 case TLS_LE32:
29751 fputs ("(tpoff)", fp);
29752 break;
29753 case TLS_DESCSEQ:
29754 fputs ("(tlsdesc)", fp);
29755 break;
29756 default:
29757 gcc_unreachable ();
29758 }
29759
29760 switch (reloc)
29761 {
29762 case TLS_GD32:
29763 case TLS_LDM32:
29764 case TLS_IE32:
29765 case TLS_DESCSEQ:
29766 fputs (" + (. - ", fp);
29767 output_addr_const (fp, XVECEXP (x, 0, 2));
29768 /* For DESCSEQ the 3rd operand encodes thumbness, and is added */
29769 fputs (reloc == TLS_DESCSEQ ? " + " : " - ", fp);
29770 output_addr_const (fp, XVECEXP (x, 0, 3));
29771 fputc (')', fp);
29772 break;
29773 default:
29774 break;
29775 }
29776
29777 return TRUE;
29778 }
29779
29780 /* ARM implementation of TARGET_ASM_OUTPUT_DWARF_DTPREL. */
29781
29782 static void
29783 arm_output_dwarf_dtprel (FILE *file, int size, rtx x)
29784 {
29785 gcc_assert (size == 4);
29786 fputs ("\t.word\t", file);
29787 output_addr_const (file, x);
29788 fputs ("(tlsldo)", file);
29789 }
29790
29791 /* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
29792
29793 static bool
29794 arm_output_addr_const_extra (FILE *fp, rtx x)
29795 {
29796 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
29797 return arm_emit_tls_decoration (fp, x);
29798 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PIC_LABEL)
29799 {
29800 char label[256];
29801 int labelno = INTVAL (XVECEXP (x, 0, 0));
29802
29803 ASM_GENERATE_INTERNAL_LABEL (label, "LPIC", labelno);
29804 assemble_name_raw (fp, label);
29805
29806 return TRUE;
29807 }
29808 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_GOTSYM_OFF)
29809 {
29810 assemble_name (fp, "_GLOBAL_OFFSET_TABLE_");
29811 if (GOT_PCREL)
29812 fputs ("+.", fp);
29813 fputs ("-(", fp);
29814 output_addr_const (fp, XVECEXP (x, 0, 0));
29815 fputc (')', fp);
29816 return TRUE;
29817 }
29818 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_SYMBOL_OFFSET)
29819 {
29820 output_addr_const (fp, XVECEXP (x, 0, 0));
29821 if (GOT_PCREL)
29822 fputs ("+.", fp);
29823 fputs ("-(", fp);
29824 output_addr_const (fp, XVECEXP (x, 0, 1));
29825 fputc (')', fp);
29826 return TRUE;
29827 }
29828 else if (GET_CODE (x) == CONST_VECTOR)
29829 return arm_emit_vector_const (fp, x);
29830
29831 return FALSE;
29832 }
29833
29834 /* Output assembly for a shift instruction.
29835 SET_FLAGS determines how the instruction modifies the condition codes.
29836 0 - Do not set condition codes.
29837 1 - Set condition codes.
29838 2 - Use smallest instruction. */
29839 const char *
29840 arm_output_shift(rtx * operands, int set_flags)
29841 {
29842 char pattern[100];
29843 static const char flag_chars[3] = {'?', '.', '!'};
29844 const char *shift;
29845 HOST_WIDE_INT val;
29846 char c;
29847
29848 c = flag_chars[set_flags];
29849 shift = shift_op(operands[3], &val);
29850 if (shift)
29851 {
29852 if (val != -1)
29853 operands[2] = GEN_INT(val);
29854 sprintf (pattern, "%s%%%c\t%%0, %%1, %%2", shift, c);
29855 }
29856 else
29857 sprintf (pattern, "mov%%%c\t%%0, %%1", c);
29858
29859 output_asm_insn (pattern, operands);
29860 return "";
29861 }
29862
29863 /* Output assembly for a WMMX immediate shift instruction. */
29864 const char *
29865 arm_output_iwmmxt_shift_immediate (const char *insn_name, rtx *operands, bool wror_or_wsra)
29866 {
29867 int shift = INTVAL (operands[2]);
29868 char templ[50];
29869 machine_mode opmode = GET_MODE (operands[0]);
29870
29871 gcc_assert (shift >= 0);
29872
29873 /* If the shift value in the register versions is > 63 (for D qualifier),
29874 31 (for W qualifier) or 15 (for H qualifier). */
29875 if (((opmode == V4HImode) && (shift > 15))
29876 || ((opmode == V2SImode) && (shift > 31))
29877 || ((opmode == DImode) && (shift > 63)))
29878 {
29879 if (wror_or_wsra)
29880 {
29881 sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, 32);
29882 output_asm_insn (templ, operands);
29883 if (opmode == DImode)
29884 {
29885 sprintf (templ, "%s\t%%0, %%0, #%d", insn_name, 32);
29886 output_asm_insn (templ, operands);
29887 }
29888 }
29889 else
29890 {
29891 /* The destination register will contain all zeros. */
29892 sprintf (templ, "wzero\t%%0");
29893 output_asm_insn (templ, operands);
29894 }
29895 return "";
29896 }
29897
29898 if ((opmode == DImode) && (shift > 32))
29899 {
29900 sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, 32);
29901 output_asm_insn (templ, operands);
29902 sprintf (templ, "%s\t%%0, %%0, #%d", insn_name, shift - 32);
29903 output_asm_insn (templ, operands);
29904 }
29905 else
29906 {
29907 sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, shift);
29908 output_asm_insn (templ, operands);
29909 }
29910 return "";
29911 }
29912
29913 /* Output assembly for a WMMX tinsr instruction. */
29914 const char *
29915 arm_output_iwmmxt_tinsr (rtx *operands)
29916 {
29917 int mask = INTVAL (operands[3]);
29918 int i;
29919 char templ[50];
29920 int units = mode_nunits[GET_MODE (operands[0])];
29921 gcc_assert ((mask & (mask - 1)) == 0);
29922 for (i = 0; i < units; ++i)
29923 {
29924 if ((mask & 0x01) == 1)
29925 {
29926 break;
29927 }
29928 mask >>= 1;
29929 }
29930 gcc_assert (i < units);
29931 {
29932 switch (GET_MODE (operands[0]))
29933 {
29934 case E_V8QImode:
29935 sprintf (templ, "tinsrb%%?\t%%0, %%2, #%d", i);
29936 break;
29937 case E_V4HImode:
29938 sprintf (templ, "tinsrh%%?\t%%0, %%2, #%d", i);
29939 break;
29940 case E_V2SImode:
29941 sprintf (templ, "tinsrw%%?\t%%0, %%2, #%d", i);
29942 break;
29943 default:
29944 gcc_unreachable ();
29945 break;
29946 }
29947 output_asm_insn (templ, operands);
29948 }
29949 return "";
29950 }
29951
29952 /* Output a Thumb-1 casesi dispatch sequence. */
29953 const char *
29954 thumb1_output_casesi (rtx *operands)
29955 {
29956 rtx diff_vec = PATTERN (NEXT_INSN (as_a <rtx_insn *> (operands[0])));
29957
29958 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
29959
29960 switch (GET_MODE(diff_vec))
29961 {
29962 case E_QImode:
29963 return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
29964 "bl\t%___gnu_thumb1_case_uqi" : "bl\t%___gnu_thumb1_case_sqi");
29965 case E_HImode:
29966 return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
29967 "bl\t%___gnu_thumb1_case_uhi" : "bl\t%___gnu_thumb1_case_shi");
29968 case E_SImode:
29969 return "bl\t%___gnu_thumb1_case_si";
29970 default:
29971 gcc_unreachable ();
29972 }
29973 }
29974
29975 /* Output a Thumb-2 casesi instruction. */
29976 const char *
29977 thumb2_output_casesi (rtx *operands)
29978 {
29979 rtx diff_vec = PATTERN (NEXT_INSN (as_a <rtx_insn *> (operands[2])));
29980
29981 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
29982
29983 output_asm_insn ("cmp\t%0, %1", operands);
29984 output_asm_insn ("bhi\t%l3", operands);
29985 switch (GET_MODE(diff_vec))
29986 {
29987 case E_QImode:
29988 return "tbb\t[%|pc, %0]";
29989 case E_HImode:
29990 return "tbh\t[%|pc, %0, lsl #1]";
29991 case E_SImode:
29992 if (flag_pic)
29993 {
29994 output_asm_insn ("adr\t%4, %l2", operands);
29995 output_asm_insn ("ldr\t%5, [%4, %0, lsl #2]", operands);
29996 output_asm_insn ("add\t%4, %4, %5", operands);
29997 return "bx\t%4";
29998 }
29999 else
30000 {
30001 output_asm_insn ("adr\t%4, %l2", operands);
30002 return "ldr\t%|pc, [%4, %0, lsl #2]";
30003 }
30004 default:
30005 gcc_unreachable ();
30006 }
30007 }
30008
30009 /* Implement TARGET_SCHED_ISSUE_RATE. Lookup the issue rate in the
30010 per-core tuning structs. */
30011 static int
30012 arm_issue_rate (void)
30013 {
30014 return current_tune->issue_rate;
30015 }
30016
30017 /* Implement TARGET_SCHED_VARIABLE_ISSUE. */
30018 static int
30019 arm_sched_variable_issue (FILE *, int, rtx_insn *insn, int more)
30020 {
30021 if (DEBUG_INSN_P (insn))
30022 return more;
30023
30024 rtx_code code = GET_CODE (PATTERN (insn));
30025 if (code == USE || code == CLOBBER)
30026 return more;
30027
30028 if (get_attr_type (insn) == TYPE_NO_INSN)
30029 return more;
30030
30031 return more - 1;
30032 }
30033
30034 /* Return how many instructions should scheduler lookahead to choose the
30035 best one. */
30036 static int
30037 arm_first_cycle_multipass_dfa_lookahead (void)
30038 {
30039 int issue_rate = arm_issue_rate ();
30040
30041 return issue_rate > 1 && !sched_fusion ? issue_rate : 0;
30042 }
30043
30044 /* Enable modeling of L2 auto-prefetcher. */
30045 static int
30046 arm_first_cycle_multipass_dfa_lookahead_guard (rtx_insn *insn, int ready_index)
30047 {
30048 return autopref_multipass_dfa_lookahead_guard (insn, ready_index);
30049 }
30050
30051 const char *
30052 arm_mangle_type (const_tree type)
30053 {
30054 /* The ARM ABI documents (10th October 2008) say that "__va_list"
30055 has to be managled as if it is in the "std" namespace. */
30056 if (TARGET_AAPCS_BASED
30057 && lang_hooks.types_compatible_p (CONST_CAST_TREE (type), va_list_type))
30058 return "St9__va_list";
30059
30060 /* Half-precision floating point types. */
30061 if (TREE_CODE (type) == REAL_TYPE && TYPE_PRECISION (type) == 16)
30062 {
30063 if (TYPE_MODE (type) == BFmode)
30064 return "u6__bf16";
30065 else
30066 return "Dh";
30067 }
30068
30069 /* Try mangling as a Neon type, TYPE_NAME is non-NULL if this is a
30070 builtin type. */
30071 if (TYPE_NAME (type) != NULL)
30072 return arm_mangle_builtin_type (type);
30073
30074 /* Use the default mangling. */
30075 return NULL;
30076 }
30077
30078 /* Order of allocation of core registers for Thumb: this allocation is
30079 written over the corresponding initial entries of the array
30080 initialized with REG_ALLOC_ORDER. We allocate all low registers
30081 first. Saving and restoring a low register is usually cheaper than
30082 using a call-clobbered high register. */
30083
30084 static const int thumb_core_reg_alloc_order[] =
30085 {
30086 3, 2, 1, 0, 4, 5, 6, 7,
30087 12, 14, 8, 9, 10, 11
30088 };
30089
30090 /* Adjust register allocation order when compiling for Thumb. */
30091
30092 void
30093 arm_order_regs_for_local_alloc (void)
30094 {
30095 const int arm_reg_alloc_order[] = REG_ALLOC_ORDER;
30096 memcpy(reg_alloc_order, arm_reg_alloc_order, sizeof (reg_alloc_order));
30097 if (TARGET_THUMB)
30098 memcpy (reg_alloc_order, thumb_core_reg_alloc_order,
30099 sizeof (thumb_core_reg_alloc_order));
30100 }
30101
30102 /* Implement TARGET_FRAME_POINTER_REQUIRED. */
30103
30104 bool
30105 arm_frame_pointer_required (void)
30106 {
30107 if (SUBTARGET_FRAME_POINTER_REQUIRED)
30108 return true;
30109
30110 /* If the function receives nonlocal gotos, it needs to save the frame
30111 pointer in the nonlocal_goto_save_area object. */
30112 if (cfun->has_nonlocal_label)
30113 return true;
30114
30115 /* The frame pointer is required for non-leaf APCS frames. */
30116 if (TARGET_ARM && TARGET_APCS_FRAME && !crtl->is_leaf)
30117 return true;
30118
30119 /* If we are probing the stack in the prologue, we will have a faulting
30120 instruction prior to the stack adjustment and this requires a frame
30121 pointer if we want to catch the exception using the EABI unwinder. */
30122 if (!IS_INTERRUPT (arm_current_func_type ())
30123 && (flag_stack_check == STATIC_BUILTIN_STACK_CHECK
30124 || flag_stack_clash_protection)
30125 && arm_except_unwind_info (&global_options) == UI_TARGET
30126 && cfun->can_throw_non_call_exceptions)
30127 {
30128 HOST_WIDE_INT size = get_frame_size ();
30129
30130 /* That's irrelevant if there is no stack adjustment. */
30131 if (size <= 0)
30132 return false;
30133
30134 /* That's relevant only if there is a stack probe. */
30135 if (crtl->is_leaf && !cfun->calls_alloca)
30136 {
30137 /* We don't have the final size of the frame so adjust. */
30138 size += 32 * UNITS_PER_WORD;
30139 if (size > PROBE_INTERVAL && size > get_stack_check_protect ())
30140 return true;
30141 }
30142 else
30143 return true;
30144 }
30145
30146 return false;
30147 }
30148
30149 /* Implement the TARGET_HAVE_CONDITIONAL_EXECUTION hook.
30150 All modes except THUMB1 have conditional execution.
30151 If we have conditional arithmetic, return false before reload to
30152 enable some ifcvt transformations. */
30153 static bool
30154 arm_have_conditional_execution (void)
30155 {
30156 bool has_cond_exec, enable_ifcvt_trans;
30157
30158 /* Only THUMB1 cannot support conditional execution. */
30159 has_cond_exec = !TARGET_THUMB1;
30160
30161 /* Enable ifcvt transformations if we have conditional arithmetic, but only
30162 before reload. */
30163 enable_ifcvt_trans = TARGET_COND_ARITH && !reload_completed;
30164
30165 return has_cond_exec && !enable_ifcvt_trans;
30166 }
30167
30168 /* The AAPCS sets the maximum alignment of a vector to 64 bits. */
30169 static HOST_WIDE_INT
30170 arm_vector_alignment (const_tree type)
30171 {
30172 HOST_WIDE_INT align = tree_to_shwi (TYPE_SIZE (type));
30173
30174 if (TARGET_AAPCS_BASED)
30175 align = MIN (align, 64);
30176
30177 return align;
30178 }
30179
30180 static unsigned int
30181 arm_autovectorize_vector_modes (vector_modes *modes, bool)
30182 {
30183 if (!TARGET_NEON_VECTORIZE_DOUBLE)
30184 {
30185 modes->safe_push (V16QImode);
30186 modes->safe_push (V8QImode);
30187 }
30188 return 0;
30189 }
30190
30191 static bool
30192 arm_vector_alignment_reachable (const_tree type, bool is_packed)
30193 {
30194 /* Vectors which aren't in packed structures will not be less aligned than
30195 the natural alignment of their element type, so this is safe. */
30196 if (TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access)
30197 return !is_packed;
30198
30199 return default_builtin_vector_alignment_reachable (type, is_packed);
30200 }
30201
30202 static bool
30203 arm_builtin_support_vector_misalignment (machine_mode mode,
30204 const_tree type, int misalignment,
30205 bool is_packed)
30206 {
30207 if (TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access)
30208 {
30209 HOST_WIDE_INT align = TYPE_ALIGN_UNIT (type);
30210
30211 if (is_packed)
30212 return align == 1;
30213
30214 /* If the misalignment is unknown, we should be able to handle the access
30215 so long as it is not to a member of a packed data structure. */
30216 if (misalignment == -1)
30217 return true;
30218
30219 /* Return true if the misalignment is a multiple of the natural alignment
30220 of the vector's element type. This is probably always going to be
30221 true in practice, since we've already established that this isn't a
30222 packed access. */
30223 return ((misalignment % align) == 0);
30224 }
30225
30226 return default_builtin_support_vector_misalignment (mode, type, misalignment,
30227 is_packed);
30228 }
30229
30230 static void
30231 arm_conditional_register_usage (void)
30232 {
30233 int regno;
30234
30235 if (TARGET_THUMB1 && optimize_size)
30236 {
30237 /* When optimizing for size on Thumb-1, it's better not
30238 to use the HI regs, because of the overhead of
30239 stacking them. */
30240 for (regno = FIRST_HI_REGNUM; regno <= LAST_HI_REGNUM; ++regno)
30241 fixed_regs[regno] = call_used_regs[regno] = 1;
30242 }
30243
30244 /* The link register can be clobbered by any branch insn,
30245 but we have no way to track that at present, so mark
30246 it as unavailable. */
30247 if (TARGET_THUMB1)
30248 fixed_regs[LR_REGNUM] = call_used_regs[LR_REGNUM] = 1;
30249
30250 if (TARGET_32BIT && TARGET_VFP_BASE)
30251 {
30252 /* VFPv3 registers are disabled when earlier VFP
30253 versions are selected due to the definition of
30254 LAST_VFP_REGNUM. */
30255 for (regno = FIRST_VFP_REGNUM;
30256 regno <= LAST_VFP_REGNUM; ++ regno)
30257 {
30258 fixed_regs[regno] = 0;
30259 call_used_regs[regno] = regno < FIRST_VFP_REGNUM + 16
30260 || regno >= FIRST_VFP_REGNUM + 32;
30261 }
30262 if (TARGET_HAVE_MVE)
30263 fixed_regs[VPR_REGNUM] = 0;
30264 }
30265
30266 if (TARGET_REALLY_IWMMXT && !TARGET_GENERAL_REGS_ONLY)
30267 {
30268 regno = FIRST_IWMMXT_GR_REGNUM;
30269 /* The 2002/10/09 revision of the XScale ABI has wCG0
30270 and wCG1 as call-preserved registers. The 2002/11/21
30271 revision changed this so that all wCG registers are
30272 scratch registers. */
30273 for (regno = FIRST_IWMMXT_GR_REGNUM;
30274 regno <= LAST_IWMMXT_GR_REGNUM; ++ regno)
30275 fixed_regs[regno] = 0;
30276 /* The XScale ABI has wR0 - wR9 as scratch registers,
30277 the rest as call-preserved registers. */
30278 for (regno = FIRST_IWMMXT_REGNUM;
30279 regno <= LAST_IWMMXT_REGNUM; ++ regno)
30280 {
30281 fixed_regs[regno] = 0;
30282 call_used_regs[regno] = regno < FIRST_IWMMXT_REGNUM + 10;
30283 }
30284 }
30285
30286 if ((unsigned) PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM)
30287 {
30288 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
30289 call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
30290 }
30291 else if (TARGET_APCS_STACK)
30292 {
30293 fixed_regs[10] = 1;
30294 call_used_regs[10] = 1;
30295 }
30296 /* -mcaller-super-interworking reserves r11 for calls to
30297 _interwork_r11_call_via_rN(). Making the register global
30298 is an easy way of ensuring that it remains valid for all
30299 calls. */
30300 if (TARGET_APCS_FRAME || TARGET_CALLER_INTERWORKING
30301 || TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME)
30302 {
30303 fixed_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
30304 call_used_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
30305 if (TARGET_CALLER_INTERWORKING)
30306 global_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
30307 }
30308
30309 /* The Q and GE bits are only accessed via special ACLE patterns. */
30310 CLEAR_HARD_REG_BIT (operand_reg_set, APSRQ_REGNUM);
30311 CLEAR_HARD_REG_BIT (operand_reg_set, APSRGE_REGNUM);
30312
30313 SUBTARGET_CONDITIONAL_REGISTER_USAGE
30314 }
30315
30316 static reg_class_t
30317 arm_preferred_rename_class (reg_class_t rclass)
30318 {
30319 /* Thumb-2 instructions using LO_REGS may be smaller than instructions
30320 using GENERIC_REGS. During register rename pass, we prefer LO_REGS,
30321 and code size can be reduced. */
30322 if (TARGET_THUMB2 && rclass == GENERAL_REGS)
30323 return LO_REGS;
30324 else
30325 return NO_REGS;
30326 }
30327
30328 /* Compute the attribute "length" of insn "*push_multi".
30329 So this function MUST be kept in sync with that insn pattern. */
30330 int
30331 arm_attr_length_push_multi(rtx parallel_op, rtx first_op)
30332 {
30333 int i, regno, hi_reg;
30334 int num_saves = XVECLEN (parallel_op, 0);
30335
30336 /* ARM mode. */
30337 if (TARGET_ARM)
30338 return 4;
30339 /* Thumb1 mode. */
30340 if (TARGET_THUMB1)
30341 return 2;
30342
30343 /* Thumb2 mode. */
30344 regno = REGNO (first_op);
30345 /* For PUSH/STM under Thumb2 mode, we can use 16-bit encodings if the register
30346 list is 8-bit. Normally this means all registers in the list must be
30347 LO_REGS, that is (R0 -R7). If any HI_REGS used, then we must use 32-bit
30348 encodings. There is one exception for PUSH that LR in HI_REGS can be used
30349 with 16-bit encoding. */
30350 hi_reg = (REGNO_REG_CLASS (regno) == HI_REGS) && (regno != LR_REGNUM);
30351 for (i = 1; i < num_saves && !hi_reg; i++)
30352 {
30353 regno = REGNO (XEXP (XVECEXP (parallel_op, 0, i), 0));
30354 hi_reg |= (REGNO_REG_CLASS (regno) == HI_REGS) && (regno != LR_REGNUM);
30355 }
30356
30357 if (!hi_reg)
30358 return 2;
30359 return 4;
30360 }
30361
30362 /* Compute the attribute "length" of insn. Currently, this function is used
30363 for "*load_multiple_with_writeback", "*pop_multiple_with_return" and
30364 "*pop_multiple_with_writeback_and_return". OPERANDS is the toplevel PARALLEL
30365 rtx, RETURN_PC is true if OPERANDS contains return insn. WRITE_BACK_P is
30366 true if OPERANDS contains insn which explicit updates base register. */
30367
30368 int
30369 arm_attr_length_pop_multi (rtx *operands, bool return_pc, bool write_back_p)
30370 {
30371 /* ARM mode. */
30372 if (TARGET_ARM)
30373 return 4;
30374 /* Thumb1 mode. */
30375 if (TARGET_THUMB1)
30376 return 2;
30377
30378 rtx parallel_op = operands[0];
30379 /* Initialize to elements number of PARALLEL. */
30380 unsigned indx = XVECLEN (parallel_op, 0) - 1;
30381 /* Initialize the value to base register. */
30382 unsigned regno = REGNO (operands[1]);
30383 /* Skip return and write back pattern.
30384 We only need register pop pattern for later analysis. */
30385 unsigned first_indx = 0;
30386 first_indx += return_pc ? 1 : 0;
30387 first_indx += write_back_p ? 1 : 0;
30388
30389 /* A pop operation can be done through LDM or POP. If the base register is SP
30390 and if it's with write back, then a LDM will be alias of POP. */
30391 bool pop_p = (regno == SP_REGNUM && write_back_p);
30392 bool ldm_p = !pop_p;
30393
30394 /* Check base register for LDM. */
30395 if (ldm_p && REGNO_REG_CLASS (regno) == HI_REGS)
30396 return 4;
30397
30398 /* Check each register in the list. */
30399 for (; indx >= first_indx; indx--)
30400 {
30401 regno = REGNO (XEXP (XVECEXP (parallel_op, 0, indx), 0));
30402 /* For POP, PC in HI_REGS can be used with 16-bit encoding. See similar
30403 comment in arm_attr_length_push_multi. */
30404 if (REGNO_REG_CLASS (regno) == HI_REGS
30405 && (regno != PC_REGNUM || ldm_p))
30406 return 4;
30407 }
30408
30409 return 2;
30410 }
30411
30412 /* Compute the number of instructions emitted by output_move_double. */
30413 int
30414 arm_count_output_move_double_insns (rtx *operands)
30415 {
30416 int count;
30417 rtx ops[2];
30418 /* output_move_double may modify the operands array, so call it
30419 here on a copy of the array. */
30420 ops[0] = operands[0];
30421 ops[1] = operands[1];
30422 output_move_double (ops, false, &count);
30423 return count;
30424 }
30425
30426 /* Same as above, but operands are a register/memory pair in SImode.
30427 Assumes operands has the base register in position 0 and memory in position
30428 2 (which is the order provided by the arm_{ldrd,strd} patterns). */
30429 int
30430 arm_count_ldrdstrd_insns (rtx *operands, bool load)
30431 {
30432 int count;
30433 rtx ops[2];
30434 int regnum, memnum;
30435 if (load)
30436 regnum = 0, memnum = 1;
30437 else
30438 regnum = 1, memnum = 0;
30439 ops[regnum] = gen_rtx_REG (DImode, REGNO (operands[0]));
30440 ops[memnum] = adjust_address (operands[2], DImode, 0);
30441 output_move_double (ops, false, &count);
30442 return count;
30443 }
30444
30445
30446 int
30447 vfp3_const_double_for_fract_bits (rtx operand)
30448 {
30449 REAL_VALUE_TYPE r0;
30450
30451 if (!CONST_DOUBLE_P (operand))
30452 return 0;
30453
30454 r0 = *CONST_DOUBLE_REAL_VALUE (operand);
30455 if (exact_real_inverse (DFmode, &r0)
30456 && !REAL_VALUE_NEGATIVE (r0))
30457 {
30458 if (exact_real_truncate (DFmode, &r0))
30459 {
30460 HOST_WIDE_INT value = real_to_integer (&r0);
30461 value = value & 0xffffffff;
30462 if ((value != 0) && ( (value & (value - 1)) == 0))
30463 {
30464 int ret = exact_log2 (value);
30465 gcc_assert (IN_RANGE (ret, 0, 31));
30466 return ret;
30467 }
30468 }
30469 }
30470 return 0;
30471 }
30472
30473 /* If X is a CONST_DOUBLE with a value that is a power of 2 whose
30474 log2 is in [1, 32], return that log2. Otherwise return -1.
30475 This is used in the patterns for vcvt.s32.f32 floating-point to
30476 fixed-point conversions. */
30477
30478 int
30479 vfp3_const_double_for_bits (rtx x)
30480 {
30481 const REAL_VALUE_TYPE *r;
30482
30483 if (!CONST_DOUBLE_P (x))
30484 return -1;
30485
30486 r = CONST_DOUBLE_REAL_VALUE (x);
30487
30488 if (REAL_VALUE_NEGATIVE (*r)
30489 || REAL_VALUE_ISNAN (*r)
30490 || REAL_VALUE_ISINF (*r)
30491 || !real_isinteger (r, SFmode))
30492 return -1;
30493
30494 HOST_WIDE_INT hwint = exact_log2 (real_to_integer (r));
30495
30496 /* The exact_log2 above will have returned -1 if this is
30497 not an exact log2. */
30498 if (!IN_RANGE (hwint, 1, 32))
30499 return -1;
30500
30501 return hwint;
30502 }
30503
30504 \f
30505 /* Emit a memory barrier around an atomic sequence according to MODEL. */
30506
30507 static void
30508 arm_pre_atomic_barrier (enum memmodel model)
30509 {
30510 if (need_atomic_barrier_p (model, true))
30511 emit_insn (gen_memory_barrier ());
30512 }
30513
30514 static void
30515 arm_post_atomic_barrier (enum memmodel model)
30516 {
30517 if (need_atomic_barrier_p (model, false))
30518 emit_insn (gen_memory_barrier ());
30519 }
30520
30521 /* Emit the load-exclusive and store-exclusive instructions.
30522 Use acquire and release versions if necessary. */
30523
30524 static void
30525 arm_emit_load_exclusive (machine_mode mode, rtx rval, rtx mem, bool acq)
30526 {
30527 rtx (*gen) (rtx, rtx);
30528
30529 if (acq)
30530 {
30531 switch (mode)
30532 {
30533 case E_QImode: gen = gen_arm_load_acquire_exclusiveqi; break;
30534 case E_HImode: gen = gen_arm_load_acquire_exclusivehi; break;
30535 case E_SImode: gen = gen_arm_load_acquire_exclusivesi; break;
30536 case E_DImode: gen = gen_arm_load_acquire_exclusivedi; break;
30537 default:
30538 gcc_unreachable ();
30539 }
30540 }
30541 else
30542 {
30543 switch (mode)
30544 {
30545 case E_QImode: gen = gen_arm_load_exclusiveqi; break;
30546 case E_HImode: gen = gen_arm_load_exclusivehi; break;
30547 case E_SImode: gen = gen_arm_load_exclusivesi; break;
30548 case E_DImode: gen = gen_arm_load_exclusivedi; break;
30549 default:
30550 gcc_unreachable ();
30551 }
30552 }
30553
30554 emit_insn (gen (rval, mem));
30555 }
30556
30557 static void
30558 arm_emit_store_exclusive (machine_mode mode, rtx bval, rtx rval,
30559 rtx mem, bool rel)
30560 {
30561 rtx (*gen) (rtx, rtx, rtx);
30562
30563 if (rel)
30564 {
30565 switch (mode)
30566 {
30567 case E_QImode: gen = gen_arm_store_release_exclusiveqi; break;
30568 case E_HImode: gen = gen_arm_store_release_exclusivehi; break;
30569 case E_SImode: gen = gen_arm_store_release_exclusivesi; break;
30570 case E_DImode: gen = gen_arm_store_release_exclusivedi; break;
30571 default:
30572 gcc_unreachable ();
30573 }
30574 }
30575 else
30576 {
30577 switch (mode)
30578 {
30579 case E_QImode: gen = gen_arm_store_exclusiveqi; break;
30580 case E_HImode: gen = gen_arm_store_exclusivehi; break;
30581 case E_SImode: gen = gen_arm_store_exclusivesi; break;
30582 case E_DImode: gen = gen_arm_store_exclusivedi; break;
30583 default:
30584 gcc_unreachable ();
30585 }
30586 }
30587
30588 emit_insn (gen (bval, rval, mem));
30589 }
30590
30591 /* Mark the previous jump instruction as unlikely. */
30592
30593 static void
30594 emit_unlikely_jump (rtx insn)
30595 {
30596 rtx_insn *jump = emit_jump_insn (insn);
30597 add_reg_br_prob_note (jump, profile_probability::very_unlikely ());
30598 }
30599
30600 /* Expand a compare and swap pattern. */
30601
30602 void
30603 arm_expand_compare_and_swap (rtx operands[])
30604 {
30605 rtx bval, bdst, rval, mem, oldval, newval, is_weak, mod_s, mod_f, x;
30606 machine_mode mode, cmp_mode;
30607
30608 bval = operands[0];
30609 rval = operands[1];
30610 mem = operands[2];
30611 oldval = operands[3];
30612 newval = operands[4];
30613 is_weak = operands[5];
30614 mod_s = operands[6];
30615 mod_f = operands[7];
30616 mode = GET_MODE (mem);
30617
30618 /* Normally the succ memory model must be stronger than fail, but in the
30619 unlikely event of fail being ACQUIRE and succ being RELEASE we need to
30620 promote succ to ACQ_REL so that we don't lose the acquire semantics. */
30621
30622 if (TARGET_HAVE_LDACQ
30623 && is_mm_acquire (memmodel_from_int (INTVAL (mod_f)))
30624 && is_mm_release (memmodel_from_int (INTVAL (mod_s))))
30625 mod_s = GEN_INT (MEMMODEL_ACQ_REL);
30626
30627 switch (mode)
30628 {
30629 case E_QImode:
30630 case E_HImode:
30631 /* For narrow modes, we're going to perform the comparison in SImode,
30632 so do the zero-extension now. */
30633 rval = gen_reg_rtx (SImode);
30634 oldval = convert_modes (SImode, mode, oldval, true);
30635 /* FALLTHRU */
30636
30637 case E_SImode:
30638 /* Force the value into a register if needed. We waited until after
30639 the zero-extension above to do this properly. */
30640 if (!arm_add_operand (oldval, SImode))
30641 oldval = force_reg (SImode, oldval);
30642 break;
30643
30644 case E_DImode:
30645 if (!cmpdi_operand (oldval, mode))
30646 oldval = force_reg (mode, oldval);
30647 break;
30648
30649 default:
30650 gcc_unreachable ();
30651 }
30652
30653 if (TARGET_THUMB1)
30654 cmp_mode = E_SImode;
30655 else
30656 cmp_mode = CC_Zmode;
30657
30658 bdst = TARGET_THUMB1 ? bval : gen_rtx_REG (CC_Zmode, CC_REGNUM);
30659 emit_insn (gen_atomic_compare_and_swap_1 (cmp_mode, mode, bdst, rval, mem,
30660 oldval, newval, is_weak, mod_s, mod_f));
30661
30662 if (mode == QImode || mode == HImode)
30663 emit_move_insn (operands[1], gen_lowpart (mode, rval));
30664
30665 /* In all cases, we arrange for success to be signaled by Z set.
30666 This arrangement allows for the boolean result to be used directly
30667 in a subsequent branch, post optimization. For Thumb-1 targets, the
30668 boolean negation of the result is also stored in bval because Thumb-1
30669 backend lacks dependency tracking for CC flag due to flag-setting not
30670 being represented at RTL level. */
30671 if (TARGET_THUMB1)
30672 emit_insn (gen_cstoresi_eq0_thumb1 (bval, bdst));
30673 else
30674 {
30675 x = gen_rtx_EQ (SImode, bdst, const0_rtx);
30676 emit_insn (gen_rtx_SET (bval, x));
30677 }
30678 }
30679
30680 /* Split a compare and swap pattern. It is IMPLEMENTATION DEFINED whether
30681 another memory store between the load-exclusive and store-exclusive can
30682 reset the monitor from Exclusive to Open state. This means we must wait
30683 until after reload to split the pattern, lest we get a register spill in
30684 the middle of the atomic sequence. Success of the compare and swap is
30685 indicated by the Z flag set for 32bit targets and by neg_bval being zero
30686 for Thumb-1 targets (ie. negation of the boolean value returned by
30687 atomic_compare_and_swapmode standard pattern in operand 0). */
30688
30689 void
30690 arm_split_compare_and_swap (rtx operands[])
30691 {
30692 rtx rval, mem, oldval, newval, neg_bval, mod_s_rtx;
30693 machine_mode mode;
30694 enum memmodel mod_s, mod_f;
30695 bool is_weak;
30696 rtx_code_label *label1, *label2;
30697 rtx x, cond;
30698
30699 rval = operands[1];
30700 mem = operands[2];
30701 oldval = operands[3];
30702 newval = operands[4];
30703 is_weak = (operands[5] != const0_rtx);
30704 mod_s_rtx = operands[6];
30705 mod_s = memmodel_from_int (INTVAL (mod_s_rtx));
30706 mod_f = memmodel_from_int (INTVAL (operands[7]));
30707 neg_bval = TARGET_THUMB1 ? operands[0] : operands[8];
30708 mode = GET_MODE (mem);
30709
30710 bool is_armv8_sync = arm_arch8 && is_mm_sync (mod_s);
30711
30712 bool use_acquire = TARGET_HAVE_LDACQ && aarch_mm_needs_acquire (mod_s_rtx);
30713 bool use_release = TARGET_HAVE_LDACQ && aarch_mm_needs_release (mod_s_rtx);
30714
30715 /* For ARMv8, the load-acquire is too weak for __sync memory orders. Instead,
30716 a full barrier is emitted after the store-release. */
30717 if (is_armv8_sync)
30718 use_acquire = false;
30719
30720 /* Checks whether a barrier is needed and emits one accordingly. */
30721 if (!(use_acquire || use_release))
30722 arm_pre_atomic_barrier (mod_s);
30723
30724 label1 = NULL;
30725 if (!is_weak)
30726 {
30727 label1 = gen_label_rtx ();
30728 emit_label (label1);
30729 }
30730 label2 = gen_label_rtx ();
30731
30732 arm_emit_load_exclusive (mode, rval, mem, use_acquire);
30733
30734 /* Z is set to 0 for 32bit targets (resp. rval set to 1) if oldval != rval,
30735 as required to communicate with arm_expand_compare_and_swap. */
30736 if (TARGET_32BIT)
30737 {
30738 cond = arm_gen_compare_reg (NE, rval, oldval, neg_bval);
30739 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
30740 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
30741 gen_rtx_LABEL_REF (Pmode, label2), pc_rtx);
30742 emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
30743 }
30744 else
30745 {
30746 cond = gen_rtx_NE (VOIDmode, rval, oldval);
30747 if (thumb1_cmpneg_operand (oldval, SImode))
30748 {
30749 rtx src = rval;
30750 if (!satisfies_constraint_L (oldval))
30751 {
30752 gcc_assert (satisfies_constraint_J (oldval));
30753
30754 /* For such immediates, ADDS needs the source and destination regs
30755 to be the same.
30756
30757 Normally this would be handled by RA, but this is all happening
30758 after RA. */
30759 emit_move_insn (neg_bval, rval);
30760 src = neg_bval;
30761 }
30762
30763 emit_unlikely_jump (gen_cbranchsi4_neg_late (neg_bval, src, oldval,
30764 label2, cond));
30765 }
30766 else
30767 {
30768 emit_move_insn (neg_bval, const1_rtx);
30769 emit_unlikely_jump (gen_cbranchsi4_insn (cond, rval, oldval, label2));
30770 }
30771 }
30772
30773 arm_emit_store_exclusive (mode, neg_bval, mem, newval, use_release);
30774
30775 /* Weak or strong, we want EQ to be true for success, so that we
30776 match the flags that we got from the compare above. */
30777 if (TARGET_32BIT)
30778 {
30779 cond = gen_rtx_REG (CCmode, CC_REGNUM);
30780 x = gen_rtx_COMPARE (CCmode, neg_bval, const0_rtx);
30781 emit_insn (gen_rtx_SET (cond, x));
30782 }
30783
30784 if (!is_weak)
30785 {
30786 /* Z is set to boolean value of !neg_bval, as required to communicate
30787 with arm_expand_compare_and_swap. */
30788 x = gen_rtx_NE (VOIDmode, neg_bval, const0_rtx);
30789 emit_unlikely_jump (gen_cbranchsi4 (x, neg_bval, const0_rtx, label1));
30790 }
30791
30792 if (!is_mm_relaxed (mod_f))
30793 emit_label (label2);
30794
30795 /* Checks whether a barrier is needed and emits one accordingly. */
30796 if (is_armv8_sync
30797 || !(use_acquire || use_release))
30798 arm_post_atomic_barrier (mod_s);
30799
30800 if (is_mm_relaxed (mod_f))
30801 emit_label (label2);
30802 }
30803
30804 /* Split an atomic operation pattern. Operation is given by CODE and is one
30805 of PLUS, MINUS, IOR, XOR, SET (for an exchange operation) or NOT (for a nand
30806 operation). Operation is performed on the content at MEM and on VALUE
30807 following the memory model MODEL_RTX. The content at MEM before and after
30808 the operation is returned in OLD_OUT and NEW_OUT respectively while the
30809 success of the operation is returned in COND. Using a scratch register or
30810 an operand register for these determines what result is returned for that
30811 pattern. */
30812
30813 void
30814 arm_split_atomic_op (enum rtx_code code, rtx old_out, rtx new_out, rtx mem,
30815 rtx value, rtx model_rtx, rtx cond)
30816 {
30817 enum memmodel model = memmodel_from_int (INTVAL (model_rtx));
30818 machine_mode mode = GET_MODE (mem);
30819 machine_mode wmode = (mode == DImode ? DImode : SImode);
30820 rtx_code_label *label;
30821 bool all_low_regs, bind_old_new;
30822 rtx x;
30823
30824 bool is_armv8_sync = arm_arch8 && is_mm_sync (model);
30825
30826 bool use_acquire = TARGET_HAVE_LDACQ && aarch_mm_needs_acquire (model_rtx);
30827 bool use_release = TARGET_HAVE_LDACQ && aarch_mm_needs_release (model_rtx);
30828
30829 /* For ARMv8, a load-acquire is too weak for __sync memory orders. Instead,
30830 a full barrier is emitted after the store-release. */
30831 if (is_armv8_sync)
30832 use_acquire = false;
30833
30834 /* Checks whether a barrier is needed and emits one accordingly. */
30835 if (!(use_acquire || use_release))
30836 arm_pre_atomic_barrier (model);
30837
30838 label = gen_label_rtx ();
30839 emit_label (label);
30840
30841 if (new_out)
30842 new_out = gen_lowpart (wmode, new_out);
30843 if (old_out)
30844 old_out = gen_lowpart (wmode, old_out);
30845 else
30846 old_out = new_out;
30847 value = simplify_gen_subreg (wmode, value, mode, 0);
30848
30849 arm_emit_load_exclusive (mode, old_out, mem, use_acquire);
30850
30851 /* Does the operation require destination and first operand to use the same
30852 register? This is decided by register constraints of relevant insn
30853 patterns in thumb1.md. */
30854 gcc_assert (!new_out || REG_P (new_out));
30855 all_low_regs = REG_P (value) && REGNO_REG_CLASS (REGNO (value)) == LO_REGS
30856 && new_out && REGNO_REG_CLASS (REGNO (new_out)) == LO_REGS
30857 && REGNO_REG_CLASS (REGNO (old_out)) == LO_REGS;
30858 bind_old_new =
30859 (TARGET_THUMB1
30860 && code != SET
30861 && code != MINUS
30862 && (code != PLUS || (!all_low_regs && !satisfies_constraint_L (value))));
30863
30864 /* We want to return the old value while putting the result of the operation
30865 in the same register as the old value so copy the old value over to the
30866 destination register and use that register for the operation. */
30867 if (old_out && bind_old_new)
30868 {
30869 emit_move_insn (new_out, old_out);
30870 old_out = new_out;
30871 }
30872
30873 switch (code)
30874 {
30875 case SET:
30876 new_out = value;
30877 break;
30878
30879 case NOT:
30880 x = gen_rtx_AND (wmode, old_out, value);
30881 emit_insn (gen_rtx_SET (new_out, x));
30882 x = gen_rtx_NOT (wmode, new_out);
30883 emit_insn (gen_rtx_SET (new_out, x));
30884 break;
30885
30886 case MINUS:
30887 if (CONST_INT_P (value))
30888 {
30889 value = gen_int_mode (-INTVAL (value), wmode);
30890 code = PLUS;
30891 }
30892 /* FALLTHRU */
30893
30894 case PLUS:
30895 if (mode == DImode)
30896 {
30897 /* DImode plus/minus need to clobber flags. */
30898 /* The adddi3 and subdi3 patterns are incorrectly written so that
30899 they require matching operands, even when we could easily support
30900 three operands. Thankfully, this can be fixed up post-splitting,
30901 as the individual add+adc patterns do accept three operands and
30902 post-reload cprop can make these moves go away. */
30903 emit_move_insn (new_out, old_out);
30904 if (code == PLUS)
30905 x = gen_adddi3 (new_out, new_out, value);
30906 else
30907 x = gen_subdi3 (new_out, new_out, value);
30908 emit_insn (x);
30909 break;
30910 }
30911 /* FALLTHRU */
30912
30913 default:
30914 x = gen_rtx_fmt_ee (code, wmode, old_out, value);
30915 emit_insn (gen_rtx_SET (new_out, x));
30916 break;
30917 }
30918
30919 arm_emit_store_exclusive (mode, cond, mem, gen_lowpart (mode, new_out),
30920 use_release);
30921
30922 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
30923 emit_unlikely_jump (gen_cbranchsi4 (x, cond, const0_rtx, label));
30924
30925 /* Checks whether a barrier is needed and emits one accordingly. */
30926 if (is_armv8_sync
30927 || !(use_acquire || use_release))
30928 arm_post_atomic_barrier (model);
30929 }
30930 \f
30931 /* Expand code to compare vectors OP0 and OP1 using condition CODE.
30932 If CAN_INVERT, store either the result or its inverse in TARGET
30933 and return true if TARGET contains the inverse. If !CAN_INVERT,
30934 always store the result in TARGET, never its inverse.
30935
30936 Note that the handling of floating-point comparisons is not
30937 IEEE compliant. */
30938
30939 bool
30940 arm_expand_vector_compare (rtx target, rtx_code code, rtx op0, rtx op1,
30941 bool can_invert)
30942 {
30943 machine_mode cmp_result_mode = GET_MODE (target);
30944 machine_mode cmp_mode = GET_MODE (op0);
30945
30946 bool inverted;
30947 switch (code)
30948 {
30949 /* For these we need to compute the inverse of the requested
30950 comparison. */
30951 case UNORDERED:
30952 case UNLT:
30953 case UNLE:
30954 case UNGT:
30955 case UNGE:
30956 case UNEQ:
30957 case NE:
30958 code = reverse_condition_maybe_unordered (code);
30959 if (!can_invert)
30960 {
30961 /* Recursively emit the inverted comparison into a temporary
30962 and then store its inverse in TARGET. This avoids reusing
30963 TARGET (which for integer NE could be one of the inputs). */
30964 rtx tmp = gen_reg_rtx (cmp_result_mode);
30965 if (arm_expand_vector_compare (tmp, code, op0, op1, true))
30966 gcc_unreachable ();
30967 emit_insn (gen_rtx_SET (target, gen_rtx_NOT (cmp_result_mode, tmp)));
30968 return false;
30969 }
30970 inverted = true;
30971 break;
30972
30973 default:
30974 inverted = false;
30975 break;
30976 }
30977
30978 switch (code)
30979 {
30980 /* These are natively supported for zero comparisons, but otherwise
30981 require the operands to be swapped. */
30982 case LE:
30983 case LT:
30984 if (op1 != CONST0_RTX (cmp_mode))
30985 {
30986 code = swap_condition (code);
30987 std::swap (op0, op1);
30988 }
30989 /* Fall through. */
30990
30991 /* These are natively supported for both register and zero operands. */
30992 case EQ:
30993 case GE:
30994 case GT:
30995 emit_insn (gen_neon_vc (code, cmp_mode, target, op0, op1));
30996 return inverted;
30997
30998 /* These are natively supported for register operands only.
30999 Comparisons with zero aren't useful and should be folded
31000 or canonicalized by target-independent code. */
31001 case GEU:
31002 case GTU:
31003 emit_insn (gen_neon_vc (code, cmp_mode, target,
31004 op0, force_reg (cmp_mode, op1)));
31005 return inverted;
31006
31007 /* These require the operands to be swapped and likewise do not
31008 support comparisons with zero. */
31009 case LEU:
31010 case LTU:
31011 emit_insn (gen_neon_vc (swap_condition (code), cmp_mode,
31012 target, force_reg (cmp_mode, op1), op0));
31013 return inverted;
31014
31015 /* These need a combination of two comparisons. */
31016 case LTGT:
31017 case ORDERED:
31018 {
31019 /* Operands are LTGT iff (a > b || a > b).
31020 Operands are ORDERED iff (a > b || a <= b). */
31021 rtx gt_res = gen_reg_rtx (cmp_result_mode);
31022 rtx alt_res = gen_reg_rtx (cmp_result_mode);
31023 rtx_code alt_code = (code == LTGT ? LT : LE);
31024 if (arm_expand_vector_compare (gt_res, GT, op0, op1, true)
31025 || arm_expand_vector_compare (alt_res, alt_code, op0, op1, true))
31026 gcc_unreachable ();
31027 emit_insn (gen_rtx_SET (target, gen_rtx_IOR (cmp_result_mode,
31028 gt_res, alt_res)));
31029 return inverted;
31030 }
31031
31032 default:
31033 gcc_unreachable ();
31034 }
31035 }
31036
31037 /* Expand a vcond or vcondu pattern with operands OPERANDS.
31038 CMP_RESULT_MODE is the mode of the comparison result. */
31039
31040 void
31041 arm_expand_vcond (rtx *operands, machine_mode cmp_result_mode)
31042 {
31043 rtx mask = gen_reg_rtx (cmp_result_mode);
31044 bool inverted = arm_expand_vector_compare (mask, GET_CODE (operands[3]),
31045 operands[4], operands[5], true);
31046 if (inverted)
31047 std::swap (operands[1], operands[2]);
31048 emit_insn (gen_neon_vbsl (GET_MODE (operands[0]), operands[0],
31049 mask, operands[1], operands[2]));
31050 }
31051 \f
31052 #define MAX_VECT_LEN 16
31053
31054 struct expand_vec_perm_d
31055 {
31056 rtx target, op0, op1;
31057 vec_perm_indices perm;
31058 machine_mode vmode;
31059 bool one_vector_p;
31060 bool testing_p;
31061 };
31062
31063 /* Generate a variable permutation. */
31064
31065 static void
31066 arm_expand_vec_perm_1 (rtx target, rtx op0, rtx op1, rtx sel)
31067 {
31068 machine_mode vmode = GET_MODE (target);
31069 bool one_vector_p = rtx_equal_p (op0, op1);
31070
31071 gcc_checking_assert (vmode == V8QImode || vmode == V16QImode);
31072 gcc_checking_assert (GET_MODE (op0) == vmode);
31073 gcc_checking_assert (GET_MODE (op1) == vmode);
31074 gcc_checking_assert (GET_MODE (sel) == vmode);
31075 gcc_checking_assert (TARGET_NEON);
31076
31077 if (one_vector_p)
31078 {
31079 if (vmode == V8QImode)
31080 emit_insn (gen_neon_vtbl1v8qi (target, op0, sel));
31081 else
31082 emit_insn (gen_neon_vtbl1v16qi (target, op0, sel));
31083 }
31084 else
31085 {
31086 rtx pair;
31087
31088 if (vmode == V8QImode)
31089 {
31090 pair = gen_reg_rtx (V16QImode);
31091 emit_insn (gen_neon_vcombinev8qi (pair, op0, op1));
31092 pair = gen_lowpart (TImode, pair);
31093 emit_insn (gen_neon_vtbl2v8qi (target, pair, sel));
31094 }
31095 else
31096 {
31097 pair = gen_reg_rtx (OImode);
31098 emit_insn (gen_neon_vcombinev16qi (pair, op0, op1));
31099 emit_insn (gen_neon_vtbl2v16qi (target, pair, sel));
31100 }
31101 }
31102 }
31103
31104 void
31105 arm_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel)
31106 {
31107 machine_mode vmode = GET_MODE (target);
31108 unsigned int nelt = GET_MODE_NUNITS (vmode);
31109 bool one_vector_p = rtx_equal_p (op0, op1);
31110 rtx mask;
31111
31112 /* TODO: ARM's VTBL indexing is little-endian. In order to handle GCC's
31113 numbering of elements for big-endian, we must reverse the order. */
31114 gcc_checking_assert (!BYTES_BIG_ENDIAN);
31115
31116 /* The VTBL instruction does not use a modulo index, so we must take care
31117 of that ourselves. */
31118 mask = GEN_INT (one_vector_p ? nelt - 1 : 2 * nelt - 1);
31119 mask = gen_const_vec_duplicate (vmode, mask);
31120 sel = expand_simple_binop (vmode, AND, sel, mask, NULL, 0, OPTAB_LIB_WIDEN);
31121
31122 arm_expand_vec_perm_1 (target, op0, op1, sel);
31123 }
31124
31125 /* Map lane ordering between architectural lane order, and GCC lane order,
31126 taking into account ABI. See comment above output_move_neon for details. */
31127
31128 static int
31129 neon_endian_lane_map (machine_mode mode, int lane)
31130 {
31131 if (BYTES_BIG_ENDIAN)
31132 {
31133 int nelems = GET_MODE_NUNITS (mode);
31134 /* Reverse lane order. */
31135 lane = (nelems - 1 - lane);
31136 /* Reverse D register order, to match ABI. */
31137 if (GET_MODE_SIZE (mode) == 16)
31138 lane = lane ^ (nelems / 2);
31139 }
31140 return lane;
31141 }
31142
31143 /* Some permutations index into pairs of vectors, this is a helper function
31144 to map indexes into those pairs of vectors. */
31145
31146 static int
31147 neon_pair_endian_lane_map (machine_mode mode, int lane)
31148 {
31149 int nelem = GET_MODE_NUNITS (mode);
31150 if (BYTES_BIG_ENDIAN)
31151 lane =
31152 neon_endian_lane_map (mode, lane & (nelem - 1)) + (lane & nelem);
31153 return lane;
31154 }
31155
31156 /* Generate or test for an insn that supports a constant permutation. */
31157
31158 /* Recognize patterns for the VUZP insns. */
31159
31160 static bool
31161 arm_evpc_neon_vuzp (struct expand_vec_perm_d *d)
31162 {
31163 unsigned int i, odd, mask, nelt = d->perm.length ();
31164 rtx out0, out1, in0, in1;
31165 int first_elem;
31166 int swap_nelt;
31167
31168 if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
31169 return false;
31170
31171 /* arm_expand_vec_perm_const_1 () helpfully swaps the operands for the
31172 big endian pattern on 64 bit vectors, so we correct for that. */
31173 swap_nelt = BYTES_BIG_ENDIAN && !d->one_vector_p
31174 && GET_MODE_SIZE (d->vmode) == 8 ? nelt : 0;
31175
31176 first_elem = d->perm[neon_endian_lane_map (d->vmode, 0)] ^ swap_nelt;
31177
31178 if (first_elem == neon_endian_lane_map (d->vmode, 0))
31179 odd = 0;
31180 else if (first_elem == neon_endian_lane_map (d->vmode, 1))
31181 odd = 1;
31182 else
31183 return false;
31184 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
31185
31186 for (i = 0; i < nelt; i++)
31187 {
31188 unsigned elt =
31189 (neon_pair_endian_lane_map (d->vmode, i) * 2 + odd) & mask;
31190 if ((d->perm[i] ^ swap_nelt) != neon_pair_endian_lane_map (d->vmode, elt))
31191 return false;
31192 }
31193
31194 /* Success! */
31195 if (d->testing_p)
31196 return true;
31197
31198 in0 = d->op0;
31199 in1 = d->op1;
31200 if (swap_nelt != 0)
31201 std::swap (in0, in1);
31202
31203 out0 = d->target;
31204 out1 = gen_reg_rtx (d->vmode);
31205 if (odd)
31206 std::swap (out0, out1);
31207
31208 emit_insn (gen_neon_vuzp_internal (d->vmode, out0, in0, in1, out1));
31209 return true;
31210 }
31211
31212 /* Recognize patterns for the VZIP insns. */
31213
31214 static bool
31215 arm_evpc_neon_vzip (struct expand_vec_perm_d *d)
31216 {
31217 unsigned int i, high, mask, nelt = d->perm.length ();
31218 rtx out0, out1, in0, in1;
31219 int first_elem;
31220 bool is_swapped;
31221
31222 if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
31223 return false;
31224
31225 is_swapped = BYTES_BIG_ENDIAN;
31226
31227 first_elem = d->perm[neon_endian_lane_map (d->vmode, 0) ^ is_swapped];
31228
31229 high = nelt / 2;
31230 if (first_elem == neon_endian_lane_map (d->vmode, high))
31231 ;
31232 else if (first_elem == neon_endian_lane_map (d->vmode, 0))
31233 high = 0;
31234 else
31235 return false;
31236 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
31237
31238 for (i = 0; i < nelt / 2; i++)
31239 {
31240 unsigned elt =
31241 neon_pair_endian_lane_map (d->vmode, i + high) & mask;
31242 if (d->perm[neon_pair_endian_lane_map (d->vmode, 2 * i + is_swapped)]
31243 != elt)
31244 return false;
31245 elt =
31246 neon_pair_endian_lane_map (d->vmode, i + nelt + high) & mask;
31247 if (d->perm[neon_pair_endian_lane_map (d->vmode, 2 * i + !is_swapped)]
31248 != elt)
31249 return false;
31250 }
31251
31252 /* Success! */
31253 if (d->testing_p)
31254 return true;
31255
31256 in0 = d->op0;
31257 in1 = d->op1;
31258 if (is_swapped)
31259 std::swap (in0, in1);
31260
31261 out0 = d->target;
31262 out1 = gen_reg_rtx (d->vmode);
31263 if (high)
31264 std::swap (out0, out1);
31265
31266 emit_insn (gen_neon_vzip_internal (d->vmode, out0, in0, in1, out1));
31267 return true;
31268 }
31269
31270 /* Recognize patterns for the VREV insns. */
31271 static bool
31272 arm_evpc_neon_vrev (struct expand_vec_perm_d *d)
31273 {
31274 unsigned int i, j, diff, nelt = d->perm.length ();
31275 rtx (*gen) (machine_mode, rtx, rtx);
31276
31277 if (!d->one_vector_p)
31278 return false;
31279
31280 diff = d->perm[0];
31281 switch (diff)
31282 {
31283 case 7:
31284 switch (d->vmode)
31285 {
31286 case E_V16QImode:
31287 case E_V8QImode:
31288 gen = gen_neon_vrev64;
31289 break;
31290 default:
31291 return false;
31292 }
31293 break;
31294 case 3:
31295 switch (d->vmode)
31296 {
31297 case E_V16QImode:
31298 case E_V8QImode:
31299 gen = gen_neon_vrev32;
31300 break;
31301 case E_V8HImode:
31302 case E_V4HImode:
31303 case E_V8HFmode:
31304 case E_V4HFmode:
31305 gen = gen_neon_vrev64;
31306 break;
31307 default:
31308 return false;
31309 }
31310 break;
31311 case 1:
31312 switch (d->vmode)
31313 {
31314 case E_V16QImode:
31315 case E_V8QImode:
31316 gen = gen_neon_vrev16;
31317 break;
31318 case E_V8HImode:
31319 case E_V4HImode:
31320 gen = gen_neon_vrev32;
31321 break;
31322 case E_V4SImode:
31323 case E_V2SImode:
31324 case E_V4SFmode:
31325 case E_V2SFmode:
31326 gen = gen_neon_vrev64;
31327 break;
31328 default:
31329 return false;
31330 }
31331 break;
31332 default:
31333 return false;
31334 }
31335
31336 for (i = 0; i < nelt ; i += diff + 1)
31337 for (j = 0; j <= diff; j += 1)
31338 {
31339 /* This is guaranteed to be true as the value of diff
31340 is 7, 3, 1 and we should have enough elements in the
31341 queue to generate this. Getting a vector mask with a
31342 value of diff other than these values implies that
31343 something is wrong by the time we get here. */
31344 gcc_assert (i + j < nelt);
31345 if (d->perm[i + j] != i + diff - j)
31346 return false;
31347 }
31348
31349 /* Success! */
31350 if (d->testing_p)
31351 return true;
31352
31353 emit_insn (gen (d->vmode, d->target, d->op0));
31354 return true;
31355 }
31356
31357 /* Recognize patterns for the VTRN insns. */
31358
31359 static bool
31360 arm_evpc_neon_vtrn (struct expand_vec_perm_d *d)
31361 {
31362 unsigned int i, odd, mask, nelt = d->perm.length ();
31363 rtx out0, out1, in0, in1;
31364
31365 if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
31366 return false;
31367
31368 /* Note that these are little-endian tests. Adjust for big-endian later. */
31369 if (d->perm[0] == 0)
31370 odd = 0;
31371 else if (d->perm[0] == 1)
31372 odd = 1;
31373 else
31374 return false;
31375 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
31376
31377 for (i = 0; i < nelt; i += 2)
31378 {
31379 if (d->perm[i] != i + odd)
31380 return false;
31381 if (d->perm[i + 1] != ((i + nelt + odd) & mask))
31382 return false;
31383 }
31384
31385 /* Success! */
31386 if (d->testing_p)
31387 return true;
31388
31389 in0 = d->op0;
31390 in1 = d->op1;
31391 if (BYTES_BIG_ENDIAN)
31392 {
31393 std::swap (in0, in1);
31394 odd = !odd;
31395 }
31396
31397 out0 = d->target;
31398 out1 = gen_reg_rtx (d->vmode);
31399 if (odd)
31400 std::swap (out0, out1);
31401
31402 emit_insn (gen_neon_vtrn_internal (d->vmode, out0, in0, in1, out1));
31403 return true;
31404 }
31405
31406 /* Recognize patterns for the VEXT insns. */
31407
31408 static bool
31409 arm_evpc_neon_vext (struct expand_vec_perm_d *d)
31410 {
31411 unsigned int i, nelt = d->perm.length ();
31412 rtx offset;
31413
31414 unsigned int location;
31415
31416 unsigned int next = d->perm[0] + 1;
31417
31418 /* TODO: Handle GCC's numbering of elements for big-endian. */
31419 if (BYTES_BIG_ENDIAN)
31420 return false;
31421
31422 /* Check if the extracted indexes are increasing by one. */
31423 for (i = 1; i < nelt; next++, i++)
31424 {
31425 /* If we hit the most significant element of the 2nd vector in
31426 the previous iteration, no need to test further. */
31427 if (next == 2 * nelt)
31428 return false;
31429
31430 /* If we are operating on only one vector: it could be a
31431 rotation. If there are only two elements of size < 64, let
31432 arm_evpc_neon_vrev catch it. */
31433 if (d->one_vector_p && (next == nelt))
31434 {
31435 if ((nelt == 2) && (d->vmode != V2DImode))
31436 return false;
31437 else
31438 next = 0;
31439 }
31440
31441 if (d->perm[i] != next)
31442 return false;
31443 }
31444
31445 location = d->perm[0];
31446
31447 /* Success! */
31448 if (d->testing_p)
31449 return true;
31450
31451 offset = GEN_INT (location);
31452
31453 if(d->vmode == E_DImode)
31454 return false;
31455
31456 emit_insn (gen_neon_vext (d->vmode, d->target, d->op0, d->op1, offset));
31457 return true;
31458 }
31459
31460 /* The NEON VTBL instruction is a fully variable permuation that's even
31461 stronger than what we expose via VEC_PERM_EXPR. What it doesn't do
31462 is mask the index operand as VEC_PERM_EXPR requires. Therefore we
31463 can do slightly better by expanding this as a constant where we don't
31464 have to apply a mask. */
31465
31466 static bool
31467 arm_evpc_neon_vtbl (struct expand_vec_perm_d *d)
31468 {
31469 rtx rperm[MAX_VECT_LEN], sel;
31470 machine_mode vmode = d->vmode;
31471 unsigned int i, nelt = d->perm.length ();
31472
31473 /* TODO: ARM's VTBL indexing is little-endian. In order to handle GCC's
31474 numbering of elements for big-endian, we must reverse the order. */
31475 if (BYTES_BIG_ENDIAN)
31476 return false;
31477
31478 if (d->testing_p)
31479 return true;
31480
31481 /* Generic code will try constant permutation twice. Once with the
31482 original mode and again with the elements lowered to QImode.
31483 So wait and don't do the selector expansion ourselves. */
31484 if (vmode != V8QImode && vmode != V16QImode)
31485 return false;
31486
31487 for (i = 0; i < nelt; ++i)
31488 rperm[i] = GEN_INT (d->perm[i]);
31489 sel = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm));
31490 sel = force_reg (vmode, sel);
31491
31492 arm_expand_vec_perm_1 (d->target, d->op0, d->op1, sel);
31493 return true;
31494 }
31495
31496 static bool
31497 arm_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
31498 {
31499 /* Check if the input mask matches vext before reordering the
31500 operands. */
31501 if (TARGET_NEON)
31502 if (arm_evpc_neon_vext (d))
31503 return true;
31504
31505 /* The pattern matching functions above are written to look for a small
31506 number to begin the sequence (0, 1, N/2). If we begin with an index
31507 from the second operand, we can swap the operands. */
31508 unsigned int nelt = d->perm.length ();
31509 if (d->perm[0] >= nelt)
31510 {
31511 d->perm.rotate_inputs (1);
31512 std::swap (d->op0, d->op1);
31513 }
31514
31515 if (TARGET_NEON)
31516 {
31517 if (arm_evpc_neon_vuzp (d))
31518 return true;
31519 if (arm_evpc_neon_vzip (d))
31520 return true;
31521 if (arm_evpc_neon_vrev (d))
31522 return true;
31523 if (arm_evpc_neon_vtrn (d))
31524 return true;
31525 return arm_evpc_neon_vtbl (d);
31526 }
31527 return false;
31528 }
31529
31530 /* Implement TARGET_VECTORIZE_VEC_PERM_CONST. */
31531
31532 static bool
31533 arm_vectorize_vec_perm_const (machine_mode vmode, rtx target, rtx op0, rtx op1,
31534 const vec_perm_indices &sel)
31535 {
31536 struct expand_vec_perm_d d;
31537 int i, nelt, which;
31538
31539 if (!VALID_NEON_DREG_MODE (vmode) && !VALID_NEON_QREG_MODE (vmode))
31540 return false;
31541
31542 d.target = target;
31543 if (op0)
31544 {
31545 rtx nop0 = force_reg (vmode, op0);
31546 if (op0 == op1)
31547 op1 = nop0;
31548 op0 = nop0;
31549 }
31550 if (op1)
31551 op1 = force_reg (vmode, op1);
31552 d.op0 = op0;
31553 d.op1 = op1;
31554
31555 d.vmode = vmode;
31556 gcc_assert (VECTOR_MODE_P (d.vmode));
31557 d.testing_p = !target;
31558
31559 nelt = GET_MODE_NUNITS (d.vmode);
31560 for (i = which = 0; i < nelt; ++i)
31561 {
31562 int ei = sel[i] & (2 * nelt - 1);
31563 which |= (ei < nelt ? 1 : 2);
31564 }
31565
31566 switch (which)
31567 {
31568 default:
31569 gcc_unreachable();
31570
31571 case 3:
31572 d.one_vector_p = false;
31573 if (d.testing_p || !rtx_equal_p (op0, op1))
31574 break;
31575
31576 /* The elements of PERM do not suggest that only the first operand
31577 is used, but both operands are identical. Allow easier matching
31578 of the permutation by folding the permutation into the single
31579 input vector. */
31580 /* FALLTHRU */
31581 case 2:
31582 d.op0 = op1;
31583 d.one_vector_p = true;
31584 break;
31585
31586 case 1:
31587 d.op1 = op0;
31588 d.one_vector_p = true;
31589 break;
31590 }
31591
31592 d.perm.new_vector (sel.encoding (), d.one_vector_p ? 1 : 2, nelt);
31593
31594 if (!d.testing_p)
31595 return arm_expand_vec_perm_const_1 (&d);
31596
31597 d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
31598 d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
31599 if (!d.one_vector_p)
31600 d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
31601
31602 start_sequence ();
31603 bool ret = arm_expand_vec_perm_const_1 (&d);
31604 end_sequence ();
31605
31606 return ret;
31607 }
31608
31609 bool
31610 arm_autoinc_modes_ok_p (machine_mode mode, enum arm_auto_incmodes code)
31611 {
31612 /* If we are soft float and we do not have ldrd
31613 then all auto increment forms are ok. */
31614 if (TARGET_SOFT_FLOAT && (TARGET_LDRD || GET_MODE_SIZE (mode) <= 4))
31615 return true;
31616
31617 switch (code)
31618 {
31619 /* Post increment and Pre Decrement are supported for all
31620 instruction forms except for vector forms. */
31621 case ARM_POST_INC:
31622 case ARM_PRE_DEC:
31623 if (VECTOR_MODE_P (mode))
31624 {
31625 if (code != ARM_PRE_DEC)
31626 return true;
31627 else
31628 return false;
31629 }
31630
31631 return true;
31632
31633 case ARM_POST_DEC:
31634 case ARM_PRE_INC:
31635 /* Without LDRD and mode size greater than
31636 word size, there is no point in auto-incrementing
31637 because ldm and stm will not have these forms. */
31638 if (!TARGET_LDRD && GET_MODE_SIZE (mode) > 4)
31639 return false;
31640
31641 /* Vector and floating point modes do not support
31642 these auto increment forms. */
31643 if (FLOAT_MODE_P (mode) || VECTOR_MODE_P (mode))
31644 return false;
31645
31646 return true;
31647
31648 default:
31649 return false;
31650
31651 }
31652
31653 return false;
31654 }
31655
31656 /* The default expansion of general 64-bit shifts in core-regs is suboptimal,
31657 on ARM, since we know that shifts by negative amounts are no-ops.
31658 Additionally, the default expansion code is not available or suitable
31659 for post-reload insn splits (this can occur when the register allocator
31660 chooses not to do a shift in NEON).
31661
31662 This function is used in both initial expand and post-reload splits, and
31663 handles all kinds of 64-bit shifts.
31664
31665 Input requirements:
31666 - It is safe for the input and output to be the same register, but
31667 early-clobber rules apply for the shift amount and scratch registers.
31668 - Shift by register requires both scratch registers. In all other cases
31669 the scratch registers may be NULL.
31670 - Ashiftrt by a register also clobbers the CC register. */
31671 void
31672 arm_emit_coreregs_64bit_shift (enum rtx_code code, rtx out, rtx in,
31673 rtx amount, rtx scratch1, rtx scratch2)
31674 {
31675 rtx out_high = gen_highpart (SImode, out);
31676 rtx out_low = gen_lowpart (SImode, out);
31677 rtx in_high = gen_highpart (SImode, in);
31678 rtx in_low = gen_lowpart (SImode, in);
31679
31680 /* Terminology:
31681 in = the register pair containing the input value.
31682 out = the destination register pair.
31683 up = the high- or low-part of each pair.
31684 down = the opposite part to "up".
31685 In a shift, we can consider bits to shift from "up"-stream to
31686 "down"-stream, so in a left-shift "up" is the low-part and "down"
31687 is the high-part of each register pair. */
31688
31689 rtx out_up = code == ASHIFT ? out_low : out_high;
31690 rtx out_down = code == ASHIFT ? out_high : out_low;
31691 rtx in_up = code == ASHIFT ? in_low : in_high;
31692 rtx in_down = code == ASHIFT ? in_high : in_low;
31693
31694 gcc_assert (code == ASHIFT || code == ASHIFTRT || code == LSHIFTRT);
31695 gcc_assert (out
31696 && (REG_P (out) || SUBREG_P (out))
31697 && GET_MODE (out) == DImode);
31698 gcc_assert (in
31699 && (REG_P (in) || SUBREG_P (in))
31700 && GET_MODE (in) == DImode);
31701 gcc_assert (amount
31702 && (((REG_P (amount) || SUBREG_P (amount))
31703 && GET_MODE (amount) == SImode)
31704 || CONST_INT_P (amount)));
31705 gcc_assert (scratch1 == NULL
31706 || (GET_CODE (scratch1) == SCRATCH)
31707 || (GET_MODE (scratch1) == SImode
31708 && REG_P (scratch1)));
31709 gcc_assert (scratch2 == NULL
31710 || (GET_CODE (scratch2) == SCRATCH)
31711 || (GET_MODE (scratch2) == SImode
31712 && REG_P (scratch2)));
31713 gcc_assert (!REG_P (out) || !REG_P (amount)
31714 || !HARD_REGISTER_P (out)
31715 || (REGNO (out) != REGNO (amount)
31716 && REGNO (out) + 1 != REGNO (amount)));
31717
31718 /* Macros to make following code more readable. */
31719 #define SUB_32(DEST,SRC) \
31720 gen_addsi3 ((DEST), (SRC), GEN_INT (-32))
31721 #define RSB_32(DEST,SRC) \
31722 gen_subsi3 ((DEST), GEN_INT (32), (SRC))
31723 #define SUB_S_32(DEST,SRC) \
31724 gen_addsi3_compare0 ((DEST), (SRC), \
31725 GEN_INT (-32))
31726 #define SET(DEST,SRC) \
31727 gen_rtx_SET ((DEST), (SRC))
31728 #define SHIFT(CODE,SRC,AMOUNT) \
31729 gen_rtx_fmt_ee ((CODE), SImode, (SRC), (AMOUNT))
31730 #define LSHIFT(CODE,SRC,AMOUNT) \
31731 gen_rtx_fmt_ee ((CODE) == ASHIFT ? ASHIFT : LSHIFTRT, \
31732 SImode, (SRC), (AMOUNT))
31733 #define REV_LSHIFT(CODE,SRC,AMOUNT) \
31734 gen_rtx_fmt_ee ((CODE) == ASHIFT ? LSHIFTRT : ASHIFT, \
31735 SImode, (SRC), (AMOUNT))
31736 #define ORR(A,B) \
31737 gen_rtx_IOR (SImode, (A), (B))
31738 #define BRANCH(COND,LABEL) \
31739 gen_arm_cond_branch ((LABEL), \
31740 gen_rtx_ ## COND (CCmode, cc_reg, \
31741 const0_rtx), \
31742 cc_reg)
31743
31744 /* Shifts by register and shifts by constant are handled separately. */
31745 if (CONST_INT_P (amount))
31746 {
31747 /* We have a shift-by-constant. */
31748
31749 /* First, handle out-of-range shift amounts.
31750 In both cases we try to match the result an ARM instruction in a
31751 shift-by-register would give. This helps reduce execution
31752 differences between optimization levels, but it won't stop other
31753 parts of the compiler doing different things. This is "undefined
31754 behavior, in any case. */
31755 if (INTVAL (amount) <= 0)
31756 emit_insn (gen_movdi (out, in));
31757 else if (INTVAL (amount) >= 64)
31758 {
31759 if (code == ASHIFTRT)
31760 {
31761 rtx const31_rtx = GEN_INT (31);
31762 emit_insn (SET (out_down, SHIFT (code, in_up, const31_rtx)));
31763 emit_insn (SET (out_up, SHIFT (code, in_up, const31_rtx)));
31764 }
31765 else
31766 emit_insn (gen_movdi (out, const0_rtx));
31767 }
31768
31769 /* Now handle valid shifts. */
31770 else if (INTVAL (amount) < 32)
31771 {
31772 /* Shifts by a constant less than 32. */
31773 rtx reverse_amount = GEN_INT (32 - INTVAL (amount));
31774
31775 /* Clearing the out register in DImode first avoids lots
31776 of spilling and results in less stack usage.
31777 Later this redundant insn is completely removed.
31778 Do that only if "in" and "out" are different registers. */
31779 if (REG_P (out) && REG_P (in) && REGNO (out) != REGNO (in))
31780 emit_insn (SET (out, const0_rtx));
31781 emit_insn (SET (out_down, LSHIFT (code, in_down, amount)));
31782 emit_insn (SET (out_down,
31783 ORR (REV_LSHIFT (code, in_up, reverse_amount),
31784 out_down)));
31785 emit_insn (SET (out_up, SHIFT (code, in_up, amount)));
31786 }
31787 else
31788 {
31789 /* Shifts by a constant greater than 31. */
31790 rtx adj_amount = GEN_INT (INTVAL (amount) - 32);
31791
31792 if (REG_P (out) && REG_P (in) && REGNO (out) != REGNO (in))
31793 emit_insn (SET (out, const0_rtx));
31794 emit_insn (SET (out_down, SHIFT (code, in_up, adj_amount)));
31795 if (code == ASHIFTRT)
31796 emit_insn (gen_ashrsi3 (out_up, in_up,
31797 GEN_INT (31)));
31798 else
31799 emit_insn (SET (out_up, const0_rtx));
31800 }
31801 }
31802 else
31803 {
31804 /* We have a shift-by-register. */
31805 rtx cc_reg = gen_rtx_REG (CC_NZmode, CC_REGNUM);
31806
31807 /* This alternative requires the scratch registers. */
31808 gcc_assert (scratch1 && REG_P (scratch1));
31809 gcc_assert (scratch2 && REG_P (scratch2));
31810
31811 /* We will need the values "amount-32" and "32-amount" later.
31812 Swapping them around now allows the later code to be more general. */
31813 switch (code)
31814 {
31815 case ASHIFT:
31816 emit_insn (SUB_32 (scratch1, amount));
31817 emit_insn (RSB_32 (scratch2, amount));
31818 break;
31819 case ASHIFTRT:
31820 emit_insn (RSB_32 (scratch1, amount));
31821 /* Also set CC = amount > 32. */
31822 emit_insn (SUB_S_32 (scratch2, amount));
31823 break;
31824 case LSHIFTRT:
31825 emit_insn (RSB_32 (scratch1, amount));
31826 emit_insn (SUB_32 (scratch2, amount));
31827 break;
31828 default:
31829 gcc_unreachable ();
31830 }
31831
31832 /* Emit code like this:
31833
31834 arithmetic-left:
31835 out_down = in_down << amount;
31836 out_down = (in_up << (amount - 32)) | out_down;
31837 out_down = ((unsigned)in_up >> (32 - amount)) | out_down;
31838 out_up = in_up << amount;
31839
31840 arithmetic-right:
31841 out_down = in_down >> amount;
31842 out_down = (in_up << (32 - amount)) | out_down;
31843 if (amount < 32)
31844 out_down = ((signed)in_up >> (amount - 32)) | out_down;
31845 out_up = in_up << amount;
31846
31847 logical-right:
31848 out_down = in_down >> amount;
31849 out_down = (in_up << (32 - amount)) | out_down;
31850 if (amount < 32)
31851 out_down = ((unsigned)in_up >> (amount - 32)) | out_down;
31852 out_up = in_up << amount;
31853
31854 The ARM and Thumb2 variants are the same but implemented slightly
31855 differently. If this were only called during expand we could just
31856 use the Thumb2 case and let combine do the right thing, but this
31857 can also be called from post-reload splitters. */
31858
31859 emit_insn (SET (out_down, LSHIFT (code, in_down, amount)));
31860
31861 if (!TARGET_THUMB2)
31862 {
31863 /* Emit code for ARM mode. */
31864 emit_insn (SET (out_down,
31865 ORR (SHIFT (ASHIFT, in_up, scratch1), out_down)));
31866 if (code == ASHIFTRT)
31867 {
31868 rtx_code_label *done_label = gen_label_rtx ();
31869 emit_jump_insn (BRANCH (LT, done_label));
31870 emit_insn (SET (out_down, ORR (SHIFT (ASHIFTRT, in_up, scratch2),
31871 out_down)));
31872 emit_label (done_label);
31873 }
31874 else
31875 emit_insn (SET (out_down, ORR (SHIFT (LSHIFTRT, in_up, scratch2),
31876 out_down)));
31877 }
31878 else
31879 {
31880 /* Emit code for Thumb2 mode.
31881 Thumb2 can't do shift and or in one insn. */
31882 emit_insn (SET (scratch1, SHIFT (ASHIFT, in_up, scratch1)));
31883 emit_insn (gen_iorsi3 (out_down, out_down, scratch1));
31884
31885 if (code == ASHIFTRT)
31886 {
31887 rtx_code_label *done_label = gen_label_rtx ();
31888 emit_jump_insn (BRANCH (LT, done_label));
31889 emit_insn (SET (scratch2, SHIFT (ASHIFTRT, in_up, scratch2)));
31890 emit_insn (SET (out_down, ORR (out_down, scratch2)));
31891 emit_label (done_label);
31892 }
31893 else
31894 {
31895 emit_insn (SET (scratch2, SHIFT (LSHIFTRT, in_up, scratch2)));
31896 emit_insn (gen_iorsi3 (out_down, out_down, scratch2));
31897 }
31898 }
31899
31900 emit_insn (SET (out_up, SHIFT (code, in_up, amount)));
31901 }
31902
31903 #undef SUB_32
31904 #undef RSB_32
31905 #undef SUB_S_32
31906 #undef SET
31907 #undef SHIFT
31908 #undef LSHIFT
31909 #undef REV_LSHIFT
31910 #undef ORR
31911 #undef BRANCH
31912 }
31913
31914 /* Returns true if the pattern is a valid symbolic address, which is either a
31915 symbol_ref or (symbol_ref + addend).
31916
31917 According to the ARM ELF ABI, the initial addend of REL-type relocations
31918 processing MOVW and MOVT instructions is formed by interpreting the 16-bit
31919 literal field of the instruction as a 16-bit signed value in the range
31920 -32768 <= A < 32768.
31921
31922 In Thumb-1 mode, we use upper/lower relocations which have an 8-bit
31923 unsigned range of 0 <= A < 256 as described in the AAELF32
31924 relocation handling documentation: REL-type relocations are encoded
31925 as unsigned in this case. */
31926
31927 bool
31928 arm_valid_symbolic_address_p (rtx addr)
31929 {
31930 rtx xop0, xop1 = NULL_RTX;
31931 rtx tmp = addr;
31932
31933 if (target_word_relocations)
31934 return false;
31935
31936 if (SYMBOL_REF_P (tmp) || LABEL_REF_P (tmp))
31937 return true;
31938
31939 /* (const (plus: symbol_ref const_int)) */
31940 if (GET_CODE (addr) == CONST)
31941 tmp = XEXP (addr, 0);
31942
31943 if (GET_CODE (tmp) == PLUS)
31944 {
31945 xop0 = XEXP (tmp, 0);
31946 xop1 = XEXP (tmp, 1);
31947
31948 if (GET_CODE (xop0) == SYMBOL_REF && CONST_INT_P (xop1))
31949 {
31950 if (TARGET_THUMB1 && !TARGET_HAVE_MOVT)
31951 return IN_RANGE (INTVAL (xop1), 0, 0xff);
31952 else
31953 return IN_RANGE (INTVAL (xop1), -0x8000, 0x7fff);
31954 }
31955 }
31956
31957 return false;
31958 }
31959
31960 /* Returns true if a valid comparison operation and makes
31961 the operands in a form that is valid. */
31962 bool
31963 arm_validize_comparison (rtx *comparison, rtx * op1, rtx * op2)
31964 {
31965 enum rtx_code code = GET_CODE (*comparison);
31966 int code_int;
31967 machine_mode mode = (GET_MODE (*op1) == VOIDmode)
31968 ? GET_MODE (*op2) : GET_MODE (*op1);
31969
31970 gcc_assert (GET_MODE (*op1) != VOIDmode || GET_MODE (*op2) != VOIDmode);
31971
31972 if (code == UNEQ || code == LTGT)
31973 return false;
31974
31975 code_int = (int)code;
31976 arm_canonicalize_comparison (&code_int, op1, op2, 0);
31977 PUT_CODE (*comparison, (enum rtx_code)code_int);
31978
31979 switch (mode)
31980 {
31981 case E_SImode:
31982 if (!arm_add_operand (*op1, mode))
31983 *op1 = force_reg (mode, *op1);
31984 if (!arm_add_operand (*op2, mode))
31985 *op2 = force_reg (mode, *op2);
31986 return true;
31987
31988 case E_DImode:
31989 /* gen_compare_reg() will sort out any invalid operands. */
31990 return true;
31991
31992 case E_HFmode:
31993 if (!TARGET_VFP_FP16INST)
31994 break;
31995 /* FP16 comparisons are done in SF mode. */
31996 mode = SFmode;
31997 *op1 = convert_to_mode (mode, *op1, 1);
31998 *op2 = convert_to_mode (mode, *op2, 1);
31999 /* Fall through. */
32000 case E_SFmode:
32001 case E_DFmode:
32002 if (!vfp_compare_operand (*op1, mode))
32003 *op1 = force_reg (mode, *op1);
32004 if (!vfp_compare_operand (*op2, mode))
32005 *op2 = force_reg (mode, *op2);
32006 return true;
32007 default:
32008 break;
32009 }
32010
32011 return false;
32012
32013 }
32014
32015 /* Maximum number of instructions to set block of memory. */
32016 static int
32017 arm_block_set_max_insns (void)
32018 {
32019 if (optimize_function_for_size_p (cfun))
32020 return 4;
32021 else
32022 return current_tune->max_insns_inline_memset;
32023 }
32024
32025 /* Return TRUE if it's profitable to set block of memory for
32026 non-vectorized case. VAL is the value to set the memory
32027 with. LENGTH is the number of bytes to set. ALIGN is the
32028 alignment of the destination memory in bytes. UNALIGNED_P
32029 is TRUE if we can only set the memory with instructions
32030 meeting alignment requirements. USE_STRD_P is TRUE if we
32031 can use strd to set the memory. */
32032 static bool
32033 arm_block_set_non_vect_profit_p (rtx val,
32034 unsigned HOST_WIDE_INT length,
32035 unsigned HOST_WIDE_INT align,
32036 bool unaligned_p, bool use_strd_p)
32037 {
32038 int num = 0;
32039 /* For leftovers in bytes of 0-7, we can set the memory block using
32040 strb/strh/str with minimum instruction number. */
32041 const int leftover[8] = {0, 1, 1, 2, 1, 2, 2, 3};
32042
32043 if (unaligned_p)
32044 {
32045 num = arm_const_inline_cost (SET, val);
32046 num += length / align + length % align;
32047 }
32048 else if (use_strd_p)
32049 {
32050 num = arm_const_double_inline_cost (val);
32051 num += (length >> 3) + leftover[length & 7];
32052 }
32053 else
32054 {
32055 num = arm_const_inline_cost (SET, val);
32056 num += (length >> 2) + leftover[length & 3];
32057 }
32058
32059 /* We may be able to combine last pair STRH/STRB into a single STR
32060 by shifting one byte back. */
32061 if (unaligned_access && length > 3 && (length & 3) == 3)
32062 num--;
32063
32064 return (num <= arm_block_set_max_insns ());
32065 }
32066
32067 /* Return TRUE if it's profitable to set block of memory for
32068 vectorized case. LENGTH is the number of bytes to set.
32069 ALIGN is the alignment of destination memory in bytes.
32070 MODE is the vector mode used to set the memory. */
32071 static bool
32072 arm_block_set_vect_profit_p (unsigned HOST_WIDE_INT length,
32073 unsigned HOST_WIDE_INT align,
32074 machine_mode mode)
32075 {
32076 int num;
32077 bool unaligned_p = ((align & 3) != 0);
32078 unsigned int nelt = GET_MODE_NUNITS (mode);
32079
32080 /* Instruction loading constant value. */
32081 num = 1;
32082 /* Instructions storing the memory. */
32083 num += (length + nelt - 1) / nelt;
32084 /* Instructions adjusting the address expression. Only need to
32085 adjust address expression if it's 4 bytes aligned and bytes
32086 leftover can only be stored by mis-aligned store instruction. */
32087 if (!unaligned_p && (length & 3) != 0)
32088 num++;
32089
32090 /* Store the first 16 bytes using vst1:v16qi for the aligned case. */
32091 if (!unaligned_p && mode == V16QImode)
32092 num--;
32093
32094 return (num <= arm_block_set_max_insns ());
32095 }
32096
32097 /* Set a block of memory using vectorization instructions for the
32098 unaligned case. We fill the first LENGTH bytes of the memory
32099 area starting from DSTBASE with byte constant VALUE. ALIGN is
32100 the alignment requirement of memory. Return TRUE if succeeded. */
32101 static bool
32102 arm_block_set_unaligned_vect (rtx dstbase,
32103 unsigned HOST_WIDE_INT length,
32104 unsigned HOST_WIDE_INT value,
32105 unsigned HOST_WIDE_INT align)
32106 {
32107 unsigned int i, nelt_v16, nelt_v8, nelt_mode;
32108 rtx dst, mem;
32109 rtx val_vec, reg;
32110 rtx (*gen_func) (rtx, rtx);
32111 machine_mode mode;
32112 unsigned HOST_WIDE_INT v = value;
32113 unsigned int offset = 0;
32114 gcc_assert ((align & 0x3) != 0);
32115 nelt_v8 = GET_MODE_NUNITS (V8QImode);
32116 nelt_v16 = GET_MODE_NUNITS (V16QImode);
32117 if (length >= nelt_v16)
32118 {
32119 mode = V16QImode;
32120 gen_func = gen_movmisalignv16qi;
32121 }
32122 else
32123 {
32124 mode = V8QImode;
32125 gen_func = gen_movmisalignv8qi;
32126 }
32127 nelt_mode = GET_MODE_NUNITS (mode);
32128 gcc_assert (length >= nelt_mode);
32129 /* Skip if it isn't profitable. */
32130 if (!arm_block_set_vect_profit_p (length, align, mode))
32131 return false;
32132
32133 dst = copy_addr_to_reg (XEXP (dstbase, 0));
32134 mem = adjust_automodify_address (dstbase, mode, dst, offset);
32135
32136 v = sext_hwi (v, BITS_PER_WORD);
32137
32138 reg = gen_reg_rtx (mode);
32139 val_vec = gen_const_vec_duplicate (mode, GEN_INT (v));
32140 /* Emit instruction loading the constant value. */
32141 emit_move_insn (reg, val_vec);
32142
32143 /* Handle nelt_mode bytes in a vector. */
32144 for (i = 0; (i + nelt_mode <= length); i += nelt_mode)
32145 {
32146 emit_insn ((*gen_func) (mem, reg));
32147 if (i + 2 * nelt_mode <= length)
32148 {
32149 emit_insn (gen_add2_insn (dst, GEN_INT (nelt_mode)));
32150 offset += nelt_mode;
32151 mem = adjust_automodify_address (dstbase, mode, dst, offset);
32152 }
32153 }
32154
32155 /* If there are not less than nelt_v8 bytes leftover, we must be in
32156 V16QI mode. */
32157 gcc_assert ((i + nelt_v8) > length || mode == V16QImode);
32158
32159 /* Handle (8, 16) bytes leftover. */
32160 if (i + nelt_v8 < length)
32161 {
32162 emit_insn (gen_add2_insn (dst, GEN_INT (length - i)));
32163 offset += length - i;
32164 mem = adjust_automodify_address (dstbase, mode, dst, offset);
32165
32166 /* We are shifting bytes back, set the alignment accordingly. */
32167 if ((length & 1) != 0 && align >= 2)
32168 set_mem_align (mem, BITS_PER_UNIT);
32169
32170 emit_insn (gen_movmisalignv16qi (mem, reg));
32171 }
32172 /* Handle (0, 8] bytes leftover. */
32173 else if (i < length && i + nelt_v8 >= length)
32174 {
32175 if (mode == V16QImode)
32176 reg = gen_lowpart (V8QImode, reg);
32177
32178 emit_insn (gen_add2_insn (dst, GEN_INT ((length - i)
32179 + (nelt_mode - nelt_v8))));
32180 offset += (length - i) + (nelt_mode - nelt_v8);
32181 mem = adjust_automodify_address (dstbase, V8QImode, dst, offset);
32182
32183 /* We are shifting bytes back, set the alignment accordingly. */
32184 if ((length & 1) != 0 && align >= 2)
32185 set_mem_align (mem, BITS_PER_UNIT);
32186
32187 emit_insn (gen_movmisalignv8qi (mem, reg));
32188 }
32189
32190 return true;
32191 }
32192
32193 /* Set a block of memory using vectorization instructions for the
32194 aligned case. We fill the first LENGTH bytes of the memory area
32195 starting from DSTBASE with byte constant VALUE. ALIGN is the
32196 alignment requirement of memory. Return TRUE if succeeded. */
32197 static bool
32198 arm_block_set_aligned_vect (rtx dstbase,
32199 unsigned HOST_WIDE_INT length,
32200 unsigned HOST_WIDE_INT value,
32201 unsigned HOST_WIDE_INT align)
32202 {
32203 unsigned int i, nelt_v8, nelt_v16, nelt_mode;
32204 rtx dst, addr, mem;
32205 rtx val_vec, reg;
32206 machine_mode mode;
32207 unsigned int offset = 0;
32208
32209 gcc_assert ((align & 0x3) == 0);
32210 nelt_v8 = GET_MODE_NUNITS (V8QImode);
32211 nelt_v16 = GET_MODE_NUNITS (V16QImode);
32212 if (length >= nelt_v16 && unaligned_access && !BYTES_BIG_ENDIAN)
32213 mode = V16QImode;
32214 else
32215 mode = V8QImode;
32216
32217 nelt_mode = GET_MODE_NUNITS (mode);
32218 gcc_assert (length >= nelt_mode);
32219 /* Skip if it isn't profitable. */
32220 if (!arm_block_set_vect_profit_p (length, align, mode))
32221 return false;
32222
32223 dst = copy_addr_to_reg (XEXP (dstbase, 0));
32224
32225 reg = gen_reg_rtx (mode);
32226 val_vec = gen_const_vec_duplicate (mode, gen_int_mode (value, QImode));
32227 /* Emit instruction loading the constant value. */
32228 emit_move_insn (reg, val_vec);
32229
32230 i = 0;
32231 /* Handle first 16 bytes specially using vst1:v16qi instruction. */
32232 if (mode == V16QImode)
32233 {
32234 mem = adjust_automodify_address (dstbase, mode, dst, offset);
32235 emit_insn (gen_movmisalignv16qi (mem, reg));
32236 i += nelt_mode;
32237 /* Handle (8, 16) bytes leftover using vst1:v16qi again. */
32238 if (i + nelt_v8 < length && i + nelt_v16 > length)
32239 {
32240 emit_insn (gen_add2_insn (dst, GEN_INT (length - nelt_mode)));
32241 offset += length - nelt_mode;
32242 mem = adjust_automodify_address (dstbase, mode, dst, offset);
32243 /* We are shifting bytes back, set the alignment accordingly. */
32244 if ((length & 0x3) == 0)
32245 set_mem_align (mem, BITS_PER_UNIT * 4);
32246 else if ((length & 0x1) == 0)
32247 set_mem_align (mem, BITS_PER_UNIT * 2);
32248 else
32249 set_mem_align (mem, BITS_PER_UNIT);
32250
32251 emit_insn (gen_movmisalignv16qi (mem, reg));
32252 return true;
32253 }
32254 /* Fall through for bytes leftover. */
32255 mode = V8QImode;
32256 nelt_mode = GET_MODE_NUNITS (mode);
32257 reg = gen_lowpart (V8QImode, reg);
32258 }
32259
32260 /* Handle 8 bytes in a vector. */
32261 for (; (i + nelt_mode <= length); i += nelt_mode)
32262 {
32263 addr = plus_constant (Pmode, dst, i);
32264 mem = adjust_automodify_address (dstbase, mode, addr, offset + i);
32265 if (MEM_ALIGN (mem) >= 2 * BITS_PER_WORD)
32266 emit_move_insn (mem, reg);
32267 else
32268 emit_insn (gen_unaligned_storev8qi (mem, reg));
32269 }
32270
32271 /* Handle single word leftover by shifting 4 bytes back. We can
32272 use aligned access for this case. */
32273 if (i + UNITS_PER_WORD == length)
32274 {
32275 addr = plus_constant (Pmode, dst, i - UNITS_PER_WORD);
32276 offset += i - UNITS_PER_WORD;
32277 mem = adjust_automodify_address (dstbase, mode, addr, offset);
32278 /* We are shifting 4 bytes back, set the alignment accordingly. */
32279 if (align > UNITS_PER_WORD)
32280 set_mem_align (mem, BITS_PER_UNIT * UNITS_PER_WORD);
32281
32282 emit_insn (gen_unaligned_storev8qi (mem, reg));
32283 }
32284 /* Handle (0, 4), (4, 8) bytes leftover by shifting bytes back.
32285 We have to use unaligned access for this case. */
32286 else if (i < length)
32287 {
32288 emit_insn (gen_add2_insn (dst, GEN_INT (length - nelt_mode)));
32289 offset += length - nelt_mode;
32290 mem = adjust_automodify_address (dstbase, mode, dst, offset);
32291 /* We are shifting bytes back, set the alignment accordingly. */
32292 if ((length & 1) == 0)
32293 set_mem_align (mem, BITS_PER_UNIT * 2);
32294 else
32295 set_mem_align (mem, BITS_PER_UNIT);
32296
32297 emit_insn (gen_movmisalignv8qi (mem, reg));
32298 }
32299
32300 return true;
32301 }
32302
32303 /* Set a block of memory using plain strh/strb instructions, only
32304 using instructions allowed by ALIGN on processor. We fill the
32305 first LENGTH bytes of the memory area starting from DSTBASE
32306 with byte constant VALUE. ALIGN is the alignment requirement
32307 of memory. */
32308 static bool
32309 arm_block_set_unaligned_non_vect (rtx dstbase,
32310 unsigned HOST_WIDE_INT length,
32311 unsigned HOST_WIDE_INT value,
32312 unsigned HOST_WIDE_INT align)
32313 {
32314 unsigned int i;
32315 rtx dst, addr, mem;
32316 rtx val_exp, val_reg, reg;
32317 machine_mode mode;
32318 HOST_WIDE_INT v = value;
32319
32320 gcc_assert (align == 1 || align == 2);
32321
32322 if (align == 2)
32323 v |= (value << BITS_PER_UNIT);
32324
32325 v = sext_hwi (v, BITS_PER_WORD);
32326 val_exp = GEN_INT (v);
32327 /* Skip if it isn't profitable. */
32328 if (!arm_block_set_non_vect_profit_p (val_exp, length,
32329 align, true, false))
32330 return false;
32331
32332 dst = copy_addr_to_reg (XEXP (dstbase, 0));
32333 mode = (align == 2 ? HImode : QImode);
32334 val_reg = force_reg (SImode, val_exp);
32335 reg = gen_lowpart (mode, val_reg);
32336
32337 for (i = 0; (i + GET_MODE_SIZE (mode) <= length); i += GET_MODE_SIZE (mode))
32338 {
32339 addr = plus_constant (Pmode, dst, i);
32340 mem = adjust_automodify_address (dstbase, mode, addr, i);
32341 emit_move_insn (mem, reg);
32342 }
32343
32344 /* Handle single byte leftover. */
32345 if (i + 1 == length)
32346 {
32347 reg = gen_lowpart (QImode, val_reg);
32348 addr = plus_constant (Pmode, dst, i);
32349 mem = adjust_automodify_address (dstbase, QImode, addr, i);
32350 emit_move_insn (mem, reg);
32351 i++;
32352 }
32353
32354 gcc_assert (i == length);
32355 return true;
32356 }
32357
32358 /* Set a block of memory using plain strd/str/strh/strb instructions,
32359 to permit unaligned copies on processors which support unaligned
32360 semantics for those instructions. We fill the first LENGTH bytes
32361 of the memory area starting from DSTBASE with byte constant VALUE.
32362 ALIGN is the alignment requirement of memory. */
32363 static bool
32364 arm_block_set_aligned_non_vect (rtx dstbase,
32365 unsigned HOST_WIDE_INT length,
32366 unsigned HOST_WIDE_INT value,
32367 unsigned HOST_WIDE_INT align)
32368 {
32369 unsigned int i;
32370 rtx dst, addr, mem;
32371 rtx val_exp, val_reg, reg;
32372 unsigned HOST_WIDE_INT v;
32373 bool use_strd_p;
32374
32375 use_strd_p = (length >= 2 * UNITS_PER_WORD && (align & 3) == 0
32376 && TARGET_LDRD && current_tune->prefer_ldrd_strd);
32377
32378 v = (value | (value << 8) | (value << 16) | (value << 24));
32379 if (length < UNITS_PER_WORD)
32380 v &= (0xFFFFFFFF >> (UNITS_PER_WORD - length) * BITS_PER_UNIT);
32381
32382 if (use_strd_p)
32383 v |= (v << BITS_PER_WORD);
32384 else
32385 v = sext_hwi (v, BITS_PER_WORD);
32386
32387 val_exp = GEN_INT (v);
32388 /* Skip if it isn't profitable. */
32389 if (!arm_block_set_non_vect_profit_p (val_exp, length,
32390 align, false, use_strd_p))
32391 {
32392 if (!use_strd_p)
32393 return false;
32394
32395 /* Try without strd. */
32396 v = (v >> BITS_PER_WORD);
32397 v = sext_hwi (v, BITS_PER_WORD);
32398 val_exp = GEN_INT (v);
32399 use_strd_p = false;
32400 if (!arm_block_set_non_vect_profit_p (val_exp, length,
32401 align, false, use_strd_p))
32402 return false;
32403 }
32404
32405 i = 0;
32406 dst = copy_addr_to_reg (XEXP (dstbase, 0));
32407 /* Handle double words using strd if possible. */
32408 if (use_strd_p)
32409 {
32410 val_reg = force_reg (DImode, val_exp);
32411 reg = val_reg;
32412 for (; (i + 8 <= length); i += 8)
32413 {
32414 addr = plus_constant (Pmode, dst, i);
32415 mem = adjust_automodify_address (dstbase, DImode, addr, i);
32416 if (MEM_ALIGN (mem) >= 2 * BITS_PER_WORD)
32417 emit_move_insn (mem, reg);
32418 else
32419 emit_insn (gen_unaligned_storedi (mem, reg));
32420 }
32421 }
32422 else
32423 val_reg = force_reg (SImode, val_exp);
32424
32425 /* Handle words. */
32426 reg = (use_strd_p ? gen_lowpart (SImode, val_reg) : val_reg);
32427 for (; (i + 4 <= length); i += 4)
32428 {
32429 addr = plus_constant (Pmode, dst, i);
32430 mem = adjust_automodify_address (dstbase, SImode, addr, i);
32431 if ((align & 3) == 0)
32432 emit_move_insn (mem, reg);
32433 else
32434 emit_insn (gen_unaligned_storesi (mem, reg));
32435 }
32436
32437 /* Merge last pair of STRH and STRB into a STR if possible. */
32438 if (unaligned_access && i > 0 && (i + 3) == length)
32439 {
32440 addr = plus_constant (Pmode, dst, i - 1);
32441 mem = adjust_automodify_address (dstbase, SImode, addr, i - 1);
32442 /* We are shifting one byte back, set the alignment accordingly. */
32443 if ((align & 1) == 0)
32444 set_mem_align (mem, BITS_PER_UNIT);
32445
32446 /* Most likely this is an unaligned access, and we can't tell at
32447 compilation time. */
32448 emit_insn (gen_unaligned_storesi (mem, reg));
32449 return true;
32450 }
32451
32452 /* Handle half word leftover. */
32453 if (i + 2 <= length)
32454 {
32455 reg = gen_lowpart (HImode, val_reg);
32456 addr = plus_constant (Pmode, dst, i);
32457 mem = adjust_automodify_address (dstbase, HImode, addr, i);
32458 if ((align & 1) == 0)
32459 emit_move_insn (mem, reg);
32460 else
32461 emit_insn (gen_unaligned_storehi (mem, reg));
32462
32463 i += 2;
32464 }
32465
32466 /* Handle single byte leftover. */
32467 if (i + 1 == length)
32468 {
32469 reg = gen_lowpart (QImode, val_reg);
32470 addr = plus_constant (Pmode, dst, i);
32471 mem = adjust_automodify_address (dstbase, QImode, addr, i);
32472 emit_move_insn (mem, reg);
32473 }
32474
32475 return true;
32476 }
32477
32478 /* Set a block of memory using vectorization instructions for both
32479 aligned and unaligned cases. We fill the first LENGTH bytes of
32480 the memory area starting from DSTBASE with byte constant VALUE.
32481 ALIGN is the alignment requirement of memory. */
32482 static bool
32483 arm_block_set_vect (rtx dstbase,
32484 unsigned HOST_WIDE_INT length,
32485 unsigned HOST_WIDE_INT value,
32486 unsigned HOST_WIDE_INT align)
32487 {
32488 /* Check whether we need to use unaligned store instruction. */
32489 if (((align & 3) != 0 || (length & 3) != 0)
32490 /* Check whether unaligned store instruction is available. */
32491 && (!unaligned_access || BYTES_BIG_ENDIAN))
32492 return false;
32493
32494 if ((align & 3) == 0)
32495 return arm_block_set_aligned_vect (dstbase, length, value, align);
32496 else
32497 return arm_block_set_unaligned_vect (dstbase, length, value, align);
32498 }
32499
32500 /* Expand string store operation. Firstly we try to do that by using
32501 vectorization instructions, then try with ARM unaligned access and
32502 double-word store if profitable. OPERANDS[0] is the destination,
32503 OPERANDS[1] is the number of bytes, operands[2] is the value to
32504 initialize the memory, OPERANDS[3] is the known alignment of the
32505 destination. */
32506 bool
32507 arm_gen_setmem (rtx *operands)
32508 {
32509 rtx dstbase = operands[0];
32510 unsigned HOST_WIDE_INT length;
32511 unsigned HOST_WIDE_INT value;
32512 unsigned HOST_WIDE_INT align;
32513
32514 if (!CONST_INT_P (operands[2]) || !CONST_INT_P (operands[1]))
32515 return false;
32516
32517 length = UINTVAL (operands[1]);
32518 if (length > 64)
32519 return false;
32520
32521 value = (UINTVAL (operands[2]) & 0xFF);
32522 align = UINTVAL (operands[3]);
32523 if (TARGET_NEON && length >= 8
32524 && current_tune->string_ops_prefer_neon
32525 && arm_block_set_vect (dstbase, length, value, align))
32526 return true;
32527
32528 if (!unaligned_access && (align & 3) != 0)
32529 return arm_block_set_unaligned_non_vect (dstbase, length, value, align);
32530
32531 return arm_block_set_aligned_non_vect (dstbase, length, value, align);
32532 }
32533
32534
32535 static bool
32536 arm_macro_fusion_p (void)
32537 {
32538 return current_tune->fusible_ops != tune_params::FUSE_NOTHING;
32539 }
32540
32541 /* Return true if the two back-to-back sets PREV_SET, CURR_SET are suitable
32542 for MOVW / MOVT macro fusion. */
32543
32544 static bool
32545 arm_sets_movw_movt_fusible_p (rtx prev_set, rtx curr_set)
32546 {
32547 /* We are trying to fuse
32548 movw imm / movt imm
32549 instructions as a group that gets scheduled together. */
32550
32551 rtx set_dest = SET_DEST (curr_set);
32552
32553 if (GET_MODE (set_dest) != SImode)
32554 return false;
32555
32556 /* We are trying to match:
32557 prev (movw) == (set (reg r0) (const_int imm16))
32558 curr (movt) == (set (zero_extract (reg r0)
32559 (const_int 16)
32560 (const_int 16))
32561 (const_int imm16_1))
32562 or
32563 prev (movw) == (set (reg r1)
32564 (high (symbol_ref ("SYM"))))
32565 curr (movt) == (set (reg r0)
32566 (lo_sum (reg r1)
32567 (symbol_ref ("SYM")))) */
32568
32569 if (GET_CODE (set_dest) == ZERO_EXTRACT)
32570 {
32571 if (CONST_INT_P (SET_SRC (curr_set))
32572 && CONST_INT_P (SET_SRC (prev_set))
32573 && REG_P (XEXP (set_dest, 0))
32574 && REG_P (SET_DEST (prev_set))
32575 && REGNO (XEXP (set_dest, 0)) == REGNO (SET_DEST (prev_set)))
32576 return true;
32577
32578 }
32579 else if (GET_CODE (SET_SRC (curr_set)) == LO_SUM
32580 && REG_P (SET_DEST (curr_set))
32581 && REG_P (SET_DEST (prev_set))
32582 && GET_CODE (SET_SRC (prev_set)) == HIGH
32583 && REGNO (SET_DEST (curr_set)) == REGNO (SET_DEST (prev_set)))
32584 return true;
32585
32586 return false;
32587 }
32588
32589 static bool
32590 aarch_macro_fusion_pair_p (rtx_insn* prev, rtx_insn* curr)
32591 {
32592 rtx prev_set = single_set (prev);
32593 rtx curr_set = single_set (curr);
32594
32595 if (!prev_set
32596 || !curr_set)
32597 return false;
32598
32599 if (any_condjump_p (curr))
32600 return false;
32601
32602 if (!arm_macro_fusion_p ())
32603 return false;
32604
32605 if (current_tune->fusible_ops & tune_params::FUSE_MOVW_MOVT
32606 && arm_sets_movw_movt_fusible_p (prev_set, curr_set))
32607 return true;
32608
32609 return false;
32610 }
32611
32612 /* Return true iff the instruction fusion described by OP is enabled. */
32613 bool
32614 arm_fusion_enabled_p (tune_params::fuse_ops op)
32615 {
32616 return current_tune->fusible_ops & op;
32617 }
32618
32619 /* Implement TARGET_SCHED_CAN_SPECULATE_INSN. Return true if INSN can be
32620 scheduled for speculative execution. Reject the long-running division
32621 and square-root instructions. */
32622
32623 static bool
32624 arm_sched_can_speculate_insn (rtx_insn *insn)
32625 {
32626 switch (get_attr_type (insn))
32627 {
32628 case TYPE_SDIV:
32629 case TYPE_UDIV:
32630 case TYPE_FDIVS:
32631 case TYPE_FDIVD:
32632 case TYPE_FSQRTS:
32633 case TYPE_FSQRTD:
32634 case TYPE_NEON_FP_SQRT_S:
32635 case TYPE_NEON_FP_SQRT_D:
32636 case TYPE_NEON_FP_SQRT_S_Q:
32637 case TYPE_NEON_FP_SQRT_D_Q:
32638 case TYPE_NEON_FP_DIV_S:
32639 case TYPE_NEON_FP_DIV_D:
32640 case TYPE_NEON_FP_DIV_S_Q:
32641 case TYPE_NEON_FP_DIV_D_Q:
32642 return false;
32643 default:
32644 return true;
32645 }
32646 }
32647
32648 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
32649
32650 static unsigned HOST_WIDE_INT
32651 arm_asan_shadow_offset (void)
32652 {
32653 return HOST_WIDE_INT_1U << 29;
32654 }
32655
32656
32657 /* This is a temporary fix for PR60655. Ideally we need
32658 to handle most of these cases in the generic part but
32659 currently we reject minus (..) (sym_ref). We try to
32660 ameliorate the case with minus (sym_ref1) (sym_ref2)
32661 where they are in the same section. */
32662
32663 static bool
32664 arm_const_not_ok_for_debug_p (rtx p)
32665 {
32666 tree decl_op0 = NULL;
32667 tree decl_op1 = NULL;
32668
32669 if (GET_CODE (p) == UNSPEC)
32670 return true;
32671 if (GET_CODE (p) == MINUS)
32672 {
32673 if (GET_CODE (XEXP (p, 1)) == SYMBOL_REF)
32674 {
32675 decl_op1 = SYMBOL_REF_DECL (XEXP (p, 1));
32676 if (decl_op1
32677 && GET_CODE (XEXP (p, 0)) == SYMBOL_REF
32678 && (decl_op0 = SYMBOL_REF_DECL (XEXP (p, 0))))
32679 {
32680 if ((VAR_P (decl_op1)
32681 || TREE_CODE (decl_op1) == CONST_DECL)
32682 && (VAR_P (decl_op0)
32683 || TREE_CODE (decl_op0) == CONST_DECL))
32684 return (get_variable_section (decl_op1, false)
32685 != get_variable_section (decl_op0, false));
32686
32687 if (TREE_CODE (decl_op1) == LABEL_DECL
32688 && TREE_CODE (decl_op0) == LABEL_DECL)
32689 return (DECL_CONTEXT (decl_op1)
32690 != DECL_CONTEXT (decl_op0));
32691 }
32692
32693 return true;
32694 }
32695 }
32696
32697 return false;
32698 }
32699
32700 /* return TRUE if x is a reference to a value in a constant pool */
32701 extern bool
32702 arm_is_constant_pool_ref (rtx x)
32703 {
32704 return (MEM_P (x)
32705 && GET_CODE (XEXP (x, 0)) == SYMBOL_REF
32706 && CONSTANT_POOL_ADDRESS_P (XEXP (x, 0)));
32707 }
32708
32709 /* Remember the last target of arm_set_current_function. */
32710 static GTY(()) tree arm_previous_fndecl;
32711
32712 /* Restore or save the TREE_TARGET_GLOBALS from or to NEW_TREE. */
32713
32714 void
32715 save_restore_target_globals (tree new_tree)
32716 {
32717 /* If we have a previous state, use it. */
32718 if (TREE_TARGET_GLOBALS (new_tree))
32719 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
32720 else if (new_tree == target_option_default_node)
32721 restore_target_globals (&default_target_globals);
32722 else
32723 {
32724 /* Call target_reinit and save the state for TARGET_GLOBALS. */
32725 TREE_TARGET_GLOBALS (new_tree) = save_target_globals_default_opts ();
32726 }
32727
32728 arm_option_params_internal ();
32729 }
32730
32731 /* Invalidate arm_previous_fndecl. */
32732
32733 void
32734 arm_reset_previous_fndecl (void)
32735 {
32736 arm_previous_fndecl = NULL_TREE;
32737 }
32738
32739 /* Establish appropriate back-end context for processing the function
32740 FNDECL. The argument might be NULL to indicate processing at top
32741 level, outside of any function scope. */
32742
32743 static void
32744 arm_set_current_function (tree fndecl)
32745 {
32746 if (!fndecl || fndecl == arm_previous_fndecl)
32747 return;
32748
32749 tree old_tree = (arm_previous_fndecl
32750 ? DECL_FUNCTION_SPECIFIC_TARGET (arm_previous_fndecl)
32751 : NULL_TREE);
32752
32753 tree new_tree = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
32754
32755 /* If current function has no attributes but previous one did,
32756 use the default node. */
32757 if (! new_tree && old_tree)
32758 new_tree = target_option_default_node;
32759
32760 /* If nothing to do return. #pragma GCC reset or #pragma GCC pop to
32761 the default have been handled by save_restore_target_globals from
32762 arm_pragma_target_parse. */
32763 if (old_tree == new_tree)
32764 return;
32765
32766 arm_previous_fndecl = fndecl;
32767
32768 /* First set the target options. */
32769 cl_target_option_restore (&global_options, &global_options_set,
32770 TREE_TARGET_OPTION (new_tree));
32771
32772 save_restore_target_globals (new_tree);
32773
32774 arm_override_options_after_change_1 (&global_options, &global_options_set);
32775 }
32776
32777 /* Implement TARGET_OPTION_PRINT. */
32778
32779 static void
32780 arm_option_print (FILE *file, int indent, struct cl_target_option *ptr)
32781 {
32782 int flags = ptr->x_target_flags;
32783 const char *fpu_name;
32784
32785 fpu_name = (ptr->x_arm_fpu_index == TARGET_FPU_auto
32786 ? "auto" : all_fpus[ptr->x_arm_fpu_index].name);
32787
32788 fprintf (file, "%*sselected isa %s\n", indent, "",
32789 TARGET_THUMB2_P (flags) ? "thumb2" :
32790 TARGET_THUMB_P (flags) ? "thumb1" :
32791 "arm");
32792
32793 if (ptr->x_arm_arch_string)
32794 fprintf (file, "%*sselected architecture %s\n", indent, "",
32795 ptr->x_arm_arch_string);
32796
32797 if (ptr->x_arm_cpu_string)
32798 fprintf (file, "%*sselected CPU %s\n", indent, "",
32799 ptr->x_arm_cpu_string);
32800
32801 if (ptr->x_arm_tune_string)
32802 fprintf (file, "%*sselected tune %s\n", indent, "",
32803 ptr->x_arm_tune_string);
32804
32805 fprintf (file, "%*sselected fpu %s\n", indent, "", fpu_name);
32806 }
32807
32808 /* Hook to determine if one function can safely inline another. */
32809
32810 static bool
32811 arm_can_inline_p (tree caller, tree callee)
32812 {
32813 tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
32814 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
32815 bool can_inline = true;
32816
32817 struct cl_target_option *caller_opts
32818 = TREE_TARGET_OPTION (caller_tree ? caller_tree
32819 : target_option_default_node);
32820
32821 struct cl_target_option *callee_opts
32822 = TREE_TARGET_OPTION (callee_tree ? callee_tree
32823 : target_option_default_node);
32824
32825 if (callee_opts == caller_opts)
32826 return true;
32827
32828 /* Callee's ISA features should be a subset of the caller's. */
32829 struct arm_build_target caller_target;
32830 struct arm_build_target callee_target;
32831 caller_target.isa = sbitmap_alloc (isa_num_bits);
32832 callee_target.isa = sbitmap_alloc (isa_num_bits);
32833
32834 arm_configure_build_target (&caller_target, caller_opts, &global_options_set,
32835 false);
32836 arm_configure_build_target (&callee_target, callee_opts, &global_options_set,
32837 false);
32838 if (!bitmap_subset_p (callee_target.isa, caller_target.isa))
32839 can_inline = false;
32840
32841 sbitmap_free (caller_target.isa);
32842 sbitmap_free (callee_target.isa);
32843
32844 /* OK to inline between different modes.
32845 Function with mode specific instructions, e.g using asm,
32846 must be explicitly protected with noinline. */
32847 return can_inline;
32848 }
32849
32850 /* Hook to fix function's alignment affected by target attribute. */
32851
32852 static void
32853 arm_relayout_function (tree fndecl)
32854 {
32855 if (DECL_USER_ALIGN (fndecl))
32856 return;
32857
32858 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
32859
32860 if (!callee_tree)
32861 callee_tree = target_option_default_node;
32862
32863 struct cl_target_option *opts = TREE_TARGET_OPTION (callee_tree);
32864 SET_DECL_ALIGN
32865 (fndecl,
32866 FUNCTION_ALIGNMENT (FUNCTION_BOUNDARY_P (opts->x_target_flags)));
32867 }
32868
32869 /* Inner function to process the attribute((target(...))), take an argument and
32870 set the current options from the argument. If we have a list, recursively
32871 go over the list. */
32872
32873 static bool
32874 arm_valid_target_attribute_rec (tree args, struct gcc_options *opts)
32875 {
32876 if (TREE_CODE (args) == TREE_LIST)
32877 {
32878 bool ret = true;
32879
32880 for (; args; args = TREE_CHAIN (args))
32881 if (TREE_VALUE (args)
32882 && !arm_valid_target_attribute_rec (TREE_VALUE (args), opts))
32883 ret = false;
32884 return ret;
32885 }
32886
32887 else if (TREE_CODE (args) != STRING_CST)
32888 {
32889 error ("attribute %<target%> argument not a string");
32890 return false;
32891 }
32892
32893 char *argstr = ASTRDUP (TREE_STRING_POINTER (args));
32894 char *q;
32895
32896 while ((q = strtok (argstr, ",")) != NULL)
32897 {
32898 argstr = NULL;
32899 if (!strcmp (q, "thumb"))
32900 {
32901 opts->x_target_flags |= MASK_THUMB;
32902 if (TARGET_FDPIC && !arm_arch_thumb2)
32903 sorry ("FDPIC mode is not supported in Thumb-1 mode");
32904 }
32905
32906 else if (!strcmp (q, "arm"))
32907 opts->x_target_flags &= ~MASK_THUMB;
32908
32909 else if (!strcmp (q, "general-regs-only"))
32910 opts->x_target_flags |= MASK_GENERAL_REGS_ONLY;
32911
32912 else if (!strncmp (q, "fpu=", 4))
32913 {
32914 int fpu_index;
32915 if (! opt_enum_arg_to_value (OPT_mfpu_, q + 4,
32916 &fpu_index, CL_TARGET))
32917 {
32918 error ("invalid fpu for target attribute or pragma %qs", q);
32919 return false;
32920 }
32921 if (fpu_index == TARGET_FPU_auto)
32922 {
32923 /* This doesn't really make sense until we support
32924 general dynamic selection of the architecture and all
32925 sub-features. */
32926 sorry ("auto fpu selection not currently permitted here");
32927 return false;
32928 }
32929 opts->x_arm_fpu_index = (enum fpu_type) fpu_index;
32930 }
32931 else if (!strncmp (q, "arch=", 5))
32932 {
32933 char *arch = q + 5;
32934 const arch_option *arm_selected_arch
32935 = arm_parse_arch_option_name (all_architectures, "arch", arch);
32936
32937 if (!arm_selected_arch)
32938 {
32939 error ("invalid architecture for target attribute or pragma %qs",
32940 q);
32941 return false;
32942 }
32943
32944 opts->x_arm_arch_string = xstrndup (arch, strlen (arch));
32945 }
32946 else if (q[0] == '+')
32947 {
32948 opts->x_arm_arch_string
32949 = xasprintf ("%s%s", opts->x_arm_arch_string, q);
32950 }
32951 else
32952 {
32953 error ("unknown target attribute or pragma %qs", q);
32954 return false;
32955 }
32956 }
32957
32958 return true;
32959 }
32960
32961 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL. */
32962
32963 tree
32964 arm_valid_target_attribute_tree (tree args, struct gcc_options *opts,
32965 struct gcc_options *opts_set)
32966 {
32967 struct cl_target_option cl_opts;
32968
32969 if (!arm_valid_target_attribute_rec (args, opts))
32970 return NULL_TREE;
32971
32972 cl_target_option_save (&cl_opts, opts, opts_set);
32973 arm_configure_build_target (&arm_active_target, &cl_opts, opts_set, false);
32974 arm_option_check_internal (opts);
32975 /* Do any overrides, such as global options arch=xxx.
32976 We do this since arm_active_target was overridden. */
32977 arm_option_reconfigure_globals ();
32978 arm_options_perform_arch_sanity_checks ();
32979 arm_option_override_internal (opts, opts_set);
32980
32981 return build_target_option_node (opts, opts_set);
32982 }
32983
32984 static void
32985 add_attribute (const char * mode, tree *attributes)
32986 {
32987 size_t len = strlen (mode);
32988 tree value = build_string (len, mode);
32989
32990 TREE_TYPE (value) = build_array_type (char_type_node,
32991 build_index_type (size_int (len)));
32992
32993 *attributes = tree_cons (get_identifier ("target"),
32994 build_tree_list (NULL_TREE, value),
32995 *attributes);
32996 }
32997
32998 /* For testing. Insert thumb or arm modes alternatively on functions. */
32999
33000 static void
33001 arm_insert_attributes (tree fndecl, tree * attributes)
33002 {
33003 const char *mode;
33004
33005 if (! TARGET_FLIP_THUMB)
33006 return;
33007
33008 if (TREE_CODE (fndecl) != FUNCTION_DECL || DECL_EXTERNAL(fndecl)
33009 || fndecl_built_in_p (fndecl) || DECL_ARTIFICIAL (fndecl))
33010 return;
33011
33012 /* Nested definitions must inherit mode. */
33013 if (current_function_decl)
33014 {
33015 mode = TARGET_THUMB ? "thumb" : "arm";
33016 add_attribute (mode, attributes);
33017 return;
33018 }
33019
33020 /* If there is already a setting don't change it. */
33021 if (lookup_attribute ("target", *attributes) != NULL)
33022 return;
33023
33024 mode = thumb_flipper ? "thumb" : "arm";
33025 add_attribute (mode, attributes);
33026
33027 thumb_flipper = !thumb_flipper;
33028 }
33029
33030 /* Hook to validate attribute((target("string"))). */
33031
33032 static bool
33033 arm_valid_target_attribute_p (tree fndecl, tree ARG_UNUSED (name),
33034 tree args, int ARG_UNUSED (flags))
33035 {
33036 bool ret = true;
33037 struct gcc_options func_options, func_options_set;
33038 tree cur_tree, new_optimize;
33039 gcc_assert ((fndecl != NULL_TREE) && (args != NULL_TREE));
33040
33041 /* Get the optimization options of the current function. */
33042 tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
33043
33044 /* If the function changed the optimization levels as well as setting target
33045 options, start with the optimizations specified. */
33046 if (!func_optimize)
33047 func_optimize = optimization_default_node;
33048
33049 /* Init func_options. */
33050 memset (&func_options, 0, sizeof (func_options));
33051 init_options_struct (&func_options, NULL);
33052 lang_hooks.init_options_struct (&func_options);
33053 memset (&func_options_set, 0, sizeof (func_options_set));
33054
33055 /* Initialize func_options to the defaults. */
33056 cl_optimization_restore (&func_options, &func_options_set,
33057 TREE_OPTIMIZATION (func_optimize));
33058
33059 cl_target_option_restore (&func_options, &func_options_set,
33060 TREE_TARGET_OPTION (target_option_default_node));
33061
33062 /* Set func_options flags with new target mode. */
33063 cur_tree = arm_valid_target_attribute_tree (args, &func_options,
33064 &func_options_set);
33065
33066 if (cur_tree == NULL_TREE)
33067 ret = false;
33068
33069 new_optimize = build_optimization_node (&func_options, &func_options_set);
33070
33071 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = cur_tree;
33072
33073 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
33074
33075 return ret;
33076 }
33077
33078 /* Match an ISA feature bitmap to a named FPU. We always use the
33079 first entry that exactly matches the feature set, so that we
33080 effectively canonicalize the FPU name for the assembler. */
33081 static const char*
33082 arm_identify_fpu_from_isa (sbitmap isa)
33083 {
33084 auto_sbitmap fpubits (isa_num_bits);
33085 auto_sbitmap cand_fpubits (isa_num_bits);
33086
33087 bitmap_and (fpubits, isa, isa_all_fpubits_internal);
33088
33089 /* If there are no ISA feature bits relating to the FPU, we must be
33090 doing soft-float. */
33091 if (bitmap_empty_p (fpubits))
33092 return "softvfp";
33093
33094 for (unsigned int i = 0; i < TARGET_FPU_auto; i++)
33095 {
33096 arm_initialize_isa (cand_fpubits, all_fpus[i].isa_bits);
33097 if (bitmap_equal_p (fpubits, cand_fpubits))
33098 return all_fpus[i].name;
33099 }
33100 /* We must find an entry, or things have gone wrong. */
33101 gcc_unreachable ();
33102 }
33103
33104 /* Implement ASM_DECLARE_FUNCTION_NAME. Output the ISA features used
33105 by the function fndecl. */
33106 void
33107 arm_declare_function_name (FILE *stream, const char *name, tree decl)
33108 {
33109 tree target_parts = DECL_FUNCTION_SPECIFIC_TARGET (decl);
33110
33111 struct cl_target_option *targ_options;
33112 if (target_parts)
33113 targ_options = TREE_TARGET_OPTION (target_parts);
33114 else
33115 targ_options = TREE_TARGET_OPTION (target_option_current_node);
33116 gcc_assert (targ_options);
33117
33118 /* Only update the assembler .arch string if it is distinct from the last
33119 such string we printed. arch_to_print is set conditionally in case
33120 targ_options->x_arm_arch_string is NULL which can be the case
33121 when cc1 is invoked directly without passing -march option. */
33122 std::string arch_to_print;
33123 if (targ_options->x_arm_arch_string)
33124 arch_to_print = targ_options->x_arm_arch_string;
33125
33126 if (arch_to_print != arm_last_printed_arch_string)
33127 {
33128 std::string arch_name
33129 = arch_to_print.substr (0, arch_to_print.find ("+"));
33130 asm_fprintf (asm_out_file, "\t.arch %s\n", arch_name.c_str ());
33131 const arch_option *arch
33132 = arm_parse_arch_option_name (all_architectures, "-march",
33133 targ_options->x_arm_arch_string);
33134 auto_sbitmap opt_bits (isa_num_bits);
33135
33136 gcc_assert (arch);
33137 if (arch->common.extensions)
33138 {
33139 for (const struct cpu_arch_extension *opt = arch->common.extensions;
33140 opt->name != NULL;
33141 opt++)
33142 {
33143 if (!opt->remove)
33144 {
33145 arm_initialize_isa (opt_bits, opt->isa_bits);
33146 /* For the cases "-march=armv8.1-m.main+mve -mfloat-abi=soft"
33147 and "-march=armv8.1-m.main+mve.fp -mfloat-abi=soft" MVE and
33148 MVE with floating point instructions is disabled. So the
33149 following check restricts the printing of ".arch_extension
33150 mve" and ".arch_extension fp" (for mve.fp) in the assembly
33151 file. MVE needs this special behaviour because the
33152 feature bit "mve" and "mve_float" are not part of
33153 "fpu bits", so they are not cleared when -mfloat-abi=soft
33154 (i.e nofp) but the marco TARGET_HAVE_MVE and
33155 TARGET_HAVE_MVE_FLOAT are disabled. */
33156 if ((bitmap_bit_p (opt_bits, isa_bit_mve) && !TARGET_HAVE_MVE)
33157 || (bitmap_bit_p (opt_bits, isa_bit_mve_float)
33158 && !TARGET_HAVE_MVE_FLOAT))
33159 continue;
33160 if (bitmap_subset_p (opt_bits, arm_active_target.isa)
33161 && !bitmap_subset_p (opt_bits, isa_all_fpubits_internal))
33162 asm_fprintf (asm_out_file, "\t.arch_extension %s\n",
33163 opt->name);
33164 }
33165 }
33166 }
33167
33168 arm_last_printed_arch_string = arch_to_print;
33169 }
33170
33171 fprintf (stream, "\t.syntax unified\n");
33172
33173 if (TARGET_THUMB)
33174 {
33175 if (is_called_in_ARM_mode (decl)
33176 || (TARGET_THUMB1 && !TARGET_THUMB1_ONLY
33177 && cfun->is_thunk))
33178 fprintf (stream, "\t.code 32\n");
33179 else if (TARGET_THUMB1)
33180 fprintf (stream, "\t.code\t16\n\t.thumb_func\n");
33181 else
33182 fprintf (stream, "\t.thumb\n\t.thumb_func\n");
33183 }
33184 else
33185 fprintf (stream, "\t.arm\n");
33186
33187 std::string fpu_to_print
33188 = TARGET_SOFT_FLOAT
33189 ? "softvfp" : arm_identify_fpu_from_isa (arm_active_target.isa);
33190
33191 if (!(!strcmp (fpu_to_print.c_str (), "softvfp") && TARGET_VFP_BASE)
33192 && (fpu_to_print != arm_last_printed_arch_string))
33193 {
33194 asm_fprintf (asm_out_file, "\t.fpu %s\n", fpu_to_print.c_str ());
33195 arm_last_printed_fpu_string = fpu_to_print;
33196 }
33197
33198 if (TARGET_POKE_FUNCTION_NAME)
33199 arm_poke_function_name (stream, (const char *) name);
33200 }
33201
33202 /* If MEM is in the form of [base+offset], extract the two parts
33203 of address and set to BASE and OFFSET, otherwise return false
33204 after clearing BASE and OFFSET. */
33205
33206 static bool
33207 extract_base_offset_in_addr (rtx mem, rtx *base, rtx *offset)
33208 {
33209 rtx addr;
33210
33211 gcc_assert (MEM_P (mem));
33212
33213 addr = XEXP (mem, 0);
33214
33215 /* Strip off const from addresses like (const (addr)). */
33216 if (GET_CODE (addr) == CONST)
33217 addr = XEXP (addr, 0);
33218
33219 if (REG_P (addr))
33220 {
33221 *base = addr;
33222 *offset = const0_rtx;
33223 return true;
33224 }
33225
33226 if (GET_CODE (addr) == PLUS
33227 && GET_CODE (XEXP (addr, 0)) == REG
33228 && CONST_INT_P (XEXP (addr, 1)))
33229 {
33230 *base = XEXP (addr, 0);
33231 *offset = XEXP (addr, 1);
33232 return true;
33233 }
33234
33235 *base = NULL_RTX;
33236 *offset = NULL_RTX;
33237
33238 return false;
33239 }
33240
33241 /* If INSN is a load or store of address in the form of [base+offset],
33242 extract the two parts and set to BASE and OFFSET. IS_LOAD is set
33243 to TRUE if it's a load. Return TRUE if INSN is such an instruction,
33244 otherwise return FALSE. */
33245
33246 static bool
33247 fusion_load_store (rtx_insn *insn, rtx *base, rtx *offset, bool *is_load)
33248 {
33249 rtx x, dest, src;
33250
33251 gcc_assert (INSN_P (insn));
33252 x = PATTERN (insn);
33253 if (GET_CODE (x) != SET)
33254 return false;
33255
33256 src = SET_SRC (x);
33257 dest = SET_DEST (x);
33258 if (REG_P (src) && MEM_P (dest))
33259 {
33260 *is_load = false;
33261 extract_base_offset_in_addr (dest, base, offset);
33262 }
33263 else if (MEM_P (src) && REG_P (dest))
33264 {
33265 *is_load = true;
33266 extract_base_offset_in_addr (src, base, offset);
33267 }
33268 else
33269 return false;
33270
33271 return (*base != NULL_RTX && *offset != NULL_RTX);
33272 }
33273
33274 /* Implement the TARGET_SCHED_FUSION_PRIORITY hook.
33275
33276 Currently we only support to fuse ldr or str instructions, so FUSION_PRI
33277 and PRI are only calculated for these instructions. For other instruction,
33278 FUSION_PRI and PRI are simply set to MAX_PRI. In the future, other kind
33279 instruction fusion can be supported by returning different priorities.
33280
33281 It's important that irrelevant instructions get the largest FUSION_PRI. */
33282
33283 static void
33284 arm_sched_fusion_priority (rtx_insn *insn, int max_pri,
33285 int *fusion_pri, int *pri)
33286 {
33287 int tmp, off_val;
33288 bool is_load;
33289 rtx base, offset;
33290
33291 gcc_assert (INSN_P (insn));
33292
33293 tmp = max_pri - 1;
33294 if (!fusion_load_store (insn, &base, &offset, &is_load))
33295 {
33296 *pri = tmp;
33297 *fusion_pri = tmp;
33298 return;
33299 }
33300
33301 /* Load goes first. */
33302 if (is_load)
33303 *fusion_pri = tmp - 1;
33304 else
33305 *fusion_pri = tmp - 2;
33306
33307 tmp /= 2;
33308
33309 /* INSN with smaller base register goes first. */
33310 tmp -= ((REGNO (base) & 0xff) << 20);
33311
33312 /* INSN with smaller offset goes first. */
33313 off_val = (int)(INTVAL (offset));
33314 if (off_val >= 0)
33315 tmp -= (off_val & 0xfffff);
33316 else
33317 tmp += ((- off_val) & 0xfffff);
33318
33319 *pri = tmp;
33320 return;
33321 }
33322
33323
33324 /* Construct and return a PARALLEL RTX vector with elements numbering the
33325 lanes of either the high (HIGH == TRUE) or low (HIGH == FALSE) half of
33326 the vector - from the perspective of the architecture. This does not
33327 line up with GCC's perspective on lane numbers, so we end up with
33328 different masks depending on our target endian-ness. The diagram
33329 below may help. We must draw the distinction when building masks
33330 which select one half of the vector. An instruction selecting
33331 architectural low-lanes for a big-endian target, must be described using
33332 a mask selecting GCC high-lanes.
33333
33334 Big-Endian Little-Endian
33335
33336 GCC 0 1 2 3 3 2 1 0
33337 | x | x | x | x | | x | x | x | x |
33338 Architecture 3 2 1 0 3 2 1 0
33339
33340 Low Mask: { 2, 3 } { 0, 1 }
33341 High Mask: { 0, 1 } { 2, 3 }
33342 */
33343
33344 rtx
33345 arm_simd_vect_par_cnst_half (machine_mode mode, bool high)
33346 {
33347 int nunits = GET_MODE_NUNITS (mode);
33348 rtvec v = rtvec_alloc (nunits / 2);
33349 int high_base = nunits / 2;
33350 int low_base = 0;
33351 int base;
33352 rtx t1;
33353 int i;
33354
33355 if (BYTES_BIG_ENDIAN)
33356 base = high ? low_base : high_base;
33357 else
33358 base = high ? high_base : low_base;
33359
33360 for (i = 0; i < nunits / 2; i++)
33361 RTVEC_ELT (v, i) = GEN_INT (base + i);
33362
33363 t1 = gen_rtx_PARALLEL (mode, v);
33364 return t1;
33365 }
33366
33367 /* Check OP for validity as a PARALLEL RTX vector with elements
33368 numbering the lanes of either the high (HIGH == TRUE) or low lanes,
33369 from the perspective of the architecture. See the diagram above
33370 arm_simd_vect_par_cnst_half_p for more details. */
33371
33372 bool
33373 arm_simd_check_vect_par_cnst_half_p (rtx op, machine_mode mode,
33374 bool high)
33375 {
33376 rtx ideal = arm_simd_vect_par_cnst_half (mode, high);
33377 HOST_WIDE_INT count_op = XVECLEN (op, 0);
33378 HOST_WIDE_INT count_ideal = XVECLEN (ideal, 0);
33379 int i = 0;
33380
33381 if (!VECTOR_MODE_P (mode))
33382 return false;
33383
33384 if (count_op != count_ideal)
33385 return false;
33386
33387 for (i = 0; i < count_ideal; i++)
33388 {
33389 rtx elt_op = XVECEXP (op, 0, i);
33390 rtx elt_ideal = XVECEXP (ideal, 0, i);
33391
33392 if (!CONST_INT_P (elt_op)
33393 || INTVAL (elt_ideal) != INTVAL (elt_op))
33394 return false;
33395 }
33396 return true;
33397 }
33398
33399 /* Can output mi_thunk for all cases except for non-zero vcall_offset
33400 in Thumb1. */
33401 static bool
33402 arm_can_output_mi_thunk (const_tree, HOST_WIDE_INT, HOST_WIDE_INT vcall_offset,
33403 const_tree)
33404 {
33405 /* For now, we punt and not handle this for TARGET_THUMB1. */
33406 if (vcall_offset && TARGET_THUMB1)
33407 return false;
33408
33409 /* Otherwise ok. */
33410 return true;
33411 }
33412
33413 /* Generate RTL for a conditional branch with rtx comparison CODE in
33414 mode CC_MODE. The destination of the unlikely conditional branch
33415 is LABEL_REF. */
33416
33417 void
33418 arm_gen_unlikely_cbranch (enum rtx_code code, machine_mode cc_mode,
33419 rtx label_ref)
33420 {
33421 rtx x;
33422 x = gen_rtx_fmt_ee (code, VOIDmode,
33423 gen_rtx_REG (cc_mode, CC_REGNUM),
33424 const0_rtx);
33425
33426 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
33427 gen_rtx_LABEL_REF (VOIDmode, label_ref),
33428 pc_rtx);
33429 emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
33430 }
33431
33432 /* Implement the TARGET_ASM_ELF_FLAGS_NUMERIC hook.
33433
33434 For pure-code sections there is no letter code for this attribute, so
33435 output all the section flags numerically when this is needed. */
33436
33437 static bool
33438 arm_asm_elf_flags_numeric (unsigned int flags, unsigned int *num)
33439 {
33440
33441 if (flags & SECTION_ARM_PURECODE)
33442 {
33443 *num = 0x20000000;
33444
33445 if (!(flags & SECTION_DEBUG))
33446 *num |= 0x2;
33447 if (flags & SECTION_EXCLUDE)
33448 *num |= 0x80000000;
33449 if (flags & SECTION_WRITE)
33450 *num |= 0x1;
33451 if (flags & SECTION_CODE)
33452 *num |= 0x4;
33453 if (flags & SECTION_MERGE)
33454 *num |= 0x10;
33455 if (flags & SECTION_STRINGS)
33456 *num |= 0x20;
33457 if (flags & SECTION_TLS)
33458 *num |= 0x400;
33459 if (HAVE_COMDAT_GROUP && (flags & SECTION_LINKONCE))
33460 *num |= 0x200;
33461
33462 return true;
33463 }
33464
33465 return false;
33466 }
33467
33468 /* Implement the TARGET_ASM_FUNCTION_SECTION hook.
33469
33470 If pure-code is passed as an option, make sure all functions are in
33471 sections that have the SHF_ARM_PURECODE attribute. */
33472
33473 static section *
33474 arm_function_section (tree decl, enum node_frequency freq,
33475 bool startup, bool exit)
33476 {
33477 const char * section_name;
33478 section * sec;
33479
33480 if (!decl || TREE_CODE (decl) != FUNCTION_DECL)
33481 return default_function_section (decl, freq, startup, exit);
33482
33483 if (!target_pure_code)
33484 return default_function_section (decl, freq, startup, exit);
33485
33486
33487 section_name = DECL_SECTION_NAME (decl);
33488
33489 /* If a function is not in a named section then it falls under the 'default'
33490 text section, also known as '.text'. We can preserve previous behavior as
33491 the default text section already has the SHF_ARM_PURECODE section
33492 attribute. */
33493 if (!section_name)
33494 {
33495 section *default_sec = default_function_section (decl, freq, startup,
33496 exit);
33497
33498 /* If default_sec is not null, then it must be a special section like for
33499 example .text.startup. We set the pure-code attribute and return the
33500 same section to preserve existing behavior. */
33501 if (default_sec)
33502 default_sec->common.flags |= SECTION_ARM_PURECODE;
33503 return default_sec;
33504 }
33505
33506 /* Otherwise look whether a section has already been created with
33507 'section_name'. */
33508 sec = get_named_section (decl, section_name, 0);
33509 if (!sec)
33510 /* If that is not the case passing NULL as the section's name to
33511 'get_named_section' will create a section with the declaration's
33512 section name. */
33513 sec = get_named_section (decl, NULL, 0);
33514
33515 /* Set the SHF_ARM_PURECODE attribute. */
33516 sec->common.flags |= SECTION_ARM_PURECODE;
33517
33518 return sec;
33519 }
33520
33521 /* Implements the TARGET_SECTION_FLAGS hook.
33522
33523 If DECL is a function declaration and pure-code is passed as an option
33524 then add the SFH_ARM_PURECODE attribute to the section flags. NAME is the
33525 section's name and RELOC indicates whether the declarations initializer may
33526 contain runtime relocations. */
33527
33528 static unsigned int
33529 arm_elf_section_type_flags (tree decl, const char *name, int reloc)
33530 {
33531 unsigned int flags = default_section_type_flags (decl, name, reloc);
33532
33533 if (decl && TREE_CODE (decl) == FUNCTION_DECL && target_pure_code)
33534 flags |= SECTION_ARM_PURECODE;
33535
33536 return flags;
33537 }
33538
33539 /* Generate call to __aeabi_[mode]divmod (op0, op1). */
33540
33541 static void
33542 arm_expand_divmod_libfunc (rtx libfunc, machine_mode mode,
33543 rtx op0, rtx op1,
33544 rtx *quot_p, rtx *rem_p)
33545 {
33546 if (mode == SImode)
33547 gcc_assert (!TARGET_IDIV);
33548
33549 scalar_int_mode libval_mode
33550 = smallest_int_mode_for_size (2 * GET_MODE_BITSIZE (mode));
33551
33552 rtx libval = emit_library_call_value (libfunc, NULL_RTX, LCT_CONST,
33553 libval_mode, op0, mode, op1, mode);
33554
33555 rtx quotient = simplify_gen_subreg (mode, libval, libval_mode, 0);
33556 rtx remainder = simplify_gen_subreg (mode, libval, libval_mode,
33557 GET_MODE_SIZE (mode));
33558
33559 gcc_assert (quotient);
33560 gcc_assert (remainder);
33561
33562 *quot_p = quotient;
33563 *rem_p = remainder;
33564 }
33565
33566 /* This function checks for the availability of the coprocessor builtin passed
33567 in BUILTIN for the current target. Returns true if it is available and
33568 false otherwise. If a BUILTIN is passed for which this function has not
33569 been implemented it will cause an exception. */
33570
33571 bool
33572 arm_coproc_builtin_available (enum unspecv builtin)
33573 {
33574 /* None of these builtins are available in Thumb mode if the target only
33575 supports Thumb-1. */
33576 if (TARGET_THUMB1)
33577 return false;
33578
33579 switch (builtin)
33580 {
33581 case VUNSPEC_CDP:
33582 case VUNSPEC_LDC:
33583 case VUNSPEC_LDCL:
33584 case VUNSPEC_STC:
33585 case VUNSPEC_STCL:
33586 case VUNSPEC_MCR:
33587 case VUNSPEC_MRC:
33588 if (arm_arch4)
33589 return true;
33590 break;
33591 case VUNSPEC_CDP2:
33592 case VUNSPEC_LDC2:
33593 case VUNSPEC_LDC2L:
33594 case VUNSPEC_STC2:
33595 case VUNSPEC_STC2L:
33596 case VUNSPEC_MCR2:
33597 case VUNSPEC_MRC2:
33598 /* Only present in ARMv5*, ARMv6 (but not ARMv6-M), ARMv7* and
33599 ARMv8-{A,M}. */
33600 if (arm_arch5t)
33601 return true;
33602 break;
33603 case VUNSPEC_MCRR:
33604 case VUNSPEC_MRRC:
33605 /* Only present in ARMv5TE, ARMv6 (but not ARMv6-M), ARMv7* and
33606 ARMv8-{A,M}. */
33607 if (arm_arch6 || arm_arch5te)
33608 return true;
33609 break;
33610 case VUNSPEC_MCRR2:
33611 case VUNSPEC_MRRC2:
33612 if (arm_arch6)
33613 return true;
33614 break;
33615 default:
33616 gcc_unreachable ();
33617 }
33618 return false;
33619 }
33620
33621 /* This function returns true if OP is a valid memory operand for the ldc and
33622 stc coprocessor instructions and false otherwise. */
33623
33624 bool
33625 arm_coproc_ldc_stc_legitimate_address (rtx op)
33626 {
33627 HOST_WIDE_INT range;
33628 /* Has to be a memory operand. */
33629 if (!MEM_P (op))
33630 return false;
33631
33632 op = XEXP (op, 0);
33633
33634 /* We accept registers. */
33635 if (REG_P (op))
33636 return true;
33637
33638 switch GET_CODE (op)
33639 {
33640 case PLUS:
33641 {
33642 /* Or registers with an offset. */
33643 if (!REG_P (XEXP (op, 0)))
33644 return false;
33645
33646 op = XEXP (op, 1);
33647
33648 /* The offset must be an immediate though. */
33649 if (!CONST_INT_P (op))
33650 return false;
33651
33652 range = INTVAL (op);
33653
33654 /* Within the range of [-1020,1020]. */
33655 if (!IN_RANGE (range, -1020, 1020))
33656 return false;
33657
33658 /* And a multiple of 4. */
33659 return (range % 4) == 0;
33660 }
33661 case PRE_INC:
33662 case POST_INC:
33663 case PRE_DEC:
33664 case POST_DEC:
33665 return REG_P (XEXP (op, 0));
33666 default:
33667 gcc_unreachable ();
33668 }
33669 return false;
33670 }
33671
33672 /* Return the diagnostic message string if conversion from FROMTYPE to
33673 TOTYPE is not allowed, NULL otherwise. */
33674
33675 static const char *
33676 arm_invalid_conversion (const_tree fromtype, const_tree totype)
33677 {
33678 if (element_mode (fromtype) != element_mode (totype))
33679 {
33680 /* Do no allow conversions to/from BFmode scalar types. */
33681 if (TYPE_MODE (fromtype) == BFmode)
33682 return N_("invalid conversion from type %<bfloat16_t%>");
33683 if (TYPE_MODE (totype) == BFmode)
33684 return N_("invalid conversion to type %<bfloat16_t%>");
33685 }
33686
33687 /* Conversion allowed. */
33688 return NULL;
33689 }
33690
33691 /* Return the diagnostic message string if the unary operation OP is
33692 not permitted on TYPE, NULL otherwise. */
33693
33694 static const char *
33695 arm_invalid_unary_op (int op, const_tree type)
33696 {
33697 /* Reject all single-operand operations on BFmode except for &. */
33698 if (element_mode (type) == BFmode && op != ADDR_EXPR)
33699 return N_("operation not permitted on type %<bfloat16_t%>");
33700
33701 /* Operation allowed. */
33702 return NULL;
33703 }
33704
33705 /* Return the diagnostic message string if the binary operation OP is
33706 not permitted on TYPE1 and TYPE2, NULL otherwise. */
33707
33708 static const char *
33709 arm_invalid_binary_op (int op ATTRIBUTE_UNUSED, const_tree type1,
33710 const_tree type2)
33711 {
33712 /* Reject all 2-operand operations on BFmode. */
33713 if (element_mode (type1) == BFmode
33714 || element_mode (type2) == BFmode)
33715 return N_("operation not permitted on type %<bfloat16_t%>");
33716
33717 /* Operation allowed. */
33718 return NULL;
33719 }
33720
33721 /* Implement TARGET_CAN_CHANGE_MODE_CLASS.
33722
33723 In VFPv1, VFP registers could only be accessed in the mode they were
33724 set, so subregs would be invalid there. However, we don't support
33725 VFPv1 at the moment, and the restriction was lifted in VFPv2.
33726
33727 In big-endian mode, modes greater than word size (i.e. DFmode) are stored in
33728 VFP registers in little-endian order. We can't describe that accurately to
33729 GCC, so avoid taking subregs of such values.
33730
33731 The only exception is going from a 128-bit to a 64-bit type. In that
33732 case the data layout happens to be consistent for big-endian, so we
33733 explicitly allow that case. */
33734
33735 static bool
33736 arm_can_change_mode_class (machine_mode from, machine_mode to,
33737 reg_class_t rclass)
33738 {
33739 if (TARGET_BIG_END
33740 && !(GET_MODE_SIZE (from) == 16 && GET_MODE_SIZE (to) == 8)
33741 && (GET_MODE_SIZE (from) > UNITS_PER_WORD
33742 || GET_MODE_SIZE (to) > UNITS_PER_WORD)
33743 && reg_classes_intersect_p (VFP_REGS, rclass))
33744 return false;
33745 return true;
33746 }
33747
33748 /* Implement TARGET_CONSTANT_ALIGNMENT. Make strings word-aligned so
33749 strcpy from constants will be faster. */
33750
33751 static HOST_WIDE_INT
33752 arm_constant_alignment (const_tree exp, HOST_WIDE_INT align)
33753 {
33754 unsigned int factor = (TARGET_THUMB || ! arm_tune_xscale ? 1 : 2);
33755 if (TREE_CODE (exp) == STRING_CST && !optimize_size)
33756 return MAX (align, BITS_PER_WORD * factor);
33757 return align;
33758 }
33759
33760 /* Emit a speculation barrier on target architectures that do not have
33761 DSB/ISB directly. Such systems probably don't need a barrier
33762 themselves, but if the code is ever run on a later architecture, it
33763 might become a problem. */
33764 void
33765 arm_emit_speculation_barrier_function ()
33766 {
33767 emit_library_call (speculation_barrier_libfunc, LCT_NORMAL, VOIDmode);
33768 }
33769
33770 /* Have we recorded an explicit access to the Q bit of APSR?. */
33771 bool
33772 arm_q_bit_access (void)
33773 {
33774 if (cfun && cfun->decl)
33775 return lookup_attribute ("acle qbit",
33776 DECL_ATTRIBUTES (cfun->decl));
33777 return true;
33778 }
33779
33780 /* Have we recorded an explicit access to the GE bits of PSTATE?. */
33781 bool
33782 arm_ge_bits_access (void)
33783 {
33784 if (cfun && cfun->decl)
33785 return lookup_attribute ("acle gebits",
33786 DECL_ATTRIBUTES (cfun->decl));
33787 return true;
33788 }
33789
33790 /* NULL if insn INSN is valid within a low-overhead loop.
33791 Otherwise return why doloop cannot be applied. */
33792
33793 static const char *
33794 arm_invalid_within_doloop (const rtx_insn *insn)
33795 {
33796 if (!TARGET_HAVE_LOB)
33797 return default_invalid_within_doloop (insn);
33798
33799 if (CALL_P (insn))
33800 return "Function call in the loop.";
33801
33802 if (reg_mentioned_p (gen_rtx_REG (SImode, LR_REGNUM), insn))
33803 return "LR is used inside loop.";
33804
33805 return NULL;
33806 }
33807
33808 bool
33809 arm_target_insn_ok_for_lob (rtx insn)
33810 {
33811 basic_block bb = BLOCK_FOR_INSN (insn);
33812 /* Make sure the basic block of the target insn is a simple latch
33813 having as single predecessor and successor the body of the loop
33814 itself. Only simple loops with a single basic block as body are
33815 supported for 'low over head loop' making sure that LE target is
33816 above LE itself in the generated code. */
33817
33818 return single_succ_p (bb)
33819 && single_pred_p (bb)
33820 && single_succ_edge (bb)->dest == single_pred_edge (bb)->src
33821 && contains_no_active_insn_p (bb);
33822 }
33823
33824 #if CHECKING_P
33825 namespace selftest {
33826
33827 /* Scan the static data tables generated by parsecpu.awk looking for
33828 potential issues with the data. We primarily check for
33829 inconsistencies in the option extensions at present (extensions
33830 that duplicate others but aren't marked as aliases). Furthermore,
33831 for correct canonicalization later options must never be a subset
33832 of an earlier option. Any extension should also only specify other
33833 feature bits and never an architecture bit. The architecture is inferred
33834 from the declaration of the extension. */
33835 static void
33836 arm_test_cpu_arch_data (void)
33837 {
33838 const arch_option *arch;
33839 const cpu_option *cpu;
33840 auto_sbitmap target_isa (isa_num_bits);
33841 auto_sbitmap isa1 (isa_num_bits);
33842 auto_sbitmap isa2 (isa_num_bits);
33843
33844 for (arch = all_architectures; arch->common.name != NULL; ++arch)
33845 {
33846 const cpu_arch_extension *ext1, *ext2;
33847
33848 if (arch->common.extensions == NULL)
33849 continue;
33850
33851 arm_initialize_isa (target_isa, arch->common.isa_bits);
33852
33853 for (ext1 = arch->common.extensions; ext1->name != NULL; ++ext1)
33854 {
33855 if (ext1->alias)
33856 continue;
33857
33858 arm_initialize_isa (isa1, ext1->isa_bits);
33859 for (ext2 = ext1 + 1; ext2->name != NULL; ++ext2)
33860 {
33861 if (ext2->alias || ext1->remove != ext2->remove)
33862 continue;
33863
33864 arm_initialize_isa (isa2, ext2->isa_bits);
33865 /* If the option is a subset of the parent option, it doesn't
33866 add anything and so isn't useful. */
33867 ASSERT_TRUE (!bitmap_subset_p (isa2, isa1));
33868
33869 /* If the extension specifies any architectural bits then
33870 disallow it. Extensions should only specify feature bits. */
33871 ASSERT_TRUE (!bitmap_intersect_p (isa2, target_isa));
33872 }
33873 }
33874 }
33875
33876 for (cpu = all_cores; cpu->common.name != NULL; ++cpu)
33877 {
33878 const cpu_arch_extension *ext1, *ext2;
33879
33880 if (cpu->common.extensions == NULL)
33881 continue;
33882
33883 arm_initialize_isa (target_isa, arch->common.isa_bits);
33884
33885 for (ext1 = cpu->common.extensions; ext1->name != NULL; ++ext1)
33886 {
33887 if (ext1->alias)
33888 continue;
33889
33890 arm_initialize_isa (isa1, ext1->isa_bits);
33891 for (ext2 = ext1 + 1; ext2->name != NULL; ++ext2)
33892 {
33893 if (ext2->alias || ext1->remove != ext2->remove)
33894 continue;
33895
33896 arm_initialize_isa (isa2, ext2->isa_bits);
33897 /* If the option is a subset of the parent option, it doesn't
33898 add anything and so isn't useful. */
33899 ASSERT_TRUE (!bitmap_subset_p (isa2, isa1));
33900
33901 /* If the extension specifies any architectural bits then
33902 disallow it. Extensions should only specify feature bits. */
33903 ASSERT_TRUE (!bitmap_intersect_p (isa2, target_isa));
33904 }
33905 }
33906 }
33907 }
33908
33909 /* Scan the static data tables generated by parsecpu.awk looking for
33910 potential issues with the data. Here we check for consistency between the
33911 fpu bits, in particular we check that ISA_ALL_FPU_INTERNAL does not contain
33912 a feature bit that is not defined by any FPU flag. */
33913 static void
33914 arm_test_fpu_data (void)
33915 {
33916 auto_sbitmap isa_all_fpubits_internal (isa_num_bits);
33917 auto_sbitmap fpubits (isa_num_bits);
33918 auto_sbitmap tmpset (isa_num_bits);
33919
33920 static const enum isa_feature fpu_bitlist_internal[]
33921 = { ISA_ALL_FPU_INTERNAL, isa_nobit };
33922 arm_initialize_isa (isa_all_fpubits_internal, fpu_bitlist_internal);
33923
33924 for (unsigned int i = 0; i < TARGET_FPU_auto; i++)
33925 {
33926 arm_initialize_isa (fpubits, all_fpus[i].isa_bits);
33927 bitmap_and_compl (tmpset, isa_all_fpubits_internal, fpubits);
33928 bitmap_clear (isa_all_fpubits_internal);
33929 bitmap_copy (isa_all_fpubits_internal, tmpset);
33930 }
33931
33932 if (!bitmap_empty_p (isa_all_fpubits_internal))
33933 {
33934 fprintf (stderr, "Error: found feature bits in the ALL_FPU_INTERAL"
33935 " group that are not defined by any FPU.\n"
33936 " Check your arm-cpus.in.\n");
33937 ASSERT_TRUE (bitmap_empty_p (isa_all_fpubits_internal));
33938 }
33939 }
33940
33941 static void
33942 arm_run_selftests (void)
33943 {
33944 arm_test_cpu_arch_data ();
33945 arm_test_fpu_data ();
33946 }
33947 } /* Namespace selftest. */
33948
33949 #undef TARGET_RUN_TARGET_SELFTESTS
33950 #define TARGET_RUN_TARGET_SELFTESTS selftest::arm_run_selftests
33951 #endif /* CHECKING_P */
33952
33953 /* Worker function for TARGET_MD_ASM_ADJUST, while in thumb1 mode.
33954 Unlike the arm version, we do NOT implement asm flag outputs. */
33955
33956 rtx_insn *
33957 thumb1_md_asm_adjust (vec<rtx> &outputs, vec<rtx> & /*inputs*/,
33958 vec<machine_mode> & /*input_modes*/,
33959 vec<const char *> &constraints, vec<rtx> & /*clobbers*/,
33960 HARD_REG_SET & /*clobbered_regs*/)
33961 {
33962 for (unsigned i = 0, n = outputs.length (); i < n; ++i)
33963 if (strncmp (constraints[i], "=@cc", 4) == 0)
33964 {
33965 sorry ("asm flags not supported in thumb1 mode");
33966 break;
33967 }
33968 return NULL;
33969 }
33970
33971 /* Generate code to enable conditional branches in functions over 1 MiB.
33972 Parameters are:
33973 operands: is the operands list of the asm insn (see arm_cond_branch or
33974 arm_cond_branch_reversed).
33975 pos_label: is an index into the operands array where operands[pos_label] is
33976 the asm label of the final jump destination.
33977 dest: is a string which is used to generate the asm label of the intermediate
33978 destination
33979 branch_format: is a string denoting the intermediate branch format, e.g.
33980 "beq", "bne", etc. */
33981
33982 const char *
33983 arm_gen_far_branch (rtx * operands, int pos_label, const char * dest,
33984 const char * branch_format)
33985 {
33986 rtx_code_label * tmp_label = gen_label_rtx ();
33987 char label_buf[256];
33988 char buffer[128];
33989 ASM_GENERATE_INTERNAL_LABEL (label_buf, dest , \
33990 CODE_LABEL_NUMBER (tmp_label));
33991 const char *label_ptr = arm_strip_name_encoding (label_buf);
33992 rtx dest_label = operands[pos_label];
33993 operands[pos_label] = tmp_label;
33994
33995 snprintf (buffer, sizeof (buffer), "%s%s", branch_format , label_ptr);
33996 output_asm_insn (buffer, operands);
33997
33998 snprintf (buffer, sizeof (buffer), "b\t%%l0%d\n%s:", pos_label, label_ptr);
33999 operands[pos_label] = dest_label;
34000 output_asm_insn (buffer, operands);
34001 return "";
34002 }
34003
34004 /* If given mode matches, load from memory to LO_REGS.
34005 (i.e [Rn], Rn <= LO_REGS). */
34006 enum reg_class
34007 arm_mode_base_reg_class (machine_mode mode)
34008 {
34009 if (TARGET_HAVE_MVE
34010 && (mode == E_V8QImode || mode == E_V4QImode || mode == E_V4HImode))
34011 return LO_REGS;
34012
34013 return MODE_BASE_REG_REG_CLASS (mode);
34014 }
34015
34016 struct gcc_target targetm = TARGET_INITIALIZER;
34017
34018 #include "gt-arm.h"
This page took 1.524195 seconds and 4 git commands to generate.