]> gcc.gnu.org Git - gcc.git/blob - gcc/config/loongarch/loongarch.cc
LoongArch: Add Loongson SX base instruction support.
[gcc.git] / gcc / config / loongarch / loongarch.cc
1 /* Subroutines used for LoongArch code generation.
2 Copyright (C) 2021-2023 Free Software Foundation, Inc.
3 Contributed by Loongson Ltd.
4 Based on MIPS and RISC-V target for GNU compiler.
5
6 This file is part of GCC.
7
8 GCC is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3, or (at your option)
11 any later version.
12
13 GCC is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
21
22 #define IN_TARGET_CODE 1
23
24 #include "config.h"
25 #include "system.h"
26 #include "coretypes.h"
27 #include "backend.h"
28 #include "target.h"
29 #include "rtl.h"
30 #include "tree.h"
31 #include "memmodel.h"
32 #include "gimple.h"
33 #include "cfghooks.h"
34 #include "df.h"
35 #include "tm_p.h"
36 #include "stringpool.h"
37 #include "attribs.h"
38 #include "optabs.h"
39 #include "regs.h"
40 #include "emit-rtl.h"
41 #include "recog.h"
42 #include "cgraph.h"
43 #include "diagnostic.h"
44 #include "insn-attr.h"
45 #include "output.h"
46 #include "alias.h"
47 #include "fold-const.h"
48 #include "varasm.h"
49 #include "stor-layout.h"
50 #include "calls.h"
51 #include "explow.h"
52 #include "expr.h"
53 #include "libfuncs.h"
54 #include "reload.h"
55 #include "common/common-target.h"
56 #include "langhooks.h"
57 #include "cfgrtl.h"
58 #include "cfganal.h"
59 #include "sched-int.h"
60 #include "gimplify.h"
61 #include "target-globals.h"
62 #include "tree-pass.h"
63 #include "context.h"
64 #include "builtins.h"
65 #include "rtl-iter.h"
66 #include "opts.h"
67 #include "function-abi.h"
68
69 /* This file should be included last. */
70 #include "target-def.h"
71
72 /* True if X is an UNSPEC wrapper around a SYMBOL_REF or LABEL_REF. */
73 #define UNSPEC_ADDRESS_P(X) \
74 (GET_CODE (X) == UNSPEC \
75 && XINT (X, 1) >= UNSPEC_ADDRESS_FIRST \
76 && XINT (X, 1) < UNSPEC_ADDRESS_FIRST + NUM_SYMBOL_TYPES)
77
78 /* Extract the symbol or label from UNSPEC wrapper X. */
79 #define UNSPEC_ADDRESS(X) XVECEXP (X, 0, 0)
80
81 /* Extract the symbol type from UNSPEC wrapper X. */
82 #define UNSPEC_ADDRESS_TYPE(X) \
83 ((enum loongarch_symbol_type) (XINT (X, 1) - UNSPEC_ADDRESS_FIRST))
84
85 /* True if INSN is a loongarch.md pattern or asm statement. */
86 /* ??? This test exists through the compiler, perhaps it should be
87 moved to rtl.h. */
88 #define USEFUL_INSN_P(INSN) \
89 (NONDEBUG_INSN_P (INSN) \
90 && GET_CODE (PATTERN (INSN)) != USE \
91 && GET_CODE (PATTERN (INSN)) != CLOBBER)
92
93 /* True if bit BIT is set in VALUE. */
94 #define BITSET_P(VALUE, BIT) (((VALUE) & (1 << (BIT))) != 0)
95
96 /* Classifies an address.
97
98 ADDRESS_REG
99 A natural register + offset address. The register satisfies
100 loongarch_valid_base_register_p and the offset is a const_arith_operand.
101
102 ADDRESS_REG_REG
103 A base register indexed by (optionally scaled) register.
104
105 ADDRESS_LO_SUM
106 A LO_SUM rtx. The first operand is a valid base register and the second
107 operand is a symbolic address.
108
109 ADDRESS_CONST_INT
110 A signed 16-bit constant address.
111
112 ADDRESS_SYMBOLIC:
113 A constant symbolic address. */
114 enum loongarch_address_type
115 {
116 ADDRESS_REG,
117 ADDRESS_REG_REG,
118 ADDRESS_LO_SUM,
119 ADDRESS_CONST_INT,
120 ADDRESS_SYMBOLIC
121 };
122
123
124 /* Information about an address described by loongarch_address_type. */
125 struct loongarch_address_info
126 {
127 enum loongarch_address_type type;
128 rtx reg;
129 rtx offset;
130 enum loongarch_symbol_type symbol_type;
131 };
132
133 /* Method of loading instant numbers:
134
135 METHOD_NORMAL:
136 Load 0-31 bit of the immediate number.
137
138 METHOD_LU32I:
139 Load 32-51 bit of the immediate number.
140
141 METHOD_LU52I:
142 Load 52-63 bit of the immediate number.
143 */
144 enum loongarch_load_imm_method
145 {
146 METHOD_NORMAL,
147 METHOD_LU32I,
148 METHOD_LU52I
149 };
150
151 struct loongarch_integer_op
152 {
153 enum rtx_code code;
154 HOST_WIDE_INT value;
155 /* Represent the result of the immediate count of the load instruction at
156 each step. */
157 HOST_WIDE_INT curr_value;
158 enum loongarch_load_imm_method method;
159 };
160
161 /* The largest number of operations needed to load an integer constant.
162 The worst accepted case for 64-bit constants is LU12I.W,LU32I.D,LU52I.D,ORI
163 or LU12I.W,LU32I.D,LU52I.D,ADDI.D DECL_ASSEMBLER_NAME. */
164 #define LARCH_MAX_INTEGER_OPS 4
165
166 /* Arrays that map GCC register numbers to debugger register numbers. */
167 int loongarch_dwarf_regno[FIRST_PSEUDO_REGISTER];
168
169 /* Index [M][R] is true if register R is allowed to hold a value of mode M. */
170 static bool loongarch_hard_regno_mode_ok_p[MAX_MACHINE_MODE]
171 [FIRST_PSEUDO_REGISTER];
172
173 /* Index C is true if character C is a valid PRINT_OPERAND punctation
174 character. */
175 static bool loongarch_print_operand_punct[256];
176
177 /* Cached value of can_issue_more. This is cached in loongarch_variable_issue
178 hook and returned from loongarch_sched_reorder2. */
179 static int cached_can_issue_more;
180
181 /* Index R is the smallest register class that contains register R. */
182 const enum reg_class loongarch_regno_to_class[FIRST_PSEUDO_REGISTER] = {
183 GR_REGS, GR_REGS, GR_REGS, GR_REGS,
184 JIRL_REGS, JIRL_REGS, JIRL_REGS, JIRL_REGS,
185 JIRL_REGS, JIRL_REGS, JIRL_REGS, JIRL_REGS,
186 SIBCALL_REGS, JIRL_REGS, SIBCALL_REGS, SIBCALL_REGS,
187 SIBCALL_REGS, SIBCALL_REGS, SIBCALL_REGS, SIBCALL_REGS,
188 SIBCALL_REGS, GR_REGS, GR_REGS, JIRL_REGS,
189 JIRL_REGS, JIRL_REGS, JIRL_REGS, JIRL_REGS,
190 JIRL_REGS, JIRL_REGS, JIRL_REGS, JIRL_REGS,
191
192 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
193 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
194 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
195 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
196 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
197 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
198 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
199 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
200 FCC_REGS, FCC_REGS, FCC_REGS, FCC_REGS,
201 FCC_REGS, FCC_REGS, FCC_REGS, FCC_REGS,
202 FRAME_REGS, FRAME_REGS
203 };
204
205 /* Which cost information to use. */
206 static const struct loongarch_rtx_cost_data *loongarch_cost;
207
208 /* Information about a single argument. */
209 struct loongarch_arg_info
210 {
211 /* True if the argument is at least partially passed on the stack. */
212 bool stack_p;
213
214 /* The number of integer registers allocated to this argument. */
215 unsigned int num_gprs;
216
217 /* The offset of the first register used, provided num_gprs is nonzero.
218 If passed entirely on the stack, the value is MAX_ARGS_IN_REGISTERS. */
219 unsigned int gpr_offset;
220
221 /* The number of floating-point registers allocated to this argument. */
222 unsigned int num_fprs;
223
224 /* The offset of the first register used, provided num_fprs is nonzero. */
225 unsigned int fpr_offset;
226 };
227
228 /* Invoke MACRO (COND) for each fcmp.cond.{s/d} condition. */
229 #define LARCH_FP_CONDITIONS(MACRO) \
230 MACRO (f), \
231 MACRO (un), \
232 MACRO (eq), \
233 MACRO (ueq), \
234 MACRO (olt), \
235 MACRO (ult), \
236 MACRO (ole), \
237 MACRO (ule), \
238 MACRO (sf), \
239 MACRO (ngle), \
240 MACRO (seq), \
241 MACRO (ngl), \
242 MACRO (lt), \
243 MACRO (nge), \
244 MACRO (le), \
245 MACRO (ngt)
246
247 /* Enumerates the codes above as LARCH_FP_COND_<X>. */
248 #define DECLARE_LARCH_COND(X) LARCH_FP_COND_##X
249 enum loongarch_fp_condition
250 {
251 LARCH_FP_CONDITIONS (DECLARE_LARCH_COND)
252 };
253 #undef DECLARE_LARCH_COND
254
255 /* Index X provides the string representation of LARCH_FP_COND_<X>. */
256 #define STRINGIFY(X) #X
257 const char *const
258 loongarch_fp_conditions[16]= {LARCH_FP_CONDITIONS (STRINGIFY)};
259 #undef STRINGIFY
260
261 /* Size of guard page. */
262 #define STACK_CLASH_PROTECTION_GUARD_SIZE \
263 (1 << param_stack_clash_protection_guard_size)
264
265 /* Implement TARGET_FUNCTION_ARG_BOUNDARY. Every parameter gets at
266 least PARM_BOUNDARY bits of alignment, but will be given anything up
267 to PREFERRED_STACK_BOUNDARY bits if the type requires it. */
268
269 static unsigned int
270 loongarch_function_arg_boundary (machine_mode mode, const_tree type)
271 {
272 unsigned int alignment;
273
274 /* Use natural alignment if the type is not aggregate data. */
275 if (type && !AGGREGATE_TYPE_P (type))
276 alignment = TYPE_ALIGN (TYPE_MAIN_VARIANT (type));
277 else
278 alignment = type ? TYPE_ALIGN (type) : GET_MODE_ALIGNMENT (mode);
279
280 return MIN (PREFERRED_STACK_BOUNDARY, MAX (PARM_BOUNDARY, alignment));
281 }
282
283 /* If MODE represents an argument that can be passed or returned in
284 floating-point registers, return the number of registers, else 0. */
285
286 static unsigned
287 loongarch_pass_mode_in_fpr_p (machine_mode mode)
288 {
289 if (GET_MODE_UNIT_SIZE (mode) <= UNITS_PER_FP_ARG)
290 {
291 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
292 return 1;
293
294 if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
295 return 2;
296 }
297
298 return 0;
299 }
300
301 typedef struct
302 {
303 const_tree type;
304 HOST_WIDE_INT offset;
305 } loongarch_aggregate_field;
306
307 /* Identify subfields of aggregates that are candidates for passing in
308 floating-point registers. */
309
310 static int
311 loongarch_flatten_aggregate_field (const_tree type,
312 loongarch_aggregate_field fields[2], int n,
313 HOST_WIDE_INT offset)
314 {
315 switch (TREE_CODE (type))
316 {
317 case RECORD_TYPE:
318 /* Can't handle incomplete types nor sizes that are not fixed. */
319 if (!COMPLETE_TYPE_P (type)
320 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST
321 || !tree_fits_uhwi_p (TYPE_SIZE (type)))
322 return -1;
323
324 for (tree f = TYPE_FIELDS (type); f; f = DECL_CHAIN (f))
325 if (TREE_CODE (f) == FIELD_DECL)
326 {
327 if (!TYPE_P (TREE_TYPE (f)))
328 return -1;
329
330 if (DECL_SIZE (f) && integer_zerop (DECL_SIZE (f)))
331 continue;
332
333 HOST_WIDE_INT pos = offset + int_byte_position (f);
334 n = loongarch_flatten_aggregate_field (TREE_TYPE (f), fields, n,
335 pos);
336 if (n < 0)
337 return -1;
338 }
339 return n;
340
341 case ARRAY_TYPE:
342 {
343 HOST_WIDE_INT n_elts;
344 loongarch_aggregate_field subfields[2];
345 tree index = TYPE_DOMAIN (type);
346 tree elt_size = TYPE_SIZE_UNIT (TREE_TYPE (type));
347 int n_subfields = loongarch_flatten_aggregate_field (TREE_TYPE (type),
348 subfields, 0,
349 offset);
350
351 /* Can't handle incomplete types nor sizes that are not fixed. */
352 if (n_subfields <= 0
353 || !COMPLETE_TYPE_P (type)
354 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST
355 || !index
356 || !TYPE_MAX_VALUE (index)
357 || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index))
358 || !TYPE_MIN_VALUE (index)
359 || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index))
360 || !tree_fits_uhwi_p (elt_size))
361 return -1;
362
363 n_elts = 1 + tree_to_uhwi (TYPE_MAX_VALUE (index))
364 - tree_to_uhwi (TYPE_MIN_VALUE (index));
365 gcc_assert (n_elts >= 0);
366
367 for (HOST_WIDE_INT i = 0; i < n_elts; i++)
368 for (int j = 0; j < n_subfields; j++)
369 {
370 if (n >= 2)
371 return -1;
372
373 fields[n] = subfields[j];
374 fields[n++].offset += i * tree_to_uhwi (elt_size);
375 }
376
377 return n;
378 }
379
380 case COMPLEX_TYPE:
381 {
382 /* Complex type need consume 2 field, so n must be 0. */
383 if (n != 0)
384 return -1;
385
386 HOST_WIDE_INT elt_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (type)));
387
388 if (elt_size <= UNITS_PER_FP_ARG)
389 {
390 fields[0].type = TREE_TYPE (type);
391 fields[0].offset = offset;
392 fields[1].type = TREE_TYPE (type);
393 fields[1].offset = offset + elt_size;
394
395 return 2;
396 }
397
398 return -1;
399 }
400
401 default:
402 if (n < 2
403 && ((SCALAR_FLOAT_TYPE_P (type)
404 && GET_MODE_SIZE (TYPE_MODE (type)) <= UNITS_PER_FP_ARG)
405 || (INTEGRAL_TYPE_P (type)
406 && GET_MODE_SIZE (TYPE_MODE (type)) <= UNITS_PER_WORD)))
407 {
408 fields[n].type = type;
409 fields[n].offset = offset;
410 return n + 1;
411 }
412 else
413 return -1;
414 }
415 }
416
417 /* Identify candidate aggregates for passing in floating-point registers.
418 Candidates have at most two fields after flattening. */
419
420 static int
421 loongarch_flatten_aggregate_argument (const_tree type,
422 loongarch_aggregate_field fields[2])
423 {
424 if (!type || TREE_CODE (type) != RECORD_TYPE)
425 return -1;
426
427 return loongarch_flatten_aggregate_field (type, fields, 0, 0);
428 }
429
430 /* See whether TYPE is a record whose fields should be returned in one or
431 two floating-point registers. If so, populate FIELDS accordingly. */
432
433 static unsigned
434 loongarch_pass_aggregate_num_fpr (const_tree type,
435 loongarch_aggregate_field fields[2])
436 {
437 int n = loongarch_flatten_aggregate_argument (type, fields);
438
439 for (int i = 0; i < n; i++)
440 if (!SCALAR_FLOAT_TYPE_P (fields[i].type))
441 return 0;
442
443 return n > 0 ? n : 0;
444 }
445
446 /* See whether TYPE is a record whose fields should be returned in one
447 floating-point register and one integer register. If so, populate
448 FIELDS accordingly. */
449
450 static bool
451 loongarch_pass_aggregate_in_fpr_and_gpr_p (const_tree type,
452 loongarch_aggregate_field fields[2])
453 {
454 unsigned num_int = 0, num_float = 0;
455 int n = loongarch_flatten_aggregate_argument (type, fields);
456
457 for (int i = 0; i < n; i++)
458 {
459 num_float += SCALAR_FLOAT_TYPE_P (fields[i].type);
460 num_int += INTEGRAL_TYPE_P (fields[i].type);
461 }
462
463 return num_int == 1 && num_float == 1;
464 }
465
466 /* Return the representation of an argument passed or returned in an FPR
467 when the value has mode VALUE_MODE and the type has TYPE_MODE. The
468 two modes may be different for structures like:
469
470 struct __attribute__((packed)) foo { float f; }
471
472 where the SFmode value "f" is passed in REGNO but the struct itself
473 has mode BLKmode. */
474
475 static rtx
476 loongarch_pass_fpr_single (machine_mode type_mode, unsigned regno,
477 machine_mode value_mode,
478 HOST_WIDE_INT offset)
479 {
480 rtx x = gen_rtx_REG (value_mode, regno);
481
482 if (type_mode != value_mode)
483 {
484 x = gen_rtx_EXPR_LIST (VOIDmode, x, GEN_INT (offset));
485 x = gen_rtx_PARALLEL (type_mode, gen_rtvec (1, x));
486 }
487 return x;
488 }
489
490 /* Pass or return a composite value in the FPR pair REGNO and REGNO + 1.
491 MODE is the mode of the composite. MODE1 and OFFSET1 are the mode and
492 byte offset for the first value, likewise MODE2 and OFFSET2 for the
493 second value. */
494
495 static rtx
496 loongarch_pass_fpr_pair (machine_mode mode, unsigned regno1,
497 machine_mode mode1, HOST_WIDE_INT offset1,
498 unsigned regno2, machine_mode mode2,
499 HOST_WIDE_INT offset2)
500 {
501 return gen_rtx_PARALLEL (
502 mode, gen_rtvec (2,
503 gen_rtx_EXPR_LIST (VOIDmode, gen_rtx_REG (mode1, regno1),
504 GEN_INT (offset1)),
505 gen_rtx_EXPR_LIST (VOIDmode, gen_rtx_REG (mode2, regno2),
506 GEN_INT (offset2))));
507 }
508
509 /* Fill INFO with information about a single argument, and return an
510 RTL pattern to pass or return the argument. CUM is the cumulative
511 state for earlier arguments. MODE is the mode of this argument and
512 TYPE is its type (if known). NAMED is true if this is a named
513 (fixed) argument rather than a variable one. RETURN_P is true if
514 returning the argument, or false if passing the argument. */
515
516 static rtx
517 loongarch_get_arg_info (struct loongarch_arg_info *info,
518 const CUMULATIVE_ARGS *cum, machine_mode mode,
519 const_tree type, bool named, bool return_p)
520 {
521 unsigned num_bytes, num_words;
522 unsigned fpr_base = return_p ? FP_RETURN : FP_ARG_FIRST;
523 unsigned gpr_base = return_p ? GP_RETURN : GP_ARG_FIRST;
524 unsigned alignment = loongarch_function_arg_boundary (mode, type);
525
526 memset (info, 0, sizeof (*info));
527 info->gpr_offset = cum->num_gprs;
528 info->fpr_offset = cum->num_fprs;
529
530 if (named)
531 {
532 loongarch_aggregate_field fields[2];
533 unsigned fregno = fpr_base + info->fpr_offset;
534 unsigned gregno = gpr_base + info->gpr_offset;
535
536 /* Pass one- or two-element floating-point aggregates in FPRs. */
537 if ((info->num_fprs
538 = loongarch_pass_aggregate_num_fpr (type, fields))
539 && info->fpr_offset + info->num_fprs <= MAX_ARGS_IN_REGISTERS)
540 switch (info->num_fprs)
541 {
542 case 1:
543 return loongarch_pass_fpr_single (mode, fregno,
544 TYPE_MODE (fields[0].type),
545 fields[0].offset);
546
547 case 2:
548 return loongarch_pass_fpr_pair (mode, fregno,
549 TYPE_MODE (fields[0].type),
550 fields[0].offset,
551 fregno + 1,
552 TYPE_MODE (fields[1].type),
553 fields[1].offset);
554
555 default:
556 gcc_unreachable ();
557 }
558
559 /* Pass real and complex floating-point numbers in FPRs. */
560 if ((info->num_fprs = loongarch_pass_mode_in_fpr_p (mode))
561 && info->fpr_offset + info->num_fprs <= MAX_ARGS_IN_REGISTERS)
562 switch (GET_MODE_CLASS (mode))
563 {
564 case MODE_FLOAT:
565 return gen_rtx_REG (mode, fregno);
566
567 case MODE_COMPLEX_FLOAT:
568 return loongarch_pass_fpr_pair (mode, fregno,
569 GET_MODE_INNER (mode), 0,
570 fregno + 1, GET_MODE_INNER (mode),
571 GET_MODE_UNIT_SIZE (mode));
572
573 default:
574 gcc_unreachable ();
575 }
576
577 /* Pass structs with one float and one integer in an FPR and a GPR. */
578 if (loongarch_pass_aggregate_in_fpr_and_gpr_p (type, fields)
579 && info->gpr_offset < MAX_ARGS_IN_REGISTERS
580 && info->fpr_offset < MAX_ARGS_IN_REGISTERS)
581 {
582 info->num_gprs = 1;
583 info->num_fprs = 1;
584
585 if (!SCALAR_FLOAT_TYPE_P (fields[0].type))
586 std::swap (fregno, gregno);
587
588 return loongarch_pass_fpr_pair (mode, fregno,
589 TYPE_MODE (fields[0].type),
590 fields[0].offset, gregno,
591 TYPE_MODE (fields[1].type),
592 fields[1].offset);
593 }
594 }
595
596 /* Work out the size of the argument. */
597 num_bytes = type ? int_size_in_bytes (type) : GET_MODE_SIZE (mode);
598 num_words = (num_bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
599
600 /* Doubleword-aligned varargs start on an even register boundary. */
601 if (!named && num_bytes != 0 && alignment > BITS_PER_WORD)
602 info->gpr_offset += info->gpr_offset & 1;
603
604 /* Partition the argument between registers and stack. */
605 info->num_fprs = 0;
606 info->num_gprs = MIN (num_words, MAX_ARGS_IN_REGISTERS - info->gpr_offset);
607 info->stack_p = (num_words - info->num_gprs) != 0;
608
609 if (info->num_gprs || return_p)
610 return gen_rtx_REG (mode, gpr_base + info->gpr_offset);
611
612 return NULL_RTX;
613 }
614
615 /* Implement TARGET_FUNCTION_ARG. */
616
617 static rtx
618 loongarch_function_arg (cumulative_args_t cum_v, const function_arg_info &arg)
619 {
620 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
621 struct loongarch_arg_info info;
622
623 if (arg.end_marker_p ())
624 return NULL;
625
626 return loongarch_get_arg_info (&info, cum, arg.mode, arg.type, arg.named,
627 false);
628 }
629
630 /* Implement TARGET_FUNCTION_ARG_ADVANCE. */
631
632 static void
633 loongarch_function_arg_advance (cumulative_args_t cum_v,
634 const function_arg_info &arg)
635 {
636 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
637 struct loongarch_arg_info info;
638
639 loongarch_get_arg_info (&info, cum, arg.mode, arg.type, arg.named, false);
640
641 /* Advance the register count. This has the effect of setting
642 num_gprs to MAX_ARGS_IN_REGISTERS if a doubleword-aligned
643 argument required us to skip the final GPR and pass the whole
644 argument on the stack. */
645 cum->num_fprs = info.fpr_offset + info.num_fprs;
646 cum->num_gprs = info.gpr_offset + info.num_gprs;
647 }
648
649 /* Implement TARGET_ARG_PARTIAL_BYTES. */
650
651 static int
652 loongarch_arg_partial_bytes (cumulative_args_t cum,
653 const function_arg_info &generic_arg)
654 {
655 struct loongarch_arg_info arg;
656
657 loongarch_get_arg_info (&arg, get_cumulative_args (cum), generic_arg.mode,
658 generic_arg.type, generic_arg.named, false);
659 return arg.stack_p ? arg.num_gprs * UNITS_PER_WORD : 0;
660 }
661
662 /* Implement FUNCTION_VALUE and LIBCALL_VALUE. For normal calls,
663 VALTYPE is the return type and MODE is VOIDmode. For libcalls,
664 VALTYPE is null and MODE is the mode of the return value. */
665
666 static rtx
667 loongarch_function_value_1 (const_tree type, const_tree func,
668 machine_mode mode)
669 {
670 struct loongarch_arg_info info;
671 CUMULATIVE_ARGS args;
672
673 if (type)
674 {
675 int unsigned_p = TYPE_UNSIGNED (type);
676
677 mode = TYPE_MODE (type);
678
679 /* Since TARGET_PROMOTE_FUNCTION_MODE unconditionally promotes,
680 return values, promote the mode here too. */
681 mode = promote_function_mode (type, mode, &unsigned_p, func, 1);
682 }
683
684 memset (&args, 0, sizeof (args));
685 return loongarch_get_arg_info (&info, &args, mode, type, true, true);
686 }
687
688
689 /* Implement TARGET_FUNCTION_VALUE. */
690
691 static rtx
692 loongarch_function_value (const_tree valtype, const_tree fn_decl_or_type,
693 bool outgoing ATTRIBUTE_UNUSED)
694 {
695 return loongarch_function_value_1 (valtype, fn_decl_or_type, VOIDmode);
696 }
697
698 /* Implement TARGET_LIBCALL_VALUE. */
699
700 static rtx
701 loongarch_libcall_value (machine_mode mode, const_rtx fun ATTRIBUTE_UNUSED)
702 {
703 return loongarch_function_value_1 (NULL_TREE, NULL_TREE, mode);
704 }
705
706
707 /* Implement TARGET_PASS_BY_REFERENCE. */
708
709 static bool
710 loongarch_pass_by_reference (cumulative_args_t cum_v,
711 const function_arg_info &arg)
712 {
713 HOST_WIDE_INT size = arg.type_size_in_bytes ();
714 struct loongarch_arg_info info;
715 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
716
717 /* ??? std_gimplify_va_arg_expr passes NULL for cum. Fortunately, we
718 never pass variadic arguments in floating-point registers, so we can
719 avoid the call to loongarch_get_arg_info in this case. */
720 if (cum != NULL)
721 {
722 /* Don't pass by reference if we can use a floating-point register. */
723 loongarch_get_arg_info (&info, cum, arg.mode, arg.type, arg.named,
724 false);
725 if (info.num_fprs)
726 return false;
727 }
728
729 /* Pass by reference if the data do not fit in two integer registers. */
730 return !IN_RANGE (size, 0, 2 * UNITS_PER_WORD);
731 }
732
733 /* Implement TARGET_RETURN_IN_MEMORY. */
734
735 static bool
736 loongarch_return_in_memory (const_tree type,
737 const_tree fndecl ATTRIBUTE_UNUSED)
738 {
739 CUMULATIVE_ARGS args;
740 cumulative_args_t cum = pack_cumulative_args (&args);
741
742 /* The rules for returning in memory are the same as for passing the
743 first named argument by reference. */
744 memset (&args, 0, sizeof (args));
745 function_arg_info arg (const_cast<tree> (type), /*named=*/true);
746 return loongarch_pass_by_reference (cum, arg);
747 }
748
749 /* Implement TARGET_SETUP_INCOMING_VARARGS. */
750
751 static void
752 loongarch_setup_incoming_varargs (cumulative_args_t cum,
753 const function_arg_info &arg,
754 int *pretend_size ATTRIBUTE_UNUSED,
755 int no_rtl)
756 {
757 CUMULATIVE_ARGS local_cum;
758 int gp_saved;
759
760 /* The caller has advanced CUM up to, but not beyond, the last named
761 argument. Advance a local copy of CUM past the last "real" named
762 argument, to find out how many registers are left over. */
763 local_cum = *get_cumulative_args (cum);
764 if (!TYPE_NO_NAMED_ARGS_STDARG_P (TREE_TYPE (current_function_decl)))
765 loongarch_function_arg_advance (pack_cumulative_args (&local_cum), arg);
766
767 /* Found out how many registers we need to save. */
768 gp_saved = cfun->va_list_gpr_size / UNITS_PER_WORD;
769 if (gp_saved > (int) (MAX_ARGS_IN_REGISTERS - local_cum.num_gprs))
770 gp_saved = MAX_ARGS_IN_REGISTERS - local_cum.num_gprs;
771
772 if (!no_rtl && gp_saved > 0)
773 {
774 rtx ptr = plus_constant (Pmode, virtual_incoming_args_rtx,
775 REG_PARM_STACK_SPACE (cfun->decl)
776 - gp_saved * UNITS_PER_WORD);
777 rtx mem = gen_frame_mem (BLKmode, ptr);
778 set_mem_alias_set (mem, get_varargs_alias_set ());
779
780 move_block_from_reg (local_cum.num_gprs + GP_ARG_FIRST, mem, gp_saved);
781 }
782 if (REG_PARM_STACK_SPACE (cfun->decl) == 0)
783 cfun->machine->varargs_size = gp_saved * UNITS_PER_WORD;
784 }
785
786 /* Make the last instruction frame-related and note that it performs
787 the operation described by FRAME_PATTERN. */
788
789 static void
790 loongarch_set_frame_expr (rtx frame_pattern)
791 {
792 rtx insn;
793
794 insn = get_last_insn ();
795 RTX_FRAME_RELATED_P (insn) = 1;
796 REG_NOTES (insn) = alloc_EXPR_LIST (REG_FRAME_RELATED_EXPR, frame_pattern,
797 REG_NOTES (insn));
798 }
799
800 /* Return a frame-related rtx that stores REG at MEM.
801 REG must be a single register. */
802
803 static rtx
804 loongarch_frame_set (rtx mem, rtx reg)
805 {
806 rtx set = gen_rtx_SET (mem, reg);
807 RTX_FRAME_RELATED_P (set) = 1;
808 return set;
809 }
810
811 /* Return true if the current function must save register REGNO. */
812
813 static bool
814 loongarch_save_reg_p (unsigned int regno)
815 {
816 bool call_saved = !global_regs[regno] && !call_used_regs[regno];
817 bool might_clobber
818 = crtl->saves_all_registers || df_regs_ever_live_p (regno);
819
820 if (call_saved && might_clobber)
821 return true;
822
823 if (regno == HARD_FRAME_POINTER_REGNUM && frame_pointer_needed)
824 return true;
825
826 if (regno == RETURN_ADDR_REGNUM && crtl->calls_eh_return)
827 return true;
828
829 return false;
830 }
831
832 /* Determine which GPR save/restore routine to call. */
833
834 static unsigned
835 loongarch_save_libcall_count (unsigned mask)
836 {
837 for (unsigned n = GP_REG_LAST; n > GP_REG_FIRST; n--)
838 if (BITSET_P (mask, n))
839 return CALLEE_SAVED_REG_NUMBER (n) + 1;
840 abort ();
841 }
842
843 /* Populate the current function's loongarch_frame_info structure.
844
845 LoongArch stack frames grown downward. High addresses are at the top.
846
847 +-------------------------------+
848 | |
849 | incoming stack arguments |
850 | |
851 +-------------------------------+ <-- incoming stack pointer
852 | |
853 | callee-allocated save area |
854 | for arguments that are |
855 | split between registers and |
856 | the stack |
857 | |
858 +-------------------------------+ <-- arg_pointer_rtx (virtual)
859 | |
860 | callee-allocated save area |
861 | for register varargs |
862 | |
863 +-------------------------------+ <-- hard_frame_pointer_rtx;
864 | | stack_pointer_rtx + gp_sp_offset
865 | GPR save area | + UNITS_PER_WORD
866 | |
867 +-------------------------------+ <-- stack_pointer_rtx + fp_sp_offset
868 | | + UNITS_PER_HWVALUE
869 | FPR save area |
870 | |
871 +-------------------------------+ <-- frame_pointer_rtx (virtual)
872 | |
873 | local variables |
874 | |
875 P +-------------------------------+
876 | |
877 | outgoing stack arguments |
878 | |
879 +-------------------------------+ <-- stack_pointer_rtx
880
881 Dynamic stack allocations such as alloca insert data at point P.
882 They decrease stack_pointer_rtx but leave frame_pointer_rtx and
883 hard_frame_pointer_rtx unchanged. */
884
885 static void
886 loongarch_compute_frame_info (void)
887 {
888 struct loongarch_frame_info *frame;
889 HOST_WIDE_INT offset;
890 unsigned int regno, i, num_x_saved = 0, num_f_saved = 0;
891
892 frame = &cfun->machine->frame;
893 memset (frame, 0, sizeof (*frame));
894
895 /* Find out which GPRs we need to save. */
896 for (regno = GP_REG_FIRST; regno <= GP_REG_LAST; regno++)
897 if (loongarch_save_reg_p (regno))
898 frame->mask |= 1 << (regno - GP_REG_FIRST), num_x_saved++;
899
900 /* If this function calls eh_return, we must also save and restore the
901 EH data registers. */
902 if (crtl->calls_eh_return)
903 for (i = 0; (regno = EH_RETURN_DATA_REGNO (i)) != INVALID_REGNUM; i++)
904 frame->mask |= 1 << (regno - GP_REG_FIRST), num_x_saved++;
905
906 /* Find out which FPRs we need to save. This loop must iterate over
907 the same space as its companion in loongarch_for_each_saved_reg. */
908 if (TARGET_HARD_FLOAT)
909 for (regno = FP_REG_FIRST; regno <= FP_REG_LAST; regno++)
910 if (loongarch_save_reg_p (regno))
911 frame->fmask |= 1 << (regno - FP_REG_FIRST), num_f_saved++;
912
913 /* At the bottom of the frame are any outgoing stack arguments. */
914 offset = LARCH_STACK_ALIGN (crtl->outgoing_args_size);
915 /* Next are local stack variables. */
916 offset += LARCH_STACK_ALIGN (get_frame_size ());
917 /* The virtual frame pointer points above the local variables. */
918 frame->frame_pointer_offset = offset;
919 /* Next are the callee-saved FPRs. */
920 if (frame->fmask)
921 {
922 offset += LARCH_STACK_ALIGN (num_f_saved * UNITS_PER_FP_REG);
923 frame->fp_sp_offset = offset - UNITS_PER_FP_REG;
924 }
925 else
926 frame->fp_sp_offset = offset;
927 /* Next are the callee-saved GPRs. */
928 if (frame->mask)
929 {
930 unsigned x_save_size = LARCH_STACK_ALIGN (num_x_saved * UNITS_PER_WORD);
931 unsigned num_save_restore
932 = 1 + loongarch_save_libcall_count (frame->mask);
933
934 /* Only use save/restore routines if they don't alter the stack size. */
935 if (LARCH_STACK_ALIGN (num_save_restore * UNITS_PER_WORD) == x_save_size)
936 frame->save_libcall_adjustment = x_save_size;
937
938 offset += x_save_size;
939 frame->gp_sp_offset = offset - UNITS_PER_WORD;
940 }
941 else
942 frame->gp_sp_offset = offset;
943 /* The hard frame pointer points above the callee-saved GPRs. */
944 frame->hard_frame_pointer_offset = offset;
945 /* Above the hard frame pointer is the callee-allocated varags save area. */
946 offset += LARCH_STACK_ALIGN (cfun->machine->varargs_size);
947 /* Next is the callee-allocated area for pretend stack arguments. */
948 offset += LARCH_STACK_ALIGN (crtl->args.pretend_args_size);
949 /* Arg pointer must be below pretend args, but must be above alignment
950 padding. */
951 frame->arg_pointer_offset = offset - crtl->args.pretend_args_size;
952 frame->total_size = offset;
953 /* Next points the incoming stack pointer and any incoming arguments. */
954
955 /* Only use save/restore routines when the GPRs are atop the frame. */
956 if (frame->hard_frame_pointer_offset != frame->total_size)
957 frame->save_libcall_adjustment = 0;
958 }
959
960 /* Implement INITIAL_ELIMINATION_OFFSET. FROM is either the frame pointer
961 or argument pointer. TO is either the stack pointer or hard frame
962 pointer. */
963
964 HOST_WIDE_INT
965 loongarch_initial_elimination_offset (int from, int to)
966 {
967 HOST_WIDE_INT src, dest;
968
969 loongarch_compute_frame_info ();
970
971 if (to == HARD_FRAME_POINTER_REGNUM)
972 dest = cfun->machine->frame.hard_frame_pointer_offset;
973 else if (to == STACK_POINTER_REGNUM)
974 dest = 0; /* The stack pointer is the base of all offsets, hence 0. */
975 else
976 gcc_unreachable ();
977
978 if (from == FRAME_POINTER_REGNUM)
979 src = cfun->machine->frame.frame_pointer_offset;
980 else if (from == ARG_POINTER_REGNUM)
981 src = cfun->machine->frame.arg_pointer_offset;
982 else
983 gcc_unreachable ();
984
985 return src - dest;
986 }
987
988 /* A function to save or store a register. The first argument is the
989 register and the second is the stack slot. */
990 typedef void (*loongarch_save_restore_fn) (rtx, rtx);
991
992 /* Use FN to save or restore register REGNO. MODE is the register's
993 mode and OFFSET is the offset of its save slot from the current
994 stack pointer. */
995
996 static void
997 loongarch_save_restore_reg (machine_mode mode, int regno, HOST_WIDE_INT offset,
998 loongarch_save_restore_fn fn)
999 {
1000 rtx mem;
1001
1002 mem = gen_frame_mem (mode, plus_constant (Pmode, stack_pointer_rtx, offset));
1003 fn (gen_rtx_REG (mode, regno), mem);
1004 }
1005
1006 /* Call FN for each register that is saved by the current function.
1007 SP_OFFSET is the offset of the current stack pointer from the start
1008 of the frame. */
1009
1010 static void
1011 loongarch_for_each_saved_reg (HOST_WIDE_INT sp_offset,
1012 loongarch_save_restore_fn fn)
1013 {
1014 HOST_WIDE_INT offset;
1015
1016 /* Save the link register and s-registers. */
1017 offset = cfun->machine->frame.gp_sp_offset - sp_offset;
1018 for (int regno = GP_REG_FIRST; regno <= GP_REG_LAST; regno++)
1019 if (BITSET_P (cfun->machine->frame.mask, regno - GP_REG_FIRST))
1020 {
1021 if (!cfun->machine->reg_is_wrapped_separately[regno])
1022 loongarch_save_restore_reg (word_mode, regno, offset, fn);
1023
1024 offset -= UNITS_PER_WORD;
1025 }
1026
1027 /* This loop must iterate over the same space as its companion in
1028 loongarch_compute_frame_info. */
1029 offset = cfun->machine->frame.fp_sp_offset - sp_offset;
1030 machine_mode mode = TARGET_DOUBLE_FLOAT ? DFmode : SFmode;
1031
1032 for (int regno = FP_REG_FIRST; regno <= FP_REG_LAST; regno++)
1033 if (BITSET_P (cfun->machine->frame.fmask, regno - FP_REG_FIRST))
1034 {
1035 if (!cfun->machine->reg_is_wrapped_separately[regno])
1036 loongarch_save_restore_reg (word_mode, regno, offset, fn);
1037
1038 offset -= GET_MODE_SIZE (mode);
1039 }
1040 }
1041
1042 /* Emit a move from SRC to DEST. Assume that the move expanders can
1043 handle all moves if !can_create_pseudo_p (). The distinction is
1044 important because, unlike emit_move_insn, the move expanders know
1045 how to force Pmode objects into the constant pool even when the
1046 constant pool address is not itself legitimate. */
1047
1048 rtx
1049 loongarch_emit_move (rtx dest, rtx src)
1050 {
1051 return (can_create_pseudo_p () ? emit_move_insn (dest, src)
1052 : emit_move_insn_1 (dest, src));
1053 }
1054
1055 /* Save register REG to MEM. Make the instruction frame-related. */
1056
1057 static void
1058 loongarch_save_reg (rtx reg, rtx mem)
1059 {
1060 loongarch_emit_move (mem, reg);
1061 loongarch_set_frame_expr (loongarch_frame_set (mem, reg));
1062 }
1063
1064 /* Restore register REG from MEM. */
1065
1066 static void
1067 loongarch_restore_reg (rtx reg, rtx mem)
1068 {
1069 rtx insn = loongarch_emit_move (reg, mem);
1070 rtx dwarf = NULL_RTX;
1071 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
1072 REG_NOTES (insn) = dwarf;
1073
1074 RTX_FRAME_RELATED_P (insn) = 1;
1075 }
1076
1077 /* For stack frames that can't be allocated with a single ADDI instruction,
1078 compute the best value to initially allocate. It must at a minimum
1079 allocate enough space to spill the callee-saved registers. */
1080
1081 static HOST_WIDE_INT
1082 loongarch_first_stack_step (struct loongarch_frame_info *frame)
1083 {
1084 HOST_WIDE_INT min_first_step
1085 = LARCH_STACK_ALIGN (frame->total_size - frame->fp_sp_offset);
1086
1087 /* When stack checking is required, if the sum of frame->total_size
1088 and stack_check_protect is greater than stack clash protection guard
1089 size, then return min_first_step. */
1090 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK
1091 || (flag_stack_clash_protection
1092 && frame->total_size > STACK_CLASH_PROTECTION_GUARD_SIZE))
1093 return min_first_step;
1094
1095 if (IMM12_OPERAND (frame->total_size))
1096 return frame->total_size;
1097
1098 HOST_WIDE_INT max_first_step = IMM_REACH / 2 - PREFERRED_STACK_BOUNDARY / 8;
1099 HOST_WIDE_INT min_second_step = frame->total_size - max_first_step;
1100 gcc_assert (min_first_step <= max_first_step);
1101
1102 /* As an optimization, use the least-significant bits of the total frame
1103 size, so that the second adjustment step is just LU12I + ADD. */
1104 if (!IMM12_OPERAND (min_second_step)
1105 && frame->total_size % IMM_REACH < IMM_REACH / 2
1106 && frame->total_size % IMM_REACH >= min_first_step)
1107 return frame->total_size % IMM_REACH;
1108
1109 return max_first_step;
1110 }
1111
1112 static void
1113 loongarch_emit_stack_tie (void)
1114 {
1115 emit_insn (gen_stack_tie (Pmode, stack_pointer_rtx,
1116 frame_pointer_needed ? hard_frame_pointer_rtx
1117 : stack_pointer_rtx));
1118 }
1119
1120 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
1121
1122 #if PROBE_INTERVAL > 16384
1123 #error Cannot use indexed addressing mode for stack probing
1124 #endif
1125
1126 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
1127 inclusive. These are offsets from the current stack pointer. */
1128
1129 static void
1130 loongarch_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size)
1131 {
1132 HOST_WIDE_INT rounded_size;
1133 HOST_WIDE_INT interval;
1134
1135 if (flag_stack_clash_protection)
1136 interval = STACK_CLASH_PROTECTION_GUARD_SIZE;
1137 else
1138 interval = PROBE_INTERVAL;
1139
1140 rtx r12 = LARCH_PROLOGUE_TEMP2 (Pmode);
1141 rtx r14 = LARCH_PROLOGUE_TEMP3 (Pmode);
1142
1143 size = size + first;
1144
1145 /* Sanity check for the addressing mode we're going to use. */
1146 gcc_assert (first <= 16384);
1147
1148 /* Step 1: round SIZE to the previous multiple of the interval. */
1149
1150 rounded_size = ROUND_DOWN (size, interval);
1151
1152 /* Step 2: compute initial and final value of the loop counter. */
1153
1154 emit_move_insn (r14, GEN_INT (interval));
1155
1156 /* If rounded_size is zero, it means that the space requested by
1157 the local variable is less than the interval, and there is no
1158 need to display and detect the allocated space. */
1159 if (rounded_size != 0)
1160 {
1161 /* Step 3: the loop
1162
1163 do
1164 {
1165 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
1166 probe at TEST_ADDR
1167 }
1168 while (TEST_ADDR != LAST_ADDR)
1169
1170 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
1171 until it is equal to ROUNDED_SIZE. */
1172
1173 if (rounded_size <= STACK_CLASH_MAX_UNROLL_PAGES * interval)
1174 {
1175 for (HOST_WIDE_INT i = 0; i < rounded_size; i += interval)
1176 {
1177 emit_insn (gen_rtx_SET (stack_pointer_rtx,
1178 gen_rtx_MINUS (Pmode,
1179 stack_pointer_rtx,
1180 r14)));
1181 emit_move_insn (gen_rtx_MEM (Pmode,
1182 gen_rtx_PLUS (Pmode,
1183 stack_pointer_rtx,
1184 const0_rtx)),
1185 const0_rtx);
1186 emit_insn (gen_blockage ());
1187 }
1188 dump_stack_clash_frame_info (PROBE_INLINE, size != rounded_size);
1189 }
1190 else
1191 {
1192 emit_move_insn (r12, GEN_INT (rounded_size));
1193 emit_insn (gen_rtx_SET (r12,
1194 gen_rtx_MINUS (Pmode,
1195 stack_pointer_rtx,
1196 r12)));
1197
1198 emit_insn (gen_probe_stack_range (Pmode, stack_pointer_rtx,
1199 stack_pointer_rtx, r12, r14));
1200 emit_insn (gen_blockage ());
1201 dump_stack_clash_frame_info (PROBE_LOOP, size != rounded_size);
1202 }
1203 }
1204 else
1205 dump_stack_clash_frame_info (NO_PROBE_SMALL_FRAME, true);
1206
1207
1208 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
1209 that SIZE is equal to ROUNDED_SIZE. */
1210
1211 if (size != rounded_size)
1212 {
1213 if (size - rounded_size >= 2048)
1214 {
1215 emit_move_insn (r14, GEN_INT (size - rounded_size));
1216 emit_insn (gen_rtx_SET (stack_pointer_rtx,
1217 gen_rtx_MINUS (Pmode,
1218 stack_pointer_rtx,
1219 r14)));
1220 }
1221 else
1222 emit_insn (gen_rtx_SET (stack_pointer_rtx,
1223 gen_rtx_PLUS (Pmode,
1224 stack_pointer_rtx,
1225 GEN_INT (rounded_size - size))));
1226 }
1227
1228 if (first)
1229 {
1230 emit_move_insn (r12, GEN_INT (first));
1231 emit_insn (gen_rtx_SET (stack_pointer_rtx,
1232 gen_rtx_PLUS (Pmode,
1233 stack_pointer_rtx, r12)));
1234 }
1235 /* Make sure nothing is scheduled before we are done. */
1236 emit_insn (gen_blockage ());
1237 }
1238
1239 /* Probe a range of stack addresses from REG1 to REG2 inclusive. These are
1240 absolute addresses. */
1241 const char *
1242 loongarch_output_probe_stack_range (rtx reg1, rtx reg2, rtx reg3)
1243 {
1244 static int labelno = 0;
1245 char loop_lab[32], tmp[64];
1246 rtx xops[3];
1247
1248 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno++);
1249
1250 /* Loop. */
1251 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
1252
1253 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
1254 xops[0] = reg1;
1255 xops[2] = reg3;
1256 if (TARGET_64BIT)
1257 output_asm_insn ("sub.d\t%0,%0,%2", xops);
1258 else
1259 output_asm_insn ("sub.w\t%0,%0,%2", xops);
1260
1261 /* Probe at TEST_ADDR, test if TEST_ADDR == LAST_ADDR and branch. */
1262 xops[1] = reg2;
1263 strcpy (tmp, "bne\t%0,%1,");
1264 if (TARGET_64BIT)
1265 output_asm_insn ("st.d\t$r0,%0,0", xops);
1266 else
1267 output_asm_insn ("st.w\t$r0,%0,0", xops);
1268 output_asm_insn (strcat (tmp, &loop_lab[1]), xops);
1269
1270 return "";
1271 }
1272
1273 /* Expand the "prologue" pattern. */
1274
1275 void
1276 loongarch_expand_prologue (void)
1277 {
1278 struct loongarch_frame_info *frame = &cfun->machine->frame;
1279 HOST_WIDE_INT size = frame->total_size;
1280 rtx insn;
1281
1282 if (flag_stack_usage_info)
1283 current_function_static_stack_size = size;
1284
1285 /* Save the registers. */
1286 if ((frame->mask | frame->fmask) != 0)
1287 {
1288 HOST_WIDE_INT step1 = MIN (size, loongarch_first_stack_step (frame));
1289
1290 insn = gen_add3_insn (stack_pointer_rtx, stack_pointer_rtx,
1291 GEN_INT (-step1));
1292 RTX_FRAME_RELATED_P (emit_insn (insn)) = 1;
1293 size -= step1;
1294 loongarch_for_each_saved_reg (size, loongarch_save_reg);
1295 }
1296
1297 /* Set up the frame pointer, if we're using one. */
1298 if (frame_pointer_needed)
1299 {
1300 insn = gen_add3_insn (hard_frame_pointer_rtx, stack_pointer_rtx,
1301 GEN_INT (frame->hard_frame_pointer_offset - size));
1302 RTX_FRAME_RELATED_P (emit_insn (insn)) = 1;
1303
1304 loongarch_emit_stack_tie ();
1305 }
1306
1307 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK
1308 || flag_stack_clash_protection)
1309 {
1310 HOST_WIDE_INT first = get_stack_check_protect ();
1311
1312 if (frame->total_size == 0)
1313 {
1314 /* do nothing. */
1315 dump_stack_clash_frame_info (NO_PROBE_NO_FRAME, false);
1316 return;
1317 }
1318
1319 if (crtl->is_leaf && !cfun->calls_alloca)
1320 {
1321 HOST_WIDE_INT interval;
1322
1323 if (flag_stack_clash_protection)
1324 interval = STACK_CLASH_PROTECTION_GUARD_SIZE;
1325 else
1326 interval = PROBE_INTERVAL;
1327
1328 if (size > interval && size > first)
1329 loongarch_emit_probe_stack_range (first, size - first);
1330 else
1331 loongarch_emit_probe_stack_range (first, size);
1332 }
1333 else
1334 loongarch_emit_probe_stack_range (first, size);
1335
1336 if (size > 0)
1337 {
1338 /* Describe the effect of the previous instructions. */
1339 insn = plus_constant (Pmode, stack_pointer_rtx, -size);
1340 insn = gen_rtx_SET (stack_pointer_rtx, insn);
1341 loongarch_set_frame_expr (insn);
1342 }
1343 return;
1344 }
1345
1346 if (size > 0)
1347 {
1348 if (IMM12_OPERAND (-size))
1349 {
1350 insn = gen_add3_insn (stack_pointer_rtx, stack_pointer_rtx,
1351 GEN_INT (-size));
1352 RTX_FRAME_RELATED_P (emit_insn (insn)) = 1;
1353 }
1354 else
1355 {
1356 loongarch_emit_move (LARCH_PROLOGUE_TEMP (Pmode),
1357 GEN_INT (-size));
1358 emit_insn (gen_add3_insn (stack_pointer_rtx, stack_pointer_rtx,
1359 LARCH_PROLOGUE_TEMP (Pmode)));
1360
1361 /* Describe the effect of the previous instructions. */
1362 insn = plus_constant (Pmode, stack_pointer_rtx, -size);
1363 insn = gen_rtx_SET (stack_pointer_rtx, insn);
1364 loongarch_set_frame_expr (insn);
1365 }
1366 }
1367 }
1368
1369 /* Return nonzero if this function is known to have a null epilogue.
1370 This allows the optimizer to omit jumps to jumps if no stack
1371 was created. */
1372
1373 bool
1374 loongarch_can_use_return_insn (void)
1375 {
1376 return reload_completed && cfun->machine->frame.total_size == 0;
1377 }
1378
1379 /* Expand an "epilogue" or "sibcall_epilogue" pattern; SIBCALL_P
1380 says which. */
1381
1382 void
1383 loongarch_expand_epilogue (bool sibcall_p)
1384 {
1385 /* Split the frame into two. STEP1 is the amount of stack we should
1386 deallocate before restoring the registers. STEP2 is the amount we
1387 should deallocate afterwards.
1388
1389 Start off by assuming that no registers need to be restored. */
1390 struct loongarch_frame_info *frame = &cfun->machine->frame;
1391 HOST_WIDE_INT step1 = frame->total_size;
1392 HOST_WIDE_INT step2 = 0;
1393 rtx ra = gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM);
1394 rtx insn;
1395
1396 /* We need to add memory barrier to prevent read from deallocated stack. */
1397 bool need_barrier_p
1398 = (get_frame_size () + cfun->machine->frame.arg_pointer_offset) != 0;
1399
1400 if (!sibcall_p && loongarch_can_use_return_insn ())
1401 {
1402 emit_jump_insn (gen_return ());
1403 return;
1404 }
1405
1406 /* Move past any dynamic stack allocations. */
1407 if (cfun->calls_alloca)
1408 {
1409 /* Emit a barrier to prevent loads from a deallocated stack. */
1410 loongarch_emit_stack_tie ();
1411 need_barrier_p = false;
1412
1413 rtx adjust = GEN_INT (-frame->hard_frame_pointer_offset);
1414 if (!IMM12_OPERAND (INTVAL (adjust)))
1415 {
1416 loongarch_emit_move (LARCH_PROLOGUE_TEMP (Pmode), adjust);
1417 adjust = LARCH_PROLOGUE_TEMP (Pmode);
1418 }
1419
1420 insn = emit_insn (gen_add3_insn (stack_pointer_rtx,
1421 hard_frame_pointer_rtx,
1422 adjust));
1423
1424 rtx dwarf = NULL_RTX;
1425 rtx minus_offset = GEN_INT (-frame->hard_frame_pointer_offset);
1426 rtx cfa_adjust_value = gen_rtx_PLUS (Pmode,
1427 hard_frame_pointer_rtx,
1428 minus_offset);
1429
1430 rtx cfa_adjust_rtx = gen_rtx_SET (stack_pointer_rtx, cfa_adjust_value);
1431 dwarf = alloc_reg_note (REG_CFA_ADJUST_CFA, cfa_adjust_rtx, dwarf);
1432 RTX_FRAME_RELATED_P (insn) = 1;
1433
1434 REG_NOTES (insn) = dwarf;
1435 }
1436
1437 /* If we need to restore registers, deallocate as much stack as
1438 possible in the second step without going out of range. */
1439 if ((frame->mask | frame->fmask) != 0)
1440 {
1441 step2 = loongarch_first_stack_step (frame);
1442 step1 -= step2;
1443 }
1444
1445 /* Set TARGET to BASE + STEP1. */
1446 if (step1 > 0)
1447 {
1448 /* Emit a barrier to prevent loads from a deallocated stack. */
1449 loongarch_emit_stack_tie ();
1450 need_barrier_p = false;
1451
1452 /* Get an rtx for STEP1 that we can add to BASE. */
1453 rtx adjust = GEN_INT (step1);
1454 if (!IMM12_OPERAND (step1))
1455 {
1456 loongarch_emit_move (LARCH_PROLOGUE_TEMP (Pmode), adjust);
1457 adjust = LARCH_PROLOGUE_TEMP (Pmode);
1458 }
1459
1460 insn = emit_insn (gen_add3_insn (stack_pointer_rtx,
1461 stack_pointer_rtx,
1462 adjust));
1463
1464 rtx dwarf = NULL_RTX;
1465 rtx cfa_adjust_rtx = gen_rtx_PLUS (Pmode, stack_pointer_rtx,
1466 GEN_INT (step2));
1467
1468 dwarf = alloc_reg_note (REG_CFA_DEF_CFA, cfa_adjust_rtx, dwarf);
1469 RTX_FRAME_RELATED_P (insn) = 1;
1470
1471 REG_NOTES (insn) = dwarf;
1472 }
1473
1474 /* Restore the registers. */
1475 loongarch_for_each_saved_reg (frame->total_size - step2,
1476 loongarch_restore_reg);
1477
1478 if (need_barrier_p)
1479 loongarch_emit_stack_tie ();
1480
1481 /* Deallocate the final bit of the frame. */
1482 if (step2 > 0)
1483 {
1484 insn = emit_insn (gen_add3_insn (stack_pointer_rtx,
1485 stack_pointer_rtx,
1486 GEN_INT (step2)));
1487
1488 rtx dwarf = NULL_RTX;
1489 rtx cfa_adjust_rtx = gen_rtx_PLUS (Pmode, stack_pointer_rtx, const0_rtx);
1490 dwarf = alloc_reg_note (REG_CFA_DEF_CFA, cfa_adjust_rtx, dwarf);
1491 RTX_FRAME_RELATED_P (insn) = 1;
1492
1493 REG_NOTES (insn) = dwarf;
1494 }
1495
1496 /* Add in the __builtin_eh_return stack adjustment. */
1497 if (crtl->calls_eh_return)
1498 emit_insn (gen_add3_insn (stack_pointer_rtx, stack_pointer_rtx,
1499 EH_RETURN_STACKADJ_RTX));
1500
1501 if (!sibcall_p)
1502 emit_jump_insn (gen_simple_return_internal (ra));
1503 }
1504
1505 #define LU32I_B (0xfffffULL << 32)
1506 #define LU52I_B (0xfffULL << 52)
1507
1508 /* Fill CODES with a sequence of rtl operations to load VALUE.
1509 Return the number of operations needed. */
1510
1511 static unsigned int
1512 loongarch_build_integer (struct loongarch_integer_op *codes,
1513 HOST_WIDE_INT value)
1514
1515 {
1516 unsigned int cost = 0;
1517
1518 /* Get the lower 32 bits of the value. */
1519 HOST_WIDE_INT low_part = (int32_t)value;
1520
1521 if (IMM12_OPERAND (low_part) || IMM12_OPERAND_UNSIGNED (low_part))
1522 {
1523 /* The value of the lower 32 bit be loaded with one instruction.
1524 lu12i.w. */
1525 codes[cost].code = UNKNOWN;
1526 codes[cost].method = METHOD_NORMAL;
1527 codes[cost].value = low_part;
1528 codes[cost].curr_value = low_part;
1529 cost++;
1530 }
1531 else
1532 {
1533 /* lu12i.w + ior. */
1534 codes[cost].code = UNKNOWN;
1535 codes[cost].method = METHOD_NORMAL;
1536 codes[cost].value = low_part & ~(IMM_REACH - 1);
1537 codes[cost].curr_value = codes[cost].value;
1538 cost++;
1539 HOST_WIDE_INT iorv = low_part & (IMM_REACH - 1);
1540 if (iorv != 0)
1541 {
1542 codes[cost].code = IOR;
1543 codes[cost].method = METHOD_NORMAL;
1544 codes[cost].value = iorv;
1545 codes[cost].curr_value = low_part;
1546 cost++;
1547 }
1548 }
1549
1550 if (TARGET_64BIT)
1551 {
1552 bool lu32i[2] = {(value & LU32I_B) == 0, (value & LU32I_B) == LU32I_B};
1553 bool lu52i[2] = {(value & LU52I_B) == 0, (value & LU52I_B) == LU52I_B};
1554
1555 int sign31 = (value & (HOST_WIDE_INT_1U << 31)) >> 31;
1556 int sign51 = (value & (HOST_WIDE_INT_1U << 51)) >> 51;
1557 /* Determine whether the upper 32 bits are sign-extended from the lower
1558 32 bits. If it is, the instructions to load the high order can be
1559 ommitted. */
1560 if (lu32i[sign31] && lu52i[sign31])
1561 return cost;
1562 /* Determine whether bits 32-51 are sign-extended from the lower 32
1563 bits. If so, directly load 52-63 bits. */
1564 else if (lu32i[sign31])
1565 {
1566 codes[cost].method = METHOD_LU52I;
1567 codes[cost].value = value & LU52I_B;
1568 codes[cost].curr_value = value;
1569 return cost + 1;
1570 }
1571
1572 codes[cost].method = METHOD_LU32I;
1573 codes[cost].value = (value & LU32I_B) | (sign51 ? LU52I_B : 0);
1574 codes[cost].curr_value = (value & 0xfffffffffffff)
1575 | (sign51 ? LU52I_B : 0);
1576 cost++;
1577
1578 /* Determine whether the 52-61 bits are sign-extended from the low order,
1579 and if not, load the 52-61 bits. */
1580 if (!lu52i[(value & (HOST_WIDE_INT_1U << 51)) >> 51])
1581 {
1582 codes[cost].method = METHOD_LU52I;
1583 codes[cost].value = value & LU52I_B;
1584 codes[cost].curr_value = value;
1585 cost++;
1586 }
1587 }
1588
1589 gcc_assert (cost <= LARCH_MAX_INTEGER_OPS);
1590
1591 return cost;
1592 }
1593
1594 /* Fill CODES with a sequence of rtl operations to load VALUE.
1595 Return the number of operations needed.
1596 Split interger in loongarch_output_move. */
1597
1598 static unsigned int
1599 loongarch_integer_cost (HOST_WIDE_INT value)
1600 {
1601 struct loongarch_integer_op codes[LARCH_MAX_INTEGER_OPS];
1602 return loongarch_build_integer (codes, value);
1603 }
1604
1605 /* Implement TARGET_LEGITIMATE_CONSTANT_P. */
1606
1607 static bool
1608 loongarch_legitimate_constant_p (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
1609 {
1610 return loongarch_const_insns (x) > 0;
1611 }
1612
1613 /* Return true if X is a thread-local symbol. */
1614
1615 static bool
1616 loongarch_tls_symbol_p (rtx x)
1617 {
1618 return SYMBOL_REF_P (x) && SYMBOL_REF_TLS_MODEL (x) != 0;
1619 }
1620
1621 /* Return true if SYMBOL_REF X is associated with a global symbol
1622 (in the STB_GLOBAL sense). */
1623
1624 bool
1625 loongarch_global_symbol_p (const_rtx x)
1626 {
1627 if (LABEL_REF_P (x))
1628 return false;
1629
1630 const_tree decl = SYMBOL_REF_DECL (x);
1631
1632 if (!decl)
1633 return !SYMBOL_REF_LOCAL_P (x) || SYMBOL_REF_EXTERNAL_P (x);
1634
1635 /* Weakref symbols are not TREE_PUBLIC, but their targets are global
1636 or weak symbols. Relocations in the object file will be against
1637 the target symbol, so it's that symbol's binding that matters here. */
1638 return DECL_P (decl) && (TREE_PUBLIC (decl) || DECL_WEAK (decl));
1639 }
1640
1641 bool
1642 loongarch_global_symbol_noweak_p (const_rtx x)
1643 {
1644 if (LABEL_REF_P (x))
1645 return false;
1646
1647 const_tree decl = SYMBOL_REF_DECL (x);
1648
1649 if (!decl)
1650 return !SYMBOL_REF_LOCAL_P (x) || SYMBOL_REF_EXTERNAL_P (x);
1651
1652 return DECL_P (decl) && TREE_PUBLIC (decl);
1653 }
1654
1655 bool
1656 loongarch_weak_symbol_p (const_rtx x)
1657 {
1658 const_tree decl;
1659 if (LABEL_REF_P (x) || !(decl = SYMBOL_REF_DECL (x)))
1660 return false;
1661 return DECL_P (decl) && DECL_WEAK (decl);
1662 }
1663
1664 /* Return true if SYMBOL_REF X binds locally. */
1665
1666 bool
1667 loongarch_symbol_binds_local_p (const_rtx x)
1668 {
1669 if (TARGET_DIRECT_EXTERN_ACCESS)
1670 return true;
1671
1672 if (SYMBOL_REF_P (x))
1673 return (SYMBOL_REF_DECL (x)
1674 ? targetm.binds_local_p (SYMBOL_REF_DECL (x))
1675 : SYMBOL_REF_LOCAL_P (x));
1676 else
1677 return false;
1678 }
1679
1680 /* Return true if OP is a constant vector with the number of units in MODE,
1681 and each unit has the same bit set. */
1682
1683 bool
1684 loongarch_const_vector_bitimm_set_p (rtx op, machine_mode mode)
1685 {
1686 if (GET_CODE (op) == CONST_VECTOR && op != CONST0_RTX (mode))
1687 {
1688 unsigned HOST_WIDE_INT val = UINTVAL (CONST_VECTOR_ELT (op, 0));
1689 int vlog2 = exact_log2 (val & GET_MODE_MASK (GET_MODE_INNER (mode)));
1690
1691 if (vlog2 != -1)
1692 {
1693 gcc_assert (GET_MODE_CLASS (mode) == MODE_VECTOR_INT);
1694 gcc_assert (vlog2 >= 0 && vlog2 <= GET_MODE_UNIT_BITSIZE (mode) - 1);
1695 return loongarch_const_vector_same_val_p (op, mode);
1696 }
1697 }
1698
1699 return false;
1700 }
1701
1702 /* Return true if OP is a constant vector with the number of units in MODE,
1703 and each unit has the same bit clear. */
1704
1705 bool
1706 loongarch_const_vector_bitimm_clr_p (rtx op, machine_mode mode)
1707 {
1708 if (GET_CODE (op) == CONST_VECTOR && op != CONSTM1_RTX (mode))
1709 {
1710 unsigned HOST_WIDE_INT val = ~UINTVAL (CONST_VECTOR_ELT (op, 0));
1711 int vlog2 = exact_log2 (val & GET_MODE_MASK (GET_MODE_INNER (mode)));
1712
1713 if (vlog2 != -1)
1714 {
1715 gcc_assert (GET_MODE_CLASS (mode) == MODE_VECTOR_INT);
1716 gcc_assert (vlog2 >= 0 && vlog2 <= GET_MODE_UNIT_BITSIZE (mode) - 1);
1717 return loongarch_const_vector_same_val_p (op, mode);
1718 }
1719 }
1720
1721 return false;
1722 }
1723
1724 /* Return true if OP is a constant vector with the number of units in MODE,
1725 and each unit has the same value. */
1726
1727 bool
1728 loongarch_const_vector_same_val_p (rtx op, machine_mode mode)
1729 {
1730 int i, nunits = GET_MODE_NUNITS (mode);
1731 rtx first;
1732
1733 if (GET_CODE (op) != CONST_VECTOR || GET_MODE (op) != mode)
1734 return false;
1735
1736 first = CONST_VECTOR_ELT (op, 0);
1737 for (i = 1; i < nunits; i++)
1738 if (!rtx_equal_p (first, CONST_VECTOR_ELT (op, i)))
1739 return false;
1740
1741 return true;
1742 }
1743
1744 /* Return true if OP is a constant vector with the number of units in MODE,
1745 and each unit has the same value as well as replicated bytes in the value.
1746 */
1747
1748 bool
1749 loongarch_const_vector_same_bytes_p (rtx op, machine_mode mode)
1750 {
1751 int i, bytes;
1752 HOST_WIDE_INT val, first_byte;
1753 rtx first;
1754
1755 if (!loongarch_const_vector_same_val_p (op, mode))
1756 return false;
1757
1758 first = CONST_VECTOR_ELT (op, 0);
1759 bytes = GET_MODE_UNIT_SIZE (mode);
1760 val = INTVAL (first);
1761 first_byte = val & 0xff;
1762 for (i = 1; i < bytes; i++)
1763 {
1764 val >>= 8;
1765 if ((val & 0xff) != first_byte)
1766 return false;
1767 }
1768
1769 return true;
1770 }
1771
1772 /* Return true if OP is a constant vector with the number of units in MODE,
1773 and each unit has the same integer value in the range [LOW, HIGH]. */
1774
1775 bool
1776 loongarch_const_vector_same_int_p (rtx op, machine_mode mode, HOST_WIDE_INT low,
1777 HOST_WIDE_INT high)
1778 {
1779 HOST_WIDE_INT value;
1780 rtx elem0;
1781
1782 if (!loongarch_const_vector_same_val_p (op, mode))
1783 return false;
1784
1785 elem0 = CONST_VECTOR_ELT (op, 0);
1786 if (!CONST_INT_P (elem0))
1787 return false;
1788
1789 value = INTVAL (elem0);
1790 return (value >= low && value <= high);
1791 }
1792
1793 /* Return true if OP is a constant vector with repeated 4-element sets
1794 in mode MODE. */
1795
1796 bool
1797 loongarch_const_vector_shuffle_set_p (rtx op, machine_mode mode)
1798 {
1799 int nunits = GET_MODE_NUNITS (mode);
1800 int nsets = nunits / 4;
1801 int set = 0;
1802 int i, j;
1803
1804 /* Check if we have the same 4-element sets. */
1805 for (j = 0; j < nsets; j++, set = 4 * j)
1806 for (i = 0; i < 4; i++)
1807 if ((INTVAL (XVECEXP (op, 0, i))
1808 != (INTVAL (XVECEXP (op, 0, set + i)) - set))
1809 || !IN_RANGE (INTVAL (XVECEXP (op, 0, set + i)), 0, set + 3))
1810 return false;
1811 return true;
1812 }
1813
1814 /* Return true if rtx constants of mode MODE should be put into a small
1815 data section. */
1816
1817 static bool
1818 loongarch_rtx_constant_in_small_data_p (machine_mode mode)
1819 {
1820 return (GET_MODE_SIZE (mode) <= g_switch_value);
1821 }
1822
1823 /* Return the method that should be used to access SYMBOL_REF or
1824 LABEL_REF X. */
1825
1826 static enum loongarch_symbol_type
1827 loongarch_classify_symbol (const_rtx x)
1828 {
1829 enum loongarch_symbol_type pcrel =
1830 TARGET_CMODEL_EXTREME ? SYMBOL_PCREL64 : SYMBOL_PCREL;
1831
1832 if (!SYMBOL_REF_P (x))
1833 return pcrel;
1834
1835 if (SYMBOL_REF_TLS_MODEL (x))
1836 return SYMBOL_TLS;
1837
1838 if (!loongarch_symbol_binds_local_p (x))
1839 return SYMBOL_GOT_DISP;
1840
1841 tree t = SYMBOL_REF_DECL (x);
1842 if (!t)
1843 return pcrel;
1844
1845 t = lookup_attribute ("model", DECL_ATTRIBUTES (t));
1846 if (!t)
1847 return pcrel;
1848
1849 t = TREE_VALUE (TREE_VALUE (t));
1850
1851 /* loongarch_handle_model_attribute should reject other values. */
1852 gcc_assert (TREE_CODE (t) == STRING_CST);
1853
1854 const char *model = TREE_STRING_POINTER (t);
1855 if (strcmp (model, "normal") == 0)
1856 return SYMBOL_PCREL;
1857 if (strcmp (model, "extreme") == 0)
1858 return SYMBOL_PCREL64;
1859
1860 /* loongarch_handle_model_attribute should reject unknown model
1861 name. */
1862 gcc_unreachable ();
1863 }
1864
1865 /* Classify the base of symbolic expression X, given that X appears in
1866 context CONTEXT. */
1867
1868 static enum loongarch_symbol_type
1869 loongarch_classify_symbolic_expression (rtx x)
1870 {
1871 rtx offset;
1872
1873 split_const (x, &x, &offset);
1874 if (UNSPEC_ADDRESS_P (x))
1875 return UNSPEC_ADDRESS_TYPE (x);
1876
1877 return loongarch_classify_symbol (x);
1878 }
1879
1880 /* Return true if X is a symbolic constant. If it is,
1881 store the type of the symbol in *SYMBOL_TYPE. */
1882
1883 bool
1884 loongarch_symbolic_constant_p (rtx x, enum loongarch_symbol_type *symbol_type)
1885 {
1886 rtx offset;
1887
1888 split_const (x, &x, &offset);
1889 if (UNSPEC_ADDRESS_P (x))
1890 {
1891 *symbol_type = UNSPEC_ADDRESS_TYPE (x);
1892 x = UNSPEC_ADDRESS (x);
1893 }
1894 else if (SYMBOL_REF_P (x) || LABEL_REF_P (x))
1895 {
1896 *symbol_type = loongarch_classify_symbol (x);
1897 if (*symbol_type == SYMBOL_TLS)
1898 return true;
1899 }
1900 else
1901 return false;
1902
1903 if (offset == const0_rtx)
1904 return true;
1905
1906 /* Check whether a nonzero offset is valid for the underlying
1907 relocations. */
1908 switch (*symbol_type)
1909 {
1910 case SYMBOL_TLS_IE:
1911 case SYMBOL_TLS_LE:
1912 case SYMBOL_TLSGD:
1913 case SYMBOL_TLSLDM:
1914 case SYMBOL_PCREL:
1915 case SYMBOL_PCREL64:
1916 /* GAS rejects offsets outside the range [-2^31, 2^31-1]. */
1917 return sext_hwi (INTVAL (offset), 32) == INTVAL (offset);
1918
1919 case SYMBOL_GOT_DISP:
1920 case SYMBOL_TLS:
1921 return false;
1922 }
1923 gcc_unreachable ();
1924 }
1925
1926 /* Returns the number of instructions necessary to reference a symbol. */
1927
1928 static int
1929 loongarch_symbol_insns (enum loongarch_symbol_type type, machine_mode mode)
1930 {
1931 /* LSX LD.* and ST.* cannot support loading symbols via an immediate
1932 operand. */
1933 if (LSX_SUPPORTED_MODE_P (mode))
1934 return 0;
1935
1936 switch (type)
1937 {
1938 case SYMBOL_GOT_DISP:
1939 /* The constant will have to be loaded from the GOT before it
1940 is used in an address. */
1941 if (!TARGET_EXPLICIT_RELOCS && mode != MAX_MACHINE_MODE)
1942 return 0;
1943
1944 return 3;
1945
1946 case SYMBOL_PCREL:
1947 case SYMBOL_TLS_IE:
1948 case SYMBOL_TLS_LE:
1949 return 2;
1950
1951 case SYMBOL_TLSGD:
1952 case SYMBOL_TLSLDM:
1953 return 3;
1954
1955 case SYMBOL_PCREL64:
1956 return 5;
1957
1958 case SYMBOL_TLS:
1959 /* We don't treat a bare TLS symbol as a constant. */
1960 return 0;
1961 }
1962 gcc_unreachable ();
1963 }
1964
1965 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
1966
1967 static bool
1968 loongarch_cannot_force_const_mem (machine_mode mode, rtx x)
1969 {
1970 enum loongarch_symbol_type type;
1971 rtx base, offset;
1972
1973 /* As an optimization, reject constants that loongarch_legitimize_move
1974 can expand inline.
1975
1976 Suppose we have a multi-instruction sequence that loads constant C
1977 into register R. If R does not get allocated a hard register, and
1978 R is used in an operand that allows both registers and memory
1979 references, reload will consider forcing C into memory and using
1980 one of the instruction's memory alternatives. Returning false
1981 here will force it to use an input reload instead. */
1982 if ((CONST_INT_P (x) || GET_CODE (x) == CONST_VECTOR)
1983 && loongarch_legitimate_constant_p (mode, x))
1984 return true;
1985
1986 split_const (x, &base, &offset);
1987 if (loongarch_symbolic_constant_p (base, &type))
1988 {
1989 /* The same optimization as for CONST_INT. */
1990 if (IMM12_INT (offset)
1991 && loongarch_symbol_insns (type, MAX_MACHINE_MODE) > 0)
1992 return true;
1993 }
1994
1995 /* TLS symbols must be computed by loongarch_legitimize_move. */
1996 if (tls_referenced_p (x))
1997 return true;
1998
1999 return false;
2000 }
2001
2002 /* Return true if register REGNO is a valid base register for mode MODE.
2003 STRICT_P is true if REG_OK_STRICT is in effect. */
2004
2005 int
2006 loongarch_regno_mode_ok_for_base_p (int regno,
2007 machine_mode mode ATTRIBUTE_UNUSED,
2008 bool strict_p)
2009 {
2010 if (!HARD_REGISTER_NUM_P (regno))
2011 {
2012 if (!strict_p)
2013 return true;
2014 regno = reg_renumber[regno];
2015 }
2016
2017 /* These fake registers will be eliminated to either the stack or
2018 hard frame pointer, both of which are usually valid base registers.
2019 Reload deals with the cases where the eliminated form isn't valid. */
2020 if (regno == ARG_POINTER_REGNUM || regno == FRAME_POINTER_REGNUM)
2021 return true;
2022
2023 return GP_REG_P (regno);
2024 }
2025
2026 /* Return true if X is a valid base register for mode MODE.
2027 STRICT_P is true if REG_OK_STRICT is in effect. */
2028
2029 static bool
2030 loongarch_valid_base_register_p (rtx x, machine_mode mode, bool strict_p)
2031 {
2032 if (!strict_p && SUBREG_P (x))
2033 x = SUBREG_REG (x);
2034
2035 return (REG_P (x)
2036 && loongarch_regno_mode_ok_for_base_p (REGNO (x), mode, strict_p));
2037 }
2038
2039 /* Return true if, for every base register BASE_REG, (plus BASE_REG X)
2040 can address a value of mode MODE. */
2041
2042 static bool
2043 loongarch_valid_offset_p (rtx x, machine_mode mode)
2044 {
2045 /* Check that X is a signed 12-bit number,
2046 or check that X is a signed 16-bit number
2047 and offset 4 byte aligned. */
2048 if (!(const_arith_operand (x, Pmode)
2049 || ((mode == E_SImode || mode == E_DImode)
2050 && const_imm16_operand (x, Pmode)
2051 && (loongarch_signed_immediate_p (INTVAL (x), 14, 2)))))
2052 return false;
2053
2054 /* We may need to split multiword moves, so make sure that every word
2055 is accessible. */
2056 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD
2057 && !IMM12_OPERAND (INTVAL (x) + GET_MODE_SIZE (mode) - UNITS_PER_WORD))
2058 return false;
2059
2060 /* LSX LD.* and ST.* supports 10-bit signed offsets. */
2061 if (LSX_SUPPORTED_MODE_P (mode)
2062 && !loongarch_signed_immediate_p (INTVAL (x), 10,
2063 loongarch_ldst_scaled_shift (mode)))
2064 return false;
2065
2066 return true;
2067 }
2068
2069 /* Should a symbol of type SYMBOL_TYPE should be split in two or more? */
2070
2071 bool
2072 loongarch_split_symbol_type (enum loongarch_symbol_type symbol_type)
2073 {
2074 switch (symbol_type)
2075 {
2076 case SYMBOL_PCREL:
2077 case SYMBOL_PCREL64:
2078 case SYMBOL_GOT_DISP:
2079 case SYMBOL_TLS_IE:
2080 case SYMBOL_TLS_LE:
2081 case SYMBOL_TLSGD:
2082 case SYMBOL_TLSLDM:
2083 return true;
2084
2085 case SYMBOL_TLS:
2086 return false;
2087
2088 default:
2089 gcc_unreachable ();
2090 }
2091 }
2092
2093 /* Return true if a LO_SUM can address a value of mode MODE when the
2094 LO_SUM symbol has type SYMBOL_TYPE. */
2095
2096 static bool
2097 loongarch_valid_lo_sum_p (enum loongarch_symbol_type symbol_type,
2098 machine_mode mode, rtx x)
2099 {
2100 int align, size;
2101
2102 /* Check that symbols of type SYMBOL_TYPE can be used to access values
2103 of mode MODE. */
2104 if (loongarch_symbol_insns (symbol_type, mode) == 0)
2105 return false;
2106
2107 /* Check that there is a known low-part relocation. */
2108 if (!loongarch_split_symbol_type (symbol_type))
2109 return false;
2110
2111 /* We can't tell size or alignment when we have BLKmode, so try extracing a
2112 decl from the symbol if possible. */
2113 if (mode == BLKmode)
2114 {
2115 rtx offset;
2116
2117 /* Extract the symbol from the LO_SUM operand, if any. */
2118 split_const (x, &x, &offset);
2119
2120 /* Might be a CODE_LABEL. We can compute align but not size for that,
2121 so don't bother trying to handle it. */
2122 if (!SYMBOL_REF_P (x))
2123 return false;
2124
2125 /* Use worst case assumptions if we don't have a SYMBOL_REF_DECL. */
2126 align = (SYMBOL_REF_DECL (x)
2127 ? DECL_ALIGN (SYMBOL_REF_DECL (x))
2128 : 1);
2129 size = (SYMBOL_REF_DECL (x) && DECL_SIZE (SYMBOL_REF_DECL (x))
2130 ? tree_to_uhwi (DECL_SIZE (SYMBOL_REF_DECL (x)))
2131 : 2*BITS_PER_WORD);
2132 }
2133 else
2134 {
2135 align = GET_MODE_ALIGNMENT (mode);
2136 size = GET_MODE_BITSIZE (mode);
2137 }
2138
2139 /* We may need to split multiword moves, so make sure that each word
2140 can be accessed without inducing a carry. */
2141 if (size > BITS_PER_WORD
2142 && (!TARGET_STRICT_ALIGN || size > align))
2143 return false;
2144
2145 return true;
2146 }
2147
2148 static bool
2149 loongarch_valid_index_p (struct loongarch_address_info *info, rtx x,
2150 machine_mode mode, bool strict_p)
2151 {
2152 rtx index;
2153
2154 if ((REG_P (x) || SUBREG_P (x))
2155 && GET_MODE (x) == Pmode)
2156 {
2157 index = x;
2158 }
2159 else
2160 return false;
2161
2162 if (!strict_p
2163 && SUBREG_P (index)
2164 && contains_reg_of_mode[GENERAL_REGS][GET_MODE (SUBREG_REG (index))])
2165 index = SUBREG_REG (index);
2166
2167 if (loongarch_valid_base_register_p (index, mode, strict_p))
2168 {
2169 info->type = ADDRESS_REG_REG;
2170 info->offset = index;
2171 return true;
2172 }
2173
2174 return false;
2175 }
2176
2177 /* Return true if X is a valid address for machine mode MODE. If it is,
2178 fill in INFO appropriately. STRICT_P is true if REG_OK_STRICT is in
2179 effect. */
2180
2181 static bool
2182 loongarch_classify_address (struct loongarch_address_info *info, rtx x,
2183 machine_mode mode, bool strict_p)
2184 {
2185 switch (GET_CODE (x))
2186 {
2187 case REG:
2188 case SUBREG:
2189 info->type = ADDRESS_REG;
2190 info->reg = x;
2191 info->offset = const0_rtx;
2192 return loongarch_valid_base_register_p (info->reg, mode, strict_p);
2193
2194 case PLUS:
2195 if (loongarch_valid_base_register_p (XEXP (x, 0), mode, strict_p)
2196 && loongarch_valid_index_p (info, XEXP (x, 1), mode, strict_p))
2197 {
2198 info->reg = XEXP (x, 0);
2199 return true;
2200 }
2201
2202 if (loongarch_valid_base_register_p (XEXP (x, 1), mode, strict_p)
2203 && loongarch_valid_index_p (info, XEXP (x, 0), mode, strict_p))
2204 {
2205 info->reg = XEXP (x, 1);
2206 return true;
2207 }
2208
2209 info->type = ADDRESS_REG;
2210 info->reg = XEXP (x, 0);
2211 info->offset = XEXP (x, 1);
2212 return (loongarch_valid_base_register_p (info->reg, mode, strict_p)
2213 && loongarch_valid_offset_p (info->offset, mode));
2214
2215 case LO_SUM:
2216 info->type = ADDRESS_LO_SUM;
2217 info->reg = XEXP (x, 0);
2218 info->offset = XEXP (x, 1);
2219 /* We have to trust the creator of the LO_SUM to do something vaguely
2220 sane. Target-independent code that creates a LO_SUM should also
2221 create and verify the matching HIGH. Target-independent code that
2222 adds an offset to a LO_SUM must prove that the offset will not
2223 induce a carry. Failure to do either of these things would be
2224 a bug, and we are not required to check for it here. The MIPS
2225 backend itself should only create LO_SUMs for valid symbolic
2226 constants, with the high part being either a HIGH or a copy
2227 of _gp. */
2228 info->symbol_type
2229 = loongarch_classify_symbolic_expression (info->offset);
2230 return (loongarch_valid_base_register_p (info->reg, mode, strict_p)
2231 && loongarch_valid_lo_sum_p (info->symbol_type, mode,
2232 info->offset));
2233 case CONST_INT:
2234 /* Small-integer addresses don't occur very often, but they
2235 are legitimate if $r0 is a valid base register. */
2236 info->type = ADDRESS_CONST_INT;
2237 return IMM12_OPERAND (INTVAL (x));
2238
2239 default:
2240 return false;
2241 }
2242 }
2243
2244 /* Implement TARGET_LEGITIMATE_ADDRESS_P. */
2245
2246 static bool
2247 loongarch_legitimate_address_p (machine_mode mode, rtx x, bool strict_p,
2248 code_helper = ERROR_MARK)
2249 {
2250 struct loongarch_address_info addr;
2251
2252 return loongarch_classify_address (&addr, x, mode, strict_p);
2253 }
2254
2255 /* Return true if ADDR matches the pattern for the indexed address
2256 instruction. */
2257
2258 static bool
2259 loongarch_index_address_p (rtx addr, machine_mode mode ATTRIBUTE_UNUSED)
2260 {
2261 if (GET_CODE (addr) != PLUS
2262 || !REG_P (XEXP (addr, 0))
2263 || !REG_P (XEXP (addr, 1)))
2264 return false;
2265 return true;
2266 }
2267
2268 /* Return the number of instructions needed to load or store a value
2269 of mode MODE at address X. Return 0 if X isn't valid for MODE.
2270 Assume that multiword moves may need to be split into word moves
2271 if MIGHT_SPLIT_P, otherwise assume that a single load or store is
2272 enough. */
2273
2274 int
2275 loongarch_address_insns (rtx x, machine_mode mode, bool might_split_p)
2276 {
2277 struct loongarch_address_info addr;
2278 int factor;
2279 bool lsx_p = !might_split_p && LSX_SUPPORTED_MODE_P (mode);
2280
2281 if (!loongarch_classify_address (&addr, x, mode, false))
2282 return 0;
2283
2284 /* BLKmode is used for single unaligned loads and stores and should
2285 not count as a multiword mode. (GET_MODE_SIZE (BLKmode) is pretty
2286 meaningless, so we have to single it out as a special case one way
2287 or the other.) */
2288 if (mode != BLKmode && might_split_p)
2289 factor = (GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2290 else
2291 factor = 1;
2292
2293 if (loongarch_classify_address (&addr, x, mode, false))
2294 switch (addr.type)
2295 {
2296 case ADDRESS_REG:
2297 if (lsx_p)
2298 {
2299 /* LSX LD.* and ST.* supports 10-bit signed offsets. */
2300 if (loongarch_signed_immediate_p (INTVAL (addr.offset), 10,
2301 loongarch_ldst_scaled_shift (mode)))
2302 return 1;
2303 else
2304 return 0;
2305 }
2306 return factor;
2307
2308 case ADDRESS_REG_REG:
2309 return factor;
2310
2311 case ADDRESS_CONST_INT:
2312 return lsx_p ? 0 : factor;
2313
2314 case ADDRESS_LO_SUM:
2315 return factor + 1;
2316
2317 case ADDRESS_SYMBOLIC:
2318 return lsx_p ? 0
2319 : factor * loongarch_symbol_insns (addr.symbol_type, mode);
2320 }
2321 return 0;
2322 }
2323
2324 /* Return true if X fits within an unsigned field of BITS bits that is
2325 shifted left SHIFT bits before being used. */
2326
2327 bool
2328 loongarch_unsigned_immediate_p (unsigned HOST_WIDE_INT x, int bits,
2329 int shift = 0)
2330 {
2331 return (x & ((1 << shift) - 1)) == 0 && x < ((unsigned) 1 << (shift + bits));
2332 }
2333
2334 /* Return true if X fits within a signed field of BITS bits that is
2335 shifted left SHIFT bits before being used. */
2336
2337 bool
2338 loongarch_signed_immediate_p (unsigned HOST_WIDE_INT x, int bits,
2339 int shift = 0)
2340 {
2341 x += 1 << (bits + shift - 1);
2342 return loongarch_unsigned_immediate_p (x, bits, shift);
2343 }
2344
2345 /* Return the scale shift that applied to LSX LD/ST address offset. */
2346
2347 int
2348 loongarch_ldst_scaled_shift (machine_mode mode)
2349 {
2350 int shift = exact_log2 (GET_MODE_UNIT_SIZE (mode));
2351
2352 if (shift < 0 || shift > 8)
2353 gcc_unreachable ();
2354
2355 return shift;
2356 }
2357
2358 /* Return true if X is a legitimate address with a 12-bit offset
2359 or addr.type is ADDRESS_LO_SUM.
2360 MODE is the mode of the value being accessed. */
2361
2362 bool
2363 loongarch_12bit_offset_address_p (rtx x, machine_mode mode)
2364 {
2365 struct loongarch_address_info addr;
2366
2367 return (loongarch_classify_address (&addr, x, mode, false)
2368 && ((addr.type == ADDRESS_REG
2369 && CONST_INT_P (addr.offset)
2370 && LARCH_12BIT_OFFSET_P (INTVAL (addr.offset)))
2371 || addr.type == ADDRESS_LO_SUM));
2372 }
2373
2374 /* Return true if X is a legitimate address with a 14-bit offset shifted 2.
2375 MODE is the mode of the value being accessed. */
2376
2377 bool
2378 loongarch_14bit_shifted_offset_address_p (rtx x, machine_mode mode)
2379 {
2380 struct loongarch_address_info addr;
2381
2382 return (loongarch_classify_address (&addr, x, mode, false)
2383 && addr.type == ADDRESS_REG
2384 && CONST_INT_P (addr.offset)
2385 && LARCH_16BIT_OFFSET_P (INTVAL (addr.offset))
2386 && LARCH_SHIFT_2_OFFSET_P (INTVAL (addr.offset)));
2387 }
2388
2389 /* Return true if X is a legitimate address with base and index.
2390 MODE is the mode of the value being accessed. */
2391
2392 bool
2393 loongarch_base_index_address_p (rtx x, machine_mode mode)
2394 {
2395 struct loongarch_address_info addr;
2396
2397 return (loongarch_classify_address (&addr, x, mode, false)
2398 && addr.type == ADDRESS_REG_REG
2399 && REG_P (addr.offset));
2400 }
2401
2402 /* Return the number of instructions needed to load constant X,
2403 Return 0 if X isn't a valid constant. */
2404
2405 int
2406 loongarch_const_insns (rtx x)
2407 {
2408 enum loongarch_symbol_type symbol_type;
2409 rtx offset;
2410
2411 switch (GET_CODE (x))
2412 {
2413 case HIGH:
2414 if (!loongarch_symbolic_constant_p (XEXP (x, 0), &symbol_type)
2415 || !loongarch_split_symbol_type (symbol_type))
2416 return 0;
2417
2418 /* This is simply a PCALAU12I. */
2419 return 1;
2420
2421 case CONST_INT:
2422 return loongarch_integer_cost (INTVAL (x));
2423
2424 case CONST_VECTOR:
2425 if (LSX_SUPPORTED_MODE_P (GET_MODE (x))
2426 && loongarch_const_vector_same_int_p (x, GET_MODE (x), -512, 511))
2427 return 1;
2428 /* Fall through. */
2429 case CONST_DOUBLE:
2430 return x == CONST0_RTX (GET_MODE (x)) ? 1 : 0;
2431
2432 case CONST:
2433 /* See if we can refer to X directly. */
2434 if (loongarch_symbolic_constant_p (x, &symbol_type))
2435 return loongarch_symbol_insns (symbol_type, MAX_MACHINE_MODE);
2436
2437 /* Otherwise try splitting the constant into a base and offset.
2438 If the offset is a 12-bit value, we can load the base address
2439 into a register and then use ADDI.{W/D} to add in the offset.
2440 If the offset is larger, we can load the base and offset
2441 into separate registers and add them together with ADD.{W/D}.
2442 However, the latter is only possible before reload; during
2443 and after reload, we must have the option of forcing the
2444 constant into the pool instead. */
2445 split_const (x, &x, &offset);
2446 if (offset != 0)
2447 {
2448 int n = loongarch_const_insns (x);
2449 if (n != 0)
2450 {
2451 if (IMM12_INT (offset))
2452 return n + 1;
2453 else if (!targetm.cannot_force_const_mem (GET_MODE (x), x))
2454 return n + 1 + loongarch_integer_cost (INTVAL (offset));
2455 }
2456 }
2457 return 0;
2458
2459 case SYMBOL_REF:
2460 case LABEL_REF:
2461 return loongarch_symbol_insns (
2462 loongarch_classify_symbol (x), MAX_MACHINE_MODE);
2463
2464 default:
2465 return 0;
2466 }
2467 }
2468
2469 /* X is a doubleword constant that can be handled by splitting it into
2470 two words and loading each word separately. Return the number of
2471 instructions required to do this. */
2472
2473 int
2474 loongarch_split_const_insns (rtx x)
2475 {
2476 unsigned int low, high;
2477
2478 low = loongarch_const_insns (loongarch_subword (x, false));
2479 high = loongarch_const_insns (loongarch_subword (x, true));
2480 gcc_assert (low > 0 && high > 0);
2481 return low + high;
2482 }
2483
2484 bool loongarch_split_move_insn_p (rtx dest, rtx src);
2485 /* Return one word of 128-bit value OP, taking into account the fixed
2486 endianness of certain registers. BYTE selects from the byte address. */
2487
2488 rtx
2489 loongarch_subword_at_byte (rtx op, unsigned int byte)
2490 {
2491 machine_mode mode;
2492
2493 mode = GET_MODE (op);
2494 if (mode == VOIDmode)
2495 mode = TImode;
2496
2497 gcc_assert (!FP_REG_RTX_P (op));
2498
2499 if (MEM_P (op))
2500 return loongarch_rewrite_small_data (adjust_address (op, word_mode, byte));
2501
2502 return simplify_gen_subreg (word_mode, op, mode, byte);
2503 }
2504
2505 /* Return the number of instructions needed to implement INSN,
2506 given that it loads from or stores to MEM. */
2507
2508 int
2509 loongarch_load_store_insns (rtx mem, rtx_insn *insn)
2510 {
2511 machine_mode mode;
2512 bool might_split_p;
2513 rtx set;
2514
2515 gcc_assert (MEM_P (mem));
2516 mode = GET_MODE (mem);
2517
2518 /* Try to prove that INSN does not need to be split. */
2519 might_split_p = GET_MODE_SIZE (mode) > UNITS_PER_WORD;
2520 if (might_split_p)
2521 {
2522 set = single_set (insn);
2523 if (set
2524 && !loongarch_split_move_insn_p (SET_DEST (set), SET_SRC (set)))
2525 might_split_p = false;
2526 }
2527
2528 return loongarch_address_insns (XEXP (mem, 0), mode, might_split_p);
2529 }
2530
2531 /* Return true if we need to trap on division by zero. */
2532
2533 bool
2534 loongarch_check_zero_div_p (void)
2535 {
2536 /* if -m[no-]check-zero-division is given explicitly. */
2537 if (target_flags_explicit & MASK_CHECK_ZERO_DIV)
2538 return TARGET_CHECK_ZERO_DIV;
2539
2540 /* if not, don't trap for optimized code except -Og. */
2541 return !optimize || optimize_debug;
2542 }
2543
2544 /* Return the number of instructions needed for an integer division. */
2545
2546 int
2547 loongarch_idiv_insns (machine_mode mode ATTRIBUTE_UNUSED)
2548 {
2549 int count;
2550
2551 count = 1;
2552 if (loongarch_check_zero_div_p ())
2553 count += 2;
2554
2555 return count;
2556 }
2557
2558 /* Emit an instruction of the form (set TARGET (CODE OP0 OP1)). */
2559
2560 void
2561 loongarch_emit_binary (enum rtx_code code, rtx target, rtx op0, rtx op1)
2562 {
2563 emit_insn (gen_rtx_SET (target, gen_rtx_fmt_ee (code, GET_MODE (target),
2564 op0, op1)));
2565 }
2566
2567 /* Compute (CODE OP0 OP1) and store the result in a new register
2568 of mode MODE. Return that new register. */
2569
2570 static rtx
2571 loongarch_force_binary (machine_mode mode, enum rtx_code code, rtx op0,
2572 rtx op1)
2573 {
2574 rtx reg;
2575
2576 reg = gen_reg_rtx (mode);
2577 loongarch_emit_binary (code, reg, op0, op1);
2578 return reg;
2579 }
2580
2581 /* Copy VALUE to a register and return that register. If new pseudos
2582 are allowed, copy it into a new register, otherwise use DEST. */
2583
2584 static rtx
2585 loongarch_force_temporary (rtx dest, rtx value)
2586 {
2587 if (can_create_pseudo_p ())
2588 return force_reg (Pmode, value);
2589 else
2590 {
2591 loongarch_emit_move (dest, value);
2592 return dest;
2593 }
2594 }
2595
2596 /* Wrap symbol or label BASE in an UNSPEC address of type SYMBOL_TYPE,
2597 then add CONST_INT OFFSET to the result. */
2598
2599 static rtx
2600 loongarch_unspec_address_offset (rtx base, rtx offset,
2601 enum loongarch_symbol_type symbol_type)
2602 {
2603 base = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, base),
2604 UNSPEC_ADDRESS_FIRST + symbol_type);
2605 if (offset != const0_rtx)
2606 base = gen_rtx_PLUS (Pmode, base, offset);
2607 return gen_rtx_CONST (Pmode, base);
2608 }
2609
2610 /* Return an UNSPEC address with underlying address ADDRESS and symbol
2611 type SYMBOL_TYPE. */
2612
2613 rtx
2614 loongarch_unspec_address (rtx address, enum loongarch_symbol_type symbol_type)
2615 {
2616 rtx base, offset;
2617
2618 split_const (address, &base, &offset);
2619 return loongarch_unspec_address_offset (base, offset, symbol_type);
2620 }
2621
2622 /* Emit an instruction of the form (set TARGET SRC). */
2623
2624 static rtx
2625 loongarch_emit_set (rtx target, rtx src)
2626 {
2627 emit_insn (gen_rtx_SET (target, src));
2628 return target;
2629 }
2630
2631 /* If OP is an UNSPEC address, return the address to which it refers,
2632 otherwise return OP itself. */
2633
2634 rtx
2635 loongarch_strip_unspec_address (rtx op)
2636 {
2637 rtx base, offset;
2638
2639 split_const (op, &base, &offset);
2640 if (UNSPEC_ADDRESS_P (base))
2641 op = plus_constant (Pmode, UNSPEC_ADDRESS (base), INTVAL (offset));
2642 return op;
2643 }
2644
2645 /* Return a legitimate address for REG + OFFSET. TEMP is as for
2646 loongarch_force_temporary; it is only needed when OFFSET is not a
2647 IMM12_OPERAND. */
2648
2649 static rtx
2650 loongarch_add_offset (rtx temp, rtx reg, HOST_WIDE_INT offset)
2651 {
2652 if (!IMM12_OPERAND (offset))
2653 {
2654 rtx high;
2655
2656 /* Leave OFFSET as a 12-bit offset and put the excess in HIGH.
2657 The addition inside the macro CONST_HIGH_PART may cause an
2658 overflow, so we need to force a sign-extension check. */
2659 high = gen_int_mode (CONST_HIGH_PART (offset), Pmode);
2660 offset = CONST_LOW_PART (offset);
2661 high = loongarch_force_temporary (temp, high);
2662 reg = loongarch_force_temporary (temp, gen_rtx_PLUS (Pmode, high, reg));
2663 }
2664 return plus_constant (Pmode, reg, offset);
2665 }
2666
2667 /* The __tls_get_attr symbol. */
2668 static GTY (()) rtx loongarch_tls_symbol;
2669
2670 /* Load an entry from the GOT for a TLS GD access. */
2671
2672 static rtx
2673 loongarch_got_load_tls_gd (rtx dest, rtx sym)
2674 {
2675 return gen_got_load_tls_gd (Pmode, dest, sym);
2676 }
2677
2678 /* Load an entry from the GOT for a TLS LD access. */
2679
2680 static rtx
2681 loongarch_got_load_tls_ld (rtx dest, rtx sym)
2682 {
2683 return gen_got_load_tls_ld (Pmode, dest, sym);
2684 }
2685
2686 /* Load an entry from the GOT for a TLS IE access. */
2687
2688 static rtx
2689 loongarch_got_load_tls_ie (rtx dest, rtx sym)
2690 {
2691 return gen_got_load_tls_ie (Pmode, dest, sym);
2692 }
2693
2694 /* Add in the thread pointer for a TLS LE access. */
2695
2696 static rtx
2697 loongarch_got_load_tls_le (rtx dest, rtx sym)
2698 {
2699 return gen_got_load_tls_le (Pmode, dest, sym);
2700 }
2701
2702 /* Return an instruction sequence that calls __tls_get_addr. SYM is
2703 the TLS symbol we are referencing and TYPE is the symbol type to use
2704 (either global dynamic or local dynamic). V0 is an RTX for the
2705 return value location. */
2706
2707 static rtx_insn *
2708 loongarch_call_tls_get_addr (rtx sym, enum loongarch_symbol_type type, rtx v0)
2709 {
2710 rtx loc, a0;
2711 rtx_insn *insn;
2712 rtx tmp = gen_reg_rtx (Pmode);
2713
2714 a0 = gen_rtx_REG (Pmode, GP_ARG_FIRST);
2715
2716 if (!loongarch_tls_symbol)
2717 loongarch_tls_symbol = init_one_libfunc ("__tls_get_addr");
2718
2719 loc = loongarch_unspec_address (sym, type);
2720
2721 start_sequence ();
2722
2723 if (TARGET_EXPLICIT_RELOCS)
2724 {
2725 /* Split tls symbol to high and low. */
2726 rtx high = gen_rtx_HIGH (Pmode, copy_rtx (loc));
2727 high = loongarch_force_temporary (tmp, high);
2728
2729 if (TARGET_CMODEL_EXTREME)
2730 {
2731 gcc_assert (TARGET_EXPLICIT_RELOCS);
2732
2733 rtx tmp1 = gen_reg_rtx (Pmode);
2734 emit_insn (gen_tls_low (Pmode, tmp1, gen_rtx_REG (Pmode, 0), loc));
2735 emit_insn (gen_lui_h_lo20 (tmp1, tmp1, loc));
2736 emit_insn (gen_lui_h_hi12 (tmp1, tmp1, loc));
2737 emit_move_insn (a0, gen_rtx_PLUS (Pmode, high, tmp1));
2738 }
2739 else
2740 emit_insn (gen_tls_low (Pmode, a0, high, loc));
2741 }
2742 else
2743 {
2744 if (type == SYMBOL_TLSLDM)
2745 emit_insn (loongarch_got_load_tls_ld (a0, loc));
2746 else if (type == SYMBOL_TLSGD)
2747 emit_insn (loongarch_got_load_tls_gd (a0, loc));
2748 else
2749 gcc_unreachable ();
2750 }
2751
2752 if (flag_plt)
2753 {
2754 switch (la_target.cmodel)
2755 {
2756 case CMODEL_NORMAL:
2757 insn = emit_call_insn (gen_call_value_internal (v0,
2758 loongarch_tls_symbol,
2759 const0_rtx));
2760 break;
2761
2762 case CMODEL_MEDIUM:
2763 {
2764 rtx reg = gen_reg_rtx (Pmode);
2765 if (TARGET_EXPLICIT_RELOCS)
2766 {
2767 emit_insn (gen_pcalau12i (Pmode, reg, loongarch_tls_symbol));
2768 rtx call = gen_call_value_internal_1 (Pmode, v0, reg,
2769 loongarch_tls_symbol,
2770 const0_rtx);
2771 insn = emit_call_insn (call);
2772 }
2773 else
2774 {
2775 emit_move_insn (reg, loongarch_tls_symbol);
2776 insn = emit_call_insn (gen_call_value_internal (v0,
2777 reg,
2778 const0_rtx));
2779 }
2780 break;
2781 }
2782
2783 /* code model extreme not support plt. */
2784 case CMODEL_EXTREME:
2785 case CMODEL_LARGE:
2786 case CMODEL_TINY:
2787 case CMODEL_TINY_STATIC:
2788 default:
2789 gcc_unreachable ();
2790 }
2791 }
2792 else
2793 {
2794 rtx dest = gen_reg_rtx (Pmode);
2795
2796 switch (la_target.cmodel)
2797 {
2798 case CMODEL_NORMAL:
2799 case CMODEL_MEDIUM:
2800 {
2801 if (TARGET_EXPLICIT_RELOCS)
2802 {
2803 rtx high = gen_reg_rtx (Pmode);
2804 loongarch_emit_move (high,
2805 gen_rtx_HIGH (Pmode,
2806 loongarch_tls_symbol));
2807 emit_insn (gen_ld_from_got (Pmode, dest, high,
2808 loongarch_tls_symbol));
2809 }
2810 else
2811 loongarch_emit_move (dest, loongarch_tls_symbol);
2812 break;
2813 }
2814
2815 case CMODEL_EXTREME:
2816 {
2817 gcc_assert (TARGET_EXPLICIT_RELOCS);
2818
2819 rtx tmp1 = gen_reg_rtx (Pmode);
2820 rtx high = gen_reg_rtx (Pmode);
2821
2822 loongarch_emit_move (high,
2823 gen_rtx_HIGH (Pmode, loongarch_tls_symbol));
2824 loongarch_emit_move (tmp1, gen_rtx_LO_SUM (Pmode,
2825 gen_rtx_REG (Pmode, 0),
2826 loongarch_tls_symbol));
2827 emit_insn (gen_lui_h_lo20 (tmp1, tmp1, loongarch_tls_symbol));
2828 emit_insn (gen_lui_h_hi12 (tmp1, tmp1, loongarch_tls_symbol));
2829 loongarch_emit_move (dest,
2830 gen_rtx_MEM (Pmode,
2831 gen_rtx_PLUS (Pmode,
2832 high, tmp1)));
2833 }
2834 break;
2835
2836 case CMODEL_LARGE:
2837 case CMODEL_TINY:
2838 case CMODEL_TINY_STATIC:
2839 default:
2840 gcc_unreachable ();
2841 }
2842
2843 insn = emit_call_insn (gen_call_value_internal (v0, dest, const0_rtx));
2844 }
2845
2846 RTL_CONST_CALL_P (insn) = 1;
2847 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), a0);
2848 insn = get_insns ();
2849
2850 end_sequence ();
2851
2852 return insn;
2853 }
2854
2855 /* Generate the code to access LOC, a thread-local SYMBOL_REF, and return
2856 its address. The return value will be both a valid address and a valid
2857 SET_SRC (either a REG or a LO_SUM). */
2858
2859 static rtx
2860 loongarch_legitimize_tls_address (rtx loc)
2861 {
2862 rtx dest, tp, tmp, tmp1, tmp2, tmp3;
2863 enum tls_model model = SYMBOL_REF_TLS_MODEL (loc);
2864 rtx_insn *insn;
2865
2866 switch (model)
2867 {
2868 case TLS_MODEL_LOCAL_DYNAMIC:
2869 tmp = gen_rtx_REG (Pmode, GP_RETURN);
2870 dest = gen_reg_rtx (Pmode);
2871 insn = loongarch_call_tls_get_addr (loc, SYMBOL_TLSLDM, tmp);
2872 emit_libcall_block (insn, dest, tmp, loc);
2873 break;
2874
2875 case TLS_MODEL_GLOBAL_DYNAMIC:
2876 tmp = gen_rtx_REG (Pmode, GP_RETURN);
2877 dest = gen_reg_rtx (Pmode);
2878 insn = loongarch_call_tls_get_addr (loc, SYMBOL_TLSGD, tmp);
2879 emit_libcall_block (insn, dest, tmp, loc);
2880 break;
2881
2882 case TLS_MODEL_INITIAL_EXEC:
2883 {
2884 /* la.tls.ie; tp-relative add. */
2885 tp = gen_rtx_REG (Pmode, THREAD_POINTER_REGNUM);
2886 tmp1 = gen_reg_rtx (Pmode);
2887 dest = gen_reg_rtx (Pmode);
2888 if (TARGET_EXPLICIT_RELOCS)
2889 {
2890 tmp2 = loongarch_unspec_address (loc, SYMBOL_TLS_IE);
2891 tmp3 = gen_reg_rtx (Pmode);
2892 rtx high = gen_rtx_HIGH (Pmode, copy_rtx (tmp2));
2893 high = loongarch_force_temporary (tmp3, high);
2894
2895 if (TARGET_CMODEL_EXTREME)
2896 {
2897 gcc_assert (TARGET_EXPLICIT_RELOCS);
2898
2899 rtx tmp3 = gen_reg_rtx (Pmode);
2900 emit_insn (gen_tls_low (Pmode, tmp3,
2901 gen_rtx_REG (Pmode, 0), tmp2));
2902 emit_insn (gen_lui_h_lo20 (tmp3, tmp3, tmp2));
2903 emit_insn (gen_lui_h_hi12 (tmp3, tmp3, tmp2));
2904 emit_move_insn (tmp1,
2905 gen_rtx_MEM (Pmode,
2906 gen_rtx_PLUS (Pmode,
2907 high, tmp3)));
2908 }
2909 else
2910 emit_insn (gen_ld_from_got (Pmode, tmp1, high, tmp2));
2911 }
2912 else
2913 emit_insn (loongarch_got_load_tls_ie (tmp1, loc));
2914 emit_insn (gen_add3_insn (dest, tmp1, tp));
2915 }
2916 break;
2917
2918 case TLS_MODEL_LOCAL_EXEC:
2919 {
2920 /* la.tls.le; tp-relative add. */
2921 tp = gen_rtx_REG (Pmode, THREAD_POINTER_REGNUM);
2922 tmp1 = gen_reg_rtx (Pmode);
2923 dest = gen_reg_rtx (Pmode);
2924
2925 if (TARGET_EXPLICIT_RELOCS)
2926 {
2927 tmp2 = loongarch_unspec_address (loc, SYMBOL_TLS_LE);
2928 tmp3 = gen_reg_rtx (Pmode);
2929 rtx high = gen_rtx_HIGH (Pmode, copy_rtx (tmp2));
2930 high = loongarch_force_temporary (tmp3, high);
2931 emit_insn (gen_ori_l_lo12 (Pmode, tmp1, high, tmp2));
2932
2933 if (TARGET_CMODEL_EXTREME)
2934 {
2935 gcc_assert (TARGET_EXPLICIT_RELOCS);
2936
2937 emit_insn (gen_lui_h_lo20 (tmp1, tmp1, tmp2));
2938 emit_insn (gen_lui_h_hi12 (tmp1, tmp1, tmp2));
2939 }
2940 }
2941 else
2942 emit_insn (loongarch_got_load_tls_le (tmp1, loc));
2943 emit_insn (gen_add3_insn (dest, tmp1, tp));
2944 }
2945 break;
2946
2947 default:
2948 gcc_unreachable ();
2949 }
2950 return dest;
2951 }
2952
2953 rtx
2954 loongarch_legitimize_call_address (rtx addr)
2955 {
2956 if (!call_insn_operand (addr, VOIDmode))
2957 {
2958 rtx reg = gen_reg_rtx (Pmode);
2959 loongarch_emit_move (reg, addr);
2960 return reg;
2961 }
2962
2963 enum loongarch_symbol_type symbol_type = loongarch_classify_symbol (addr);
2964
2965 /* Split function call insn 'bl sym' or 'bl %plt(sym)' to :
2966 pcalau12i $rd, %pc_hi20(sym)
2967 jr $rd, %pc_lo12(sym). */
2968
2969 if (TARGET_CMODEL_MEDIUM
2970 && TARGET_EXPLICIT_RELOCS
2971 && (SYMBOL_REF_P (addr) || LABEL_REF_P (addr))
2972 && (symbol_type == SYMBOL_PCREL
2973 || (symbol_type == SYMBOL_GOT_DISP && flag_plt)))
2974 {
2975 rtx reg = gen_reg_rtx (Pmode);
2976 emit_insn (gen_pcalau12i (Pmode, reg, addr));
2977 return gen_rtx_LO_SUM (Pmode, reg, addr);
2978 }
2979
2980 return addr;
2981 }
2982
2983 /* If X is a PLUS of a CONST_INT, return the two terms in *BASE_PTR
2984 and *OFFSET_PTR. Return X in *BASE_PTR and 0 in *OFFSET_PTR otherwise. */
2985
2986 static void
2987 loongarch_split_plus (rtx x, rtx *base_ptr, HOST_WIDE_INT *offset_ptr)
2988 {
2989 if (GET_CODE (x) == PLUS && CONST_INT_P (XEXP (x, 1)))
2990 {
2991 *base_ptr = XEXP (x, 0);
2992 *offset_ptr = INTVAL (XEXP (x, 1));
2993 }
2994 else
2995 {
2996 *base_ptr = x;
2997 *offset_ptr = 0;
2998 }
2999 }
3000
3001 /* If X is not a valid address for mode MODE, force it into a register. */
3002
3003 static rtx
3004 loongarch_force_address (rtx x, machine_mode mode)
3005 {
3006 if (!loongarch_legitimate_address_p (mode, x, false))
3007 x = force_reg (Pmode, x);
3008 return x;
3009 }
3010
3011 static bool
3012 loongarch_symbol_extreme_p (enum loongarch_symbol_type type)
3013 {
3014 switch (type)
3015 {
3016 case SYMBOL_PCREL:
3017 return false;
3018 case SYMBOL_PCREL64:
3019 return true;
3020 default:
3021 return TARGET_CMODEL_EXTREME;
3022 }
3023 }
3024
3025 /* If MODE is MAX_MACHINE_MODE, ADDR appears as a move operand, otherwise
3026 it appears in a MEM of that mode. Return true if ADDR is a legitimate
3027 constant in that context and can be split into high and low parts.
3028 If so, and if LOW_OUT is nonnull, emit the high part and store the
3029 low part in *LOW_OUT. Leave *LOW_OUT unchanged otherwise.
3030
3031 Return false if build with '-mno-explicit-relocs'.
3032
3033 TEMP is as for loongarch_force_temporary and is used to load the high
3034 part into a register.
3035
3036 When MODE is MAX_MACHINE_MODE, the low part is guaranteed to be
3037 a legitimize SET_SRC for an .md pattern, otherwise the low part
3038 is guaranteed to be a legitimate address for mode MODE. */
3039
3040 bool
3041 loongarch_split_symbol (rtx temp, rtx addr, machine_mode mode, rtx *low_out)
3042 {
3043 enum loongarch_symbol_type symbol_type;
3044
3045 /* If build with '-mno-explicit-relocs', don't split symbol. */
3046 if (!TARGET_EXPLICIT_RELOCS)
3047 return false;
3048
3049 if ((GET_CODE (addr) == HIGH && mode == MAX_MACHINE_MODE)
3050 || !loongarch_symbolic_constant_p (addr, &symbol_type)
3051 || loongarch_symbol_insns (symbol_type, mode) == 0
3052 || !loongarch_split_symbol_type (symbol_type))
3053 return false;
3054
3055 rtx high, temp1 = NULL;
3056
3057 if (temp == NULL)
3058 temp = gen_reg_rtx (Pmode);
3059
3060 /* Get the 12-31 bits of the address. */
3061 high = gen_rtx_HIGH (Pmode, copy_rtx (addr));
3062 high = loongarch_force_temporary (temp, high);
3063
3064 if (loongarch_symbol_extreme_p (symbol_type) && can_create_pseudo_p ())
3065 {
3066 gcc_assert (TARGET_EXPLICIT_RELOCS);
3067
3068 temp1 = gen_reg_rtx (Pmode);
3069 emit_move_insn (temp1, gen_rtx_LO_SUM (Pmode, gen_rtx_REG (Pmode, 0),
3070 addr));
3071 emit_insn (gen_lui_h_lo20 (temp1, temp1, addr));
3072 emit_insn (gen_lui_h_hi12 (temp1, temp1, addr));
3073 }
3074
3075 if (low_out)
3076 switch (symbol_type)
3077 {
3078 case SYMBOL_PCREL64:
3079 if (can_create_pseudo_p ())
3080 {
3081 *low_out = gen_rtx_PLUS (Pmode, high, temp1);
3082 break;
3083 }
3084 /* fall through */
3085 case SYMBOL_PCREL:
3086 *low_out = gen_rtx_LO_SUM (Pmode, high, addr);
3087 break;
3088
3089 case SYMBOL_GOT_DISP:
3090 /* SYMBOL_GOT_DISP symbols are loaded from the GOT. */
3091 {
3092 if (TARGET_CMODEL_EXTREME && can_create_pseudo_p ())
3093 *low_out = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, high, temp1));
3094 else
3095 {
3096 rtx low = gen_rtx_LO_SUM (Pmode, high, addr);
3097 rtx mem = gen_rtx_MEM (Pmode, low);
3098 *low_out = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, mem),
3099 UNSPEC_LOAD_FROM_GOT);
3100 }
3101
3102 break;
3103 }
3104
3105 default:
3106 gcc_unreachable ();
3107 }
3108
3109 return true;
3110 }
3111
3112 /* This function is used to implement LEGITIMIZE_ADDRESS. If X can
3113 be legitimized in a way that the generic machinery might not expect,
3114 return a new address, otherwise return NULL. MODE is the mode of
3115 the memory being accessed. */
3116
3117 static rtx
3118 loongarch_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
3119 machine_mode mode)
3120 {
3121 rtx base, addr;
3122 HOST_WIDE_INT offset;
3123
3124 if (loongarch_tls_symbol_p (x))
3125 return loongarch_legitimize_tls_address (x);
3126
3127 /* See if the address can split into a high part and a LO_SUM. */
3128 if (loongarch_split_symbol (NULL, x, mode, &addr))
3129 return loongarch_force_address (addr, mode);
3130
3131 /* Handle BASE + OFFSET using loongarch_add_offset. */
3132 loongarch_split_plus (x, &base, &offset);
3133 if (offset != 0)
3134 {
3135 if (!loongarch_valid_base_register_p (base, mode, false))
3136 base = copy_to_mode_reg (Pmode, base);
3137 addr = loongarch_add_offset (NULL, base, offset);
3138 return loongarch_force_address (addr, mode);
3139 }
3140
3141 return x;
3142 }
3143
3144 /* Load VALUE into DEST. TEMP is as for loongarch_force_temporary. */
3145
3146 void
3147 loongarch_move_integer (rtx temp, rtx dest, unsigned HOST_WIDE_INT value)
3148 {
3149 struct loongarch_integer_op codes[LARCH_MAX_INTEGER_OPS];
3150 machine_mode mode;
3151 unsigned int i, num_ops;
3152 rtx x;
3153
3154 mode = GET_MODE (dest);
3155 num_ops = loongarch_build_integer (codes, value);
3156
3157 /* Apply each binary operation to X. Invariant: X is a legitimate
3158 source operand for a SET pattern. */
3159 x = GEN_INT (codes[0].value);
3160 for (i = 1; i < num_ops; i++)
3161 {
3162 if (!can_create_pseudo_p ())
3163 {
3164 emit_insn (gen_rtx_SET (temp, x));
3165 x = temp;
3166 }
3167 else
3168 x = force_reg (mode, x);
3169
3170 set_unique_reg_note (get_last_insn (), REG_EQUAL,
3171 GEN_INT (codes[i-1].curr_value));
3172
3173 switch (codes[i].method)
3174 {
3175 case METHOD_NORMAL:
3176 x = gen_rtx_fmt_ee (codes[i].code, mode, x,
3177 GEN_INT (codes[i].value));
3178 break;
3179 case METHOD_LU32I:
3180 gcc_assert (mode == DImode);
3181 x = gen_rtx_IOR (DImode,
3182 gen_rtx_ZERO_EXTEND (DImode,
3183 gen_rtx_SUBREG (SImode, x, 0)),
3184 GEN_INT (codes[i].value));
3185 break;
3186 case METHOD_LU52I:
3187 gcc_assert (mode == DImode);
3188 x = gen_rtx_IOR (DImode,
3189 gen_rtx_AND (DImode, x, GEN_INT (0xfffffffffffff)),
3190 GEN_INT (codes[i].value));
3191 break;
3192 default:
3193 gcc_unreachable ();
3194 }
3195 }
3196
3197 emit_insn (gen_rtx_SET (dest, x));
3198 }
3199
3200 /* Subroutine of loongarch_legitimize_move. Move constant SRC into register
3201 DEST given that SRC satisfies immediate_operand but doesn't satisfy
3202 move_operand. */
3203
3204 static void
3205 loongarch_legitimize_const_move (machine_mode mode, rtx dest, rtx src)
3206 {
3207 rtx base, offset;
3208
3209 /* Split moves of big integers into smaller pieces. */
3210 if (splittable_const_int_operand (src, mode))
3211 {
3212 loongarch_move_integer (dest, dest, INTVAL (src));
3213 return;
3214 }
3215
3216 /* Split moves of symbolic constants into high and low. */
3217 if (loongarch_split_symbol (dest, src, MAX_MACHINE_MODE, &src))
3218 {
3219 loongarch_emit_set (dest, src);
3220 return;
3221 }
3222
3223 /* Generate the appropriate access sequences for TLS symbols. */
3224 if (loongarch_tls_symbol_p (src))
3225 {
3226 loongarch_emit_move (dest, loongarch_legitimize_tls_address (src));
3227 return;
3228 }
3229
3230 /* If we have (const (plus symbol offset)), and that expression cannot
3231 be forced into memory, load the symbol first and add in the offset.
3232 prefer to do this even if the constant _can_ be forced into memory,
3233 as it usually produces better code. */
3234 split_const (src, &base, &offset);
3235 if (offset != const0_rtx
3236 && (targetm.cannot_force_const_mem (mode, src)
3237 || (can_create_pseudo_p ())))
3238 {
3239 base = loongarch_force_temporary (dest, base);
3240 loongarch_emit_move (dest,
3241 loongarch_add_offset (NULL, base, INTVAL (offset)));
3242 return;
3243 }
3244
3245 src = force_const_mem (mode, src);
3246
3247 loongarch_emit_move (dest, src);
3248 }
3249
3250 /* If (set DEST SRC) is not a valid move instruction, emit an equivalent
3251 sequence that is valid. */
3252
3253 bool
3254 loongarch_legitimize_move (machine_mode mode, rtx dest, rtx src)
3255 {
3256 if (!register_operand (dest, mode) && !reg_or_0_operand (src, mode))
3257 {
3258 loongarch_emit_move (dest, force_reg (mode, src));
3259 return true;
3260 }
3261
3262 /* Both src and dest are non-registers; one special case is supported where
3263 the source is (const_int 0) and the store can source the zero register.
3264 LSX is never able to source the zero register directly in
3265 memory operations. */
3266 if (!register_operand (dest, mode) && !register_operand (src, mode)
3267 && (!const_0_operand (src, mode) || LSX_SUPPORTED_MODE_P (mode)))
3268 {
3269 loongarch_emit_move (dest, force_reg (mode, src));
3270 return true;
3271 }
3272
3273 /* We need to deal with constants that would be legitimate
3274 immediate_operands but aren't legitimate move_operands. */
3275 if (CONSTANT_P (src) && !move_operand (src, mode))
3276 {
3277 loongarch_legitimize_const_move (mode, dest, src);
3278 set_unique_reg_note (get_last_insn (), REG_EQUAL, copy_rtx (src));
3279 return true;
3280 }
3281
3282 return false;
3283 }
3284
3285 /* Return true if OP refers to small data symbols directly. */
3286
3287 static int
3288 loongarch_small_data_pattern_1 (rtx x)
3289 {
3290 subrtx_var_iterator::array_type array;
3291 FOR_EACH_SUBRTX_VAR (iter, array, x, ALL)
3292 {
3293 rtx x = *iter;
3294
3295 /* We make no particular guarantee about which symbolic constants are
3296 acceptable as asm operands versus which must be forced into a GPR. */
3297 if (GET_CODE (x) == ASM_OPERANDS)
3298 iter.skip_subrtxes ();
3299 else if (MEM_P (x))
3300 {
3301 if (loongarch_small_data_pattern_1 (XEXP (x, 0)))
3302 return true;
3303 iter.skip_subrtxes ();
3304 }
3305 }
3306 return false;
3307 }
3308
3309 /* Return true if OP refers to small data symbols directly. */
3310
3311 bool
3312 loongarch_small_data_pattern_p (rtx op)
3313 {
3314 return loongarch_small_data_pattern_1 (op);
3315 }
3316
3317 /* Rewrite *LOC so that it refers to small data using explicit
3318 relocations. */
3319
3320 static void
3321 loongarch_rewrite_small_data_1 (rtx *loc)
3322 {
3323 subrtx_ptr_iterator::array_type array;
3324 FOR_EACH_SUBRTX_PTR (iter, array, loc, ALL)
3325 {
3326 rtx *loc = *iter;
3327 if (MEM_P (*loc))
3328 {
3329 loongarch_rewrite_small_data_1 (&XEXP (*loc, 0));
3330 iter.skip_subrtxes ();
3331 }
3332 }
3333 }
3334
3335 /* Rewrite instruction pattern PATTERN so that it refers to small data
3336 using explicit relocations. */
3337
3338 rtx
3339 loongarch_rewrite_small_data (rtx pattern)
3340 {
3341 pattern = copy_insn (pattern);
3342 loongarch_rewrite_small_data_1 (&pattern);
3343 return pattern;
3344 }
3345
3346 /* The cost of loading values from the constant pool. It should be
3347 larger than the cost of any constant we want to synthesize inline. */
3348 #define CONSTANT_POOL_COST COSTS_N_INSNS (8)
3349
3350 /* Return true if there is a instruction that implements CODE
3351 and if that instruction accepts X as an immediate operand. */
3352
3353 static int
3354 loongarch_immediate_operand_p (int code, HOST_WIDE_INT x)
3355 {
3356 switch (code)
3357 {
3358 case ASHIFT:
3359 case ASHIFTRT:
3360 case LSHIFTRT:
3361 /* All shift counts are truncated to a valid constant. */
3362 return true;
3363
3364 case ROTATE:
3365 case ROTATERT:
3366 return true;
3367
3368 case AND:
3369 case IOR:
3370 case XOR:
3371 /* These instructions take 12-bit unsigned immediates. */
3372 return IMM12_OPERAND_UNSIGNED (x);
3373
3374 case PLUS:
3375 case LT:
3376 case LTU:
3377 /* These instructions take 12-bit signed immediates. */
3378 return IMM12_OPERAND (x);
3379
3380 case EQ:
3381 case NE:
3382 case GT:
3383 case GTU:
3384 /* The "immediate" forms of these instructions are really
3385 implemented as comparisons with register 0. */
3386 return x == 0;
3387
3388 case GE:
3389 case GEU:
3390 /* Likewise, meaning that the only valid immediate operand is 1. */
3391 return x == 1;
3392
3393 case LE:
3394 /* We add 1 to the immediate and use SLT. */
3395 return IMM12_OPERAND (x + 1);
3396
3397 case LEU:
3398 /* Likewise SLTU, but reject the always-true case. */
3399 return IMM12_OPERAND (x + 1) && x + 1 != 0;
3400
3401 case SIGN_EXTRACT:
3402 case ZERO_EXTRACT:
3403 /* The bit position and size are immediate operands. */
3404 return 1;
3405
3406 default:
3407 /* By default assume that $0 can be used for 0. */
3408 return x == 0;
3409 }
3410 }
3411
3412 /* Return the cost of binary operation X, given that the instruction
3413 sequence for a word-sized or smaller operation has cost SINGLE_COST
3414 and that the sequence of a double-word operation has cost DOUBLE_COST.
3415 If SPEED is true, optimize for speed otherwise optimize for size. */
3416
3417 static int
3418 loongarch_binary_cost (rtx x, int single_cost, int double_cost, bool speed)
3419 {
3420 int cost;
3421
3422 if (GET_MODE_SIZE (GET_MODE (x)) == UNITS_PER_WORD * 2)
3423 cost = double_cost;
3424 else
3425 cost = single_cost;
3426 return (cost
3427 + set_src_cost (XEXP (x, 0), GET_MODE (x), speed)
3428 + rtx_cost (XEXP (x, 1), GET_MODE (x), GET_CODE (x), 1, speed));
3429 }
3430
3431 /* Return the cost of floating-point multiplications of mode MODE. */
3432
3433 static int
3434 loongarch_fp_mult_cost (machine_mode mode)
3435 {
3436 return mode == DFmode ? loongarch_cost->fp_mult_df
3437 : loongarch_cost->fp_mult_sf;
3438 }
3439
3440 /* Return the cost of floating-point divisions of mode MODE. */
3441
3442 static int
3443 loongarch_fp_div_cost (machine_mode mode)
3444 {
3445 return mode == DFmode ? loongarch_cost->fp_div_df
3446 : loongarch_cost->fp_div_sf;
3447 }
3448
3449 /* Return the cost of sign-extending OP to mode MODE, not including the
3450 cost of OP itself. */
3451
3452 static int
3453 loongarch_sign_extend_cost (rtx op)
3454 {
3455 if (MEM_P (op))
3456 /* Extended loads are as cheap as unextended ones. */
3457 return 0;
3458
3459 return COSTS_N_INSNS (1);
3460 }
3461
3462 /* Return the cost of zero-extending OP to mode MODE, not including the
3463 cost of OP itself. */
3464
3465 static int
3466 loongarch_zero_extend_cost (rtx op)
3467 {
3468 if (MEM_P (op))
3469 /* Extended loads are as cheap as unextended ones. */
3470 return 0;
3471
3472 /* We can use ANDI. */
3473 return COSTS_N_INSNS (1);
3474 }
3475
3476 /* Return the cost of moving between two registers of mode MODE,
3477 assuming that the move will be in pieces of at most UNITS bytes. */
3478
3479 static int
3480 loongarch_set_reg_reg_piece_cost (machine_mode mode, unsigned int units)
3481 {
3482 return COSTS_N_INSNS ((GET_MODE_SIZE (mode) + units - 1) / units);
3483 }
3484
3485 /* Return the cost of moving between two registers of mode MODE. */
3486
3487 static int
3488 loongarch_set_reg_reg_cost (machine_mode mode)
3489 {
3490 switch (GET_MODE_CLASS (mode))
3491 {
3492 case MODE_CC:
3493 return loongarch_set_reg_reg_piece_cost (mode, GET_MODE_SIZE (CCmode));
3494
3495 case MODE_FLOAT:
3496 case MODE_COMPLEX_FLOAT:
3497 case MODE_VECTOR_FLOAT:
3498 if (TARGET_HARD_FLOAT)
3499 return loongarch_set_reg_reg_piece_cost (mode, UNITS_PER_HWFPVALUE);
3500 /* Fall through. */
3501
3502 default:
3503 return loongarch_set_reg_reg_piece_cost (mode, UNITS_PER_WORD);
3504 }
3505 }
3506
3507 /* Implement TARGET_RTX_COSTS. */
3508
3509 static bool
3510 loongarch_rtx_costs (rtx x, machine_mode mode, int outer_code,
3511 int opno ATTRIBUTE_UNUSED, int *total, bool speed)
3512 {
3513 int code = GET_CODE (x);
3514 bool float_mode_p = FLOAT_MODE_P (mode);
3515 int cost;
3516 rtx addr;
3517
3518 if (outer_code == COMPARE)
3519 {
3520 gcc_assert (CONSTANT_P (x));
3521 *total = 0;
3522 return true;
3523 }
3524
3525 switch (code)
3526 {
3527 case CONST_INT:
3528 if (TARGET_64BIT && outer_code == AND && UINTVAL (x) == 0xffffffff)
3529 {
3530 *total = 0;
3531 return true;
3532 }
3533
3534 /* When not optimizing for size, we care more about the cost
3535 of hot code, and hot code is often in a loop. If a constant
3536 operand needs to be forced into a register, we will often be
3537 able to hoist the constant load out of the loop, so the load
3538 should not contribute to the cost. */
3539 if (speed || loongarch_immediate_operand_p (outer_code, INTVAL (x)))
3540 {
3541 *total = 0;
3542 return true;
3543 }
3544 /* Fall through. */
3545
3546 case CONST:
3547 case SYMBOL_REF:
3548 case LABEL_REF:
3549 case CONST_DOUBLE:
3550 cost = loongarch_const_insns (x);
3551 if (cost > 0)
3552 {
3553 if (cost == 1 && outer_code == SET
3554 && !(float_mode_p && TARGET_HARD_FLOAT))
3555 cost = 0;
3556 else if ((outer_code == SET || GET_MODE (x) == VOIDmode))
3557 cost = 1;
3558 *total = COSTS_N_INSNS (cost);
3559 return true;
3560 }
3561 /* The value will need to be fetched from the constant pool. */
3562 *total = CONSTANT_POOL_COST;
3563 return true;
3564
3565 case MEM:
3566 /* If the address is legitimate, return the number of
3567 instructions it needs. */
3568 addr = XEXP (x, 0);
3569 /* Check for a scaled indexed address. */
3570 if (loongarch_index_address_p (addr, mode))
3571 {
3572 *total = COSTS_N_INSNS (2);
3573 return true;
3574 }
3575 cost = loongarch_address_insns (addr, mode, true);
3576 if (cost > 0)
3577 {
3578 *total = COSTS_N_INSNS (cost + 1);
3579 return true;
3580 }
3581 /* Otherwise use the default handling. */
3582 return false;
3583
3584 case FFS:
3585 *total = COSTS_N_INSNS (6);
3586 return false;
3587
3588 case NOT:
3589 *total = COSTS_N_INSNS (GET_MODE_SIZE (mode) > UNITS_PER_WORD ? 2 : 1);
3590 return false;
3591
3592 case AND:
3593 /* Check for a *clear_upper32 pattern and treat it like a zero
3594 extension. See the pattern's comment for details. */
3595 if (TARGET_64BIT && mode == DImode && CONST_INT_P (XEXP (x, 1))
3596 && UINTVAL (XEXP (x, 1)) == 0xffffffff)
3597 {
3598 *total = (loongarch_zero_extend_cost (XEXP (x, 0))
3599 + set_src_cost (XEXP (x, 0), mode, speed));
3600 return true;
3601 }
3602 /* (AND (NOT op0) (NOT op1) is a nor operation that can be done in
3603 a single instruction. */
3604 if (GET_CODE (XEXP (x, 0)) == NOT && GET_CODE (XEXP (x, 1)) == NOT)
3605 {
3606 cost = GET_MODE_SIZE (mode) > UNITS_PER_WORD ? 2 : 1;
3607 *total = (COSTS_N_INSNS (cost)
3608 + set_src_cost (XEXP (XEXP (x, 0), 0), mode, speed)
3609 + set_src_cost (XEXP (XEXP (x, 1), 0), mode, speed));
3610 return true;
3611 }
3612
3613 /* Fall through. */
3614
3615 case IOR:
3616 case XOR:
3617 /* Double-word operations use two single-word operations. */
3618 *total = loongarch_binary_cost (x, COSTS_N_INSNS (1), COSTS_N_INSNS (2),
3619 speed);
3620 return true;
3621
3622 case ASHIFT:
3623 case ASHIFTRT:
3624 case LSHIFTRT:
3625 case ROTATE:
3626 case ROTATERT:
3627 if (CONSTANT_P (XEXP (x, 1)))
3628 *total = loongarch_binary_cost (x, COSTS_N_INSNS (1),
3629 COSTS_N_INSNS (4), speed);
3630 else
3631 *total = loongarch_binary_cost (x, COSTS_N_INSNS (1),
3632 COSTS_N_INSNS (12), speed);
3633 return true;
3634
3635 case ABS:
3636 if (float_mode_p)
3637 *total = loongarch_cost->fp_add;
3638 else
3639 *total = COSTS_N_INSNS (4);
3640 return false;
3641
3642 case LT:
3643 case LTU:
3644 case LE:
3645 case LEU:
3646 case GT:
3647 case GTU:
3648 case GE:
3649 case GEU:
3650 case EQ:
3651 case NE:
3652 case UNORDERED:
3653 case LTGT:
3654 case UNGE:
3655 case UNGT:
3656 case UNLE:
3657 case UNLT:
3658 /* Branch comparisons have VOIDmode, so use the first operand's
3659 mode instead. */
3660 mode = GET_MODE (XEXP (x, 0));
3661 if (FLOAT_MODE_P (mode))
3662 {
3663 *total = loongarch_cost->fp_add;
3664 return false;
3665 }
3666 *total = loongarch_binary_cost (x, COSTS_N_INSNS (1), COSTS_N_INSNS (4),
3667 speed);
3668 return true;
3669
3670 case MINUS:
3671 case PLUS:
3672 if (float_mode_p)
3673 {
3674 *total = loongarch_cost->fp_add;
3675 return false;
3676 }
3677
3678 /* If it's an add + mult (which is equivalent to shift left) and
3679 it's immediate operand satisfies const_immalsl_operand predicate. */
3680 if ((mode == SImode || (TARGET_64BIT && mode == DImode))
3681 && GET_CODE (XEXP (x, 0)) == MULT)
3682 {
3683 rtx op2 = XEXP (XEXP (x, 0), 1);
3684 if (const_immalsl_operand (op2, mode))
3685 {
3686 *total = (COSTS_N_INSNS (1)
3687 + set_src_cost (XEXP (XEXP (x, 0), 0), mode, speed)
3688 + set_src_cost (XEXP (x, 1), mode, speed));
3689 return true;
3690 }
3691 }
3692
3693 /* Double-word operations require three single-word operations and
3694 an SLTU. */
3695 *total = loongarch_binary_cost (x, COSTS_N_INSNS (1), COSTS_N_INSNS (4),
3696 speed);
3697 return true;
3698
3699 case NEG:
3700 if (float_mode_p)
3701 *total = loongarch_cost->fp_add;
3702 else
3703 *total = COSTS_N_INSNS (GET_MODE_SIZE (mode) > UNITS_PER_WORD ? 4 : 1);
3704 return false;
3705
3706 case FMA:
3707 *total = loongarch_fp_mult_cost (mode);
3708 return false;
3709
3710 case MULT:
3711 if (float_mode_p)
3712 *total = loongarch_fp_mult_cost (mode);
3713 else if (mode == DImode && !TARGET_64BIT)
3714 *total = (speed
3715 ? loongarch_cost->int_mult_si * 3 + 6
3716 : COSTS_N_INSNS (7));
3717 else if (!speed)
3718 *total = COSTS_N_INSNS (1) + 1;
3719 else if (mode == DImode)
3720 *total = loongarch_cost->int_mult_di;
3721 else
3722 *total = loongarch_cost->int_mult_si;
3723 return false;
3724
3725 case DIV:
3726 /* Check for a reciprocal. */
3727 if (float_mode_p
3728 && flag_unsafe_math_optimizations
3729 && XEXP (x, 0) == CONST1_RTX (mode))
3730 {
3731 if (outer_code == SQRT || GET_CODE (XEXP (x, 1)) == SQRT)
3732 /* An rsqrt<mode>a or rsqrt<mode>b pattern. Count the
3733 division as being free. */
3734 *total = set_src_cost (XEXP (x, 1), mode, speed);
3735 else
3736 *total = (loongarch_fp_div_cost (mode)
3737 + set_src_cost (XEXP (x, 1), mode, speed));
3738 return true;
3739 }
3740 /* Fall through. */
3741
3742 case SQRT:
3743 case MOD:
3744 if (float_mode_p)
3745 {
3746 *total = loongarch_fp_div_cost (mode);
3747 return false;
3748 }
3749 /* Fall through. */
3750
3751 case UDIV:
3752 case UMOD:
3753 if (!speed)
3754 {
3755 *total = COSTS_N_INSNS (loongarch_idiv_insns (mode));
3756 }
3757 else if (mode == DImode)
3758 *total = loongarch_cost->int_div_di;
3759 else
3760 *total = loongarch_cost->int_div_si;
3761 return false;
3762
3763 case SIGN_EXTEND:
3764 *total = loongarch_sign_extend_cost (XEXP (x, 0));
3765 return false;
3766
3767 case ZERO_EXTEND:
3768 *total = loongarch_zero_extend_cost (XEXP (x, 0));
3769 return false;
3770 case TRUNCATE:
3771 /* Costings for highpart multiplies. Matching patterns of the form:
3772
3773 (lshiftrt:DI (mult:DI (sign_extend:DI (...)
3774 (sign_extend:DI (...))
3775 (const_int 32)
3776 */
3777 if ((GET_CODE (XEXP (x, 0)) == ASHIFTRT
3778 || GET_CODE (XEXP (x, 0)) == LSHIFTRT)
3779 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
3780 && ((INTVAL (XEXP (XEXP (x, 0), 1)) == 32
3781 && GET_MODE (XEXP (x, 0)) == DImode)
3782 || (TARGET_64BIT
3783 && INTVAL (XEXP (XEXP (x, 0), 1)) == 64
3784 && GET_MODE (XEXP (x, 0)) == TImode))
3785 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
3786 && ((GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND
3787 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == SIGN_EXTEND)
3788 || (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND
3789 && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1))
3790 == ZERO_EXTEND))))
3791 {
3792 if (!speed)
3793 *total = COSTS_N_INSNS (1) + 1;
3794 else if (mode == DImode)
3795 *total = loongarch_cost->int_mult_di;
3796 else
3797 *total = loongarch_cost->int_mult_si;
3798
3799 /* Sign extension is free, zero extension costs for DImode when
3800 on a 64bit core / when DMUL is present. */
3801 for (int i = 0; i < 2; ++i)
3802 {
3803 rtx op = XEXP (XEXP (XEXP (x, 0), 0), i);
3804 if (TARGET_64BIT
3805 && GET_CODE (op) == ZERO_EXTEND
3806 && GET_MODE (op) == DImode)
3807 *total += rtx_cost (op, DImode, MULT, i, speed);
3808 else
3809 *total += rtx_cost (XEXP (op, 0), VOIDmode, GET_CODE (op), 0,
3810 speed);
3811 }
3812
3813 return true;
3814 }
3815 return false;
3816
3817 case FLOAT:
3818 case UNSIGNED_FLOAT:
3819 case FIX:
3820 case FLOAT_EXTEND:
3821 case FLOAT_TRUNCATE:
3822 *total = loongarch_cost->fp_add;
3823 return false;
3824
3825 case SET:
3826 if (register_operand (SET_DEST (x), VOIDmode)
3827 && reg_or_0_operand (SET_SRC (x), VOIDmode))
3828 {
3829 *total = loongarch_set_reg_reg_cost (GET_MODE (SET_DEST (x)));
3830 return true;
3831 }
3832 return false;
3833
3834 default:
3835 return false;
3836 }
3837 }
3838
3839 /* Vectorizer cost model implementation. */
3840
3841 /* Implement targetm.vectorize.builtin_vectorization_cost. */
3842
3843 static int
3844 loongarch_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
3845 tree vectype,
3846 int misalign ATTRIBUTE_UNUSED)
3847 {
3848 unsigned elements;
3849
3850 switch (type_of_cost)
3851 {
3852 case scalar_stmt:
3853 case scalar_load:
3854 case vector_stmt:
3855 case vector_load:
3856 case vec_to_scalar:
3857 case scalar_to_vec:
3858 case cond_branch_not_taken:
3859 case vec_promote_demote:
3860 case scalar_store:
3861 case vector_store:
3862 return 1;
3863
3864 case vec_perm:
3865 return 1;
3866
3867 case unaligned_load:
3868 case vector_gather_load:
3869 return 2;
3870
3871 case unaligned_store:
3872 case vector_scatter_store:
3873 return 10;
3874
3875 case cond_branch_taken:
3876 return 3;
3877
3878 case vec_construct:
3879 elements = TYPE_VECTOR_SUBPARTS (vectype);
3880 return elements / 2 + 1;
3881
3882 default:
3883 gcc_unreachable ();
3884 }
3885 }
3886
3887 /* Implement TARGET_ADDRESS_COST. */
3888
3889 static int
3890 loongarch_address_cost (rtx addr, machine_mode mode,
3891 addr_space_t as ATTRIBUTE_UNUSED,
3892 bool speed ATTRIBUTE_UNUSED)
3893 {
3894 return loongarch_address_insns (addr, mode, false);
3895 }
3896
3897 /* Return one word of double-word value OP, taking into account the fixed
3898 endianness of certain registers. HIGH_P is true to select the high part,
3899 false to select the low part. */
3900
3901 rtx
3902 loongarch_subword (rtx op, bool high_p)
3903 {
3904 unsigned int byte;
3905 machine_mode mode;
3906
3907 byte = high_p ? UNITS_PER_WORD : 0;
3908 mode = GET_MODE (op);
3909 if (mode == VOIDmode)
3910 mode = TARGET_64BIT ? TImode : DImode;
3911
3912 if (FP_REG_RTX_P (op))
3913 return gen_rtx_REG (word_mode, REGNO (op) + high_p);
3914
3915 if (MEM_P (op))
3916 return loongarch_rewrite_small_data (adjust_address (op, word_mode, byte));
3917
3918 return simplify_gen_subreg (word_mode, op, mode, byte);
3919 }
3920
3921 /* Return true if a move from SRC to DEST should be split into two.
3922 SPLIT_TYPE describes the split condition. */
3923
3924 bool
3925 loongarch_split_move_p (rtx dest, rtx src)
3926 {
3927 /* FPR-to-FPR moves can be done in a single instruction, if they're
3928 allowed at all. */
3929 unsigned int size = GET_MODE_SIZE (GET_MODE (dest));
3930 if (size == 8 && FP_REG_RTX_P (src) && FP_REG_RTX_P (dest))
3931 return false;
3932
3933 /* Check for floating-point loads and stores. */
3934 if (size == 8)
3935 {
3936 if (FP_REG_RTX_P (dest) && MEM_P (src))
3937 return false;
3938 if (FP_REG_RTX_P (src) && MEM_P (dest))
3939 return false;
3940 }
3941
3942 /* Check if LSX moves need splitting. */
3943 if (LSX_SUPPORTED_MODE_P (GET_MODE (dest)))
3944 return loongarch_split_128bit_move_p (dest, src);
3945
3946 /* Otherwise split all multiword moves. */
3947 return size > UNITS_PER_WORD;
3948 }
3949
3950 /* Split a move from SRC to DEST, given that loongarch_split_move_p holds.
3951 SPLIT_TYPE describes the split condition. */
3952
3953 void
3954 loongarch_split_move (rtx dest, rtx src, rtx insn_)
3955 {
3956 rtx low_dest;
3957
3958 gcc_checking_assert (loongarch_split_move_p (dest, src));
3959 if (LSX_SUPPORTED_MODE_P (GET_MODE (dest)))
3960 loongarch_split_128bit_move (dest, src);
3961 else if (FP_REG_RTX_P (dest) || FP_REG_RTX_P (src))
3962 {
3963 if (!TARGET_64BIT && GET_MODE (dest) == DImode)
3964 emit_insn (gen_move_doubleword_fprdi (dest, src));
3965 else if (!TARGET_64BIT && GET_MODE (dest) == DFmode)
3966 emit_insn (gen_move_doubleword_fprdf (dest, src));
3967 else if (TARGET_64BIT && GET_MODE (dest) == TFmode)
3968 emit_insn (gen_move_doubleword_fprtf (dest, src));
3969 else
3970 gcc_unreachable ();
3971 }
3972 else
3973 {
3974 /* The operation can be split into two normal moves. Decide in
3975 which order to do them. */
3976 low_dest = loongarch_subword (dest, false);
3977 if (REG_P (low_dest) && reg_overlap_mentioned_p (low_dest, src))
3978 {
3979 loongarch_emit_move (loongarch_subword (dest, true),
3980 loongarch_subword (src, true));
3981 loongarch_emit_move (low_dest, loongarch_subword (src, false));
3982 }
3983 else
3984 {
3985 loongarch_emit_move (low_dest, loongarch_subword (src, false));
3986 loongarch_emit_move (loongarch_subword (dest, true),
3987 loongarch_subword (src, true));
3988 }
3989 }
3990
3991 /* This is a hack. See if the next insn uses DEST and if so, see if we
3992 can forward SRC for DEST. This is most useful if the next insn is a
3993 simple store. */
3994 rtx_insn *insn = (rtx_insn *) insn_;
3995 struct loongarch_address_info addr = {};
3996 if (insn)
3997 {
3998 rtx_insn *next = next_nonnote_nondebug_insn_bb (insn);
3999 if (next)
4000 {
4001 rtx set = single_set (next);
4002 if (set && SET_SRC (set) == dest)
4003 {
4004 if (MEM_P (src))
4005 {
4006 rtx tmp = XEXP (src, 0);
4007 loongarch_classify_address (&addr, tmp, GET_MODE (tmp),
4008 true);
4009 if (addr.reg && !reg_overlap_mentioned_p (dest, addr.reg))
4010 validate_change (next, &SET_SRC (set), src, false);
4011 }
4012 else
4013 validate_change (next, &SET_SRC (set), src, false);
4014 }
4015 }
4016 }
4017 }
4018
4019 /* Check if adding an integer constant value for a specific mode can be
4020 performed with an addu16i.d instruction and an addi.{w/d}
4021 instruction. */
4022
4023 bool
4024 loongarch_addu16i_imm12_operand_p (HOST_WIDE_INT value, machine_mode mode)
4025 {
4026 /* Not necessary, but avoid unnecessary calculation if !TARGET_64BIT. */
4027 if (!TARGET_64BIT)
4028 return false;
4029
4030 if ((value & 0xffff) == 0)
4031 return false;
4032
4033 if (IMM12_OPERAND (value))
4034 return false;
4035
4036 value = (value & ~HWIT_UC_0xFFF) + ((value & 0x800) << 1);
4037 return ADDU16I_OPERAND (trunc_int_for_mode (value, mode));
4038 }
4039
4040 /* Split one integer constant op[0] into two (op[1] and op[2]) for constant
4041 plus operation in a specific mode. The splitted constants can be added
4042 onto a register with a single instruction (addi.{d/w} or addu16i.d). */
4043
4044 void
4045 loongarch_split_plus_constant (rtx *op, machine_mode mode)
4046 {
4047 HOST_WIDE_INT v = INTVAL (op[0]), a;
4048
4049 if (DUAL_IMM12_OPERAND (v))
4050 a = (v > 0 ? 2047 : -2048);
4051 else if (loongarch_addu16i_imm12_operand_p (v, mode))
4052 a = (v & ~HWIT_UC_0xFFF) + ((v & 0x800) << 1);
4053 else if (mode == DImode && DUAL_ADDU16I_OPERAND (v))
4054 a = (v > 0 ? 0x7fff : -0x8000) << 16;
4055 else
4056 gcc_unreachable ();
4057
4058 op[1] = gen_int_mode (a, mode);
4059 v = v - (unsigned HOST_WIDE_INT) a;
4060 op[2] = gen_int_mode (v, mode);
4061 }
4062
4063 /* Return true if a move from SRC to DEST in INSN should be split. */
4064
4065 bool
4066 loongarch_split_move_insn_p (rtx dest, rtx src)
4067 {
4068 return loongarch_split_move_p (dest, src);
4069 }
4070
4071 /* Split a move from SRC to DEST in INSN, given that
4072 loongarch_split_move_insn_p holds. */
4073
4074 void
4075 loongarch_split_move_insn (rtx dest, rtx src, rtx insn)
4076 {
4077 loongarch_split_move (dest, src, insn);
4078 }
4079
4080 /* Implement TARGET_CONSTANT_ALIGNMENT. */
4081
4082 static HOST_WIDE_INT
4083 loongarch_constant_alignment (const_tree exp, HOST_WIDE_INT align)
4084 {
4085 if (TREE_CODE (exp) == STRING_CST || TREE_CODE (exp) == CONSTRUCTOR)
4086 return MAX (align, BITS_PER_WORD);
4087 return align;
4088 }
4089
4090 const char *
4091 loongarch_output_move_index (rtx x, machine_mode mode, bool ldr)
4092 {
4093 int index = exact_log2 (GET_MODE_SIZE (mode));
4094 if (!IN_RANGE (index, 0, 3))
4095 return NULL;
4096
4097 struct loongarch_address_info info;
4098 if ((loongarch_classify_address (&info, x, mode, false)
4099 && !(info.type == ADDRESS_REG_REG))
4100 || !loongarch_legitimate_address_p (mode, x, false))
4101 return NULL;
4102
4103 const char *const insn[][4] =
4104 {
4105 {
4106 "stx.b\t%z1,%0",
4107 "stx.h\t%z1,%0",
4108 "stx.w\t%z1,%0",
4109 "stx.d\t%z1,%0",
4110 },
4111 {
4112 "ldx.bu\t%0,%1",
4113 "ldx.hu\t%0,%1",
4114 "ldx.w\t%0,%1",
4115 "ldx.d\t%0,%1",
4116 }
4117 };
4118
4119 return insn[ldr][index];
4120 }
4121
4122 const char *
4123 loongarch_output_move_index_float (rtx x, machine_mode mode, bool ldr)
4124 {
4125 int index = exact_log2 (GET_MODE_SIZE (mode));
4126 if (!IN_RANGE (index, 2, 4))
4127 return NULL;
4128
4129 struct loongarch_address_info info;
4130 if ((loongarch_classify_address (&info, x, mode, false)
4131 && !(info.type == ADDRESS_REG_REG))
4132 || !loongarch_legitimate_address_p (mode, x, false))
4133 return NULL;
4134
4135 const char *const insn[][3] =
4136 {
4137 {
4138 "fstx.s\t%1,%0",
4139 "fstx.d\t%1,%0",
4140 "vstx\t%w1,%0"
4141 },
4142 {
4143 "fldx.s\t%0,%1",
4144 "fldx.d\t%0,%1",
4145 "vldx\t%w0,%1"
4146 }
4147 };
4148
4149 return insn[ldr][index-2];
4150 }
4151 /* Return true if a 128-bit move from SRC to DEST should be split. */
4152
4153 bool
4154 loongarch_split_128bit_move_p (rtx dest, rtx src)
4155 {
4156 /* LSX-to-LSX moves can be done in a single instruction. */
4157 if (FP_REG_RTX_P (src) && FP_REG_RTX_P (dest))
4158 return false;
4159
4160 /* Check for LSX loads and stores. */
4161 if (FP_REG_RTX_P (dest) && MEM_P (src))
4162 return false;
4163 if (FP_REG_RTX_P (src) && MEM_P (dest))
4164 return false;
4165
4166 /* Check for LSX set to an immediate const vector with valid replicated
4167 element. */
4168 if (FP_REG_RTX_P (dest)
4169 && loongarch_const_vector_same_int_p (src, GET_MODE (src), -512, 511))
4170 return false;
4171
4172 /* Check for LSX load zero immediate. */
4173 if (FP_REG_RTX_P (dest) && src == CONST0_RTX (GET_MODE (src)))
4174 return false;
4175
4176 return true;
4177 }
4178
4179 /* Split a 128-bit move from SRC to DEST. */
4180
4181 void
4182 loongarch_split_128bit_move (rtx dest, rtx src)
4183 {
4184 int byte, index;
4185 rtx low_dest, low_src, d, s;
4186
4187 if (FP_REG_RTX_P (dest))
4188 {
4189 gcc_assert (!MEM_P (src));
4190
4191 rtx new_dest = dest;
4192 if (!TARGET_64BIT)
4193 {
4194 if (GET_MODE (dest) != V4SImode)
4195 new_dest = simplify_gen_subreg (V4SImode, dest, GET_MODE (dest), 0);
4196 }
4197 else
4198 {
4199 if (GET_MODE (dest) != V2DImode)
4200 new_dest = simplify_gen_subreg (V2DImode, dest, GET_MODE (dest), 0);
4201 }
4202
4203 for (byte = 0, index = 0; byte < GET_MODE_SIZE (TImode);
4204 byte += UNITS_PER_WORD, index++)
4205 {
4206 s = loongarch_subword_at_byte (src, byte);
4207 if (!TARGET_64BIT)
4208 emit_insn (gen_lsx_vinsgr2vr_w (new_dest, s, new_dest,
4209 GEN_INT (1 << index)));
4210 else
4211 emit_insn (gen_lsx_vinsgr2vr_d (new_dest, s, new_dest,
4212 GEN_INT (1 << index)));
4213 }
4214 }
4215 else if (FP_REG_RTX_P (src))
4216 {
4217 gcc_assert (!MEM_P (dest));
4218
4219 rtx new_src = src;
4220 if (!TARGET_64BIT)
4221 {
4222 if (GET_MODE (src) != V4SImode)
4223 new_src = simplify_gen_subreg (V4SImode, src, GET_MODE (src), 0);
4224 }
4225 else
4226 {
4227 if (GET_MODE (src) != V2DImode)
4228 new_src = simplify_gen_subreg (V2DImode, src, GET_MODE (src), 0);
4229 }
4230
4231 for (byte = 0, index = 0; byte < GET_MODE_SIZE (TImode);
4232 byte += UNITS_PER_WORD, index++)
4233 {
4234 d = loongarch_subword_at_byte (dest, byte);
4235 if (!TARGET_64BIT)
4236 emit_insn (gen_lsx_vpickve2gr_w (d, new_src, GEN_INT (index)));
4237 else
4238 emit_insn (gen_lsx_vpickve2gr_d (d, new_src, GEN_INT (index)));
4239 }
4240 }
4241 else
4242 {
4243 low_dest = loongarch_subword_at_byte (dest, 0);
4244 low_src = loongarch_subword_at_byte (src, 0);
4245 gcc_assert (REG_P (low_dest) && REG_P (low_src));
4246 /* Make sure the source register is not written before reading. */
4247 if (REGNO (low_dest) <= REGNO (low_src))
4248 {
4249 for (byte = 0; byte < GET_MODE_SIZE (TImode);
4250 byte += UNITS_PER_WORD)
4251 {
4252 d = loongarch_subword_at_byte (dest, byte);
4253 s = loongarch_subword_at_byte (src, byte);
4254 loongarch_emit_move (d, s);
4255 }
4256 }
4257 else
4258 {
4259 for (byte = GET_MODE_SIZE (TImode) - UNITS_PER_WORD; byte >= 0;
4260 byte -= UNITS_PER_WORD)
4261 {
4262 d = loongarch_subword_at_byte (dest, byte);
4263 s = loongarch_subword_at_byte (src, byte);
4264 loongarch_emit_move (d, s);
4265 }
4266 }
4267 }
4268 }
4269
4270
4271 /* Split a COPY_S.D with operands DEST, SRC and INDEX. GEN is a function
4272 used to generate subregs. */
4273
4274 void
4275 loongarch_split_lsx_copy_d (rtx dest, rtx src, rtx index,
4276 rtx (*gen_fn)(rtx, rtx, rtx))
4277 {
4278 gcc_assert ((GET_MODE (src) == V2DImode && GET_MODE (dest) == DImode)
4279 || (GET_MODE (src) == V2DFmode && GET_MODE (dest) == DFmode));
4280
4281 /* Note that low is always from the lower index, and high is always
4282 from the higher index. */
4283 rtx low = loongarch_subword (dest, false);
4284 rtx high = loongarch_subword (dest, true);
4285 rtx new_src = simplify_gen_subreg (V4SImode, src, GET_MODE (src), 0);
4286
4287 emit_insn (gen_fn (low, new_src, GEN_INT (INTVAL (index) * 2)));
4288 emit_insn (gen_fn (high, new_src, GEN_INT (INTVAL (index) * 2 + 1)));
4289 }
4290
4291 /* Split a INSERT.D with operand DEST, SRC1.INDEX and SRC2. */
4292
4293 void
4294 loongarch_split_lsx_insert_d (rtx dest, rtx src1, rtx index, rtx src2)
4295 {
4296 int i;
4297 gcc_assert (GET_MODE (dest) == GET_MODE (src1));
4298 gcc_assert ((GET_MODE (dest) == V2DImode
4299 && (GET_MODE (src2) == DImode || src2 == const0_rtx))
4300 || (GET_MODE (dest) == V2DFmode && GET_MODE (src2) == DFmode));
4301
4302 /* Note that low is always from the lower index, and high is always
4303 from the higher index. */
4304 rtx low = loongarch_subword (src2, false);
4305 rtx high = loongarch_subword (src2, true);
4306 rtx new_dest = simplify_gen_subreg (V4SImode, dest, GET_MODE (dest), 0);
4307 rtx new_src1 = simplify_gen_subreg (V4SImode, src1, GET_MODE (src1), 0);
4308 i = exact_log2 (INTVAL (index));
4309 gcc_assert (i != -1);
4310
4311 emit_insn (gen_lsx_vinsgr2vr_w (new_dest, low, new_src1,
4312 GEN_INT (1 << (i * 2))));
4313 emit_insn (gen_lsx_vinsgr2vr_w (new_dest, high, new_dest,
4314 GEN_INT (1 << (i * 2 + 1))));
4315 }
4316
4317 /* Split FILL.D. */
4318
4319 void
4320 loongarch_split_lsx_fill_d (rtx dest, rtx src)
4321 {
4322 gcc_assert ((GET_MODE (dest) == V2DImode
4323 && (GET_MODE (src) == DImode || src == const0_rtx))
4324 || (GET_MODE (dest) == V2DFmode && GET_MODE (src) == DFmode));
4325
4326 /* Note that low is always from the lower index, and high is always
4327 from the higher index. */
4328 rtx low, high;
4329 if (src == const0_rtx)
4330 {
4331 low = src;
4332 high = src;
4333 }
4334 else
4335 {
4336 low = loongarch_subword (src, false);
4337 high = loongarch_subword (src, true);
4338 }
4339 rtx new_dest = simplify_gen_subreg (V4SImode, dest, GET_MODE (dest), 0);
4340 emit_insn (gen_lsx_vreplgr2vr_w (new_dest, low));
4341 emit_insn (gen_lsx_vinsgr2vr_w (new_dest, high, new_dest, GEN_INT (1 << 1)));
4342 emit_insn (gen_lsx_vinsgr2vr_w (new_dest, high, new_dest, GEN_INT (1 << 3)));
4343 }
4344
4345
4346 /* Return the appropriate instructions to move SRC into DEST. Assume
4347 that SRC is operand 1 and DEST is operand 0. */
4348
4349 const char *
4350 loongarch_output_move (rtx dest, rtx src)
4351 {
4352 enum rtx_code dest_code = GET_CODE (dest);
4353 enum rtx_code src_code = GET_CODE (src);
4354 machine_mode mode = GET_MODE (dest);
4355 bool dbl_p = (GET_MODE_SIZE (mode) == 8);
4356 bool lsx_p = LSX_SUPPORTED_MODE_P (mode);
4357
4358 if (loongarch_split_move_p (dest, src))
4359 return "#";
4360
4361 if ((lsx_p)
4362 && dest_code == REG && FP_REG_P (REGNO (dest))
4363 && src_code == CONST_VECTOR
4364 && CONST_INT_P (CONST_VECTOR_ELT (src, 0)))
4365 {
4366 gcc_assert (loongarch_const_vector_same_int_p (src, mode, -512, 511));
4367 switch (GET_MODE_SIZE (mode))
4368 {
4369 case 16:
4370 return "vrepli.%v0\t%w0,%E1";
4371 default: gcc_unreachable ();
4372 }
4373 }
4374
4375 if ((src_code == REG && GP_REG_P (REGNO (src)))
4376 || (src == CONST0_RTX (mode)))
4377 {
4378 if (dest_code == REG)
4379 {
4380 if (GP_REG_P (REGNO (dest)))
4381 return "or\t%0,%z1,$r0";
4382
4383 if (FP_REG_P (REGNO (dest)))
4384 {
4385 if (lsx_p)
4386 {
4387 gcc_assert (src == CONST0_RTX (GET_MODE (src)));
4388 switch (GET_MODE_SIZE (mode))
4389 {
4390 case 16:
4391 return "vrepli.b\t%w0,0";
4392 default:
4393 gcc_unreachable ();
4394 }
4395 }
4396
4397 return dbl_p ? "movgr2fr.d\t%0,%z1" : "movgr2fr.w\t%0,%z1";
4398 }
4399 }
4400 if (dest_code == MEM)
4401 {
4402 const char *insn = NULL;
4403 insn = loongarch_output_move_index (XEXP (dest, 0), GET_MODE (dest),
4404 false);
4405 if (insn)
4406 return insn;
4407
4408 rtx offset = XEXP (dest, 0);
4409 if (GET_CODE (offset) == PLUS)
4410 offset = XEXP (offset, 1);
4411 switch (GET_MODE_SIZE (mode))
4412 {
4413 case 1:
4414 return "st.b\t%z1,%0";
4415 case 2:
4416 return "st.h\t%z1,%0";
4417 case 4:
4418 /* Matching address type with a 12bit offset and
4419 ADDRESS_LO_SUM. */
4420 if (const_arith_operand (offset, Pmode)
4421 || GET_CODE (offset) == LO_SUM)
4422 return "st.w\t%z1,%0";
4423 else
4424 return "stptr.w\t%z1,%0";
4425 case 8:
4426 if (const_arith_operand (offset, Pmode)
4427 || GET_CODE (offset) == LO_SUM)
4428 return "st.d\t%z1,%0";
4429 else
4430 return "stptr.d\t%z1,%0";
4431 default:
4432 gcc_unreachable ();
4433 }
4434 }
4435 }
4436 if (dest_code == REG && GP_REG_P (REGNO (dest)))
4437 {
4438 if (src_code == REG)
4439 if (FP_REG_P (REGNO (src)))
4440 {
4441 gcc_assert (!lsx_p);
4442 return dbl_p ? "movfr2gr.d\t%0,%1" : "movfr2gr.s\t%0,%1";
4443 }
4444
4445 if (src_code == MEM)
4446 {
4447 const char *insn = NULL;
4448 insn = loongarch_output_move_index (XEXP (src, 0), GET_MODE (src),
4449 true);
4450 if (insn)
4451 return insn;
4452
4453 rtx offset = XEXP (src, 0);
4454 if (GET_CODE (offset) == PLUS)
4455 offset = XEXP (offset, 1);
4456 switch (GET_MODE_SIZE (mode))
4457 {
4458 case 1:
4459 return "ld.bu\t%0,%1";
4460 case 2:
4461 return "ld.hu\t%0,%1";
4462 case 4:
4463 /* Matching address type with a 12bit offset and
4464 ADDRESS_LO_SUM. */
4465 if (const_arith_operand (offset, Pmode)
4466 || GET_CODE (offset) == LO_SUM)
4467 return "ld.w\t%0,%1";
4468 else
4469 return "ldptr.w\t%0,%1";
4470 case 8:
4471 if (const_arith_operand (offset, Pmode)
4472 || GET_CODE (offset) == LO_SUM)
4473 return "ld.d\t%0,%1";
4474 else
4475 return "ldptr.d\t%0,%1";
4476 default:
4477 gcc_unreachable ();
4478 }
4479 }
4480
4481 if (src_code == HIGH)
4482 {
4483 rtx offset, x;
4484 split_const (XEXP (src, 0), &x, &offset);
4485 enum loongarch_symbol_type type = SYMBOL_PCREL;
4486
4487 if (UNSPEC_ADDRESS_P (x))
4488 type = UNSPEC_ADDRESS_TYPE (x);
4489
4490 if (type == SYMBOL_TLS_LE)
4491 return "lu12i.w\t%0,%h1";
4492 else
4493 return "pcalau12i\t%0,%h1";
4494 }
4495
4496 if (src_code == CONST_INT)
4497 {
4498 if (LU12I_INT (src))
4499 return "lu12i.w\t%0,%1>>12\t\t\t# %X1";
4500 else if (IMM12_INT (src))
4501 return "addi.w\t%0,$r0,%1\t\t\t# %X1";
4502 else if (IMM12_INT_UNSIGNED (src))
4503 return "ori\t%0,$r0,%1\t\t\t# %X1";
4504 else if (LU52I_INT (src))
4505 return "lu52i.d\t%0,$r0,%X1>>52\t\t\t# %1";
4506 else
4507 gcc_unreachable ();
4508 }
4509 }
4510
4511 if (!TARGET_EXPLICIT_RELOCS
4512 && dest_code == REG && symbolic_operand (src, VOIDmode))
4513 {
4514 if (loongarch_classify_symbol (src) == SYMBOL_PCREL)
4515 return "la.local\t%0,%1";
4516 else
4517 return "la.global\t%0,%1";
4518 }
4519
4520 if (src_code == REG && FP_REG_P (REGNO (src)))
4521 {
4522 if (dest_code == REG && FP_REG_P (REGNO (dest)))
4523 {
4524 if (lsx_p)
4525 {
4526 switch (GET_MODE_SIZE (mode))
4527 {
4528 case 16:
4529 return "vori.b\t%w0,%w1,0";
4530 default:
4531 gcc_unreachable ();
4532 }
4533 }
4534
4535 return dbl_p ? "fmov.d\t%0,%1" : "fmov.s\t%0,%1";
4536 }
4537
4538 if (dest_code == MEM)
4539 {
4540 const char *insn = NULL;
4541 insn = loongarch_output_move_index_float (XEXP (dest, 0),
4542 GET_MODE (dest),
4543 false);
4544 if (insn)
4545 return insn;
4546
4547 if (lsx_p)
4548 {
4549 switch (GET_MODE_SIZE (mode))
4550 {
4551 case 16:
4552 return "vst\t%w1,%0";
4553 default:
4554 gcc_unreachable ();
4555 }
4556 }
4557
4558 return dbl_p ? "fst.d\t%1,%0" : "fst.s\t%1,%0";
4559 }
4560 }
4561
4562 if (dest_code == REG && FP_REG_P (REGNO (dest)))
4563 {
4564 if (src_code == MEM)
4565 {
4566 const char *insn = NULL;
4567 insn = loongarch_output_move_index_float (XEXP (src, 0),
4568 GET_MODE (src),
4569 true);
4570 if (insn)
4571 return insn;
4572
4573 if (lsx_p)
4574 {
4575 switch (GET_MODE_SIZE (mode))
4576 {
4577 case 16:
4578 return "vld\t%w0,%1";
4579 default:
4580 gcc_unreachable ();
4581 }
4582 }
4583 return dbl_p ? "fld.d\t%0,%1" : "fld.s\t%0,%1";
4584 }
4585 }
4586
4587 gcc_unreachable ();
4588 }
4589
4590 /* Return true if CMP1 is a suitable second operand for integer ordering
4591 test CODE. */
4592
4593 static bool
4594 loongarch_int_order_operand_ok_p (enum rtx_code code, rtx cmp1)
4595 {
4596 switch (code)
4597 {
4598 case GT:
4599 case GTU:
4600 return reg_or_0_operand (cmp1, VOIDmode);
4601
4602 case GE:
4603 case GEU:
4604 return cmp1 == const1_rtx;
4605
4606 case LT:
4607 case LTU:
4608 return arith_operand (cmp1, VOIDmode);
4609
4610 case LE:
4611 return sle_operand (cmp1, VOIDmode);
4612
4613 case LEU:
4614 return sleu_operand (cmp1, VOIDmode);
4615
4616 default:
4617 gcc_unreachable ();
4618 }
4619 }
4620
4621 /* Return true if *CMP1 (of mode MODE) is a valid second operand for
4622 integer ordering test *CODE, or if an equivalent combination can
4623 be formed by adjusting *CODE and *CMP1. When returning true, update
4624 *CODE and *CMP1 with the chosen code and operand, otherwise leave
4625 them alone. */
4626
4627 static bool
4628 loongarch_canonicalize_int_order_test (enum rtx_code *code, rtx *cmp1,
4629 machine_mode mode)
4630 {
4631 HOST_WIDE_INT plus_one;
4632
4633 if (loongarch_int_order_operand_ok_p (*code, *cmp1))
4634 return true;
4635
4636 if (CONST_INT_P (*cmp1))
4637 switch (*code)
4638 {
4639 case LE:
4640 plus_one = trunc_int_for_mode (UINTVAL (*cmp1) + 1, mode);
4641 if (INTVAL (*cmp1) < plus_one)
4642 {
4643 *code = LT;
4644 *cmp1 = force_reg (mode, GEN_INT (plus_one));
4645 return true;
4646 }
4647 break;
4648
4649 case LEU:
4650 plus_one = trunc_int_for_mode (UINTVAL (*cmp1) + 1, mode);
4651 if (plus_one != 0)
4652 {
4653 *code = LTU;
4654 *cmp1 = force_reg (mode, GEN_INT (plus_one));
4655 return true;
4656 }
4657 break;
4658
4659 default:
4660 break;
4661 }
4662 return false;
4663 }
4664
4665 /* Compare CMP0 and CMP1 using ordering test CODE and store the result
4666 in TARGET. CMP0 and TARGET are register_operands. If INVERT_PTR
4667 is nonnull, it's OK to set TARGET to the inverse of the result and
4668 flip *INVERT_PTR instead. */
4669
4670 static void
4671 loongarch_emit_int_order_test (enum rtx_code code, bool *invert_ptr,
4672 rtx target, rtx cmp0, rtx cmp1)
4673 {
4674 machine_mode mode;
4675
4676 /* First see if there is a LoongArch instruction that can do this operation.
4677 If not, try doing the same for the inverse operation. If that also
4678 fails, force CMP1 into a register and try again. */
4679 mode = GET_MODE (cmp0);
4680 if (loongarch_canonicalize_int_order_test (&code, &cmp1, mode))
4681 loongarch_emit_binary (code, target, cmp0, cmp1);
4682 else
4683 {
4684 enum rtx_code inv_code = reverse_condition (code);
4685 if (!loongarch_canonicalize_int_order_test (&inv_code, &cmp1, mode))
4686 {
4687 cmp1 = force_reg (mode, cmp1);
4688 loongarch_emit_int_order_test (code, invert_ptr, target, cmp0, cmp1);
4689 }
4690 else if (invert_ptr == 0)
4691 {
4692 rtx inv_target;
4693
4694 inv_target = loongarch_force_binary (GET_MODE (target),
4695 inv_code, cmp0, cmp1);
4696 loongarch_emit_binary (XOR, target, inv_target, const1_rtx);
4697 }
4698 else
4699 {
4700 *invert_ptr = !*invert_ptr;
4701 loongarch_emit_binary (inv_code, target, cmp0, cmp1);
4702 }
4703 }
4704 }
4705
4706 /* Return a register that is zero if CMP0 and CMP1 are equal.
4707 The register will have the same mode as CMP0. */
4708
4709 static rtx
4710 loongarch_zero_if_equal (rtx cmp0, rtx cmp1)
4711 {
4712 if (cmp1 == const0_rtx)
4713 return cmp0;
4714
4715 if (uns_arith_operand (cmp1, VOIDmode))
4716 return expand_binop (GET_MODE (cmp0), xor_optab, cmp0, cmp1, 0, 0,
4717 OPTAB_DIRECT);
4718
4719 return expand_binop (GET_MODE (cmp0), sub_optab, cmp0, cmp1, 0, 0,
4720 OPTAB_DIRECT);
4721 }
4722
4723 /* Allocate a floating-point condition-code register of mode MODE. */
4724
4725 static rtx
4726 loongarch_allocate_fcc (machine_mode mode)
4727 {
4728 unsigned int regno, count;
4729
4730 gcc_assert (TARGET_HARD_FLOAT);
4731
4732 if (mode == FCCmode)
4733 count = 1;
4734 else
4735 gcc_unreachable ();
4736
4737 cfun->machine->next_fcc += -cfun->machine->next_fcc & (count - 1);
4738 if (cfun->machine->next_fcc > FCC_REG_LAST - FCC_REG_FIRST)
4739 cfun->machine->next_fcc = 0;
4740
4741 regno = FCC_REG_FIRST + cfun->machine->next_fcc;
4742 cfun->machine->next_fcc += count;
4743 return gen_rtx_REG (mode, regno);
4744 }
4745
4746 /* Sign- or zero-extend OP0 and OP1 for integer comparisons. */
4747
4748 static void
4749 loongarch_extend_comparands (rtx_code code, rtx *op0, rtx *op1)
4750 {
4751 /* Comparisons consider all GRLEN bits, so extend sub-GRLEN values. */
4752 if (GET_MODE_SIZE (word_mode) > GET_MODE_SIZE (GET_MODE (*op0)))
4753 {
4754 /* It is more profitable to zero-extend QImode values. But not if the
4755 first operand has already been sign-extended, and the second one is
4756 is a constant or has already been sign-extended also. */
4757 if (unsigned_condition (code) == code
4758 && (GET_MODE (*op0) == QImode
4759 && ! (GET_CODE (*op0) == SUBREG
4760 && SUBREG_PROMOTED_VAR_P (*op0)
4761 && SUBREG_PROMOTED_SIGNED_P (*op0)
4762 && (CONST_INT_P (*op1)
4763 || (GET_CODE (*op1) == SUBREG
4764 && SUBREG_PROMOTED_VAR_P (*op1)
4765 && SUBREG_PROMOTED_SIGNED_P (*op1))))))
4766 {
4767 *op0 = gen_rtx_ZERO_EXTEND (word_mode, *op0);
4768 if (CONST_INT_P (*op1))
4769 *op1 = GEN_INT ((uint8_t) INTVAL (*op1));
4770 else
4771 *op1 = gen_rtx_ZERO_EXTEND (word_mode, *op1);
4772 }
4773 else
4774 {
4775 *op0 = gen_rtx_SIGN_EXTEND (word_mode, *op0);
4776 if (*op1 != const0_rtx)
4777 *op1 = gen_rtx_SIGN_EXTEND (word_mode, *op1);
4778 }
4779 }
4780 }
4781
4782
4783 /* Convert a comparison into something that can be used in a branch. On
4784 entry, *OP0 and *OP1 are the values being compared and *CODE is the code
4785 used to compare them. Update them to describe the final comparison. */
4786
4787 static void
4788 loongarch_emit_int_compare (enum rtx_code *code, rtx *op0, rtx *op1)
4789 {
4790 static const enum rtx_code
4791 mag_comparisons[][2] = {{LEU, LTU}, {GTU, GEU}, {LE, LT}, {GT, GE}};
4792
4793 if (splittable_const_int_operand (*op1, VOIDmode))
4794 {
4795 HOST_WIDE_INT rhs = INTVAL (*op1);
4796
4797 if (*code == EQ || *code == NE)
4798 {
4799 /* Convert e.g. OP0 == 2048 into OP0 - 2048 == 0. */
4800 if (IMM12_OPERAND (-rhs))
4801 {
4802 *op0 = loongarch_force_binary (GET_MODE (*op0), PLUS, *op0,
4803 GEN_INT (-rhs));
4804 *op1 = const0_rtx;
4805 }
4806 }
4807 else
4808 {
4809 /* Convert e.g. (OP0 <= 0xFFF) into (OP0 < 0x1000). */
4810 for (size_t i = 0; i < ARRAY_SIZE (mag_comparisons); i++)
4811 {
4812 HOST_WIDE_INT new_rhs;
4813 bool increment = *code == mag_comparisons[i][0];
4814 bool decrement = *code == mag_comparisons[i][1];
4815 if (!increment && !decrement)
4816 continue;
4817
4818 if ((increment && rhs == HOST_WIDE_INT_MAX)
4819 || (decrement && rhs == HOST_WIDE_INT_MIN))
4820 break;
4821
4822 new_rhs = rhs + (increment ? 1 : -1);
4823 if (loongarch_integer_cost (new_rhs)
4824 < loongarch_integer_cost (rhs))
4825 {
4826 *op1 = GEN_INT (new_rhs);
4827 *code = mag_comparisons[i][increment];
4828 }
4829 break;
4830 }
4831 }
4832 }
4833
4834 loongarch_extend_comparands (*code, op0, op1);
4835
4836 *op0 = force_reg (word_mode, *op0);
4837 if (*op1 != const0_rtx)
4838 *op1 = force_reg (word_mode, *op1);
4839 }
4840
4841 /* Like loongarch_emit_int_compare, but for floating-point comparisons. */
4842
4843 static void
4844 loongarch_emit_float_compare (enum rtx_code *code, rtx *op0, rtx *op1)
4845 {
4846 rtx cmp_op0 = *op0;
4847 rtx cmp_op1 = *op1;
4848
4849 /* Floating-point tests use a separate FCMP.cond.fmt
4850 comparison to set a register. The branch or conditional move will
4851 then compare that register against zero.
4852
4853 Set CMP_CODE to the code of the comparison instruction and
4854 *CODE to the code that the branch or move should use. */
4855 enum rtx_code cmp_code = *code;
4856 /* Three FP conditions cannot be implemented by reversing the
4857 operands for FCMP.cond.fmt, instead a reversed condition code is
4858 required and a test for false. */
4859 *code = NE;
4860 *op0 = loongarch_allocate_fcc (FCCmode);
4861
4862 *op1 = const0_rtx;
4863 loongarch_emit_binary (cmp_code, *op0, cmp_op0, cmp_op1);
4864 }
4865
4866 /* Try performing the comparison in OPERANDS[1], whose arms are OPERANDS[2]
4867 and OPERAND[3]. Store the result in OPERANDS[0].
4868
4869 On 64-bit targets, the mode of the comparison and target will always be
4870 SImode, thus possibly narrower than that of the comparison's operands. */
4871
4872 void
4873 loongarch_expand_scc (rtx operands[])
4874 {
4875 rtx target = operands[0];
4876 enum rtx_code code = GET_CODE (operands[1]);
4877 rtx op0 = operands[2];
4878 rtx op1 = operands[3];
4879
4880 loongarch_extend_comparands (code, &op0, &op1);
4881 op0 = force_reg (word_mode, op0);
4882
4883 gcc_assert (GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT);
4884
4885 if (code == EQ || code == NE)
4886 {
4887 rtx zie = loongarch_zero_if_equal (op0, op1);
4888 loongarch_emit_binary (code, target, zie, const0_rtx);
4889 }
4890 else
4891 loongarch_emit_int_order_test (code, 0, target, op0, op1);
4892 }
4893
4894 /* Compare OPERANDS[1] with OPERANDS[2] using comparison code
4895 CODE and jump to OPERANDS[3] if the condition holds. */
4896
4897 void
4898 loongarch_expand_conditional_branch (rtx *operands)
4899 {
4900 enum rtx_code code = GET_CODE (operands[0]);
4901 rtx op0 = operands[1];
4902 rtx op1 = operands[2];
4903 rtx condition;
4904
4905 if (FLOAT_MODE_P (GET_MODE (op1)))
4906 loongarch_emit_float_compare (&code, &op0, &op1);
4907 else
4908 loongarch_emit_int_compare (&code, &op0, &op1);
4909
4910 condition = gen_rtx_fmt_ee (code, VOIDmode, op0, op1);
4911 emit_jump_insn (gen_condjump (condition, operands[3]));
4912 }
4913
4914 /* Perform the comparison in OPERANDS[1]. Move OPERANDS[2] into OPERANDS[0]
4915 if the condition holds, otherwise move OPERANDS[3] into OPERANDS[0]. */
4916
4917 void
4918 loongarch_expand_conditional_move (rtx *operands)
4919 {
4920 enum rtx_code code = GET_CODE (operands[1]);
4921 rtx op0 = XEXP (operands[1], 0);
4922 rtx op1 = XEXP (operands[1], 1);
4923 rtx op0_extend = op0;
4924 rtx op1_extend = op1;
4925
4926 /* Record whether operands[2] and operands[3] modes are promoted to word_mode. */
4927 bool promote_p = false;
4928 machine_mode mode = GET_MODE (operands[0]);
4929
4930 if (FLOAT_MODE_P (GET_MODE (op1)))
4931 loongarch_emit_float_compare (&code, &op0, &op1);
4932 else
4933 {
4934 if ((REGNO (op0) == REGNO (operands[2])
4935 || (REGNO (op1) == REGNO (operands[3]) && (op1 != const0_rtx)))
4936 && (GET_MODE_SIZE (GET_MODE (op0)) < word_mode))
4937 {
4938 mode = word_mode;
4939 promote_p = true;
4940 }
4941
4942 loongarch_extend_comparands (code, &op0, &op1);
4943
4944 op0 = force_reg (word_mode, op0);
4945 op0_extend = op0;
4946 op1_extend = force_reg (word_mode, op1);
4947
4948 if (code == EQ || code == NE)
4949 {
4950 op0 = loongarch_zero_if_equal (op0, op1);
4951 op1 = const0_rtx;
4952 }
4953 else
4954 {
4955 /* The comparison needs a separate scc instruction. Store the
4956 result of the scc in *OP0 and compare it against zero. */
4957 bool invert = false;
4958 rtx target = gen_reg_rtx (GET_MODE (op0));
4959 loongarch_emit_int_order_test (code, &invert, target, op0, op1);
4960 code = invert ? EQ : NE;
4961 op0 = target;
4962 op1 = const0_rtx;
4963 }
4964 }
4965
4966 rtx cond = gen_rtx_fmt_ee (code, GET_MODE (op0), op0, op1);
4967 /* There is no direct support for general conditional GP move involving
4968 two registers using SEL. */
4969 if (INTEGRAL_MODE_P (GET_MODE (operands[2]))
4970 && register_operand (operands[2], VOIDmode)
4971 && register_operand (operands[3], VOIDmode))
4972 {
4973 rtx op2 = operands[2];
4974 rtx op3 = operands[3];
4975
4976 if (promote_p)
4977 {
4978 if (REGNO (XEXP (operands[1], 0)) == REGNO (operands[2]))
4979 op2 = op0_extend;
4980 else
4981 {
4982 loongarch_extend_comparands (code, &op2, &const0_rtx);
4983 op2 = force_reg (mode, op2);
4984 }
4985
4986 if (REGNO (XEXP (operands[1], 1)) == REGNO (operands[3]))
4987 op3 = op1_extend;
4988 else
4989 {
4990 loongarch_extend_comparands (code, &op3, &const0_rtx);
4991 op3 = force_reg (mode, op3);
4992 }
4993 }
4994
4995 rtx temp = gen_reg_rtx (mode);
4996 rtx temp2 = gen_reg_rtx (mode);
4997
4998 emit_insn (gen_rtx_SET (temp,
4999 gen_rtx_IF_THEN_ELSE (mode, cond,
5000 op2, const0_rtx)));
5001
5002 /* Flip the test for the second operand. */
5003 cond = gen_rtx_fmt_ee ((code == EQ) ? NE : EQ, GET_MODE (op0), op0, op1);
5004
5005 emit_insn (gen_rtx_SET (temp2,
5006 gen_rtx_IF_THEN_ELSE (mode, cond,
5007 op3, const0_rtx)));
5008
5009 /* Merge the two results, at least one is guaranteed to be zero. */
5010 if (promote_p)
5011 {
5012 rtx temp3 = gen_reg_rtx (mode);
5013 emit_insn (gen_rtx_SET (temp3, gen_rtx_IOR (mode, temp, temp2)));
5014 temp3 = gen_lowpart (GET_MODE (operands[0]), temp3);
5015 loongarch_emit_move (operands[0], temp3);
5016 }
5017 else
5018 emit_insn (gen_rtx_SET (operands[0], gen_rtx_IOR (mode, temp, temp2)));
5019 }
5020 else
5021 emit_insn (gen_rtx_SET (operands[0],
5022 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]), cond,
5023 operands[2], operands[3])));
5024 }
5025
5026 /* Implement TARGET_EXPAND_BUILTIN_VA_START. */
5027
5028 static void
5029 loongarch_va_start (tree valist, rtx nextarg)
5030 {
5031 nextarg = plus_constant (Pmode, nextarg, -cfun->machine->varargs_size);
5032 std_expand_builtin_va_start (valist, nextarg);
5033 }
5034
5035 /* Implement TARGET_FUNCTION_OK_FOR_SIBCALL. */
5036
5037 static bool
5038 loongarch_function_ok_for_sibcall (tree decl ATTRIBUTE_UNUSED,
5039 tree exp ATTRIBUTE_UNUSED)
5040 {
5041 /* Always OK. */
5042 return true;
5043 }
5044
5045 /* Emit straight-line code to move LENGTH bytes from SRC to DEST.
5046 Assume that the areas do not overlap. */
5047
5048 static void
5049 loongarch_block_move_straight (rtx dest, rtx src, HOST_WIDE_INT length,
5050 HOST_WIDE_INT delta)
5051 {
5052 HOST_WIDE_INT offs, delta_cur;
5053 int i;
5054 machine_mode mode;
5055 rtx *regs;
5056
5057 /* Calculate how many registers we'll need for the block move.
5058 We'll emit length / delta move operations with delta as the size
5059 first. Then we may still have length % delta bytes not copied.
5060 We handle these remaining bytes by move operations with smaller
5061 (halfed) sizes. For example, if length = 21 and delta = 8, we'll
5062 emit two ld.d/st.d pairs, one ld.w/st.w pair, and one ld.b/st.b
5063 pair. For each load/store pair we use a dedicated register to keep
5064 the pipeline as populated as possible. */
5065 HOST_WIDE_INT num_reg = length / delta;
5066 for (delta_cur = delta / 2; delta_cur != 0; delta_cur /= 2)
5067 num_reg += !!(length & delta_cur);
5068
5069 /* Allocate a buffer for the temporary registers. */
5070 regs = XALLOCAVEC (rtx, num_reg);
5071
5072 for (delta_cur = delta, i = 0, offs = 0; offs < length; delta_cur /= 2)
5073 {
5074 mode = int_mode_for_size (delta_cur * BITS_PER_UNIT, 0).require ();
5075
5076 for (; offs + delta_cur <= length; offs += delta_cur, i++)
5077 {
5078 regs[i] = gen_reg_rtx (mode);
5079 loongarch_emit_move (regs[i], adjust_address (src, mode, offs));
5080 }
5081 }
5082
5083 for (delta_cur = delta, i = 0, offs = 0; offs < length; delta_cur /= 2)
5084 {
5085 mode = int_mode_for_size (delta_cur * BITS_PER_UNIT, 0).require ();
5086
5087 for (; offs + delta_cur <= length; offs += delta_cur, i++)
5088 loongarch_emit_move (adjust_address (dest, mode, offs), regs[i]);
5089 }
5090 }
5091
5092 /* Helper function for doing a loop-based block operation on memory
5093 reference MEM. Each iteration of the loop will operate on LENGTH
5094 bytes of MEM.
5095
5096 Create a new base register for use within the loop and point it to
5097 the start of MEM. Create a new memory reference that uses this
5098 register. Store them in *LOOP_REG and *LOOP_MEM respectively. */
5099
5100 static void
5101 loongarch_adjust_block_mem (rtx mem, HOST_WIDE_INT length, rtx *loop_reg,
5102 rtx *loop_mem)
5103 {
5104 *loop_reg = copy_addr_to_reg (XEXP (mem, 0));
5105
5106 /* Although the new mem does not refer to a known location,
5107 it does keep up to LENGTH bytes of alignment. */
5108 *loop_mem = change_address (mem, BLKmode, *loop_reg);
5109 set_mem_align (*loop_mem, MIN (MEM_ALIGN (mem), length * BITS_PER_UNIT));
5110 }
5111
5112 /* Move LENGTH bytes from SRC to DEST using a loop that moves BYTES_PER_ITER
5113 bytes at a time. LENGTH must be at least BYTES_PER_ITER. Assume that
5114 the memory regions do not overlap. */
5115
5116 static void
5117 loongarch_block_move_loop (rtx dest, rtx src, HOST_WIDE_INT length,
5118 HOST_WIDE_INT align)
5119 {
5120 rtx_code_label *label;
5121 rtx src_reg, dest_reg, final_src, test;
5122 HOST_WIDE_INT bytes_per_iter = align * LARCH_MAX_MOVE_OPS_PER_LOOP_ITER;
5123 HOST_WIDE_INT leftover;
5124
5125 leftover = length % bytes_per_iter;
5126 length -= leftover;
5127
5128 /* Create registers and memory references for use within the loop. */
5129 loongarch_adjust_block_mem (src, bytes_per_iter, &src_reg, &src);
5130 loongarch_adjust_block_mem (dest, bytes_per_iter, &dest_reg, &dest);
5131
5132 /* Calculate the value that SRC_REG should have after the last iteration
5133 of the loop. */
5134 final_src = expand_simple_binop (Pmode, PLUS, src_reg, GEN_INT (length), 0,
5135 0, OPTAB_WIDEN);
5136
5137 /* Emit the start of the loop. */
5138 label = gen_label_rtx ();
5139 emit_label (label);
5140
5141 /* Emit the loop body. */
5142 loongarch_block_move_straight (dest, src, bytes_per_iter, align);
5143
5144 /* Move on to the next block. */
5145 loongarch_emit_move (src_reg,
5146 plus_constant (Pmode, src_reg, bytes_per_iter));
5147 loongarch_emit_move (dest_reg,
5148 plus_constant (Pmode, dest_reg, bytes_per_iter));
5149
5150 /* Emit the loop condition. */
5151 test = gen_rtx_NE (VOIDmode, src_reg, final_src);
5152 if (Pmode == DImode)
5153 emit_jump_insn (gen_cbranchdi4 (test, src_reg, final_src, label));
5154 else
5155 emit_jump_insn (gen_cbranchsi4 (test, src_reg, final_src, label));
5156
5157 /* Mop up any left-over bytes. */
5158 if (leftover)
5159 loongarch_block_move_straight (dest, src, leftover, align);
5160 else
5161 /* Temporary fix for PR79150. */
5162 emit_insn (gen_nop ());
5163 }
5164
5165 /* Expand a cpymemsi instruction, which copies LENGTH bytes from
5166 memory reference SRC to memory reference DEST. */
5167
5168 bool
5169 loongarch_expand_block_move (rtx dest, rtx src, rtx r_length, rtx r_align)
5170 {
5171 if (!CONST_INT_P (r_length))
5172 return false;
5173
5174 HOST_WIDE_INT length = INTVAL (r_length);
5175 if (length > loongarch_max_inline_memcpy_size)
5176 return false;
5177
5178 HOST_WIDE_INT align = INTVAL (r_align);
5179
5180 if (!TARGET_STRICT_ALIGN || align > UNITS_PER_WORD)
5181 align = UNITS_PER_WORD;
5182
5183 if (length <= align * LARCH_MAX_MOVE_OPS_STRAIGHT)
5184 {
5185 loongarch_block_move_straight (dest, src, length, align);
5186 return true;
5187 }
5188
5189 if (optimize)
5190 {
5191 loongarch_block_move_loop (dest, src, length, align);
5192 return true;
5193 }
5194
5195 return false;
5196 }
5197
5198 /* Return true if loongarch_expand_block_move is the preferred
5199 implementation of the 'cpymemsi' template. */
5200
5201 bool
5202 loongarch_do_optimize_block_move_p (void)
5203 {
5204 /* if -m[no-]memcpy is given explicitly. */
5205 if (target_flags_explicit & MASK_MEMCPY)
5206 return !TARGET_MEMCPY;
5207
5208 /* if not, don't optimize under -Os. */
5209 return !optimize_size;
5210 }
5211
5212 /* Expand a QI or HI mode atomic memory operation.
5213
5214 GENERATOR contains a pointer to the gen_* function that generates
5215 the SI mode underlying atomic operation using masks that we
5216 calculate.
5217
5218 RESULT is the return register for the operation. Its value is NULL
5219 if unused.
5220
5221 MEM is the location of the atomic access.
5222
5223 OLDVAL is the first operand for the operation.
5224
5225 NEWVAL is the optional second operand for the operation. Its value
5226 is NULL if unused. */
5227
5228 void
5229 loongarch_expand_atomic_qihi (union loongarch_gen_fn_ptrs generator,
5230 rtx result, rtx mem, rtx oldval, rtx newval,
5231 rtx model)
5232 {
5233 rtx orig_addr, memsi_addr, memsi, shift, shiftsi, unshifted_mask;
5234 rtx unshifted_mask_reg, mask, inverted_mask, si_op;
5235 rtx res = NULL;
5236 machine_mode mode;
5237
5238 mode = GET_MODE (mem);
5239
5240 /* Compute the address of the containing SImode value. */
5241 orig_addr = force_reg (Pmode, XEXP (mem, 0));
5242 memsi_addr = loongarch_force_binary (Pmode, AND, orig_addr,
5243 force_reg (Pmode, GEN_INT (-4)));
5244
5245 /* Create a memory reference for it. */
5246 memsi = gen_rtx_MEM (SImode, memsi_addr);
5247 set_mem_alias_set (memsi, ALIAS_SET_MEMORY_BARRIER);
5248 MEM_VOLATILE_P (memsi) = MEM_VOLATILE_P (mem);
5249
5250 /* Work out the byte offset of the QImode or HImode value,
5251 counting from the least significant byte. */
5252 shift = loongarch_force_binary (Pmode, AND, orig_addr, GEN_INT (3));
5253 /* Multiply by eight to convert the shift value from bytes to bits. */
5254 loongarch_emit_binary (ASHIFT, shift, shift, GEN_INT (3));
5255
5256 /* Make the final shift an SImode value, so that it can be used in
5257 SImode operations. */
5258 shiftsi = force_reg (SImode, gen_lowpart (SImode, shift));
5259
5260 /* Set MASK to an inclusive mask of the QImode or HImode value. */
5261 unshifted_mask = GEN_INT (GET_MODE_MASK (mode));
5262 unshifted_mask_reg = force_reg (SImode, unshifted_mask);
5263 mask = loongarch_force_binary (SImode, ASHIFT, unshifted_mask_reg, shiftsi);
5264
5265 /* Compute the equivalent exclusive mask. */
5266 inverted_mask = gen_reg_rtx (SImode);
5267 emit_insn (gen_rtx_SET (inverted_mask, gen_rtx_NOT (SImode, mask)));
5268
5269 /* Shift the old value into place. */
5270 if (oldval != const0_rtx)
5271 {
5272 oldval = convert_modes (SImode, mode, oldval, true);
5273 oldval = force_reg (SImode, oldval);
5274 oldval = loongarch_force_binary (SImode, ASHIFT, oldval, shiftsi);
5275 }
5276
5277 /* Do the same for the new value. */
5278 if (newval && newval != const0_rtx)
5279 {
5280 newval = convert_modes (SImode, mode, newval, true);
5281 newval = force_reg (SImode, newval);
5282 newval = loongarch_force_binary (SImode, ASHIFT, newval, shiftsi);
5283 }
5284
5285 /* Do the SImode atomic access. */
5286 if (result)
5287 res = gen_reg_rtx (SImode);
5288
5289 if (newval)
5290 si_op = generator.fn_7 (res, memsi, mask, inverted_mask, oldval, newval,
5291 model);
5292 else if (result)
5293 si_op = generator.fn_6 (res, memsi, mask, inverted_mask, oldval, model);
5294 else
5295 si_op = generator.fn_5 (memsi, mask, inverted_mask, oldval, model);
5296
5297 emit_insn (si_op);
5298
5299 if (result)
5300 {
5301 /* Shift and convert the result. */
5302 loongarch_emit_binary (AND, res, res, mask);
5303 loongarch_emit_binary (LSHIFTRT, res, res, shiftsi);
5304 loongarch_emit_move (result, gen_lowpart (GET_MODE (result), res));
5305 }
5306 }
5307
5308 /* Return true if (zero_extract OP WIDTH BITPOS) can be used as the
5309 source of an "ext" instruction or the destination of an "ins"
5310 instruction. OP must be a register operand and the following
5311 conditions must hold:
5312
5313 0 <= BITPOS < GET_MODE_BITSIZE (GET_MODE (op))
5314 0 < WIDTH <= GET_MODE_BITSIZE (GET_MODE (op))
5315 0 < BITPOS + WIDTH <= GET_MODE_BITSIZE (GET_MODE (op))
5316
5317 Also reject lengths equal to a word as they are better handled
5318 by the move patterns. */
5319
5320 bool
5321 loongarch_use_ins_ext_p (rtx op, HOST_WIDE_INT width, HOST_WIDE_INT bitpos)
5322 {
5323 if (!register_operand (op, VOIDmode)
5324 || GET_MODE_BITSIZE (GET_MODE (op)) > BITS_PER_WORD)
5325 return false;
5326
5327 if (!IN_RANGE (width, 1, GET_MODE_BITSIZE (GET_MODE (op)) - 1))
5328 return false;
5329
5330 if (bitpos < 0 || bitpos + width > GET_MODE_BITSIZE (GET_MODE (op)))
5331 return false;
5332
5333 return true;
5334 }
5335
5336 /* Print the text for PRINT_OPERAND punctation character CH to FILE.
5337 The punctuation characters are:
5338
5339 '.' Print the name of the register with a hard-wired zero (zero or $r0).
5340 '$' Print the name of the stack pointer register (sp or $r3).
5341
5342 See also loongarch_init_print_operand_punct. */
5343
5344 static void
5345 loongarch_print_operand_punctuation (FILE *file, int ch)
5346 {
5347 switch (ch)
5348 {
5349 case '.':
5350 fputs (reg_names[GP_REG_FIRST + 0], file);
5351 break;
5352
5353 case '$':
5354 fputs (reg_names[STACK_POINTER_REGNUM], file);
5355 break;
5356
5357 default:
5358 gcc_unreachable ();
5359 break;
5360 }
5361 }
5362
5363 /* Initialize loongarch_print_operand_punct. */
5364
5365 static void
5366 loongarch_init_print_operand_punct (void)
5367 {
5368 const char *p;
5369
5370 for (p = ".$"; *p; p++)
5371 loongarch_print_operand_punct[(unsigned char) *p] = true;
5372 }
5373
5374 /* PRINT_OPERAND prefix LETTER refers to the integer branch instruction
5375 associated with condition CODE. Print the condition part of the
5376 opcode to FILE. */
5377
5378 static void
5379 loongarch_print_int_branch_condition (FILE *file, enum rtx_code code,
5380 int letter)
5381 {
5382 switch (code)
5383 {
5384 case EQ:
5385 case NE:
5386 case GT:
5387 case GE:
5388 case LT:
5389 case LE:
5390 case GTU:
5391 case GEU:
5392 case LTU:
5393 case LEU:
5394 /* Conveniently, the LoongArch names for these conditions are the same
5395 as their RTL equivalents. */
5396 fputs (GET_RTX_NAME (code), file);
5397 break;
5398
5399 default:
5400 output_operand_lossage ("'%%%c' is not a valid operand prefix", letter);
5401 break;
5402 }
5403 }
5404
5405 /* Likewise floating-point branches. */
5406
5407 static void
5408 loongarch_print_float_branch_condition (FILE *file, enum rtx_code code,
5409 int letter)
5410 {
5411 switch (code)
5412 {
5413 case EQ:
5414 fputs ("ceqz", file);
5415 break;
5416
5417 case NE:
5418 fputs ("cnez", file);
5419 break;
5420
5421 default:
5422 output_operand_lossage ("'%%%c' is not a valid operand prefix", letter);
5423 break;
5424 }
5425 }
5426
5427 /* Implement TARGET_PRINT_OPERAND_PUNCT_VALID_P. */
5428
5429 static bool
5430 loongarch_print_operand_punct_valid_p (unsigned char code)
5431 {
5432 return loongarch_print_operand_punct[code];
5433 }
5434
5435 /* Return true if a FENCE should be emitted to before a memory access to
5436 implement the release portion of memory model MODEL. */
5437
5438 static bool
5439 loongarch_memmodel_needs_rel_acq_fence (enum memmodel model)
5440 {
5441 switch (model)
5442 {
5443 case MEMMODEL_ACQ_REL:
5444 case MEMMODEL_SEQ_CST:
5445 case MEMMODEL_SYNC_SEQ_CST:
5446 case MEMMODEL_RELEASE:
5447 case MEMMODEL_SYNC_RELEASE:
5448 case MEMMODEL_ACQUIRE:
5449 case MEMMODEL_CONSUME:
5450 case MEMMODEL_SYNC_ACQUIRE:
5451 return true;
5452
5453 case MEMMODEL_RELAXED:
5454 return false;
5455
5456 default:
5457 gcc_unreachable ();
5458 }
5459 }
5460
5461 /* Return true if a FENCE should be emitted to before a memory access to
5462 implement the release portion of memory model MODEL. */
5463
5464 static bool
5465 loongarch_memmodel_needs_release_fence (enum memmodel model)
5466 {
5467 switch (model)
5468 {
5469 case MEMMODEL_ACQ_REL:
5470 case MEMMODEL_SEQ_CST:
5471 case MEMMODEL_SYNC_SEQ_CST:
5472 case MEMMODEL_RELEASE:
5473 case MEMMODEL_SYNC_RELEASE:
5474 return true;
5475
5476 case MEMMODEL_ACQUIRE:
5477 case MEMMODEL_CONSUME:
5478 case MEMMODEL_SYNC_ACQUIRE:
5479 case MEMMODEL_RELAXED:
5480 return false;
5481
5482 default:
5483 gcc_unreachable ();
5484 }
5485 }
5486
5487 /* Print symbolic operand OP, which is part of a HIGH or LO_SUM
5488 in context CONTEXT. HI_RELOC indicates a high-part reloc. */
5489
5490 static void
5491 loongarch_print_operand_reloc (FILE *file, rtx op, bool hi64_part,
5492 bool hi_reloc)
5493 {
5494 const char *reloc;
5495 enum loongarch_symbol_type symbol_type =
5496 loongarch_classify_symbolic_expression (op);
5497
5498 if (loongarch_symbol_extreme_p (symbol_type))
5499 gcc_assert (TARGET_EXPLICIT_RELOCS);
5500
5501 switch (symbol_type)
5502 {
5503 case SYMBOL_PCREL64:
5504 if (hi64_part)
5505 {
5506 reloc = hi_reloc ? "%pc64_hi12" : "%pc64_lo20";
5507 break;
5508 }
5509 /* fall through */
5510 case SYMBOL_PCREL:
5511 reloc = hi_reloc ? "%pc_hi20" : "%pc_lo12";
5512 break;
5513
5514 case SYMBOL_GOT_DISP:
5515 if (hi64_part)
5516 {
5517 if (TARGET_CMODEL_EXTREME)
5518 reloc = hi_reloc ? "%got64_pc_hi12" : "%got64_pc_lo20";
5519 else
5520 gcc_unreachable ();
5521 }
5522 else
5523 reloc = hi_reloc ? "%got_pc_hi20" : "%got_pc_lo12";
5524 break;
5525
5526 case SYMBOL_TLS_IE:
5527 if (hi64_part)
5528 {
5529 if (TARGET_CMODEL_EXTREME)
5530 reloc = hi_reloc ? "%ie64_pc_hi12" : "%ie64_pc_lo20";
5531 else
5532 gcc_unreachable ();
5533 }
5534 else
5535 reloc = hi_reloc ? "%ie_pc_hi20" : "%ie_pc_lo12";
5536 break;
5537
5538 case SYMBOL_TLS_LE:
5539 if (hi64_part)
5540 {
5541 if (TARGET_CMODEL_EXTREME)
5542 reloc = hi_reloc ? "%le64_hi12" : "%le64_lo20";
5543 else
5544 gcc_unreachable ();
5545 }
5546 else
5547 reloc = hi_reloc ? "%le_hi20" : "%le_lo12";
5548 break;
5549
5550 case SYMBOL_TLSGD:
5551 if (hi64_part)
5552 {
5553 if (TARGET_CMODEL_EXTREME)
5554 reloc = hi_reloc ? "%got64_pc_hi12" : "%got64_pc_lo20";
5555 else
5556 gcc_unreachable ();
5557 }
5558 else
5559 reloc = hi_reloc ? "%gd_pc_hi20" : "%got_pc_lo12";
5560 break;
5561
5562 case SYMBOL_TLSLDM:
5563 if (hi64_part)
5564 {
5565 if (TARGET_CMODEL_EXTREME)
5566 reloc = hi_reloc ? "%got64_pc_hi12" : "%got64_pc_lo20";
5567 else
5568 gcc_unreachable ();
5569 }
5570 else
5571 reloc = hi_reloc ? "%ld_pc_hi20" : "%got_pc_lo12";
5572 break;
5573
5574 default:
5575 gcc_unreachable ();
5576 }
5577
5578 fprintf (file, "%s(", reloc);
5579 output_addr_const (file, loongarch_strip_unspec_address (op));
5580 fputc (')', file);
5581 }
5582
5583 /* Implement TARGET_PRINT_OPERAND. The LoongArch-specific operand codes are:
5584
5585 'A' Print a _DB suffix if the memory model requires a release.
5586 'b' Print the address of a memory operand, without offset.
5587 'B' Print CONST_INT OP element 0 of a replicated CONST_VECTOR
5588 as an unsigned byte [0..255].
5589 'c' Print an integer.
5590 'C' Print the integer branch condition for comparison OP.
5591 'd' Print CONST_INT OP in decimal.
5592 'E' Print CONST_INT OP element 0 of a replicated CONST_VECTOR in decimal.
5593 'F' Print the FPU branch condition for comparison OP.
5594 'G' Print a DBAR insn if the memory model requires a release.
5595 'H' Print address 52-61bit relocation associated with OP.
5596 'h' Print the high-part relocation associated with OP.
5597 'i' Print i if the operand is not a register.
5598 'L' Print the low-part relocation associated with OP.
5599 'm' Print one less than CONST_INT OP in decimal.
5600 'N' Print the inverse of the integer branch condition for comparison OP.
5601 'r' Print address 12-31bit relocation associated with OP.
5602 'R' Print address 32-51bit relocation associated with OP.
5603 'T' Print 'f' for (eq:CC ...), 't' for (ne:CC ...),
5604 'z' for (eq:?I ...), 'n' for (ne:?I ...).
5605 't' Like 'T', but with the EQ/NE cases reversed
5606 'V' Print exact log2 of CONST_INT OP element 0 of a replicated
5607 CONST_VECTOR in decimal.
5608 'v' Print the insn size suffix b, h, w or d for vector modes V16QI, V8HI,
5609 V4SI, V2SI, and w, d for vector modes V4SF, V2DF respectively.
5610 'W' Print the inverse of the FPU branch condition for comparison OP.
5611 'w' Print a LSX register.
5612 'X' Print CONST_INT OP in hexadecimal format.
5613 'x' Print the low 16 bits of CONST_INT OP in hexadecimal format.
5614 'Y' Print loongarch_fp_conditions[INTVAL (OP)]
5615 'y' Print exact log2 of CONST_INT OP in decimal.
5616 'Z' Print OP and a comma for 8CC, otherwise print nothing.
5617 'z' Print $r0 if OP is zero, otherwise print OP normally. */
5618
5619 static void
5620 loongarch_print_operand (FILE *file, rtx op, int letter)
5621 {
5622 enum rtx_code code;
5623
5624 if (loongarch_print_operand_punct_valid_p (letter))
5625 {
5626 loongarch_print_operand_punctuation (file, letter);
5627 return;
5628 }
5629
5630 gcc_assert (op);
5631 code = GET_CODE (op);
5632
5633 switch (letter)
5634 {
5635 case 'A':
5636 if (loongarch_memmodel_needs_rel_acq_fence ((enum memmodel) INTVAL (op)))
5637 fputs ("_db", file);
5638 break;
5639 case 'E':
5640 if (GET_CODE (op) == CONST_VECTOR)
5641 {
5642 gcc_assert (loongarch_const_vector_same_val_p (op, GET_MODE (op)));
5643 op = CONST_VECTOR_ELT (op, 0);
5644 gcc_assert (CONST_INT_P (op));
5645 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (op));
5646 }
5647 else
5648 output_operand_lossage ("invalid use of '%%%c'", letter);
5649 break;
5650
5651
5652 case 'c':
5653 if (CONST_INT_P (op))
5654 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (op));
5655 else
5656 output_operand_lossage ("unsupported operand for code '%c'", letter);
5657
5658 break;
5659
5660 case 'C':
5661 loongarch_print_int_branch_condition (file, code, letter);
5662 break;
5663
5664 case 'd':
5665 if (CONST_INT_P (op))
5666 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (op));
5667 else
5668 output_operand_lossage ("invalid use of '%%%c'", letter);
5669 break;
5670
5671 case 'F':
5672 loongarch_print_float_branch_condition (file, code, letter);
5673 break;
5674
5675 case 'G':
5676 if (loongarch_memmodel_needs_release_fence ((enum memmodel) INTVAL (op)))
5677 fputs ("dbar\t0", file);
5678 break;
5679
5680 case 'h':
5681 if (code == HIGH)
5682 op = XEXP (op, 0);
5683 loongarch_print_operand_reloc (file, op, false /* hi64_part */,
5684 true /* hi_reloc */);
5685 break;
5686
5687 case 'H':
5688 loongarch_print_operand_reloc (file, op, true /* hi64_part */,
5689 true /* hi_reloc */);
5690 break;
5691
5692 case 'i':
5693 if (code != REG)
5694 fputs ("i", file);
5695 break;
5696
5697 case 'L':
5698 loongarch_print_operand_reloc (file, op, false /* hi64_part*/,
5699 false /* lo_reloc */);
5700 break;
5701 case 'B':
5702 if (GET_CODE (op) == CONST_VECTOR)
5703 {
5704 gcc_assert (loongarch_const_vector_same_val_p (op, GET_MODE (op)));
5705 op = CONST_VECTOR_ELT (op, 0);
5706 gcc_assert (CONST_INT_P (op));
5707 unsigned HOST_WIDE_INT val8 = UINTVAL (op) & GET_MODE_MASK (QImode);
5708 fprintf (file, HOST_WIDE_INT_PRINT_UNSIGNED, val8);
5709 }
5710 else
5711 output_operand_lossage ("invalid use of '%%%c'", letter);
5712 break;
5713
5714 case 'm':
5715 if (CONST_INT_P (op))
5716 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (op) - 1);
5717 else
5718 output_operand_lossage ("invalid use of '%%%c'", letter);
5719 break;
5720
5721 case 'N':
5722 loongarch_print_int_branch_condition (file, reverse_condition (code),
5723 letter);
5724 break;
5725
5726 case 'r':
5727 loongarch_print_operand_reloc (file, op, false /* hi64_part */,
5728 true /* lo_reloc */);
5729 break;
5730
5731 case 'R':
5732 loongarch_print_operand_reloc (file, op, true /* hi64_part */,
5733 false /* lo_reloc */);
5734 break;
5735
5736 case 't':
5737 case 'T':
5738 {
5739 int truth = (code == NE) == (letter == 'T');
5740 fputc ("zfnt"[truth * 2 + FCC_REG_P (REGNO (XEXP (op, 0)))], file);
5741 }
5742 break;
5743
5744 case 'V':
5745 if (CONST_VECTOR_P (op))
5746 {
5747 machine_mode mode = GET_MODE_INNER (GET_MODE (op));
5748 unsigned HOST_WIDE_INT val = UINTVAL (CONST_VECTOR_ELT (op, 0));
5749 int vlog2 = exact_log2 (val & GET_MODE_MASK (mode));
5750 if (vlog2 != -1)
5751 fprintf (file, "%d", vlog2);
5752 else
5753 output_operand_lossage ("invalid use of '%%%c'", letter);
5754 }
5755 else
5756 output_operand_lossage ("invalid use of '%%%c'", letter);
5757 break;
5758
5759 case 'v':
5760 switch (GET_MODE (op))
5761 {
5762 case E_V16QImode:
5763 case E_V32QImode:
5764 fprintf (file, "b");
5765 break;
5766 case E_V8HImode:
5767 case E_V16HImode:
5768 fprintf (file, "h");
5769 break;
5770 case E_V4SImode:
5771 case E_V4SFmode:
5772 case E_V8SImode:
5773 case E_V8SFmode:
5774 fprintf (file, "w");
5775 break;
5776 case E_V2DImode:
5777 case E_V2DFmode:
5778 case E_V4DImode:
5779 case E_V4DFmode:
5780 fprintf (file, "d");
5781 break;
5782 default:
5783 output_operand_lossage ("invalid use of '%%%c'", letter);
5784 }
5785 break;
5786
5787 case 'W':
5788 loongarch_print_float_branch_condition (file, reverse_condition (code),
5789 letter);
5790 break;
5791
5792 case 'w':
5793 if (code == REG && LSX_REG_P (REGNO (op)))
5794 fprintf (file, "$vr%s", &reg_names[REGNO (op)][2]);
5795 else
5796 output_operand_lossage ("invalid use of '%%%c'", letter);
5797 break;
5798
5799 case 'x':
5800 if (CONST_INT_P (op))
5801 fprintf (file, HOST_WIDE_INT_PRINT_HEX, INTVAL (op) & 0xffff);
5802 else
5803 output_operand_lossage ("invalid use of '%%%c'", letter);
5804 break;
5805
5806 case 'X':
5807 if (CONST_INT_P (op))
5808 fprintf (file, HOST_WIDE_INT_PRINT_HEX, INTVAL (op));
5809 else
5810 output_operand_lossage ("invalid use of '%%%c'", letter);
5811 break;
5812
5813 case 'y':
5814 if (CONST_INT_P (op))
5815 {
5816 int val = exact_log2 (INTVAL (op));
5817 if (val != -1)
5818 fprintf (file, "%d", val);
5819 else
5820 output_operand_lossage ("invalid use of '%%%c'", letter);
5821 }
5822 else
5823 output_operand_lossage ("invalid use of '%%%c'", letter);
5824 break;
5825
5826 case 'Y':
5827 if (code == CONST_INT
5828 && UINTVAL (op) < ARRAY_SIZE (loongarch_fp_conditions))
5829 fputs (loongarch_fp_conditions[UINTVAL (op)], file);
5830 else
5831 output_operand_lossage ("'%%%c' is not a valid operand prefix",
5832 letter);
5833 break;
5834
5835 case 'Z':
5836 loongarch_print_operand (file, op, 0);
5837 fputc (',', file);
5838 break;
5839
5840 default:
5841 switch (code)
5842 {
5843 case REG:
5844 {
5845 unsigned int regno = REGNO (op);
5846 if (letter && letter != 'z')
5847 output_operand_lossage ("invalid use of '%%%c'", letter);
5848 fprintf (file, "%s", reg_names[regno]);
5849 }
5850 break;
5851
5852 case MEM:
5853 if (letter == 'D')
5854 output_address (GET_MODE (op),
5855 plus_constant (Pmode, XEXP (op, 0), 4));
5856 else if (letter == 'b')
5857 {
5858 gcc_assert (REG_P (XEXP (op, 0)));
5859 loongarch_print_operand (file, XEXP (op, 0), 0);
5860 }
5861 else if (letter && letter != 'z')
5862 output_operand_lossage ("invalid use of '%%%c'", letter);
5863 else
5864 output_address (GET_MODE (op), XEXP (op, 0));
5865 break;
5866
5867 default:
5868 if (letter == 'z' && op == CONST0_RTX (GET_MODE (op)))
5869 fputs (reg_names[GP_REG_FIRST], file);
5870 else if (letter && letter != 'z')
5871 output_operand_lossage ("invalid use of '%%%c'", letter);
5872 else
5873 output_addr_const (file, loongarch_strip_unspec_address (op));
5874 break;
5875 }
5876 }
5877 }
5878
5879 /* Implement TARGET_PRINT_OPERAND_ADDRESS. */
5880
5881 static void
5882 loongarch_print_operand_address (FILE *file, machine_mode /* mode */, rtx x)
5883 {
5884 struct loongarch_address_info addr;
5885
5886 if (loongarch_classify_address (&addr, x, word_mode, true))
5887 switch (addr.type)
5888 {
5889 case ADDRESS_REG:
5890 fprintf (file, "%s,", reg_names[REGNO (addr.reg)]);
5891 loongarch_print_operand (file, addr.offset, 0);
5892 return;
5893
5894 case ADDRESS_REG_REG:
5895 fprintf (file, "%s,%s", reg_names[REGNO (addr.reg)],
5896 reg_names[REGNO (addr.offset)]);
5897 return;
5898
5899 case ADDRESS_LO_SUM:
5900 fprintf (file, "%s,", reg_names[REGNO (addr.reg)]);
5901 loongarch_print_operand_reloc (file, addr.offset, false /* hi64_part */,
5902 false /* hi_reloc */);
5903 return;
5904
5905 case ADDRESS_CONST_INT:
5906 fprintf (file, "%s,", reg_names[GP_REG_FIRST]);
5907 output_addr_const (file, x);
5908 return;
5909
5910 case ADDRESS_SYMBOLIC:
5911 output_addr_const (file, loongarch_strip_unspec_address (x));
5912 return;
5913 }
5914 if (CONST_INT_P (x))
5915 output_addr_const (file, x);
5916 else
5917 gcc_unreachable ();
5918 }
5919
5920 /* Implement TARGET_ASM_SELECT_RTX_SECTION. */
5921
5922 static section *
5923 loongarch_select_rtx_section (machine_mode mode, rtx x,
5924 unsigned HOST_WIDE_INT align)
5925 {
5926 /* ??? Consider using mergeable small data sections. */
5927 if (loongarch_rtx_constant_in_small_data_p (mode))
5928 return get_named_section (NULL, ".sdata", 0);
5929
5930 return default_elf_select_rtx_section (mode, x, align);
5931 }
5932
5933 /* Implement TARGET_ASM_FUNCTION_RODATA_SECTION.
5934
5935 The complication here is that jump tables will use absolute addresses,
5936 and should therefore not be included in the read-only part of a DSO.
5937 Handle such cases by selecting a normal data section instead of a
5938 read-only one. The logic apes that in default_function_rodata_section. */
5939
5940 static section *
5941 loongarch_function_rodata_section (tree decl, bool)
5942 {
5943 return default_function_rodata_section (decl, false);
5944 }
5945
5946 /* Implement TARGET_IN_SMALL_DATA_P. */
5947
5948 static bool
5949 loongarch_in_small_data_p (const_tree decl)
5950 {
5951 int size;
5952
5953 if (TREE_CODE (decl) == STRING_CST || TREE_CODE (decl) == FUNCTION_DECL)
5954 return false;
5955
5956 if (VAR_P (decl) && DECL_SECTION_NAME (decl) != 0)
5957 {
5958 const char *name;
5959
5960 /* Reject anything that isn't in a known small-data section. */
5961 name = DECL_SECTION_NAME (decl);
5962 if (strcmp (name, ".sdata") != 0 && strcmp (name, ".sbss") != 0)
5963 return false;
5964
5965 /* If a symbol is defined externally, the assembler will use the
5966 usual -G rules when deciding how to implement macros. */
5967 if (!DECL_EXTERNAL (decl))
5968 return true;
5969 }
5970
5971 /* We have traditionally not treated zero-sized objects as small data,
5972 so this is now effectively part of the ABI. */
5973 size = int_size_in_bytes (TREE_TYPE (decl));
5974 return size > 0 && size <= g_switch_value;
5975 }
5976
5977 /* The LoongArch debug format wants all automatic variables and arguments
5978 to be in terms of the virtual frame pointer (stack pointer before
5979 any adjustment in the function), while the LoongArch linker wants
5980 the frame pointer to be the stack pointer after the initial
5981 adjustment. So, we do the adjustment here. The arg pointer (which
5982 is eliminated) points to the virtual frame pointer, while the frame
5983 pointer (which may be eliminated) points to the stack pointer after
5984 the initial adjustments. */
5985
5986 HOST_WIDE_INT
5987 loongarch_debugger_offset (rtx addr, HOST_WIDE_INT offset)
5988 {
5989 rtx offset2 = const0_rtx;
5990 rtx reg = eliminate_constant_term (addr, &offset2);
5991
5992 if (offset == 0)
5993 offset = INTVAL (offset2);
5994
5995 if (reg == stack_pointer_rtx
5996 || reg == frame_pointer_rtx
5997 || reg == hard_frame_pointer_rtx)
5998 {
5999 offset -= cfun->machine->frame.total_size;
6000 if (reg == hard_frame_pointer_rtx)
6001 offset += cfun->machine->frame.hard_frame_pointer_offset;
6002 }
6003
6004 return offset;
6005 }
6006
6007 /* Implement ASM_OUTPUT_EXTERNAL. */
6008
6009 void
6010 loongarch_output_external (FILE *file, tree decl, const char *name)
6011 {
6012 default_elf_asm_output_external (file, decl, name);
6013
6014 /* We output the name if and only if TREE_SYMBOL_REFERENCED is
6015 set in order to avoid putting out names that are never really
6016 used. */
6017 if (TREE_SYMBOL_REFERENCED (DECL_ASSEMBLER_NAME (decl)))
6018 {
6019 if (loongarch_in_small_data_p (decl))
6020 {
6021 /* When using assembler macros, emit .extern directives for
6022 all small-data externs so that the assembler knows how
6023 big they are.
6024
6025 In most cases it would be safe (though pointless) to emit
6026 .externs for other symbols too. One exception is when an
6027 object is within the -G limit but declared by the user to
6028 be in a section other than .sbss or .sdata. */
6029 fputs ("\t.extern\t", file);
6030 assemble_name (file, name);
6031 fprintf (file, ", " HOST_WIDE_INT_PRINT_DEC "\n",
6032 int_size_in_bytes (TREE_TYPE (decl)));
6033 }
6034 }
6035 }
6036
6037 /* Implement TARGET_ASM_OUTPUT_DWARF_DTPREL. */
6038
6039 static void ATTRIBUTE_UNUSED
6040 loongarch_output_dwarf_dtprel (FILE *file, int size, rtx x)
6041 {
6042 switch (size)
6043 {
6044 case 4:
6045 fputs ("\t.dtprelword\t", file);
6046 break;
6047
6048 case 8:
6049 fputs ("\t.dtpreldword\t", file);
6050 break;
6051
6052 default:
6053 gcc_unreachable ();
6054 }
6055 output_addr_const (file, x);
6056 fputs ("+0x8000", file);
6057 }
6058
6059 /* Implement ASM_OUTPUT_ASCII. */
6060
6061 void
6062 loongarch_output_ascii (FILE *stream, const char *string, size_t len)
6063 {
6064 size_t i;
6065 int cur_pos;
6066
6067 cur_pos = 17;
6068 fprintf (stream, "\t.ascii\t\"");
6069 for (i = 0; i < len; i++)
6070 {
6071 int c;
6072
6073 c = (unsigned char) string[i];
6074 if (ISPRINT (c))
6075 {
6076 if (c == '\\' || c == '\"')
6077 {
6078 putc ('\\', stream);
6079 cur_pos++;
6080 }
6081 putc (c, stream);
6082 cur_pos++;
6083 }
6084 else
6085 {
6086 fprintf (stream, "\\%03o", c);
6087 cur_pos += 4;
6088 }
6089
6090 if (cur_pos > 72 && i + 1 < len)
6091 {
6092 cur_pos = 17;
6093 fprintf (stream, "\"\n\t.ascii\t\"");
6094 }
6095 }
6096 fprintf (stream, "\"\n");
6097 }
6098
6099 /* Implement TARGET_FRAME_POINTER_REQUIRED. */
6100
6101 static bool
6102 loongarch_frame_pointer_required (void)
6103 {
6104 /* If the function contains dynamic stack allocations, we need to
6105 use the frame pointer to access the static parts of the frame. */
6106 if (cfun->calls_alloca)
6107 return true;
6108
6109 return false;
6110 }
6111
6112 /* Implement TARGET_CAN_ELIMINATE. Make sure that we're not trying
6113 to eliminate to the wrong hard frame pointer. */
6114
6115 static bool
6116 loongarch_can_eliminate (const int from ATTRIBUTE_UNUSED, const int to)
6117 {
6118 return (to == HARD_FRAME_POINTER_REGNUM || to == STACK_POINTER_REGNUM);
6119 }
6120
6121 /* Implement RETURN_ADDR_RTX. We do not support moving back to a
6122 previous frame. */
6123
6124 rtx
6125 loongarch_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
6126 {
6127 if (count != 0)
6128 return const0_rtx;
6129
6130 return get_hard_reg_initial_val (Pmode, RETURN_ADDR_REGNUM);
6131 }
6132
6133 /* Emit code to change the current function's return address to
6134 ADDRESS. SCRATCH is available as a scratch register, if needed.
6135 ADDRESS and SCRATCH are both word-mode GPRs. */
6136
6137 void
6138 loongarch_set_return_address (rtx address, rtx scratch)
6139 {
6140 rtx slot_address;
6141
6142 gcc_assert (BITSET_P (cfun->machine->frame.mask, RETURN_ADDR_REGNUM));
6143
6144 if (frame_pointer_needed)
6145 slot_address = loongarch_add_offset (scratch, hard_frame_pointer_rtx,
6146 -UNITS_PER_WORD);
6147 else
6148 slot_address = loongarch_add_offset (scratch, stack_pointer_rtx,
6149 cfun->machine->frame.gp_sp_offset);
6150
6151 loongarch_emit_move (gen_frame_mem (GET_MODE (address), slot_address),
6152 address);
6153 }
6154
6155 /* Return true if register REGNO can store a value of mode MODE.
6156 The result of this function is cached in loongarch_hard_regno_mode_ok. */
6157
6158 static bool
6159 loongarch_hard_regno_mode_ok_uncached (unsigned int regno, machine_mode mode)
6160 {
6161 unsigned int size;
6162 enum mode_class mclass;
6163
6164 if (mode == FCCmode)
6165 return FCC_REG_P (regno);
6166
6167 size = GET_MODE_SIZE (mode);
6168 mclass = GET_MODE_CLASS (mode);
6169
6170 if (GP_REG_P (regno) && !LSX_SUPPORTED_MODE_P (mode))
6171 return ((regno - GP_REG_FIRST) & 1) == 0 || size <= UNITS_PER_WORD;
6172
6173 /* For LSX, allow TImode and 128-bit vector modes in all FPR. */
6174 if (FP_REG_P (regno) && LSX_SUPPORTED_MODE_P (mode))
6175 return true;
6176
6177 if (FP_REG_P (regno))
6178 {
6179 if (mclass == MODE_FLOAT
6180 || mclass == MODE_COMPLEX_FLOAT
6181 || mclass == MODE_VECTOR_FLOAT)
6182 return size <= UNITS_PER_FPVALUE;
6183
6184 /* Allow integer modes that fit into a single register. We need
6185 to put integers into FPRs when using instructions like CVT
6186 and TRUNC. There's no point allowing sizes smaller than a word,
6187 because the FPU has no appropriate load/store instructions. */
6188 if (mclass == MODE_INT)
6189 return size >= MIN_UNITS_PER_WORD && size <= UNITS_PER_FPREG;
6190 }
6191
6192 return false;
6193 }
6194
6195 /* Implement TARGET_HARD_REGNO_MODE_OK. */
6196
6197 static bool
6198 loongarch_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
6199 {
6200 return loongarch_hard_regno_mode_ok_p[mode][regno];
6201 }
6202
6203
6204 static bool
6205 loongarch_hard_regno_call_part_clobbered (unsigned int,
6206 unsigned int regno, machine_mode mode)
6207 {
6208 if (ISA_HAS_LSX && FP_REG_P (regno) && GET_MODE_SIZE (mode) > 8)
6209 return true;
6210
6211 return false;
6212 }
6213
6214 /* Implement TARGET_HARD_REGNO_NREGS. */
6215
6216 static unsigned int
6217 loongarch_hard_regno_nregs (unsigned int regno, machine_mode mode)
6218 {
6219 if (FCC_REG_P (regno))
6220 /* The size of FP status registers is always 4, because they only hold
6221 FCCmode values, and FCCmode is always considered to be 4 bytes wide. */
6222 return (GET_MODE_SIZE (mode) + 3) / 4;
6223
6224 if (FP_REG_P (regno))
6225 {
6226 if (LSX_SUPPORTED_MODE_P (mode))
6227 return 1;
6228
6229 return (GET_MODE_SIZE (mode) + UNITS_PER_FPREG - 1) / UNITS_PER_FPREG;
6230 }
6231
6232 /* All other registers are word-sized. */
6233 return (GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
6234 }
6235
6236 /* Implement CLASS_MAX_NREGS, taking the maximum of the cases
6237 in loongarch_hard_regno_nregs. */
6238
6239 int
6240 loongarch_class_max_nregs (enum reg_class rclass, machine_mode mode)
6241 {
6242 int size;
6243 HARD_REG_SET left;
6244
6245 size = 0x8000;
6246 left = reg_class_contents[rclass];
6247 if (hard_reg_set_intersect_p (left, reg_class_contents[(int) FCC_REGS]))
6248 {
6249 if (loongarch_hard_regno_mode_ok (FCC_REG_FIRST, mode))
6250 size = MIN (size, 4);
6251
6252 left &= ~reg_class_contents[FCC_REGS];
6253 }
6254 if (hard_reg_set_intersect_p (left, reg_class_contents[(int) FP_REGS]))
6255 {
6256 if (loongarch_hard_regno_mode_ok (FP_REG_FIRST, mode))
6257 {
6258 if (LSX_SUPPORTED_MODE_P (mode))
6259 size = MIN (size, UNITS_PER_LSX_REG);
6260 else
6261 size = MIN (size, UNITS_PER_FPREG);
6262 }
6263 left &= ~reg_class_contents[FP_REGS];
6264 }
6265 if (!hard_reg_set_empty_p (left))
6266 size = MIN (size, UNITS_PER_WORD);
6267 return (GET_MODE_SIZE (mode) + size - 1) / size;
6268 }
6269
6270 /* Implement TARGET_CAN_CHANGE_MODE_CLASS. */
6271
6272 static bool
6273 loongarch_can_change_mode_class (machine_mode from, machine_mode to,
6274 reg_class_t rclass)
6275 {
6276 /* Allow conversions between different LSX vector modes. */
6277 if (LSX_SUPPORTED_MODE_P (from) && LSX_SUPPORTED_MODE_P (to))
6278 return true;
6279
6280 return !reg_classes_intersect_p (FP_REGS, rclass);
6281 }
6282
6283 /* Return true if moves in mode MODE can use the FPU's fmov.fmt instruction,
6284 */
6285
6286 static bool
6287 loongarch_mode_ok_for_mov_fmt_p (machine_mode mode)
6288 {
6289 switch (mode)
6290 {
6291 case E_FCCmode:
6292 case E_SFmode:
6293 return TARGET_HARD_FLOAT;
6294
6295 case E_DFmode:
6296 return TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT;
6297
6298 default:
6299 return LSX_SUPPORTED_MODE_P (mode);
6300 }
6301 }
6302
6303 /* Implement TARGET_MODES_TIEABLE_P. */
6304
6305 static bool
6306 loongarch_modes_tieable_p (machine_mode mode1, machine_mode mode2)
6307 {
6308 /* FPRs allow no mode punning, so it's not worth tying modes if we'd
6309 prefer to put one of them in FPRs. */
6310 return (mode1 == mode2
6311 || (!loongarch_mode_ok_for_mov_fmt_p (mode1)
6312 && !loongarch_mode_ok_for_mov_fmt_p (mode2)));
6313 }
6314
6315 /* Implement TARGET_PREFERRED_RELOAD_CLASS. */
6316
6317 static reg_class_t
6318 loongarch_preferred_reload_class (rtx x, reg_class_t rclass)
6319 {
6320 if (reg_class_subset_p (FP_REGS, rclass)
6321 && loongarch_mode_ok_for_mov_fmt_p (GET_MODE (x)))
6322 return FP_REGS;
6323
6324 if (reg_class_subset_p (GR_REGS, rclass))
6325 rclass = GR_REGS;
6326
6327 return rclass;
6328 }
6329
6330 /* RCLASS is a class involved in a REGISTER_MOVE_COST calculation.
6331 Return a "canonical" class to represent it in later calculations. */
6332
6333 static reg_class_t
6334 loongarch_canonicalize_move_class (reg_class_t rclass)
6335 {
6336 if (reg_class_subset_p (rclass, GENERAL_REGS))
6337 rclass = GENERAL_REGS;
6338
6339 return rclass;
6340 }
6341
6342 /* Return the cost of moving a value from a register of class FROM to a GPR.
6343 Return 0 for classes that are unions of other classes handled by this
6344 function. */
6345
6346 static int
6347 loongarch_move_to_gpr_cost (reg_class_t from)
6348 {
6349 switch (from)
6350 {
6351 case GENERAL_REGS:
6352 /* MOVE macro. */
6353 return 2;
6354
6355 case FP_REGS:
6356 /* MOVFR2GR, etc. */
6357 return 4;
6358
6359 default:
6360 return 0;
6361 }
6362 }
6363
6364 /* Return the cost of moving a value from a GPR to a register of class TO.
6365 Return 0 for classes that are unions of other classes handled by this
6366 function. */
6367
6368 static int
6369 loongarch_move_from_gpr_cost (reg_class_t to)
6370 {
6371 switch (to)
6372 {
6373 case GENERAL_REGS:
6374 /*MOVE macro. */
6375 return 2;
6376
6377 case FP_REGS:
6378 /* MOVGR2FR, etc. */
6379 return 4;
6380
6381 default:
6382 return 0;
6383 }
6384 }
6385
6386 /* Implement TARGET_REGISTER_MOVE_COST. Return 0 for classes that are the
6387 maximum of the move costs for subclasses; regclass will work out
6388 the maximum for us. */
6389
6390 static int
6391 loongarch_register_move_cost (machine_mode mode, reg_class_t from,
6392 reg_class_t to)
6393 {
6394 reg_class_t dregs;
6395 int cost1, cost2;
6396
6397 from = loongarch_canonicalize_move_class (from);
6398 to = loongarch_canonicalize_move_class (to);
6399
6400 /* Handle moves that can be done without using general-purpose registers. */
6401 if (from == FP_REGS)
6402 {
6403 if (to == FP_REGS && loongarch_mode_ok_for_mov_fmt_p (mode))
6404 /* FMOV.FMT. */
6405 return 4;
6406 }
6407
6408 /* Handle cases in which only one class deviates from the ideal. */
6409 dregs = GENERAL_REGS;
6410 if (from == dregs)
6411 return loongarch_move_from_gpr_cost (to);
6412 if (to == dregs)
6413 return loongarch_move_to_gpr_cost (from);
6414
6415 /* Handles cases that require a GPR temporary. */
6416 cost1 = loongarch_move_to_gpr_cost (from);
6417 if (cost1 != 0)
6418 {
6419 cost2 = loongarch_move_from_gpr_cost (to);
6420 if (cost2 != 0)
6421 return cost1 + cost2;
6422 }
6423
6424 return 0;
6425 }
6426
6427 /* Implement TARGET_MEMORY_MOVE_COST. */
6428
6429 static int
6430 loongarch_memory_move_cost (machine_mode mode, reg_class_t rclass, bool in)
6431 {
6432 return (loongarch_cost->memory_latency
6433 + memory_move_secondary_cost (mode, rclass, in));
6434 }
6435
6436 /* Return the register class required for a secondary register when
6437 copying between one of the registers in RCLASS and value X, which
6438 has mode MODE. X is the source of the move if IN_P, otherwise it
6439 is the destination. Return NO_REGS if no secondary register is
6440 needed. */
6441
6442 static reg_class_t
6443 loongarch_secondary_reload (bool in_p ATTRIBUTE_UNUSED, rtx x,
6444 reg_class_t rclass, machine_mode mode,
6445 secondary_reload_info *sri ATTRIBUTE_UNUSED)
6446 {
6447 int regno;
6448
6449 regno = true_regnum (x);
6450
6451 if (reg_class_subset_p (rclass, FP_REGS))
6452 {
6453 if (regno < 0
6454 || (MEM_P (x)
6455 && (GET_MODE_SIZE (mode) == 4 || GET_MODE_SIZE (mode) == 8)))
6456 /* In this case we can use lwc1, swc1, ldc1 or sdc1. We'll use
6457 pairs of lwc1s and swc1s if ldc1 and sdc1 are not supported. */
6458 return NO_REGS;
6459
6460 if (MEM_P (x) && LSX_SUPPORTED_MODE_P (mode))
6461 /* In this case we can use LSX LD.* and ST.*. */
6462 return NO_REGS;
6463
6464 if (GP_REG_P (regno) || x == CONST0_RTX (mode))
6465 /* In this case we can use movgr2fr.s, movfr2gr.s, movgr2fr.d or
6466 * movfr2gr.d. */
6467 return NO_REGS;
6468
6469 if (CONSTANT_P (x) && !targetm.cannot_force_const_mem (mode, x))
6470 /* We can force the constant to memory and use fld.s
6471 and fld.d. As above, we will use pairs of lwc1s if
6472 ldc1 is not supported. */
6473 return NO_REGS;
6474
6475 if (FP_REG_P (regno) && loongarch_mode_ok_for_mov_fmt_p (mode))
6476 /* In this case we can use fmov.{s/d}. */
6477 return NO_REGS;
6478
6479 /* Otherwise, we need to reload through an integer register. */
6480 return GR_REGS;
6481 }
6482 if (FP_REG_P (regno))
6483 return reg_class_subset_p (rclass, GR_REGS) ? NO_REGS : GR_REGS;
6484
6485 return NO_REGS;
6486 }
6487
6488 /* Implement TARGET_VALID_POINTER_MODE. */
6489
6490 static bool
6491 loongarch_valid_pointer_mode (scalar_int_mode mode)
6492 {
6493 return mode == SImode || (TARGET_64BIT && mode == DImode);
6494 }
6495
6496 /* Implement TARGET_VECTOR_MODE_SUPPORTED_P. */
6497
6498 static bool
6499 loongarch_vector_mode_supported_p (machine_mode mode)
6500 {
6501 return LSX_SUPPORTED_MODE_P (mode);
6502 }
6503
6504 /* Implement TARGET_SCALAR_MODE_SUPPORTED_P. */
6505
6506 static bool
6507 loongarch_scalar_mode_supported_p (scalar_mode mode)
6508 {
6509 if (ALL_FIXED_POINT_MODE_P (mode)
6510 && GET_MODE_PRECISION (mode) <= 2 * BITS_PER_WORD)
6511 return true;
6512
6513 return default_scalar_mode_supported_p (mode);
6514 }
6515
6516 /* Implement TARGET_VECTORIZE_PREFERRED_SIMD_MODE. */
6517
6518 static machine_mode
6519 loongarch_preferred_simd_mode (scalar_mode mode)
6520 {
6521 if (!ISA_HAS_LSX)
6522 return word_mode;
6523
6524 switch (mode)
6525 {
6526 case E_QImode:
6527 return E_V16QImode;
6528 case E_HImode:
6529 return E_V8HImode;
6530 case E_SImode:
6531 return E_V4SImode;
6532 case E_DImode:
6533 return E_V2DImode;
6534
6535 case E_SFmode:
6536 return E_V4SFmode;
6537
6538 case E_DFmode:
6539 return E_V2DFmode;
6540
6541 default:
6542 break;
6543 }
6544 return word_mode;
6545 }
6546
6547 static unsigned int
6548 loongarch_autovectorize_vector_modes (vector_modes *modes, bool)
6549 {
6550 if (ISA_HAS_LSX)
6551 {
6552 modes->safe_push (V16QImode);
6553 }
6554
6555 return 0;
6556 }
6557
6558 /* Return the assembly code for INSN, which has the operands given by
6559 OPERANDS, and which branches to OPERANDS[0] if some condition is true.
6560 BRANCH_IF_TRUE is the asm template that should be used if OPERANDS[0]
6561 is in range of a direct branch. BRANCH_IF_FALSE is an inverted
6562 version of BRANCH_IF_TRUE. */
6563
6564 const char *
6565 loongarch_output_conditional_branch (rtx_insn *insn, rtx *operands,
6566 const char *branch_if_true,
6567 const char *branch_if_false)
6568 {
6569 unsigned int length;
6570 rtx taken;
6571
6572 gcc_assert (LABEL_P (operands[0]));
6573
6574 length = get_attr_length (insn);
6575 if (length <= 4)
6576 {
6577 return branch_if_true;
6578 }
6579
6580 /* Generate a reversed branch around a direct jump. */
6581 rtx_code_label *not_taken = gen_label_rtx ();
6582 taken = operands[0];
6583
6584 /* Generate the reversed branch to NOT_TAKEN. */
6585 operands[0] = not_taken;
6586 output_asm_insn (branch_if_false, operands);
6587
6588 output_asm_insn ("b\t%0", &taken);
6589
6590 /* Output NOT_TAKEN. */
6591 targetm.asm_out.internal_label (asm_out_file, "L",
6592 CODE_LABEL_NUMBER (not_taken));
6593 return "";
6594 }
6595
6596 /* Return the assembly code for INSN, which branches to OPERANDS[0]
6597 if some equality condition is true. The condition is given by
6598 OPERANDS[1] if !INVERTED_P, otherwise it is the inverse of
6599 OPERANDS[1]. OPERANDS[2] is the comparison's first operand;
6600 OPERANDS[3] is the second operand and may be zero or a register. */
6601
6602 const char *
6603 loongarch_output_equal_conditional_branch (rtx_insn *insn, rtx *operands,
6604 bool inverted_p)
6605 {
6606 const char *branch[2];
6607 if (operands[3] == const0_rtx)
6608 {
6609 branch[!inverted_p] = LARCH_BRANCH ("b%C1z", "%2,%0");
6610 branch[inverted_p] = LARCH_BRANCH ("b%N1z", "%2,%0");
6611 }
6612 else
6613 {
6614 branch[!inverted_p] = LARCH_BRANCH ("b%C1", "%2,%z3,%0");
6615 branch[inverted_p] = LARCH_BRANCH ("b%N1", "%2,%z3,%0");
6616 }
6617
6618 return loongarch_output_conditional_branch (insn, operands, branch[1],
6619 branch[0]);
6620 }
6621
6622 /* Return the assembly code for INSN, which branches to OPERANDS[0]
6623 if some ordering condition is true. The condition is given by
6624 OPERANDS[1] if !INVERTED_P, otherwise it is the inverse of
6625 OPERANDS[1]. OPERANDS[2] is the comparison's first operand;
6626 OPERANDS[3] is the second operand and may be zero or a register. */
6627
6628 const char *
6629 loongarch_output_order_conditional_branch (rtx_insn *insn, rtx *operands,
6630 bool inverted_p)
6631 {
6632 const char *branch[2];
6633
6634 /* Make BRANCH[1] branch to OPERANDS[0] when the condition is true.
6635 Make BRANCH[0] branch on the inverse condition. */
6636 if (operands[3] != const0_rtx)
6637 {
6638 /* Handle degenerate cases that should not, but do, occur. */
6639 if (REGNO (operands[2]) == REGNO (operands[3]))
6640 {
6641 switch (GET_CODE (operands[1]))
6642 {
6643 case LT:
6644 case LTU:
6645 case GT:
6646 case GTU:
6647 inverted_p = !inverted_p;
6648 /* Fall through. */
6649 case LE:
6650 case LEU:
6651 case GE:
6652 case GEU:
6653 branch[!inverted_p] = LARCH_BRANCH ("b", "%0");
6654 branch[inverted_p] = "\t# branch never";
6655 break;
6656 default:
6657 gcc_unreachable ();
6658 }
6659 }
6660 else
6661 {
6662 switch (GET_CODE (operands[1]))
6663 {
6664 case LE:
6665 case LEU:
6666 case GT:
6667 case GTU:
6668 case LT:
6669 case LTU:
6670 case GE:
6671 case GEU:
6672 branch[!inverted_p] = LARCH_BRANCH ("b%C1", "%2,%3,%0");
6673 branch[inverted_p] = LARCH_BRANCH ("b%N1", "%2,%3,%0");
6674 break;
6675 default:
6676 gcc_unreachable ();
6677 }
6678 }
6679 }
6680 else
6681 {
6682 switch (GET_CODE (operands[1]))
6683 {
6684 /* These cases are equivalent to comparisons against zero. */
6685 case LEU:
6686 case GTU:
6687 case LTU:
6688 case GEU:
6689 case LE:
6690 case GT:
6691 case LT:
6692 case GE:
6693 branch[!inverted_p] = LARCH_BRANCH ("b%C1", "%2,$r0,%0");
6694 branch[inverted_p] = LARCH_BRANCH ("b%N1", "%2,$r0,%0");
6695 break;
6696 default:
6697 gcc_unreachable ();
6698 }
6699 }
6700 return loongarch_output_conditional_branch (insn, operands, branch[1],
6701 branch[0]);
6702 }
6703
6704 /* Return the assembly code for DIV.{W/D} instruction DIVISION, which has
6705 the operands given by OPERANDS. Add in a divide-by-zero check if needed.
6706 */
6707
6708 const char *
6709 loongarch_output_division (const char *division, rtx *operands)
6710 {
6711 const char *s;
6712
6713 s = division;
6714 if (loongarch_check_zero_div_p ())
6715 {
6716 output_asm_insn (s, operands);
6717 s = "bne\t%2,%.,1f\n\tbreak\t7\n1:";
6718 }
6719 return s;
6720 }
6721
6722 /* Return the assembly code for LSX DIV_{S,U}.DF or MOD_{S,U}.DF instructions,
6723 which has the operands given by OPERANDS. Add in a divide-by-zero check
6724 if needed. */
6725
6726 const char *
6727 loongarch_lsx_output_division (const char *division, rtx *operands)
6728 {
6729 const char *s;
6730
6731 s = division;
6732 if (TARGET_CHECK_ZERO_DIV)
6733 {
6734 if (ISA_HAS_LSX)
6735 {
6736 output_asm_insn ("vsetallnez.%v0\t$fcc7,%w2",operands);
6737 output_asm_insn (s, operands);
6738 output_asm_insn ("bcnez\t$fcc7,1f", operands);
6739 }
6740 s = "break\t7\n1:";
6741 }
6742 return s;
6743 }
6744
6745 /* Implement TARGET_SCHED_ADJUST_COST. We assume that anti and output
6746 dependencies have no cost. */
6747
6748 static int
6749 loongarch_adjust_cost (rtx_insn *, int dep_type, rtx_insn *, int cost,
6750 unsigned int)
6751 {
6752 if (dep_type != 0 && (dep_type != REG_DEP_OUTPUT))
6753 return 0;
6754 return cost;
6755 }
6756
6757 /* Return the number of instructions that can be issued per cycle. */
6758
6759 static int
6760 loongarch_issue_rate (void)
6761 {
6762 if ((unsigned long) la_target.cpu_tune < N_TUNE_TYPES)
6763 return loongarch_cpu_issue_rate[la_target.cpu_tune];
6764 else
6765 return 1;
6766 }
6767
6768 /* Implement TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD. This should
6769 be as wide as the scheduling freedom in the DFA. */
6770
6771 static int
6772 loongarch_multipass_dfa_lookahead (void)
6773 {
6774 if ((unsigned long) la_target.cpu_tune < N_ARCH_TYPES)
6775 return loongarch_cpu_multipass_dfa_lookahead[la_target.cpu_tune];
6776 else
6777 return 0;
6778 }
6779
6780 /* Implement TARGET_SCHED_REORDER. */
6781
6782 static int
6783 loongarch_sched_reorder (FILE *file ATTRIBUTE_UNUSED,
6784 int verbose ATTRIBUTE_UNUSED,
6785 rtx_insn **ready ATTRIBUTE_UNUSED,
6786 int *nreadyp ATTRIBUTE_UNUSED,
6787 int cycle ATTRIBUTE_UNUSED)
6788 {
6789 return loongarch_issue_rate ();
6790 }
6791
6792 /* Implement TARGET_SCHED_REORDER2. */
6793
6794 static int
6795 loongarch_sched_reorder2 (FILE *file ATTRIBUTE_UNUSED,
6796 int verbose ATTRIBUTE_UNUSED,
6797 rtx_insn **ready ATTRIBUTE_UNUSED,
6798 int *nreadyp ATTRIBUTE_UNUSED,
6799 int cycle ATTRIBUTE_UNUSED)
6800 {
6801 return cached_can_issue_more;
6802 }
6803
6804 /* Implement TARGET_SCHED_INIT. */
6805
6806 static void
6807 loongarch_sched_init (FILE *file ATTRIBUTE_UNUSED,
6808 int verbose ATTRIBUTE_UNUSED,
6809 int max_ready ATTRIBUTE_UNUSED)
6810 {}
6811
6812 /* Implement TARGET_SCHED_VARIABLE_ISSUE. */
6813
6814 static int
6815 loongarch_variable_issue (FILE *file ATTRIBUTE_UNUSED,
6816 int verbose ATTRIBUTE_UNUSED, rtx_insn *insn,
6817 int more)
6818 {
6819 /* Ignore USEs and CLOBBERs; don't count them against the issue rate. */
6820 if (USEFUL_INSN_P (insn))
6821 {
6822 if (get_attr_type (insn) != TYPE_GHOST)
6823 more--;
6824 }
6825
6826 /* Instructions of type 'multi' should all be split before
6827 the second scheduling pass. */
6828 gcc_assert (!reload_completed
6829 || recog_memoized (insn) < 0
6830 || get_attr_type (insn) != TYPE_MULTI);
6831
6832 cached_can_issue_more = more;
6833 return more;
6834 }
6835
6836 /* Given that we have an rtx of the form (prefetch ... WRITE LOCALITY),
6837 return the first operand of the associated PREF or PREFX insn. */
6838
6839 rtx
6840 loongarch_prefetch_cookie (rtx write, rtx locality)
6841 {
6842 /* store_streamed / load_streamed. */
6843 if (INTVAL (locality) <= 0)
6844 return GEN_INT (INTVAL (write) + 4);
6845
6846 /* store / load. */
6847 if (INTVAL (locality) <= 2)
6848 return write;
6849
6850 /* store_retained / load_retained. */
6851 return GEN_INT (INTVAL (write) + 6);
6852 }
6853
6854 /* Implement TARGET_ASM_OUTPUT_MI_THUNK. Generate rtl rather than asm text
6855 in order to avoid duplicating too much logic from elsewhere. */
6856
6857 static void
6858 loongarch_output_mi_thunk (FILE *file, tree thunk_fndecl ATTRIBUTE_UNUSED,
6859 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
6860 tree function)
6861 {
6862 const char *fnname = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (thunk_fndecl));
6863 rtx this_rtx, temp1, temp2, fnaddr;
6864 rtx_insn *insn;
6865 bool use_sibcall_p;
6866
6867 /* Pretend to be a post-reload pass while generating rtl. */
6868 reload_completed = 1;
6869
6870 /* Mark the end of the (empty) prologue. */
6871 emit_note (NOTE_INSN_PROLOGUE_END);
6872
6873 /* Determine if we can use a sibcall to call FUNCTION directly. */
6874 fnaddr = XEXP (DECL_RTL (function), 0);
6875 use_sibcall_p = const_call_insn_operand (fnaddr, Pmode);
6876
6877 /* We need two temporary registers in some cases. */
6878 temp1 = gen_rtx_REG (Pmode, 12);
6879 temp2 = gen_rtx_REG (Pmode, 13);
6880
6881 /* Find out which register contains the "this" pointer. */
6882 if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
6883 this_rtx = gen_rtx_REG (Pmode, GP_ARG_FIRST + 1);
6884 else
6885 this_rtx = gen_rtx_REG (Pmode, GP_ARG_FIRST);
6886
6887 /* Add DELTA to THIS_RTX. */
6888 if (delta != 0)
6889 {
6890 rtx offset = GEN_INT (delta);
6891 if (!IMM12_OPERAND (delta))
6892 {
6893 loongarch_emit_move (temp1, offset);
6894 offset = temp1;
6895 }
6896 emit_insn (gen_add3_insn (this_rtx, this_rtx, offset));
6897 }
6898
6899 /* If needed, add *(*THIS_RTX + VCALL_OFFSET) to THIS_RTX. */
6900 if (vcall_offset != 0)
6901 {
6902 rtx addr;
6903
6904 /* Set TEMP1 to *THIS_RTX. */
6905 loongarch_emit_move (temp1, gen_rtx_MEM (Pmode, this_rtx));
6906
6907 /* Set ADDR to a legitimate address for *THIS_RTX + VCALL_OFFSET. */
6908 addr = loongarch_add_offset (temp2, temp1, vcall_offset);
6909
6910 /* Load the offset and add it to THIS_RTX. */
6911 loongarch_emit_move (temp1, gen_rtx_MEM (Pmode, addr));
6912 emit_insn (gen_add3_insn (this_rtx, this_rtx, temp1));
6913 }
6914
6915 /* Jump to the target function. Use a sibcall if direct jumps are
6916 allowed, otherwise load the address into a register first. */
6917 if (use_sibcall_p)
6918 {
6919 insn = emit_call_insn (gen_sibcall_internal (fnaddr, const0_rtx));
6920 SIBLING_CALL_P (insn) = 1;
6921 }
6922 else
6923 {
6924 loongarch_emit_move (temp1, fnaddr);
6925 emit_jump_insn (gen_indirect_jump (temp1));
6926 }
6927
6928 /* Run just enough of rest_of_compilation. This sequence was
6929 "borrowed" from alpha.c. */
6930 insn = get_insns ();
6931 split_all_insns_noflow ();
6932 shorten_branches (insn);
6933 assemble_start_function (thunk_fndecl, fnname);
6934 final_start_function (insn, file, 1);
6935 final (insn, file, 1);
6936 final_end_function ();
6937 assemble_end_function (thunk_fndecl, fnname);
6938
6939 /* Stop pretending to be a post-reload pass. */
6940 reload_completed = 0;
6941 }
6942
6943 /* Allocate a chunk of memory for per-function machine-dependent data. */
6944
6945 static struct machine_function *
6946 loongarch_init_machine_status (void)
6947 {
6948 return ggc_cleared_alloc<machine_function> ();
6949 }
6950
6951 static void
6952 loongarch_cpu_option_override (struct loongarch_target *target,
6953 struct gcc_options *opts,
6954 struct gcc_options *opts_set)
6955 {
6956 /* alignments */
6957 if (opts->x_flag_align_functions && !opts->x_str_align_functions)
6958 opts->x_str_align_functions
6959 = loongarch_cpu_align[target->cpu_tune].function;
6960
6961 if (opts->x_flag_align_labels && !opts->x_str_align_labels)
6962 opts->x_str_align_labels = loongarch_cpu_align[target->cpu_tune].label;
6963
6964 /* Set up parameters to be used in prefetching algorithm. */
6965 int simultaneous_prefetches
6966 = loongarch_cpu_cache[target->cpu_tune].simultaneous_prefetches;
6967
6968 SET_OPTION_IF_UNSET (opts, opts_set, param_simultaneous_prefetches,
6969 simultaneous_prefetches);
6970
6971 SET_OPTION_IF_UNSET (opts, opts_set, param_l1_cache_line_size,
6972 loongarch_cpu_cache[target->cpu_tune].l1d_line_size);
6973
6974 SET_OPTION_IF_UNSET (opts, opts_set, param_l1_cache_size,
6975 loongarch_cpu_cache[target->cpu_tune].l1d_size);
6976
6977 SET_OPTION_IF_UNSET (opts, opts_set, param_l2_cache_size,
6978 loongarch_cpu_cache[target->cpu_tune].l2d_size);
6979 }
6980
6981 static void
6982 loongarch_option_override_internal (struct gcc_options *opts,
6983 struct gcc_options *opts_set)
6984 {
6985 int i, regno, mode;
6986
6987 if (flag_pic)
6988 g_switch_value = 0;
6989
6990 loongarch_init_target (&la_target,
6991 la_opt_cpu_arch, la_opt_cpu_tune, la_opt_fpu,
6992 la_opt_simd, la_opt_abi_base, la_opt_abi_ext,
6993 la_opt_cmodel);
6994
6995 /* Handle target-specific options: compute defaults/conflicts etc. */
6996 loongarch_config_target (&la_target, NULL, 0);
6997
6998 loongarch_update_gcc_opt_status (&la_target, opts, opts_set);
6999 loongarch_cpu_option_override (&la_target, opts, opts_set);
7000
7001 if (TARGET_ABI_LP64)
7002 flag_pcc_struct_return = 0;
7003
7004 /* Decide which rtx_costs structure to use. */
7005 if (optimize_size)
7006 loongarch_cost = &loongarch_rtx_cost_optimize_size;
7007 else
7008 loongarch_cost = &loongarch_cpu_rtx_cost_data[la_target.cpu_tune];
7009
7010 /* If the user hasn't specified a branch cost, use the processor's
7011 default. */
7012 if (loongarch_branch_cost == 0)
7013 loongarch_branch_cost = loongarch_cost->branch_cost;
7014
7015
7016 /* Enable sw prefetching at -O3 and higher. */
7017 if (opts->x_flag_prefetch_loop_arrays < 0
7018 && (opts->x_optimize >= 3 || opts->x_flag_profile_use)
7019 && !opts->x_optimize_size)
7020 opts->x_flag_prefetch_loop_arrays = 1;
7021
7022 if (TARGET_DIRECT_EXTERN_ACCESS && flag_shlib)
7023 error ("%qs cannot be used for compiling a shared library",
7024 "-mdirect-extern-access");
7025 if (loongarch_vector_access_cost == 0)
7026 loongarch_vector_access_cost = 5;
7027
7028
7029 switch (la_target.cmodel)
7030 {
7031 case CMODEL_EXTREME:
7032 if (!TARGET_EXPLICIT_RELOCS)
7033 error ("code model %qs needs %s",
7034 "extreme", "-mexplicit-relocs");
7035
7036 if (opts->x_flag_plt)
7037 {
7038 if (global_options_set.x_flag_plt)
7039 error ("code model %qs is not compatible with %s",
7040 "extreme", "-fplt");
7041 opts->x_flag_plt = 0;
7042 }
7043 break;
7044
7045 case CMODEL_TINY_STATIC:
7046 case CMODEL_MEDIUM:
7047 case CMODEL_NORMAL:
7048 case CMODEL_TINY:
7049 case CMODEL_LARGE:
7050 break;
7051
7052 default:
7053 gcc_unreachable ();
7054 }
7055
7056 /* Validate the guard size. */
7057 int guard_size = param_stack_clash_protection_guard_size;
7058
7059 /* Enforce that interval is the same size as size so the mid-end does the
7060 right thing. */
7061 SET_OPTION_IF_UNSET (opts, &global_options_set,
7062 param_stack_clash_protection_probe_interval,
7063 guard_size);
7064
7065 loongarch_init_print_operand_punct ();
7066
7067 /* Set up array to map GCC register number to debug register number.
7068 Ignore the special purpose register numbers. */
7069
7070 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
7071 {
7072 if (GP_REG_P (i) || FP_REG_P (i))
7073 loongarch_dwarf_regno[i] = i;
7074 else
7075 loongarch_dwarf_regno[i] = INVALID_REGNUM;
7076 }
7077
7078 /* Set up loongarch_hard_regno_mode_ok. */
7079 for (mode = 0; mode < MAX_MACHINE_MODE; mode++)
7080 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
7081 loongarch_hard_regno_mode_ok_p[mode][regno]
7082 = loongarch_hard_regno_mode_ok_uncached (regno, (machine_mode) mode);
7083
7084 /* Function to allocate machine-dependent function status. */
7085 init_machine_status = &loongarch_init_machine_status;
7086 }
7087
7088
7089 /* Implement TARGET_OPTION_OVERRIDE. */
7090
7091 static void
7092 loongarch_option_override (void)
7093 {
7094 loongarch_option_override_internal (&global_options, &global_options_set);
7095 }
7096
7097 /* Implement TARGET_CONDITIONAL_REGISTER_USAGE. */
7098
7099 static void
7100 loongarch_conditional_register_usage (void)
7101 {
7102 if (!TARGET_HARD_FLOAT)
7103 accessible_reg_set &= ~(reg_class_contents[FP_REGS]
7104 | reg_class_contents[FCC_REGS]);
7105 }
7106
7107 /* Implement EH_USES. */
7108
7109 bool
7110 loongarch_eh_uses (unsigned int regno ATTRIBUTE_UNUSED)
7111 {
7112 return false;
7113 }
7114
7115 /* Implement EPILOGUE_USES. */
7116
7117 bool
7118 loongarch_epilogue_uses (unsigned int regno)
7119 {
7120 /* Say that the epilogue uses the return address register. Note that
7121 in the case of sibcalls, the values "used by the epilogue" are
7122 considered live at the start of the called function. */
7123 if (regno == RETURN_ADDR_REGNUM)
7124 return true;
7125
7126 return false;
7127 }
7128
7129 bool
7130 loongarch_load_store_bonding_p (rtx *operands, machine_mode mode, bool load_p)
7131 {
7132 rtx reg1, reg2, mem1, mem2, base1, base2;
7133 enum reg_class rc1, rc2;
7134 HOST_WIDE_INT offset1, offset2;
7135
7136 if (load_p)
7137 {
7138 reg1 = operands[0];
7139 reg2 = operands[2];
7140 mem1 = operands[1];
7141 mem2 = operands[3];
7142 }
7143 else
7144 {
7145 reg1 = operands[1];
7146 reg2 = operands[3];
7147 mem1 = operands[0];
7148 mem2 = operands[2];
7149 }
7150
7151 if (loongarch_address_insns (XEXP (mem1, 0), mode, false) == 0
7152 || loongarch_address_insns (XEXP (mem2, 0), mode, false) == 0)
7153 return false;
7154
7155 loongarch_split_plus (XEXP (mem1, 0), &base1, &offset1);
7156 loongarch_split_plus (XEXP (mem2, 0), &base2, &offset2);
7157
7158 /* Base regs do not match. */
7159 if (!REG_P (base1) || !rtx_equal_p (base1, base2))
7160 return false;
7161
7162 /* Either of the loads is clobbering base register. It is legitimate to bond
7163 loads if second load clobbers base register. However, hardware does not
7164 support such bonding. */
7165 if (load_p
7166 && (REGNO (reg1) == REGNO (base1) || (REGNO (reg2) == REGNO (base1))))
7167 return false;
7168
7169 /* Loading in same registers. */
7170 if (load_p && REGNO (reg1) == REGNO (reg2))
7171 return false;
7172
7173 /* The loads/stores are not of same type. */
7174 rc1 = REGNO_REG_CLASS (REGNO (reg1));
7175 rc2 = REGNO_REG_CLASS (REGNO (reg2));
7176 if (rc1 != rc2 && !reg_class_subset_p (rc1, rc2)
7177 && !reg_class_subset_p (rc2, rc1))
7178 return false;
7179
7180 if (abs (offset1 - offset2) != GET_MODE_SIZE (mode))
7181 return false;
7182
7183 return true;
7184 }
7185
7186 /* Implement TARGET_TRAMPOLINE_INIT. */
7187
7188 static void
7189 loongarch_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
7190 {
7191 rtx addr, end_addr, mem;
7192 rtx trampoline[8];
7193 unsigned int i, j;
7194 HOST_WIDE_INT end_addr_offset, static_chain_offset, target_function_offset;
7195
7196 /* Work out the offsets of the pointers from the start of the
7197 trampoline code. */
7198 end_addr_offset = TRAMPOLINE_CODE_SIZE;
7199 static_chain_offset = end_addr_offset;
7200 target_function_offset = static_chain_offset + GET_MODE_SIZE (ptr_mode);
7201
7202 /* Get pointers to the beginning and end of the code block. */
7203 addr = force_reg (Pmode, XEXP (m_tramp, 0));
7204 end_addr
7205 = loongarch_force_binary (Pmode, PLUS, addr, GEN_INT (end_addr_offset));
7206
7207 #define OP(X) gen_int_mode (X, SImode)
7208
7209 /* Build up the code in TRAMPOLINE. */
7210 i = 0;
7211 /*pcaddi $static_chain,0
7212 ld.[dw] $tmp,$static_chain,target_function_offset
7213 ld.[dw] $static_chain,$static_chain,static_chain_offset
7214 jirl $r0,$tmp,0 */
7215 trampoline[i++] = OP (0x18000000 | (STATIC_CHAIN_REGNUM - GP_REG_FIRST));
7216 trampoline[i++] = OP ((ptr_mode == DImode ? 0x28c00000 : 0x28800000)
7217 | 19 /* $t7 */
7218 | ((STATIC_CHAIN_REGNUM - GP_REG_FIRST) << 5)
7219 | ((target_function_offset & 0xfff) << 10));
7220 trampoline[i++] = OP ((ptr_mode == DImode ? 0x28c00000 : 0x28800000)
7221 | (STATIC_CHAIN_REGNUM - GP_REG_FIRST)
7222 | ((STATIC_CHAIN_REGNUM - GP_REG_FIRST) << 5)
7223 | ((static_chain_offset & 0xfff) << 10));
7224 trampoline[i++] = OP (0x4c000000 | (19 << 5));
7225 #undef OP
7226
7227 for (j = 0; j < i; j++)
7228 {
7229 mem = adjust_address (m_tramp, SImode, j * GET_MODE_SIZE (SImode));
7230 loongarch_emit_move (mem, trampoline[j]);
7231 }
7232
7233 /* Set up the static chain pointer field. */
7234 mem = adjust_address (m_tramp, ptr_mode, static_chain_offset);
7235 loongarch_emit_move (mem, chain_value);
7236
7237 /* Set up the target function field. */
7238 mem = adjust_address (m_tramp, ptr_mode, target_function_offset);
7239 loongarch_emit_move (mem, XEXP (DECL_RTL (fndecl), 0));
7240
7241 /* Flush the code part of the trampoline. */
7242 emit_insn (gen_add3_insn (end_addr, addr, GEN_INT (TRAMPOLINE_SIZE)));
7243 emit_insn (gen_clear_cache (addr, end_addr));
7244 }
7245
7246 /* Generate or test for an insn that supports a constant permutation. */
7247
7248 #define MAX_VECT_LEN 32
7249
7250 struct expand_vec_perm_d
7251 {
7252 rtx target, op0, op1;
7253 unsigned char perm[MAX_VECT_LEN];
7254 machine_mode vmode;
7255 unsigned char nelt;
7256 bool one_vector_p;
7257 bool testing_p;
7258 };
7259
7260 /* Construct (set target (vec_select op0 (parallel perm))) and
7261 return true if that's a valid instruction in the active ISA. */
7262
7263 static bool
7264 loongarch_expand_vselect (rtx target, rtx op0,
7265 const unsigned char *perm, unsigned nelt)
7266 {
7267 rtx rperm[MAX_VECT_LEN], x;
7268 rtx_insn *insn;
7269 unsigned i;
7270
7271 for (i = 0; i < nelt; ++i)
7272 rperm[i] = GEN_INT (perm[i]);
7273
7274 x = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nelt, rperm));
7275 x = gen_rtx_VEC_SELECT (GET_MODE (target), op0, x);
7276 x = gen_rtx_SET (target, x);
7277
7278 insn = emit_insn (x);
7279 if (recog_memoized (insn) < 0)
7280 {
7281 remove_insn (insn);
7282 return false;
7283 }
7284 return true;
7285 }
7286
7287 /* Similar, but generate a vec_concat from op0 and op1 as well. */
7288
7289 static bool
7290 loongarch_expand_vselect_vconcat (rtx target, rtx op0, rtx op1,
7291 const unsigned char *perm, unsigned nelt)
7292 {
7293 machine_mode v2mode;
7294 rtx x;
7295
7296 if (!GET_MODE_2XWIDER_MODE (GET_MODE (op0)).exists (&v2mode))
7297 return false;
7298 x = gen_rtx_VEC_CONCAT (v2mode, op0, op1);
7299 return loongarch_expand_vselect (target, x, perm, nelt);
7300 }
7301
7302 static tree
7303 loongarch_handle_model_attribute (tree *node, tree name, tree arg, int,
7304 bool *no_add_attrs)
7305 {
7306 tree decl = *node;
7307 if (VAR_P (decl))
7308 {
7309 if (DECL_THREAD_LOCAL_P (decl))
7310 {
7311 error_at (DECL_SOURCE_LOCATION (decl),
7312 "%qE attribute cannot be specified for thread-local "
7313 "variables", name);
7314 *no_add_attrs = true;
7315 return NULL_TREE;
7316 }
7317 if (DECL_CONTEXT (decl)
7318 && TREE_CODE (DECL_CONTEXT (decl)) == FUNCTION_DECL
7319 && !TREE_STATIC (decl))
7320 {
7321 error_at (DECL_SOURCE_LOCATION (decl),
7322 "%qE attribute cannot be specified for local "
7323 "variables", name);
7324 *no_add_attrs = true;
7325 return NULL_TREE;
7326 }
7327 if (DECL_REGISTER (decl))
7328 {
7329 error_at (DECL_SOURCE_LOCATION (decl),
7330 "%qE attribute cannot be specified for register "
7331 "variables", name);
7332 *no_add_attrs = true;
7333 return NULL_TREE;
7334 }
7335 if (!TARGET_EXPLICIT_RELOCS)
7336 {
7337 error_at (DECL_SOURCE_LOCATION (decl),
7338 "%qE attribute requires %s", name, "-mexplicit-relocs");
7339 *no_add_attrs = true;
7340 return NULL_TREE;
7341 }
7342
7343 arg = TREE_VALUE (arg);
7344 if (TREE_CODE (arg) != STRING_CST)
7345 {
7346 error_at (DECL_SOURCE_LOCATION (decl),
7347 "invalid argument of %qE attribute", name);
7348 *no_add_attrs = true;
7349 return NULL_TREE;
7350 }
7351
7352 const char *model = TREE_STRING_POINTER (arg);
7353 if (strcmp (model, "normal") != 0
7354 && strcmp (model, "extreme") != 0)
7355 {
7356 error_at (DECL_SOURCE_LOCATION (decl),
7357 "invalid argument of %qE attribute", name);
7358 *no_add_attrs = true;
7359 return NULL_TREE;
7360 }
7361
7362 if (lookup_attribute ("model", DECL_ATTRIBUTES (decl)))
7363 {
7364 error_at (DECL_SOURCE_LOCATION (decl),
7365 "multiple %qE attribute", name);
7366 *no_add_attrs = true;
7367 return NULL_TREE;
7368 }
7369 }
7370 else
7371 {
7372 warning (OPT_Wattributes, "%qE attribute ignored", name);
7373 *no_add_attrs = true;
7374 }
7375 return NULL_TREE;
7376 }
7377
7378 static const struct attribute_spec loongarch_attribute_table[] =
7379 {
7380 /* { name, min_len, max_len, decl_req, type_req, fn_type_req,
7381 affects_type_identity, handler, exclude } */
7382 { "model", 1, 1, true, false, false, false,
7383 loongarch_handle_model_attribute, NULL },
7384 /* The last attribute spec is set to be NULL. */
7385 {}
7386 };
7387
7388 bool
7389 loongarch_use_anchors_for_symbol_p (const_rtx symbol)
7390 {
7391 tree decl = SYMBOL_REF_DECL (symbol);
7392
7393 /* The section anchor optimization may break custom address model. */
7394 if (decl && lookup_attribute ("model", DECL_ATTRIBUTES (decl)))
7395 return false;
7396
7397 return default_use_anchors_for_symbol_p (symbol);
7398 }
7399
7400 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
7401
7402 static unsigned HOST_WIDE_INT
7403 loongarch_asan_shadow_offset (void)
7404 {
7405 /* We only have libsanitizer support for LOONGARCH64 at present.
7406 This value is taken from the file libsanitizer/asan/asan_mapping.h. */
7407 return TARGET_64BIT ? (HOST_WIDE_INT_1 << 46) : 0;
7408 }
7409
7410 static sbitmap
7411 loongarch_get_separate_components (void)
7412 {
7413 HOST_WIDE_INT offset;
7414 sbitmap components = sbitmap_alloc (FIRST_PSEUDO_REGISTER);
7415 bitmap_clear (components);
7416 offset = cfun->machine->frame.gp_sp_offset;
7417
7418 /* The stack should be aligned to 16-bytes boundary, so we can make the use
7419 of ldptr instructions. */
7420 gcc_assert (offset % UNITS_PER_WORD == 0);
7421
7422 for (unsigned int regno = GP_REG_FIRST; regno <= GP_REG_LAST; regno++)
7423 if (BITSET_P (cfun->machine->frame.mask, regno - GP_REG_FIRST))
7424 {
7425 /* We can wrap general registers saved at [sp, sp + 32768) using the
7426 ldptr/stptr instructions. For large offsets a pseudo register
7427 might be needed which cannot be created during the shrink
7428 wrapping pass.
7429
7430 TODO: This may need a revise when we add LA32 as ldptr.w is not
7431 guaranteed available by the manual. */
7432 if (offset < 32768)
7433 bitmap_set_bit (components, regno);
7434
7435 offset -= UNITS_PER_WORD;
7436 }
7437
7438 offset = cfun->machine->frame.fp_sp_offset;
7439 for (unsigned int regno = FP_REG_FIRST; regno <= FP_REG_LAST; regno++)
7440 if (BITSET_P (cfun->machine->frame.fmask, regno - FP_REG_FIRST))
7441 {
7442 /* We can only wrap FP registers with imm12 offsets. For large
7443 offsets a pseudo register might be needed which cannot be
7444 created during the shrink wrapping pass. */
7445 if (IMM12_OPERAND (offset))
7446 bitmap_set_bit (components, regno);
7447
7448 offset -= UNITS_PER_FPREG;
7449 }
7450
7451 /* Don't mess with the hard frame pointer. */
7452 if (frame_pointer_needed)
7453 bitmap_clear_bit (components, HARD_FRAME_POINTER_REGNUM);
7454
7455 bitmap_clear_bit (components, RETURN_ADDR_REGNUM);
7456
7457 return components;
7458 }
7459
7460 static sbitmap
7461 loongarch_components_for_bb (basic_block bb)
7462 {
7463 /* Registers are used in a bb if they are in the IN, GEN, or KILL sets. */
7464 auto_bitmap used;
7465 bitmap_copy (used, DF_LIVE_IN (bb));
7466 bitmap_ior_into (used, &DF_LIVE_BB_INFO (bb)->gen);
7467 bitmap_ior_into (used, &DF_LIVE_BB_INFO (bb)->kill);
7468
7469 sbitmap components = sbitmap_alloc (FIRST_PSEUDO_REGISTER);
7470 bitmap_clear (components);
7471
7472 function_abi_aggregator callee_abis;
7473 rtx_insn *insn;
7474 FOR_BB_INSNS (bb, insn)
7475 if (CALL_P (insn))
7476 callee_abis.note_callee_abi (insn_callee_abi (insn));
7477
7478 HARD_REG_SET extra_caller_saves =
7479 callee_abis.caller_save_regs (*crtl->abi);
7480
7481 for (unsigned int regno = GP_REG_FIRST; regno <= GP_REG_LAST; regno++)
7482 if (!fixed_regs[regno]
7483 && !crtl->abi->clobbers_full_reg_p (regno)
7484 && (TEST_HARD_REG_BIT (extra_caller_saves, regno) ||
7485 bitmap_bit_p (used, regno)))
7486 bitmap_set_bit (components, regno);
7487
7488 for (unsigned int regno = FP_REG_FIRST; regno <= FP_REG_LAST; regno++)
7489 if (!fixed_regs[regno]
7490 && !crtl->abi->clobbers_full_reg_p (regno)
7491 && (TEST_HARD_REG_BIT (extra_caller_saves, regno) ||
7492 bitmap_bit_p (used, regno)))
7493 bitmap_set_bit (components, regno);
7494
7495 return components;
7496 }
7497
7498 static void
7499 loongarch_disqualify_components (sbitmap, edge, sbitmap, bool)
7500 {
7501 /* Do nothing. */
7502 }
7503
7504 static void
7505 loongarch_process_components (sbitmap components, loongarch_save_restore_fn fn)
7506 {
7507 HOST_WIDE_INT offset = cfun->machine->frame.gp_sp_offset;
7508
7509 for (unsigned int regno = GP_REG_FIRST; regno <= GP_REG_LAST; regno++)
7510 if (BITSET_P (cfun->machine->frame.mask, regno - GP_REG_FIRST))
7511 {
7512 if (bitmap_bit_p (components, regno))
7513 loongarch_save_restore_reg (word_mode, regno, offset, fn);
7514
7515 offset -= UNITS_PER_WORD;
7516 }
7517
7518 offset = cfun->machine->frame.fp_sp_offset;
7519 machine_mode mode = TARGET_DOUBLE_FLOAT ? DFmode : SFmode;
7520
7521 for (unsigned int regno = FP_REG_FIRST; regno <= FP_REG_LAST; regno++)
7522 if (BITSET_P (cfun->machine->frame.fmask, regno - FP_REG_FIRST))
7523 {
7524 if (bitmap_bit_p (components, regno))
7525 loongarch_save_restore_reg (mode, regno, offset, fn);
7526
7527 offset -= UNITS_PER_FPREG;
7528 }
7529 }
7530
7531 static void
7532 loongarch_emit_prologue_components (sbitmap components)
7533 {
7534 loongarch_process_components (components, loongarch_save_reg);
7535 }
7536
7537 static void
7538 loongarch_emit_epilogue_components (sbitmap components)
7539 {
7540 loongarch_process_components (components, loongarch_restore_reg);
7541 }
7542
7543 static void
7544 loongarch_set_handled_components (sbitmap components)
7545 {
7546 for (unsigned int regno = GP_REG_FIRST; regno <= GP_REG_LAST; regno++)
7547 if (bitmap_bit_p (components, regno))
7548 cfun->machine->reg_is_wrapped_separately[regno] = true;
7549
7550 for (unsigned int regno = FP_REG_FIRST; regno <= FP_REG_LAST; regno++)
7551 if (bitmap_bit_p (components, regno))
7552 cfun->machine->reg_is_wrapped_separately[regno] = true;
7553 }
7554
7555 /* Initialize the GCC target structure. */
7556 #undef TARGET_ASM_ALIGNED_HI_OP
7557 #define TARGET_ASM_ALIGNED_HI_OP "\t.half\t"
7558 #undef TARGET_ASM_ALIGNED_SI_OP
7559 #define TARGET_ASM_ALIGNED_SI_OP "\t.word\t"
7560 #undef TARGET_ASM_ALIGNED_DI_OP
7561 #define TARGET_ASM_ALIGNED_DI_OP "\t.dword\t"
7562 /* Construct (set target (vec_select op0 (parallel selector))) and
7563 return true if that's a valid instruction in the active ISA. */
7564
7565 static bool
7566 loongarch_expand_lsx_shuffle (struct expand_vec_perm_d *d)
7567 {
7568 rtx x, elts[MAX_VECT_LEN];
7569 rtvec v;
7570 rtx_insn *insn;
7571 unsigned i;
7572
7573 if (!ISA_HAS_LSX)
7574 return false;
7575
7576 for (i = 0; i < d->nelt; i++)
7577 elts[i] = GEN_INT (d->perm[i]);
7578
7579 v = gen_rtvec_v (d->nelt, elts);
7580 x = gen_rtx_PARALLEL (VOIDmode, v);
7581
7582 if (!loongarch_const_vector_shuffle_set_p (x, d->vmode))
7583 return false;
7584
7585 x = gen_rtx_VEC_SELECT (d->vmode, d->op0, x);
7586 x = gen_rtx_SET (d->target, x);
7587
7588 insn = emit_insn (x);
7589 if (recog_memoized (insn) < 0)
7590 {
7591 remove_insn (insn);
7592 return false;
7593 }
7594 return true;
7595 }
7596
7597 void
7598 loongarch_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel)
7599 {
7600 machine_mode vmode = GET_MODE (target);
7601
7602 switch (vmode)
7603 {
7604 case E_V16QImode:
7605 emit_insn (gen_lsx_vshuf_b (target, op1, op0, sel));
7606 break;
7607 case E_V2DFmode:
7608 emit_insn (gen_lsx_vshuf_d_f (target, sel, op1, op0));
7609 break;
7610 case E_V2DImode:
7611 emit_insn (gen_lsx_vshuf_d (target, sel, op1, op0));
7612 break;
7613 case E_V4SFmode:
7614 emit_insn (gen_lsx_vshuf_w_f (target, sel, op1, op0));
7615 break;
7616 case E_V4SImode:
7617 emit_insn (gen_lsx_vshuf_w (target, sel, op1, op0));
7618 break;
7619 case E_V8HImode:
7620 emit_insn (gen_lsx_vshuf_h (target, sel, op1, op0));
7621 break;
7622 default:
7623 break;
7624 }
7625 }
7626
7627 static bool
7628 loongarch_try_expand_lsx_vshuf_const (struct expand_vec_perm_d *d)
7629 {
7630 int i;
7631 rtx target, op0, op1, sel, tmp;
7632 rtx rperm[MAX_VECT_LEN];
7633
7634 if (d->vmode == E_V2DImode || d->vmode == E_V2DFmode
7635 || d->vmode == E_V4SImode || d->vmode == E_V4SFmode
7636 || d->vmode == E_V8HImode || d->vmode == E_V16QImode)
7637 {
7638 target = d->target;
7639 op0 = d->op0;
7640 op1 = d->one_vector_p ? d->op0 : d->op1;
7641
7642 if (GET_MODE (op0) != GET_MODE (op1)
7643 || GET_MODE (op0) != GET_MODE (target))
7644 return false;
7645
7646 if (d->testing_p)
7647 return true;
7648
7649 for (i = 0; i < d->nelt; i += 1)
7650 {
7651 rperm[i] = GEN_INT (d->perm[i]);
7652 }
7653
7654 if (d->vmode == E_V2DFmode)
7655 {
7656 sel = gen_rtx_CONST_VECTOR (E_V2DImode, gen_rtvec_v (d->nelt, rperm));
7657 tmp = gen_rtx_SUBREG (E_V2DImode, d->target, 0);
7658 emit_move_insn (tmp, sel);
7659 }
7660 else if (d->vmode == E_V4SFmode)
7661 {
7662 sel = gen_rtx_CONST_VECTOR (E_V4SImode, gen_rtvec_v (d->nelt, rperm));
7663 tmp = gen_rtx_SUBREG (E_V4SImode, d->target, 0);
7664 emit_move_insn (tmp, sel);
7665 }
7666 else
7667 {
7668 sel = gen_rtx_CONST_VECTOR (d->vmode, gen_rtvec_v (d->nelt, rperm));
7669 emit_move_insn (d->target, sel);
7670 }
7671
7672 switch (d->vmode)
7673 {
7674 case E_V2DFmode:
7675 emit_insn (gen_lsx_vshuf_d_f (target, target, op1, op0));
7676 break;
7677 case E_V2DImode:
7678 emit_insn (gen_lsx_vshuf_d (target, target, op1, op0));
7679 break;
7680 case E_V4SFmode:
7681 emit_insn (gen_lsx_vshuf_w_f (target, target, op1, op0));
7682 break;
7683 case E_V4SImode:
7684 emit_insn (gen_lsx_vshuf_w (target, target, op1, op0));
7685 break;
7686 case E_V8HImode:
7687 emit_insn (gen_lsx_vshuf_h (target, target, op1, op0));
7688 break;
7689 case E_V16QImode:
7690 emit_insn (gen_lsx_vshuf_b (target, op1, op0, target));
7691 break;
7692 default:
7693 break;
7694 }
7695
7696 return true;
7697 }
7698 return false;
7699 }
7700
7701 static bool
7702 loongarch_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
7703 {
7704 unsigned int i, nelt = d->nelt;
7705 unsigned char perm2[MAX_VECT_LEN];
7706
7707 if (d->one_vector_p)
7708 {
7709 /* Try interleave with alternating operands. */
7710 memcpy (perm2, d->perm, sizeof (perm2));
7711 for (i = 1; i < nelt; i += 2)
7712 perm2[i] += nelt;
7713 if (loongarch_expand_vselect_vconcat (d->target, d->op0, d->op1, perm2,
7714 nelt))
7715 return true;
7716 }
7717 else
7718 {
7719 if (loongarch_expand_vselect_vconcat (d->target, d->op0, d->op1,
7720 d->perm, nelt))
7721 return true;
7722
7723 /* Try again with swapped operands. */
7724 for (i = 0; i < nelt; ++i)
7725 perm2[i] = (d->perm[i] + nelt) & (2 * nelt - 1);
7726 if (loongarch_expand_vselect_vconcat (d->target, d->op1, d->op0, perm2,
7727 nelt))
7728 return true;
7729 }
7730
7731 if (loongarch_expand_lsx_shuffle (d))
7732 return true;
7733 return false;
7734 }
7735
7736 /* Implementation of constant vector permuatation. This function identifies
7737 * recognized pattern of permuation selector argument, and use one or more
7738 * instruction(s) to finish the permutation job correctly. For unsupported
7739 * patterns, it will return false. */
7740
7741 static bool
7742 loongarch_expand_vec_perm_const_2 (struct expand_vec_perm_d *d)
7743 {
7744 /* Although we have the LSX vec_perm<mode> template, there's still some
7745 128bit vector permuatation operations send to vectorize_vec_perm_const.
7746 In this case, we just simpliy wrap them by single vshuf.* instruction,
7747 because LSX vshuf.* instruction just have the same behavior that GCC
7748 expects. */
7749 return loongarch_try_expand_lsx_vshuf_const (d);
7750 }
7751
7752 /* Implement TARGET_VECTORIZE_VEC_PERM_CONST. */
7753
7754 static bool
7755 loongarch_vectorize_vec_perm_const (machine_mode vmode, machine_mode op_mode,
7756 rtx target, rtx op0, rtx op1,
7757 const vec_perm_indices &sel)
7758 {
7759 if (vmode != op_mode)
7760 return false;
7761
7762 struct expand_vec_perm_d d;
7763 int i, nelt, which;
7764 unsigned char orig_perm[MAX_VECT_LEN];
7765 bool ok;
7766
7767 d.target = target;
7768 if (op0)
7769 {
7770 rtx nop0 = force_reg (vmode, op0);
7771 if (op0 == op1)
7772 op1 = nop0;
7773 op0 = nop0;
7774 }
7775 if (op1)
7776 op1 = force_reg (vmode, op1);
7777 d.op0 = op0;
7778 d.op1 = op1;
7779
7780 d.vmode = vmode;
7781 gcc_assert (VECTOR_MODE_P (vmode));
7782 d.nelt = nelt = GET_MODE_NUNITS (vmode);
7783 d.testing_p = !target;
7784
7785 /* This is overly conservative, but ensures we don't get an
7786 uninitialized warning on ORIG_PERM. */
7787 memset (orig_perm, 0, MAX_VECT_LEN);
7788 for (i = which = 0; i < nelt; ++i)
7789 {
7790 int ei = sel[i] & (2 * nelt - 1);
7791 which |= (ei < nelt ? 1 : 2);
7792 orig_perm[i] = ei;
7793 }
7794 memcpy (d.perm, orig_perm, MAX_VECT_LEN);
7795
7796 switch (which)
7797 {
7798 default:
7799 gcc_unreachable ();
7800
7801 case 3:
7802 d.one_vector_p = false;
7803 if (d.testing_p || !rtx_equal_p (d.op0, d.op1))
7804 break;
7805 /* FALLTHRU */
7806
7807 case 2:
7808 for (i = 0; i < nelt; ++i)
7809 d.perm[i] &= nelt - 1;
7810 d.op0 = d.op1;
7811 d.one_vector_p = true;
7812 break;
7813
7814 case 1:
7815 d.op1 = d.op0;
7816 d.one_vector_p = true;
7817 break;
7818 }
7819
7820 if (d.testing_p)
7821 {
7822 d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
7823 d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
7824 if (!d.one_vector_p)
7825 d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
7826
7827 ok = loongarch_expand_vec_perm_const_2 (&d);
7828 if (ok)
7829 return ok;
7830
7831 start_sequence ();
7832 ok = loongarch_expand_vec_perm_const_1 (&d);
7833 end_sequence ();
7834 return ok;
7835 }
7836
7837 ok = loongarch_expand_vec_perm_const_2 (&d);
7838 if (!ok)
7839 ok = loongarch_expand_vec_perm_const_1 (&d);
7840
7841 /* If we were given a two-vector permutation which just happened to
7842 have both input vectors equal, we folded this into a one-vector
7843 permutation. There are several loongson patterns that are matched
7844 via direct vec_select+vec_concat expansion, but we do not have
7845 support in loongarch_expand_vec_perm_const_1 to guess the adjustment
7846 that should be made for a single operand. Just try again with
7847 the original permutation. */
7848 if (!ok && which == 3)
7849 {
7850 d.op0 = op0;
7851 d.op1 = op1;
7852 d.one_vector_p = false;
7853 memcpy (d.perm, orig_perm, MAX_VECT_LEN);
7854 ok = loongarch_expand_vec_perm_const_1 (&d);
7855 }
7856
7857 return ok;
7858 }
7859
7860 static int
7861 loongarch_cpu_sched_reassociation_width (struct loongarch_target *target,
7862 unsigned int opc, machine_mode mode)
7863 {
7864 /* unreferenced argument */
7865 (void) opc;
7866
7867 switch (target->cpu_tune)
7868 {
7869 case CPU_LOONGARCH64:
7870 case CPU_LA464:
7871 /* Vector part. */
7872 if (LSX_SUPPORTED_MODE_P (mode))
7873 {
7874 /* Integer vector instructions execute in FP unit.
7875 The width of integer/float-point vector instructions is 3. */
7876 return 3;
7877 }
7878
7879 /* Scalar part. */
7880 else if (INTEGRAL_MODE_P (mode))
7881 return 1;
7882 else if (FLOAT_MODE_P (mode))
7883 {
7884 if (opc == PLUS_EXPR)
7885 {
7886 return 2;
7887 }
7888 return 4;
7889 }
7890 break;
7891 default:
7892 break;
7893 }
7894
7895 /* default is 1 */
7896 return 1;
7897 }
7898
7899 /* Implement TARGET_SCHED_REASSOCIATION_WIDTH. */
7900
7901 static int
7902 loongarch_sched_reassociation_width (unsigned int opc, machine_mode mode)
7903 {
7904 return loongarch_cpu_sched_reassociation_width (&la_target, opc, mode);
7905 }
7906
7907 /* Implement extract a scalar element from vecotr register */
7908
7909 void
7910 loongarch_expand_vector_extract (rtx target, rtx vec, int elt)
7911 {
7912 machine_mode mode = GET_MODE (vec);
7913 machine_mode inner_mode = GET_MODE_INNER (mode);
7914 rtx tmp;
7915
7916 switch (mode)
7917 {
7918 case E_V8HImode:
7919 case E_V16QImode:
7920 break;
7921
7922 default:
7923 break;
7924 }
7925
7926 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (elt)));
7927 tmp = gen_rtx_VEC_SELECT (inner_mode, vec, tmp);
7928
7929 /* Let the rtl optimizers know about the zero extension performed. */
7930 if (inner_mode == QImode || inner_mode == HImode)
7931 {
7932 tmp = gen_rtx_ZERO_EXTEND (SImode, tmp);
7933 target = gen_lowpart (SImode, target);
7934 }
7935 if (inner_mode == SImode || inner_mode == DImode)
7936 {
7937 tmp = gen_rtx_SIGN_EXTEND (inner_mode, tmp);
7938 }
7939
7940 emit_insn (gen_rtx_SET (target, tmp));
7941 }
7942
7943 /* Generate code to copy vector bits i / 2 ... i - 1 from vector SRC
7944 to bits 0 ... i / 2 - 1 of vector DEST, which has the same mode.
7945 The upper bits of DEST are undefined, though they shouldn't cause
7946 exceptions (some bits from src or all zeros are ok). */
7947
7948 static void
7949 emit_reduc_half (rtx dest, rtx src, int i)
7950 {
7951 rtx tem, d = dest;
7952 switch (GET_MODE (src))
7953 {
7954 case E_V4SFmode:
7955 tem = gen_lsx_vbsrl_w_f (dest, src, GEN_INT (i == 128 ? 8 : 4));
7956 break;
7957 case E_V2DFmode:
7958 tem = gen_lsx_vbsrl_d_f (dest, src, GEN_INT (8));
7959 break;
7960 case E_V16QImode:
7961 case E_V8HImode:
7962 case E_V4SImode:
7963 case E_V2DImode:
7964 d = gen_reg_rtx (V2DImode);
7965 tem = gen_lsx_vbsrl_d (d, gen_lowpart (V2DImode, src), GEN_INT (i/16));
7966 break;
7967 default:
7968 gcc_unreachable ();
7969 }
7970 emit_insn (tem);
7971 if (d != dest)
7972 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), d));
7973 }
7974
7975 /* Expand a vector reduction. FN is the binary pattern to reduce;
7976 DEST is the destination; IN is the input vector. */
7977
7978 void
7979 loongarch_expand_vector_reduc (rtx (*fn) (rtx, rtx, rtx), rtx dest, rtx in)
7980 {
7981 rtx half, dst, vec = in;
7982 machine_mode mode = GET_MODE (in);
7983 int i;
7984
7985 for (i = GET_MODE_BITSIZE (mode);
7986 i > GET_MODE_UNIT_BITSIZE (mode);
7987 i >>= 1)
7988 {
7989 half = gen_reg_rtx (mode);
7990 emit_reduc_half (half, vec, i);
7991 if (i == GET_MODE_UNIT_BITSIZE (mode) * 2)
7992 dst = dest;
7993 else
7994 dst = gen_reg_rtx (mode);
7995 emit_insn (fn (dst, half, vec));
7996 vec = dst;
7997 }
7998 }
7999
8000 /* Expand an integral vector unpack operation. */
8001
8002 void
8003 loongarch_expand_vec_unpack (rtx operands[2], bool unsigned_p, bool high_p)
8004 {
8005 machine_mode imode = GET_MODE (operands[1]);
8006 rtx (*unpack) (rtx, rtx, rtx);
8007 rtx (*cmpFunc) (rtx, rtx, rtx);
8008 rtx tmp, dest;
8009
8010 if (ISA_HAS_LSX)
8011 {
8012 switch (imode)
8013 {
8014 case E_V4SImode:
8015 if (high_p != 0)
8016 unpack = gen_lsx_vilvh_w;
8017 else
8018 unpack = gen_lsx_vilvl_w;
8019
8020 cmpFunc = gen_lsx_vslt_w;
8021 break;
8022
8023 case E_V8HImode:
8024 if (high_p != 0)
8025 unpack = gen_lsx_vilvh_h;
8026 else
8027 unpack = gen_lsx_vilvl_h;
8028
8029 cmpFunc = gen_lsx_vslt_h;
8030 break;
8031
8032 case E_V16QImode:
8033 if (high_p != 0)
8034 unpack = gen_lsx_vilvh_b;
8035 else
8036 unpack = gen_lsx_vilvl_b;
8037
8038 cmpFunc = gen_lsx_vslt_b;
8039 break;
8040
8041 default:
8042 gcc_unreachable ();
8043 break;
8044 }
8045
8046 if (!unsigned_p)
8047 {
8048 /* Extract sign extention for each element comparing each element
8049 with immediate zero. */
8050 tmp = gen_reg_rtx (imode);
8051 emit_insn (cmpFunc (tmp, operands[1], CONST0_RTX (imode)));
8052 }
8053 else
8054 tmp = force_reg (imode, CONST0_RTX (imode));
8055
8056 dest = gen_reg_rtx (imode);
8057
8058 emit_insn (unpack (dest, operands[1], tmp));
8059 emit_move_insn (operands[0], gen_lowpart (GET_MODE (operands[0]), dest));
8060 return;
8061 }
8062 gcc_unreachable ();
8063 }
8064
8065 /* Construct and return PARALLEL RTX with CONST_INTs for HIGH (high_p == TRUE)
8066 or LOW (high_p == FALSE) half of a vector for mode MODE. */
8067
8068 rtx
8069 loongarch_lsx_vec_parallel_const_half (machine_mode mode, bool high_p)
8070 {
8071 int nunits = GET_MODE_NUNITS (mode);
8072 rtvec v = rtvec_alloc (nunits / 2);
8073 int base;
8074 int i;
8075
8076 base = high_p ? nunits / 2 : 0;
8077
8078 for (i = 0; i < nunits / 2; i++)
8079 RTVEC_ELT (v, i) = GEN_INT (base + i);
8080
8081 return gen_rtx_PARALLEL (VOIDmode, v);
8082 }
8083
8084 /* A subroutine of loongarch_expand_vec_init, match constant vector
8085 elements. */
8086
8087 static inline bool
8088 loongarch_constant_elt_p (rtx x)
8089 {
8090 return CONST_INT_P (x) || GET_CODE (x) == CONST_DOUBLE;
8091 }
8092
8093 rtx
8094 loongarch_gen_const_int_vector_shuffle (machine_mode mode, int val)
8095 {
8096 int nunits = GET_MODE_NUNITS (mode);
8097 int nsets = nunits / 4;
8098 rtx elts[MAX_VECT_LEN];
8099 int set = 0;
8100 int i, j;
8101
8102 /* Generate a const_int vector replicating the same 4-element set
8103 from an immediate. */
8104 for (j = 0; j < nsets; j++, set = 4 * j)
8105 for (i = 0; i < 4; i++)
8106 elts[set + i] = GEN_INT (set + ((val >> (2 * i)) & 0x3));
8107
8108 return gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nunits, elts));
8109 }
8110
8111 /* Expand a vector initialization. */
8112
8113 void
8114 loongarch_expand_vector_init (rtx target, rtx vals)
8115 {
8116 machine_mode vmode = GET_MODE (target);
8117 machine_mode imode = GET_MODE_INNER (vmode);
8118 unsigned i, nelt = GET_MODE_NUNITS (vmode);
8119 unsigned nvar = 0;
8120 bool all_same = true;
8121 rtx x;
8122
8123 for (i = 0; i < nelt; ++i)
8124 {
8125 x = XVECEXP (vals, 0, i);
8126 if (!loongarch_constant_elt_p (x))
8127 nvar++;
8128 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
8129 all_same = false;
8130 }
8131
8132 if (ISA_HAS_LSX)
8133 {
8134 if (all_same)
8135 {
8136 rtx same = XVECEXP (vals, 0, 0);
8137 rtx temp, temp2;
8138
8139 if (CONST_INT_P (same) && nvar == 0
8140 && loongarch_signed_immediate_p (INTVAL (same), 10, 0))
8141 {
8142 switch (vmode)
8143 {
8144 case E_V16QImode:
8145 case E_V8HImode:
8146 case E_V4SImode:
8147 case E_V2DImode:
8148 temp = gen_rtx_CONST_VECTOR (vmode, XVEC (vals, 0));
8149 emit_move_insn (target, temp);
8150 return;
8151
8152 default:
8153 gcc_unreachable ();
8154 }
8155 }
8156 temp = gen_reg_rtx (imode);
8157 if (imode == GET_MODE (same))
8158 temp2 = same;
8159 else if (GET_MODE_SIZE (imode) >= UNITS_PER_WORD)
8160 {
8161 if (GET_CODE (same) == MEM)
8162 {
8163 rtx reg_tmp = gen_reg_rtx (GET_MODE (same));
8164 loongarch_emit_move (reg_tmp, same);
8165 temp2 = simplify_gen_subreg (imode, reg_tmp,
8166 GET_MODE (reg_tmp), 0);
8167 }
8168 else
8169 temp2 = simplify_gen_subreg (imode, same, GET_MODE (same), 0);
8170 }
8171 else
8172 {
8173 if (GET_CODE (same) == MEM)
8174 {
8175 rtx reg_tmp = gen_reg_rtx (GET_MODE (same));
8176 loongarch_emit_move (reg_tmp, same);
8177 temp2 = lowpart_subreg (imode, reg_tmp, GET_MODE (reg_tmp));
8178 }
8179 else
8180 temp2 = lowpart_subreg (imode, same, GET_MODE (same));
8181 }
8182 emit_move_insn (temp, temp2);
8183
8184 switch (vmode)
8185 {
8186 case E_V16QImode:
8187 case E_V8HImode:
8188 case E_V4SImode:
8189 case E_V2DImode:
8190 loongarch_emit_move (target, gen_rtx_VEC_DUPLICATE (vmode, temp));
8191 break;
8192
8193 case E_V4SFmode:
8194 emit_insn (gen_lsx_vreplvei_w_f_scalar (target, temp));
8195 break;
8196
8197 case E_V2DFmode:
8198 emit_insn (gen_lsx_vreplvei_d_f_scalar (target, temp));
8199 break;
8200
8201 default:
8202 gcc_unreachable ();
8203 }
8204 }
8205 else
8206 {
8207 emit_move_insn (target, CONST0_RTX (vmode));
8208
8209 for (i = 0; i < nelt; ++i)
8210 {
8211 rtx temp = gen_reg_rtx (imode);
8212 emit_move_insn (temp, XVECEXP (vals, 0, i));
8213 switch (vmode)
8214 {
8215 case E_V16QImode:
8216 if (i == 0)
8217 emit_insn (gen_lsx_vreplvei_b_scalar (target, temp));
8218 else
8219 emit_insn (gen_vec_setv16qi (target, temp, GEN_INT (i)));
8220 break;
8221
8222 case E_V8HImode:
8223 if (i == 0)
8224 emit_insn (gen_lsx_vreplvei_h_scalar (target, temp));
8225 else
8226 emit_insn (gen_vec_setv8hi (target, temp, GEN_INT (i)));
8227 break;
8228
8229 case E_V4SImode:
8230 if (i == 0)
8231 emit_insn (gen_lsx_vreplvei_w_scalar (target, temp));
8232 else
8233 emit_insn (gen_vec_setv4si (target, temp, GEN_INT (i)));
8234 break;
8235
8236 case E_V2DImode:
8237 if (i == 0)
8238 emit_insn (gen_lsx_vreplvei_d_scalar (target, temp));
8239 else
8240 emit_insn (gen_vec_setv2di (target, temp, GEN_INT (i)));
8241 break;
8242
8243 case E_V4SFmode:
8244 if (i == 0)
8245 emit_insn (gen_lsx_vreplvei_w_f_scalar (target, temp));
8246 else
8247 emit_insn (gen_vec_setv4sf (target, temp, GEN_INT (i)));
8248 break;
8249
8250 case E_V2DFmode:
8251 if (i == 0)
8252 emit_insn (gen_lsx_vreplvei_d_f_scalar (target, temp));
8253 else
8254 emit_insn (gen_vec_setv2df (target, temp, GEN_INT (i)));
8255 break;
8256
8257 default:
8258 gcc_unreachable ();
8259 }
8260 }
8261 }
8262 return;
8263 }
8264
8265 /* Load constants from the pool, or whatever's handy. */
8266 if (nvar == 0)
8267 {
8268 emit_move_insn (target, gen_rtx_CONST_VECTOR (vmode, XVEC (vals, 0)));
8269 return;
8270 }
8271
8272 /* For two-part initialization, always use CONCAT. */
8273 if (nelt == 2)
8274 {
8275 rtx op0 = force_reg (imode, XVECEXP (vals, 0, 0));
8276 rtx op1 = force_reg (imode, XVECEXP (vals, 0, 1));
8277 x = gen_rtx_VEC_CONCAT (vmode, op0, op1);
8278 emit_insn (gen_rtx_SET (target, x));
8279 return;
8280 }
8281
8282 /* Loongson is the only cpu with vectors with more elements. */
8283 gcc_assert (0);
8284 }
8285
8286 /* Implement HARD_REGNO_CALLER_SAVE_MODE. */
8287
8288 machine_mode
8289 loongarch_hard_regno_caller_save_mode (unsigned int regno, unsigned int nregs,
8290 machine_mode mode)
8291 {
8292 /* For performance, avoid saving/restoring upper parts of a register
8293 by returning MODE as save mode when the mode is known. */
8294 if (mode == VOIDmode)
8295 return choose_hard_reg_mode (regno, nregs, NULL);
8296 else
8297 return mode;
8298 }
8299
8300 /* Generate RTL for comparing CMP_OP0 and CMP_OP1 using condition COND and
8301 store the result -1 or 0 in DEST. */
8302
8303 static void
8304 loongarch_expand_lsx_cmp (rtx dest, enum rtx_code cond, rtx op0, rtx op1)
8305 {
8306 machine_mode cmp_mode = GET_MODE (op0);
8307 int unspec = -1;
8308 bool negate = false;
8309
8310 switch (cmp_mode)
8311 {
8312 case E_V16QImode:
8313 case E_V32QImode:
8314 case E_V8HImode:
8315 case E_V16HImode:
8316 case E_V4SImode:
8317 case E_V8SImode:
8318 case E_V2DImode:
8319 case E_V4DImode:
8320 switch (cond)
8321 {
8322 case NE:
8323 cond = reverse_condition (cond);
8324 negate = true;
8325 break;
8326 case EQ:
8327 case LT:
8328 case LE:
8329 case LTU:
8330 case LEU:
8331 break;
8332 case GE:
8333 case GT:
8334 case GEU:
8335 case GTU:
8336 std::swap (op0, op1);
8337 cond = swap_condition (cond);
8338 break;
8339 default:
8340 gcc_unreachable ();
8341 }
8342 loongarch_emit_binary (cond, dest, op0, op1);
8343 if (negate)
8344 emit_move_insn (dest, gen_rtx_NOT (GET_MODE (dest), dest));
8345 break;
8346
8347 case E_V4SFmode:
8348 case E_V2DFmode:
8349 switch (cond)
8350 {
8351 case UNORDERED:
8352 case ORDERED:
8353 case EQ:
8354 case NE:
8355 case UNEQ:
8356 case UNLE:
8357 case UNLT:
8358 break;
8359 case LTGT: cond = NE; break;
8360 case UNGE: cond = UNLE; std::swap (op0, op1); break;
8361 case UNGT: cond = UNLT; std::swap (op0, op1); break;
8362 case LE: unspec = UNSPEC_LSX_VFCMP_SLE; break;
8363 case LT: unspec = UNSPEC_LSX_VFCMP_SLT; break;
8364 case GE: unspec = UNSPEC_LSX_VFCMP_SLE; std::swap (op0, op1); break;
8365 case GT: unspec = UNSPEC_LSX_VFCMP_SLT; std::swap (op0, op1); break;
8366 default:
8367 gcc_unreachable ();
8368 }
8369 if (unspec < 0)
8370 loongarch_emit_binary (cond, dest, op0, op1);
8371 else
8372 {
8373 rtx x = gen_rtx_UNSPEC (GET_MODE (dest),
8374 gen_rtvec (2, op0, op1), unspec);
8375 emit_insn (gen_rtx_SET (dest, x));
8376 }
8377 break;
8378
8379 default:
8380 gcc_unreachable ();
8381 break;
8382 }
8383 }
8384
8385 /* Expand VEC_COND_EXPR, where:
8386 MODE is mode of the result
8387 VIMODE equivalent integer mode
8388 OPERANDS operands of VEC_COND_EXPR. */
8389
8390 void
8391 loongarch_expand_vec_cond_expr (machine_mode mode, machine_mode vimode,
8392 rtx *operands)
8393 {
8394 rtx cond = operands[3];
8395 rtx cmp_op0 = operands[4];
8396 rtx cmp_op1 = operands[5];
8397 rtx cmp_res = gen_reg_rtx (vimode);
8398
8399 loongarch_expand_lsx_cmp (cmp_res, GET_CODE (cond), cmp_op0, cmp_op1);
8400
8401 /* We handle the following cases:
8402 1) r = a CMP b ? -1 : 0
8403 2) r = a CMP b ? -1 : v
8404 3) r = a CMP b ? v : 0
8405 4) r = a CMP b ? v1 : v2 */
8406
8407 /* Case (1) above. We only move the results. */
8408 if (operands[1] == CONSTM1_RTX (vimode)
8409 && operands[2] == CONST0_RTX (vimode))
8410 emit_move_insn (operands[0], cmp_res);
8411 else
8412 {
8413 rtx src1 = gen_reg_rtx (vimode);
8414 rtx src2 = gen_reg_rtx (vimode);
8415 rtx mask = gen_reg_rtx (vimode);
8416 rtx bsel;
8417
8418 /* Move the vector result to use it as a mask. */
8419 emit_move_insn (mask, cmp_res);
8420
8421 if (register_operand (operands[1], mode))
8422 {
8423 rtx xop1 = operands[1];
8424 if (mode != vimode)
8425 {
8426 xop1 = gen_reg_rtx (vimode);
8427 emit_move_insn (xop1, gen_rtx_SUBREG (vimode, operands[1], 0));
8428 }
8429 emit_move_insn (src1, xop1);
8430 }
8431 else
8432 {
8433 gcc_assert (operands[1] == CONSTM1_RTX (vimode));
8434 /* Case (2) if the below doesn't move the mask to src2. */
8435 emit_move_insn (src1, mask);
8436 }
8437
8438 if (register_operand (operands[2], mode))
8439 {
8440 rtx xop2 = operands[2];
8441 if (mode != vimode)
8442 {
8443 xop2 = gen_reg_rtx (vimode);
8444 emit_move_insn (xop2, gen_rtx_SUBREG (vimode, operands[2], 0));
8445 }
8446 emit_move_insn (src2, xop2);
8447 }
8448 else
8449 {
8450 gcc_assert (operands[2] == CONST0_RTX (mode));
8451 /* Case (3) if the above didn't move the mask to src1. */
8452 emit_move_insn (src2, mask);
8453 }
8454
8455 /* We deal with case (4) if the mask wasn't moved to either src1 or src2.
8456 In any case, we eventually do vector mask-based copy. */
8457 bsel = gen_rtx_IOR (vimode,
8458 gen_rtx_AND (vimode,
8459 gen_rtx_NOT (vimode, mask), src2),
8460 gen_rtx_AND (vimode, mask, src1));
8461 /* The result is placed back to a register with the mask. */
8462 emit_insn (gen_rtx_SET (mask, bsel));
8463 emit_move_insn (operands[0], gen_rtx_SUBREG (mode, mask, 0));
8464 }
8465 }
8466
8467 void
8468 loongarch_expand_vec_cond_mask_expr (machine_mode mode, machine_mode vimode,
8469 rtx *operands)
8470 {
8471 rtx cmp_res = operands[3];
8472
8473 /* We handle the following cases:
8474 1) r = a CMP b ? -1 : 0
8475 2) r = a CMP b ? -1 : v
8476 3) r = a CMP b ? v : 0
8477 4) r = a CMP b ? v1 : v2 */
8478
8479 /* Case (1) above. We only move the results. */
8480 if (operands[1] == CONSTM1_RTX (vimode)
8481 && operands[2] == CONST0_RTX (vimode))
8482 emit_move_insn (operands[0], cmp_res);
8483 else
8484 {
8485 rtx src1 = gen_reg_rtx (vimode);
8486 rtx src2 = gen_reg_rtx (vimode);
8487 rtx mask = gen_reg_rtx (vimode);
8488 rtx bsel;
8489
8490 /* Move the vector result to use it as a mask. */
8491 emit_move_insn (mask, cmp_res);
8492
8493 if (register_operand (operands[1], mode))
8494 {
8495 rtx xop1 = operands[1];
8496 if (mode != vimode)
8497 {
8498 xop1 = gen_reg_rtx (vimode);
8499 emit_move_insn (xop1, gen_rtx_SUBREG (vimode, operands[1], 0));
8500 }
8501 emit_move_insn (src1, xop1);
8502 }
8503 else
8504 {
8505 gcc_assert (operands[1] == CONSTM1_RTX (vimode));
8506 /* Case (2) if the below doesn't move the mask to src2. */
8507 emit_move_insn (src1, mask);
8508 }
8509
8510 if (register_operand (operands[2], mode))
8511 {
8512 rtx xop2 = operands[2];
8513 if (mode != vimode)
8514 {
8515 xop2 = gen_reg_rtx (vimode);
8516 emit_move_insn (xop2, gen_rtx_SUBREG (vimode, operands[2], 0));
8517 }
8518 emit_move_insn (src2, xop2);
8519 }
8520 else
8521 {
8522 gcc_assert (operands[2] == CONST0_RTX (mode));
8523 /* Case (3) if the above didn't move the mask to src1. */
8524 emit_move_insn (src2, mask);
8525 }
8526
8527 /* We deal with case (4) if the mask wasn't moved to either src1 or src2.
8528 In any case, we eventually do vector mask-based copy. */
8529 bsel = gen_rtx_IOR (vimode,
8530 gen_rtx_AND (vimode,
8531 gen_rtx_NOT (vimode, mask), src2),
8532 gen_rtx_AND (vimode, mask, src1));
8533 /* The result is placed back to a register with the mask. */
8534 emit_insn (gen_rtx_SET (mask, bsel));
8535 emit_move_insn (operands[0], gen_rtx_SUBREG (mode, mask, 0));
8536 }
8537 }
8538
8539 /* Expand integer vector comparison */
8540 bool
8541 loongarch_expand_vec_cmp (rtx operands[])
8542 {
8543
8544 rtx_code code = GET_CODE (operands[1]);
8545 loongarch_expand_lsx_cmp (operands[0], code, operands[2], operands[3]);
8546 return true;
8547 }
8548
8549 /* Implement TARGET_CASE_VALUES_THRESHOLD. */
8550
8551 unsigned int
8552 loongarch_case_values_threshold (void)
8553 {
8554 return default_case_values_threshold ();
8555 }
8556
8557 /* Implement TARGET_SPILL_CLASS. */
8558
8559 static reg_class_t
8560 loongarch_spill_class (reg_class_t rclass ATTRIBUTE_UNUSED,
8561 machine_mode mode ATTRIBUTE_UNUSED)
8562 {
8563 return NO_REGS;
8564 }
8565
8566 /* Implement TARGET_PROMOTE_FUNCTION_MODE. */
8567
8568 /* This function is equivalent to default_promote_function_mode_always_promote
8569 except that it returns a promoted mode even if type is NULL_TREE. This is
8570 needed by libcalls which have no type (only a mode) such as fixed conversion
8571 routines that take a signed or unsigned char/short argument and convert it
8572 to a fixed type. */
8573
8574 static machine_mode
8575 loongarch_promote_function_mode (const_tree type ATTRIBUTE_UNUSED,
8576 machine_mode mode,
8577 int *punsignedp ATTRIBUTE_UNUSED,
8578 const_tree fntype ATTRIBUTE_UNUSED,
8579 int for_return ATTRIBUTE_UNUSED)
8580 {
8581 int unsignedp;
8582
8583 if (type != NULL_TREE)
8584 return promote_mode (type, mode, punsignedp);
8585
8586 unsignedp = *punsignedp;
8587 PROMOTE_MODE (mode, unsignedp, type);
8588 *punsignedp = unsignedp;
8589 return mode;
8590 }
8591
8592 /* Implement TARGET_STARTING_FRAME_OFFSET. See loongarch_compute_frame_info
8593 for details about the frame layout. */
8594
8595 static HOST_WIDE_INT
8596 loongarch_starting_frame_offset (void)
8597 {
8598 if (FRAME_GROWS_DOWNWARD)
8599 return 0;
8600 return crtl->outgoing_args_size;
8601 }
8602
8603 /* A subroutine of loongarch_build_signbit_mask. If VECT is true,
8604 then replicate the value for all elements of the vector
8605 register. */
8606
8607 rtx
8608 loongarch_build_const_vector (machine_mode mode, bool vect, rtx value)
8609 {
8610 int i, n_elt;
8611 rtvec v;
8612 machine_mode scalar_mode;
8613
8614 switch (mode)
8615 {
8616 case E_V32QImode:
8617 case E_V16QImode:
8618 case E_V32HImode:
8619 case E_V16HImode:
8620 case E_V8HImode:
8621 case E_V8SImode:
8622 case E_V4SImode:
8623 case E_V8DImode:
8624 case E_V4DImode:
8625 case E_V2DImode:
8626 gcc_assert (vect);
8627 /* FALLTHRU */
8628 case E_V8SFmode:
8629 case E_V4SFmode:
8630 case E_V8DFmode:
8631 case E_V4DFmode:
8632 case E_V2DFmode:
8633 n_elt = GET_MODE_NUNITS (mode);
8634 v = rtvec_alloc (n_elt);
8635 scalar_mode = GET_MODE_INNER (mode);
8636
8637 RTVEC_ELT (v, 0) = value;
8638
8639 for (i = 1; i < n_elt; ++i)
8640 RTVEC_ELT (v, i) = vect ? value : CONST0_RTX (scalar_mode);
8641
8642 return gen_rtx_CONST_VECTOR (mode, v);
8643
8644 default:
8645 gcc_unreachable ();
8646 }
8647 }
8648
8649 /* Create a mask for the sign bit in MODE
8650 for an register. If VECT is true, then replicate the mask for
8651 all elements of the vector register. If INVERT is true, then create
8652 a mask excluding the sign bit. */
8653
8654 rtx
8655 loongarch_build_signbit_mask (machine_mode mode, bool vect, bool invert)
8656 {
8657 machine_mode vec_mode, imode;
8658 wide_int w;
8659 rtx mask, v;
8660
8661 switch (mode)
8662 {
8663 case E_V16SImode:
8664 case E_V16SFmode:
8665 case E_V8SImode:
8666 case E_V4SImode:
8667 case E_V8SFmode:
8668 case E_V4SFmode:
8669 vec_mode = mode;
8670 imode = SImode;
8671 break;
8672
8673 case E_V8DImode:
8674 case E_V4DImode:
8675 case E_V2DImode:
8676 case E_V8DFmode:
8677 case E_V4DFmode:
8678 case E_V2DFmode:
8679 vec_mode = mode;
8680 imode = DImode;
8681 break;
8682
8683 case E_TImode:
8684 case E_TFmode:
8685 vec_mode = VOIDmode;
8686 imode = TImode;
8687 break;
8688
8689 default:
8690 gcc_unreachable ();
8691 }
8692
8693 machine_mode inner_mode = GET_MODE_INNER (mode);
8694 w = wi::set_bit_in_zero (GET_MODE_BITSIZE (inner_mode) - 1,
8695 GET_MODE_BITSIZE (inner_mode));
8696 if (invert)
8697 w = wi::bit_not (w);
8698
8699 /* Force this value into the low part of a fp vector constant. */
8700 mask = immed_wide_int_const (w, imode);
8701 mask = gen_lowpart (inner_mode, mask);
8702
8703 if (vec_mode == VOIDmode)
8704 return force_reg (inner_mode, mask);
8705
8706 v = loongarch_build_const_vector (vec_mode, vect, mask);
8707 return force_reg (vec_mode, v);
8708 }
8709
8710 static bool
8711 loongarch_builtin_support_vector_misalignment (machine_mode mode,
8712 const_tree type,
8713 int misalignment,
8714 bool is_packed)
8715 {
8716 if (ISA_HAS_LSX && STRICT_ALIGNMENT)
8717 {
8718 if (optab_handler (movmisalign_optab, mode) == CODE_FOR_nothing)
8719 return false;
8720 if (misalignment == -1)
8721 return false;
8722 }
8723 return default_builtin_support_vector_misalignment (mode, type, misalignment,
8724 is_packed);
8725 }
8726
8727 /* Initialize the GCC target structure. */
8728 #undef TARGET_ASM_ALIGNED_HI_OP
8729 #define TARGET_ASM_ALIGNED_HI_OP "\t.half\t"
8730 #undef TARGET_ASM_ALIGNED_SI_OP
8731 #define TARGET_ASM_ALIGNED_SI_OP "\t.word\t"
8732 #undef TARGET_ASM_ALIGNED_DI_OP
8733 #define TARGET_ASM_ALIGNED_DI_OP "\t.dword\t"
8734
8735 #undef TARGET_OPTION_OVERRIDE
8736 #define TARGET_OPTION_OVERRIDE loongarch_option_override
8737
8738 #undef TARGET_LEGITIMIZE_ADDRESS
8739 #define TARGET_LEGITIMIZE_ADDRESS loongarch_legitimize_address
8740
8741 #undef TARGET_ASM_SELECT_RTX_SECTION
8742 #define TARGET_ASM_SELECT_RTX_SECTION loongarch_select_rtx_section
8743 #undef TARGET_ASM_FUNCTION_RODATA_SECTION
8744 #define TARGET_ASM_FUNCTION_RODATA_SECTION loongarch_function_rodata_section
8745
8746 #undef TARGET_SCHED_INIT
8747 #define TARGET_SCHED_INIT loongarch_sched_init
8748 #undef TARGET_SCHED_REORDER
8749 #define TARGET_SCHED_REORDER loongarch_sched_reorder
8750 #undef TARGET_SCHED_REORDER2
8751 #define TARGET_SCHED_REORDER2 loongarch_sched_reorder2
8752 #undef TARGET_SCHED_VARIABLE_ISSUE
8753 #define TARGET_SCHED_VARIABLE_ISSUE loongarch_variable_issue
8754 #undef TARGET_SCHED_ADJUST_COST
8755 #define TARGET_SCHED_ADJUST_COST loongarch_adjust_cost
8756 #undef TARGET_SCHED_ISSUE_RATE
8757 #define TARGET_SCHED_ISSUE_RATE loongarch_issue_rate
8758 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
8759 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
8760 loongarch_multipass_dfa_lookahead
8761
8762 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
8763 #define TARGET_FUNCTION_OK_FOR_SIBCALL loongarch_function_ok_for_sibcall
8764
8765 #undef TARGET_VALID_POINTER_MODE
8766 #define TARGET_VALID_POINTER_MODE loongarch_valid_pointer_mode
8767 #undef TARGET_REGISTER_MOVE_COST
8768 #define TARGET_REGISTER_MOVE_COST loongarch_register_move_cost
8769 #undef TARGET_MEMORY_MOVE_COST
8770 #define TARGET_MEMORY_MOVE_COST loongarch_memory_move_cost
8771 #undef TARGET_RTX_COSTS
8772 #define TARGET_RTX_COSTS loongarch_rtx_costs
8773 #undef TARGET_ADDRESS_COST
8774 #define TARGET_ADDRESS_COST loongarch_address_cost
8775 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
8776 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
8777 loongarch_builtin_vectorization_cost
8778
8779
8780 #undef TARGET_IN_SMALL_DATA_P
8781 #define TARGET_IN_SMALL_DATA_P loongarch_in_small_data_p
8782
8783 #undef TARGET_PREFERRED_RELOAD_CLASS
8784 #define TARGET_PREFERRED_RELOAD_CLASS loongarch_preferred_reload_class
8785
8786 #undef TARGET_ASM_FILE_START_FILE_DIRECTIVE
8787 #define TARGET_ASM_FILE_START_FILE_DIRECTIVE true
8788
8789 #undef TARGET_EXPAND_BUILTIN_VA_START
8790 #define TARGET_EXPAND_BUILTIN_VA_START loongarch_va_start
8791
8792 #undef TARGET_PROMOTE_FUNCTION_MODE
8793 #define TARGET_PROMOTE_FUNCTION_MODE loongarch_promote_function_mode
8794 #undef TARGET_RETURN_IN_MEMORY
8795 #define TARGET_RETURN_IN_MEMORY loongarch_return_in_memory
8796
8797 #undef TARGET_FUNCTION_VALUE
8798 #define TARGET_FUNCTION_VALUE loongarch_function_value
8799 #undef TARGET_LIBCALL_VALUE
8800 #define TARGET_LIBCALL_VALUE loongarch_libcall_value
8801
8802 #undef TARGET_ASM_OUTPUT_MI_THUNK
8803 #define TARGET_ASM_OUTPUT_MI_THUNK loongarch_output_mi_thunk
8804 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
8805 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK \
8806 hook_bool_const_tree_hwi_hwi_const_tree_true
8807
8808 #undef TARGET_PRINT_OPERAND
8809 #define TARGET_PRINT_OPERAND loongarch_print_operand
8810 #undef TARGET_PRINT_OPERAND_ADDRESS
8811 #define TARGET_PRINT_OPERAND_ADDRESS loongarch_print_operand_address
8812 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
8813 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P \
8814 loongarch_print_operand_punct_valid_p
8815
8816 #undef TARGET_SETUP_INCOMING_VARARGS
8817 #define TARGET_SETUP_INCOMING_VARARGS loongarch_setup_incoming_varargs
8818 #undef TARGET_STRICT_ARGUMENT_NAMING
8819 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
8820 #undef TARGET_MUST_PASS_IN_STACK
8821 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
8822 #undef TARGET_PASS_BY_REFERENCE
8823 #define TARGET_PASS_BY_REFERENCE loongarch_pass_by_reference
8824 #undef TARGET_ARG_PARTIAL_BYTES
8825 #define TARGET_ARG_PARTIAL_BYTES loongarch_arg_partial_bytes
8826 #undef TARGET_FUNCTION_ARG
8827 #define TARGET_FUNCTION_ARG loongarch_function_arg
8828 #undef TARGET_FUNCTION_ARG_ADVANCE
8829 #define TARGET_FUNCTION_ARG_ADVANCE loongarch_function_arg_advance
8830 #undef TARGET_FUNCTION_ARG_BOUNDARY
8831 #define TARGET_FUNCTION_ARG_BOUNDARY loongarch_function_arg_boundary
8832
8833 #undef TARGET_VECTOR_MODE_SUPPORTED_P
8834 #define TARGET_VECTOR_MODE_SUPPORTED_P loongarch_vector_mode_supported_p
8835
8836 #undef TARGET_SCALAR_MODE_SUPPORTED_P
8837 #define TARGET_SCALAR_MODE_SUPPORTED_P loongarch_scalar_mode_supported_p
8838
8839 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
8840 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE loongarch_preferred_simd_mode
8841
8842 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_MODES
8843 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_MODES \
8844 loongarch_autovectorize_vector_modes
8845
8846 #undef TARGET_INIT_BUILTINS
8847 #define TARGET_INIT_BUILTINS loongarch_init_builtins
8848 #undef TARGET_BUILTIN_DECL
8849 #define TARGET_BUILTIN_DECL loongarch_builtin_decl
8850 #undef TARGET_EXPAND_BUILTIN
8851 #define TARGET_EXPAND_BUILTIN loongarch_expand_builtin
8852
8853 /* The generic ELF target does not always have TLS support. */
8854 #ifdef HAVE_AS_TLS
8855 #undef TARGET_HAVE_TLS
8856 #define TARGET_HAVE_TLS HAVE_AS_TLS
8857 #endif
8858
8859 #undef TARGET_CANNOT_FORCE_CONST_MEM
8860 #define TARGET_CANNOT_FORCE_CONST_MEM loongarch_cannot_force_const_mem
8861
8862 #undef TARGET_LEGITIMATE_CONSTANT_P
8863 #define TARGET_LEGITIMATE_CONSTANT_P loongarch_legitimate_constant_p
8864
8865 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
8866 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_const_rtx_true
8867
8868 #ifdef HAVE_AS_DTPRELWORD
8869 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
8870 #define TARGET_ASM_OUTPUT_DWARF_DTPREL loongarch_output_dwarf_dtprel
8871 #endif
8872
8873 #undef TARGET_LEGITIMATE_ADDRESS_P
8874 #define TARGET_LEGITIMATE_ADDRESS_P loongarch_legitimate_address_p
8875
8876 #undef TARGET_FRAME_POINTER_REQUIRED
8877 #define TARGET_FRAME_POINTER_REQUIRED loongarch_frame_pointer_required
8878
8879 #undef TARGET_CAN_ELIMINATE
8880 #define TARGET_CAN_ELIMINATE loongarch_can_eliminate
8881
8882 #undef TARGET_CONDITIONAL_REGISTER_USAGE
8883 #define TARGET_CONDITIONAL_REGISTER_USAGE loongarch_conditional_register_usage
8884
8885 #undef TARGET_TRAMPOLINE_INIT
8886 #define TARGET_TRAMPOLINE_INIT loongarch_trampoline_init
8887
8888 #undef TARGET_MIN_ANCHOR_OFFSET
8889 #define TARGET_MIN_ANCHOR_OFFSET (-IMM_REACH/2)
8890
8891 #undef TARGET_MAX_ANCHOR_OFFSET
8892 #define TARGET_MAX_ANCHOR_OFFSET (IMM_REACH/2-1)
8893 #undef TARGET_VECTORIZE_VEC_PERM_CONST
8894 #define TARGET_VECTORIZE_VEC_PERM_CONST loongarch_vectorize_vec_perm_const
8895
8896 #undef TARGET_SCHED_REASSOCIATION_WIDTH
8897 #define TARGET_SCHED_REASSOCIATION_WIDTH loongarch_sched_reassociation_width
8898
8899 #undef TARGET_CASE_VALUES_THRESHOLD
8900 #define TARGET_CASE_VALUES_THRESHOLD loongarch_case_values_threshold
8901
8902 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
8903 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV loongarch_atomic_assign_expand_fenv
8904
8905 #undef TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS
8906 #define TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS true
8907
8908 #undef TARGET_SPILL_CLASS
8909 #define TARGET_SPILL_CLASS loongarch_spill_class
8910
8911 #undef TARGET_HARD_REGNO_NREGS
8912 #define TARGET_HARD_REGNO_NREGS loongarch_hard_regno_nregs
8913 #undef TARGET_HARD_REGNO_MODE_OK
8914 #define TARGET_HARD_REGNO_MODE_OK loongarch_hard_regno_mode_ok
8915
8916 #undef TARGET_MODES_TIEABLE_P
8917 #define TARGET_MODES_TIEABLE_P loongarch_modes_tieable_p
8918
8919 #undef TARGET_HARD_REGNO_CALL_PART_CLOBBERED
8920 #define TARGET_HARD_REGNO_CALL_PART_CLOBBERED \
8921 loongarch_hard_regno_call_part_clobbered
8922
8923 #undef TARGET_CUSTOM_FUNCTION_DESCRIPTORS
8924 #define TARGET_CUSTOM_FUNCTION_DESCRIPTORS 2
8925
8926 #undef TARGET_CAN_CHANGE_MODE_CLASS
8927 #define TARGET_CAN_CHANGE_MODE_CLASS loongarch_can_change_mode_class
8928
8929 #undef TARGET_CONSTANT_ALIGNMENT
8930 #define TARGET_CONSTANT_ALIGNMENT loongarch_constant_alignment
8931
8932 #undef TARGET_STARTING_FRAME_OFFSET
8933 #define TARGET_STARTING_FRAME_OFFSET loongarch_starting_frame_offset
8934
8935 #undef TARGET_SECONDARY_RELOAD
8936 #define TARGET_SECONDARY_RELOAD loongarch_secondary_reload
8937
8938 #undef TARGET_HAVE_SPECULATION_SAFE_VALUE
8939 #define TARGET_HAVE_SPECULATION_SAFE_VALUE speculation_safe_value_not_needed
8940
8941 #undef TARGET_ATTRIBUTE_TABLE
8942 #define TARGET_ATTRIBUTE_TABLE loongarch_attribute_table
8943
8944 #undef TARGET_USE_ANCHORS_FOR_SYMBOL_P
8945 #define TARGET_USE_ANCHORS_FOR_SYMBOL_P loongarch_use_anchors_for_symbol_p
8946
8947 #undef TARGET_ASAN_SHADOW_OFFSET
8948 #define TARGET_ASAN_SHADOW_OFFSET loongarch_asan_shadow_offset
8949
8950 #undef TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS
8951 #define TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS \
8952 loongarch_get_separate_components
8953
8954 #undef TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB
8955 #define TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB loongarch_components_for_bb
8956
8957 #undef TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS
8958 #define TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS \
8959 loongarch_disqualify_components
8960
8961 #undef TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS
8962 #define TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS \
8963 loongarch_emit_prologue_components
8964
8965 #undef TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS
8966 #define TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS \
8967 loongarch_emit_epilogue_components
8968
8969 #undef TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS
8970 #define TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS \
8971 loongarch_set_handled_components
8972
8973 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
8974 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
8975 loongarch_builtin_support_vector_misalignment
8976
8977 struct gcc_target targetm = TARGET_INITIALIZER;
8978
8979 #include "gt-loongarch.h"
This page took 0.446562 seconds and 5 git commands to generate.