]> gcc.gnu.org Git - gcc.git/blob - gcc/config/loongarch/loongarch.cc
LoongArch: Merge template got_load_tls_{ld/gd/le/ie}.
[gcc.git] / gcc / config / loongarch / loongarch.cc
1 /* Subroutines used for LoongArch code generation.
2 Copyright (C) 2021-2024 Free Software Foundation, Inc.
3 Contributed by Loongson Ltd.
4 Based on MIPS and RISC-V target for GNU compiler.
5
6 This file is part of GCC.
7
8 GCC is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3, or (at your option)
11 any later version.
12
13 GCC is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
21
22 #define IN_TARGET_CODE 1
23
24 #include "config.h"
25 #include "system.h"
26 #include "coretypes.h"
27 #include "backend.h"
28 #include "target.h"
29 #include "rtl.h"
30 #include "tree.h"
31 #include "memmodel.h"
32 #include "gimple.h"
33 #include "cfghooks.h"
34 #include "df.h"
35 #include "tm_p.h"
36 #include "stringpool.h"
37 #include "attribs.h"
38 #include "optabs.h"
39 #include "regs.h"
40 #include "emit-rtl.h"
41 #include "recog.h"
42 #include "cgraph.h"
43 #include "diagnostic.h"
44 #include "insn-attr.h"
45 #include "output.h"
46 #include "alias.h"
47 #include "fold-const.h"
48 #include "varasm.h"
49 #include "stor-layout.h"
50 #include "calls.h"
51 #include "explow.h"
52 #include "expr.h"
53 #include "libfuncs.h"
54 #include "reload.h"
55 #include "common/common-target.h"
56 #include "langhooks.h"
57 #include "cfgrtl.h"
58 #include "cfganal.h"
59 #include "sched-int.h"
60 #include "gimplify.h"
61 #include "target-globals.h"
62 #include "tree-pass.h"
63 #include "context.h"
64 #include "builtins.h"
65 #include "rtl-iter.h"
66 #include "opts.h"
67 #include "function-abi.h"
68 #include "cfgloop.h"
69 #include "tree-vectorizer.h"
70
71 /* This file should be included last. */
72 #include "target-def.h"
73
74 /* True if X is an UNSPEC wrapper around a SYMBOL_REF or LABEL_REF. */
75 #define UNSPEC_ADDRESS_P(X) \
76 (GET_CODE (X) == UNSPEC \
77 && XINT (X, 1) >= UNSPEC_ADDRESS_FIRST \
78 && XINT (X, 1) < UNSPEC_ADDRESS_FIRST + NUM_SYMBOL_TYPES)
79
80 /* Extract the symbol or label from UNSPEC wrapper X. */
81 #define UNSPEC_ADDRESS(X) XVECEXP (X, 0, 0)
82
83 /* Extract the symbol type from UNSPEC wrapper X. */
84 #define UNSPEC_ADDRESS_TYPE(X) \
85 ((enum loongarch_symbol_type) (XINT (X, 1) - UNSPEC_ADDRESS_FIRST))
86
87 /* True if INSN is a loongarch.md pattern or asm statement. */
88 /* ??? This test exists through the compiler, perhaps it should be
89 moved to rtl.h. */
90 #define USEFUL_INSN_P(INSN) \
91 (NONDEBUG_INSN_P (INSN) \
92 && GET_CODE (PATTERN (INSN)) != USE \
93 && GET_CODE (PATTERN (INSN)) != CLOBBER)
94
95 /* True if bit BIT is set in VALUE. */
96 #define BITSET_P(VALUE, BIT) (((VALUE) & (1 << (BIT))) != 0)
97
98 /* Classifies an address.
99
100 ADDRESS_REG
101 A natural register + offset address. The register satisfies
102 loongarch_valid_base_register_p and the offset is a const_arith_operand.
103
104 ADDRESS_REG_REG
105 A base register indexed by (optionally scaled) register.
106
107 ADDRESS_LO_SUM
108 A LO_SUM rtx. The first operand is a valid base register and the second
109 operand is a symbolic address.
110
111 ADDRESS_CONST_INT
112 A signed 16-bit constant address.
113
114 ADDRESS_SYMBOLIC:
115 A constant symbolic address. */
116 enum loongarch_address_type
117 {
118 ADDRESS_REG,
119 ADDRESS_REG_REG,
120 ADDRESS_LO_SUM,
121 ADDRESS_CONST_INT,
122 ADDRESS_SYMBOLIC
123 };
124
125
126 /* Information about an address described by loongarch_address_type. */
127 struct loongarch_address_info
128 {
129 enum loongarch_address_type type;
130 rtx reg;
131 rtx offset;
132 enum loongarch_symbol_type symbol_type;
133 };
134
135 /* Method of loading instant numbers:
136
137 METHOD_NORMAL:
138 Load 0-31 bit of the immediate number.
139
140 METHOD_LU32I:
141 Load 32-51 bit of the immediate number.
142
143 METHOD_LU52I:
144 Load 52-63 bit of the immediate number.
145
146 METHOD_MIRROR:
147 Copy 0-31 bit of the immediate number to 32-63bit.
148 */
149 enum loongarch_load_imm_method
150 {
151 METHOD_NORMAL,
152 METHOD_LU32I,
153 METHOD_LU52I,
154 METHOD_MIRROR
155 };
156
157 struct loongarch_integer_op
158 {
159 enum rtx_code code;
160 HOST_WIDE_INT value;
161 /* Represent the result of the immediate count of the load instruction at
162 each step. */
163 HOST_WIDE_INT curr_value;
164 enum loongarch_load_imm_method method;
165 };
166
167 /* The largest number of operations needed to load an integer constant.
168 The worst accepted case for 64-bit constants is LU12I.W,LU32I.D,LU52I.D,ORI
169 or LU12I.W,LU32I.D,LU52I.D,ADDI.D DECL_ASSEMBLER_NAME. */
170 #define LARCH_MAX_INTEGER_OPS 4
171
172 /* Arrays that map GCC register numbers to debugger register numbers. */
173 int loongarch_dwarf_regno[FIRST_PSEUDO_REGISTER];
174
175 /* Index [M][R] is true if register R is allowed to hold a value of mode M. */
176 static bool loongarch_hard_regno_mode_ok_p[MAX_MACHINE_MODE]
177 [FIRST_PSEUDO_REGISTER];
178
179 /* Index C is true if character C is a valid PRINT_OPERAND punctation
180 character. */
181 static bool loongarch_print_operand_punct[256];
182
183 /* Cached value of can_issue_more. This is cached in loongarch_variable_issue
184 hook and returned from loongarch_sched_reorder2. */
185 static int cached_can_issue_more;
186
187 /* Index R is the smallest register class that contains register R. */
188 const enum reg_class loongarch_regno_to_class[FIRST_PSEUDO_REGISTER] = {
189 GR_REGS, GR_REGS, GR_REGS, GR_REGS,
190 JIRL_REGS, JIRL_REGS, JIRL_REGS, JIRL_REGS,
191 JIRL_REGS, JIRL_REGS, JIRL_REGS, JIRL_REGS,
192 SIBCALL_REGS, JIRL_REGS, SIBCALL_REGS, SIBCALL_REGS,
193 SIBCALL_REGS, SIBCALL_REGS, SIBCALL_REGS, SIBCALL_REGS,
194 SIBCALL_REGS, GR_REGS, GR_REGS, JIRL_REGS,
195 JIRL_REGS, JIRL_REGS, JIRL_REGS, JIRL_REGS,
196 JIRL_REGS, JIRL_REGS, JIRL_REGS, JIRL_REGS,
197
198 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
199 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
200 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
201 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
202 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
203 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
204 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
205 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
206 FCC_REGS, FCC_REGS, FCC_REGS, FCC_REGS,
207 FCC_REGS, FCC_REGS, FCC_REGS, FCC_REGS,
208 FRAME_REGS, FRAME_REGS
209 };
210
211 /* Which cost information to use. */
212 static const struct loongarch_rtx_cost_data *loongarch_cost;
213
214 /* Information about a single argument. */
215 struct loongarch_arg_info
216 {
217 /* True if the argument is at least partially passed on the stack. */
218 bool stack_p;
219
220 /* The number of integer registers allocated to this argument. */
221 unsigned int num_gprs;
222
223 /* The offset of the first register used, provided num_gprs is nonzero.
224 If passed entirely on the stack, the value is MAX_ARGS_IN_REGISTERS. */
225 unsigned int gpr_offset;
226
227 /* The number of floating-point registers allocated to this argument. */
228 unsigned int num_fprs;
229
230 /* The offset of the first register used, provided num_fprs is nonzero. */
231 unsigned int fpr_offset;
232 };
233
234 /* Invoke MACRO (COND) for each fcmp.cond.{s/d} condition. */
235 #define LARCH_FP_CONDITIONS(MACRO) \
236 MACRO (f), \
237 MACRO (un), \
238 MACRO (eq), \
239 MACRO (ueq), \
240 MACRO (olt), \
241 MACRO (ult), \
242 MACRO (ole), \
243 MACRO (ule), \
244 MACRO (sf), \
245 MACRO (ngle), \
246 MACRO (seq), \
247 MACRO (ngl), \
248 MACRO (lt), \
249 MACRO (nge), \
250 MACRO (le), \
251 MACRO (ngt)
252
253 /* Enumerates the codes above as LARCH_FP_COND_<X>. */
254 #define DECLARE_LARCH_COND(X) LARCH_FP_COND_##X
255 enum loongarch_fp_condition
256 {
257 LARCH_FP_CONDITIONS (DECLARE_LARCH_COND)
258 };
259 #undef DECLARE_LARCH_COND
260
261 /* Index X provides the string representation of LARCH_FP_COND_<X>. */
262 #define STRINGIFY(X) #X
263 const char *const
264 loongarch_fp_conditions[16]= {LARCH_FP_CONDITIONS (STRINGIFY)};
265 #undef STRINGIFY
266
267 /* Size of guard page. */
268 #define STACK_CLASH_PROTECTION_GUARD_SIZE \
269 (1 << param_stack_clash_protection_guard_size)
270
271 /* Implement TARGET_FUNCTION_ARG_BOUNDARY. Every parameter gets at
272 least PARM_BOUNDARY bits of alignment, but will be given anything up
273 to PREFERRED_STACK_BOUNDARY bits if the type requires it. */
274
275 static unsigned int
276 loongarch_function_arg_boundary (machine_mode mode, const_tree type)
277 {
278 unsigned int alignment;
279
280 /* Use natural alignment if the type is not aggregate data. */
281 if (type && !AGGREGATE_TYPE_P (type))
282 alignment = TYPE_ALIGN (TYPE_MAIN_VARIANT (type));
283 else
284 alignment = type ? TYPE_ALIGN (type) : GET_MODE_ALIGNMENT (mode);
285
286 return MIN (PREFERRED_STACK_BOUNDARY, MAX (PARM_BOUNDARY, alignment));
287 }
288
289 /* If MODE represents an argument that can be passed or returned in
290 floating-point registers, return the number of registers, else 0. */
291
292 static unsigned
293 loongarch_pass_mode_in_fpr_p (machine_mode mode)
294 {
295 if (GET_MODE_UNIT_SIZE (mode) <= UNITS_PER_FP_ARG)
296 {
297 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
298 return 1;
299
300 if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
301 return 2;
302 }
303
304 return 0;
305 }
306
307 typedef struct
308 {
309 const_tree type;
310 HOST_WIDE_INT offset;
311 } loongarch_aggregate_field;
312
313 /* Identify subfields of aggregates that are candidates for passing in
314 floating-point registers. */
315
316 static int
317 loongarch_flatten_aggregate_field (const_tree type,
318 loongarch_aggregate_field fields[2], int n,
319 HOST_WIDE_INT offset)
320 {
321 switch (TREE_CODE (type))
322 {
323 case RECORD_TYPE:
324 /* Can't handle incomplete types nor sizes that are not fixed. */
325 if (!COMPLETE_TYPE_P (type)
326 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST
327 || !tree_fits_uhwi_p (TYPE_SIZE (type)))
328 return -1;
329
330 for (tree f = TYPE_FIELDS (type); f; f = DECL_CHAIN (f))
331 if (TREE_CODE (f) == FIELD_DECL)
332 {
333 if (!TYPE_P (TREE_TYPE (f)))
334 return -1;
335
336 if (DECL_SIZE (f) && integer_zerop (DECL_SIZE (f)))
337 continue;
338
339 HOST_WIDE_INT pos = offset + int_byte_position (f);
340 n = loongarch_flatten_aggregate_field (TREE_TYPE (f), fields, n,
341 pos);
342 if (n < 0)
343 return -1;
344 }
345 return n;
346
347 case ARRAY_TYPE:
348 {
349 HOST_WIDE_INT n_elts;
350 loongarch_aggregate_field subfields[2];
351 tree index = TYPE_DOMAIN (type);
352 tree elt_size = TYPE_SIZE_UNIT (TREE_TYPE (type));
353 int n_subfields = loongarch_flatten_aggregate_field (TREE_TYPE (type),
354 subfields, 0,
355 offset);
356
357 /* Can't handle incomplete types nor sizes that are not fixed. */
358 if (n_subfields <= 0
359 || !COMPLETE_TYPE_P (type)
360 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST
361 || !index
362 || !TYPE_MAX_VALUE (index)
363 || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index))
364 || !TYPE_MIN_VALUE (index)
365 || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index))
366 || !tree_fits_uhwi_p (elt_size))
367 return -1;
368
369 n_elts = 1 + tree_to_uhwi (TYPE_MAX_VALUE (index))
370 - tree_to_uhwi (TYPE_MIN_VALUE (index));
371 gcc_assert (n_elts >= 0);
372
373 for (HOST_WIDE_INT i = 0; i < n_elts; i++)
374 for (int j = 0; j < n_subfields; j++)
375 {
376 if (n >= 2)
377 return -1;
378
379 fields[n] = subfields[j];
380 fields[n++].offset += i * tree_to_uhwi (elt_size);
381 }
382
383 return n;
384 }
385
386 case COMPLEX_TYPE:
387 {
388 /* Complex type need consume 2 field, so n must be 0. */
389 if (n != 0)
390 return -1;
391
392 HOST_WIDE_INT elt_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (type)));
393
394 if (elt_size <= UNITS_PER_FP_ARG)
395 {
396 fields[0].type = TREE_TYPE (type);
397 fields[0].offset = offset;
398 fields[1].type = TREE_TYPE (type);
399 fields[1].offset = offset + elt_size;
400
401 return 2;
402 }
403
404 return -1;
405 }
406
407 default:
408 if (n < 2
409 && ((SCALAR_FLOAT_TYPE_P (type)
410 && GET_MODE_SIZE (TYPE_MODE (type)) <= UNITS_PER_FP_ARG)
411 || (INTEGRAL_TYPE_P (type)
412 && GET_MODE_SIZE (TYPE_MODE (type)) <= UNITS_PER_WORD)))
413 {
414 fields[n].type = type;
415 fields[n].offset = offset;
416 return n + 1;
417 }
418 else
419 return -1;
420 }
421 }
422
423 /* Identify candidate aggregates for passing in floating-point registers.
424 Candidates have at most two fields after flattening. */
425
426 static int
427 loongarch_flatten_aggregate_argument (const_tree type,
428 loongarch_aggregate_field fields[2])
429 {
430 if (!type || TREE_CODE (type) != RECORD_TYPE)
431 return -1;
432
433 return loongarch_flatten_aggregate_field (type, fields, 0, 0);
434 }
435
436 /* See whether TYPE is a record whose fields should be returned in one or
437 two floating-point registers. If so, populate FIELDS accordingly. */
438
439 static unsigned
440 loongarch_pass_aggregate_num_fpr (const_tree type,
441 loongarch_aggregate_field fields[2])
442 {
443 int n = loongarch_flatten_aggregate_argument (type, fields);
444
445 for (int i = 0; i < n; i++)
446 if (!SCALAR_FLOAT_TYPE_P (fields[i].type))
447 return 0;
448
449 return n > 0 ? n : 0;
450 }
451
452 /* See whether TYPE is a record whose fields should be returned in one
453 floating-point register and one integer register. If so, populate
454 FIELDS accordingly. */
455
456 static bool
457 loongarch_pass_aggregate_in_fpr_and_gpr_p (const_tree type,
458 loongarch_aggregate_field fields[2])
459 {
460 unsigned num_int = 0, num_float = 0;
461 int n = loongarch_flatten_aggregate_argument (type, fields);
462
463 for (int i = 0; i < n; i++)
464 {
465 num_float += SCALAR_FLOAT_TYPE_P (fields[i].type);
466 num_int += INTEGRAL_TYPE_P (fields[i].type);
467 }
468
469 return num_int == 1 && num_float == 1;
470 }
471
472 /* Return the representation of an argument passed or returned in an FPR
473 when the value has mode VALUE_MODE and the type has TYPE_MODE. The
474 two modes may be different for structures like:
475
476 struct __attribute__((packed)) foo { float f; }
477
478 where the SFmode value "f" is passed in REGNO but the struct itself
479 has mode BLKmode. */
480
481 static rtx
482 loongarch_pass_fpr_single (machine_mode type_mode, unsigned regno,
483 machine_mode value_mode,
484 HOST_WIDE_INT offset)
485 {
486 rtx x = gen_rtx_REG (value_mode, regno);
487
488 if (type_mode != value_mode)
489 {
490 x = gen_rtx_EXPR_LIST (VOIDmode, x, GEN_INT (offset));
491 x = gen_rtx_PARALLEL (type_mode, gen_rtvec (1, x));
492 }
493 return x;
494 }
495
496 /* Pass or return a composite value in the FPR pair REGNO and REGNO + 1.
497 MODE is the mode of the composite. MODE1 and OFFSET1 are the mode and
498 byte offset for the first value, likewise MODE2 and OFFSET2 for the
499 second value. */
500
501 static rtx
502 loongarch_pass_fpr_pair (machine_mode mode, unsigned regno1,
503 machine_mode mode1, HOST_WIDE_INT offset1,
504 unsigned regno2, machine_mode mode2,
505 HOST_WIDE_INT offset2)
506 {
507 return gen_rtx_PARALLEL (
508 mode, gen_rtvec (2,
509 gen_rtx_EXPR_LIST (VOIDmode, gen_rtx_REG (mode1, regno1),
510 GEN_INT (offset1)),
511 gen_rtx_EXPR_LIST (VOIDmode, gen_rtx_REG (mode2, regno2),
512 GEN_INT (offset2))));
513 }
514
515 /* Fill INFO with information about a single argument, and return an
516 RTL pattern to pass or return the argument. CUM is the cumulative
517 state for earlier arguments. MODE is the mode of this argument and
518 TYPE is its type (if known). NAMED is true if this is a named
519 (fixed) argument rather than a variable one. RETURN_P is true if
520 returning the argument, or false if passing the argument. */
521
522 static rtx
523 loongarch_get_arg_info (struct loongarch_arg_info *info,
524 const CUMULATIVE_ARGS *cum, machine_mode mode,
525 const_tree type, bool named, bool return_p)
526 {
527 unsigned num_bytes, num_words;
528 unsigned fpr_base = return_p ? FP_RETURN : FP_ARG_FIRST;
529 unsigned gpr_base = return_p ? GP_RETURN : GP_ARG_FIRST;
530 unsigned alignment = loongarch_function_arg_boundary (mode, type);
531
532 memset (info, 0, sizeof (*info));
533 info->gpr_offset = cum->num_gprs;
534 info->fpr_offset = cum->num_fprs;
535
536 if (named)
537 {
538 loongarch_aggregate_field fields[2];
539 unsigned fregno = fpr_base + info->fpr_offset;
540 unsigned gregno = gpr_base + info->gpr_offset;
541
542 /* Pass one- or two-element floating-point aggregates in FPRs. */
543 if ((info->num_fprs
544 = loongarch_pass_aggregate_num_fpr (type, fields))
545 && info->fpr_offset + info->num_fprs <= MAX_ARGS_IN_REGISTERS)
546 switch (info->num_fprs)
547 {
548 case 1:
549 return loongarch_pass_fpr_single (mode, fregno,
550 TYPE_MODE (fields[0].type),
551 fields[0].offset);
552
553 case 2:
554 return loongarch_pass_fpr_pair (mode, fregno,
555 TYPE_MODE (fields[0].type),
556 fields[0].offset,
557 fregno + 1,
558 TYPE_MODE (fields[1].type),
559 fields[1].offset);
560
561 default:
562 gcc_unreachable ();
563 }
564
565 /* Pass real and complex floating-point numbers in FPRs. */
566 if ((info->num_fprs = loongarch_pass_mode_in_fpr_p (mode))
567 && info->fpr_offset + info->num_fprs <= MAX_ARGS_IN_REGISTERS)
568 switch (GET_MODE_CLASS (mode))
569 {
570 case MODE_FLOAT:
571 return gen_rtx_REG (mode, fregno);
572
573 case MODE_COMPLEX_FLOAT:
574 return loongarch_pass_fpr_pair (mode, fregno,
575 GET_MODE_INNER (mode), 0,
576 fregno + 1, GET_MODE_INNER (mode),
577 GET_MODE_UNIT_SIZE (mode));
578
579 default:
580 gcc_unreachable ();
581 }
582
583 /* Pass structs with one float and one integer in an FPR and a GPR. */
584 if (loongarch_pass_aggregate_in_fpr_and_gpr_p (type, fields)
585 && info->gpr_offset < MAX_ARGS_IN_REGISTERS
586 && info->fpr_offset < MAX_ARGS_IN_REGISTERS)
587 {
588 info->num_gprs = 1;
589 info->num_fprs = 1;
590
591 if (!SCALAR_FLOAT_TYPE_P (fields[0].type))
592 std::swap (fregno, gregno);
593
594 return loongarch_pass_fpr_pair (mode, fregno,
595 TYPE_MODE (fields[0].type),
596 fields[0].offset, gregno,
597 TYPE_MODE (fields[1].type),
598 fields[1].offset);
599 }
600 }
601
602 /* Work out the size of the argument. */
603 num_bytes = type ? int_size_in_bytes (type) : GET_MODE_SIZE (mode);
604 num_words = (num_bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
605
606 /* Doubleword-aligned varargs start on an even register boundary. */
607 if (!named && num_bytes != 0 && alignment > BITS_PER_WORD)
608 info->gpr_offset += info->gpr_offset & 1;
609
610 /* Partition the argument between registers and stack. */
611 info->num_fprs = 0;
612 info->num_gprs = MIN (num_words, MAX_ARGS_IN_REGISTERS - info->gpr_offset);
613 info->stack_p = (num_words - info->num_gprs) != 0;
614
615 if (info->num_gprs || return_p)
616 return gen_rtx_REG (mode, gpr_base + info->gpr_offset);
617
618 return NULL_RTX;
619 }
620
621 /* Implement TARGET_FUNCTION_ARG. */
622
623 static rtx
624 loongarch_function_arg (cumulative_args_t cum_v, const function_arg_info &arg)
625 {
626 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
627 struct loongarch_arg_info info;
628
629 if (arg.end_marker_p ())
630 return NULL;
631
632 return loongarch_get_arg_info (&info, cum, arg.mode, arg.type, arg.named,
633 false);
634 }
635
636 /* Implement TARGET_FUNCTION_ARG_ADVANCE. */
637
638 static void
639 loongarch_function_arg_advance (cumulative_args_t cum_v,
640 const function_arg_info &arg)
641 {
642 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
643 struct loongarch_arg_info info;
644
645 loongarch_get_arg_info (&info, cum, arg.mode, arg.type, arg.named, false);
646
647 /* Advance the register count. This has the effect of setting
648 num_gprs to MAX_ARGS_IN_REGISTERS if a doubleword-aligned
649 argument required us to skip the final GPR and pass the whole
650 argument on the stack. */
651 cum->num_fprs = info.fpr_offset + info.num_fprs;
652 cum->num_gprs = info.gpr_offset + info.num_gprs;
653 }
654
655 /* Implement TARGET_ARG_PARTIAL_BYTES. */
656
657 static int
658 loongarch_arg_partial_bytes (cumulative_args_t cum,
659 const function_arg_info &generic_arg)
660 {
661 struct loongarch_arg_info arg;
662
663 loongarch_get_arg_info (&arg, get_cumulative_args (cum), generic_arg.mode,
664 generic_arg.type, generic_arg.named, false);
665 return arg.stack_p ? arg.num_gprs * UNITS_PER_WORD : 0;
666 }
667
668 /* Implement FUNCTION_VALUE and LIBCALL_VALUE. For normal calls,
669 VALTYPE is the return type and MODE is VOIDmode. For libcalls,
670 VALTYPE is null and MODE is the mode of the return value. */
671
672 static rtx
673 loongarch_function_value_1 (const_tree type, const_tree func,
674 machine_mode mode)
675 {
676 struct loongarch_arg_info info;
677 CUMULATIVE_ARGS args;
678
679 if (type)
680 {
681 int unsigned_p = TYPE_UNSIGNED (type);
682
683 mode = TYPE_MODE (type);
684
685 /* Since TARGET_PROMOTE_FUNCTION_MODE unconditionally promotes,
686 return values, promote the mode here too. */
687 mode = promote_function_mode (type, mode, &unsigned_p, func, 1);
688 }
689
690 memset (&args, 0, sizeof (args));
691 return loongarch_get_arg_info (&info, &args, mode, type, true, true);
692 }
693
694
695 /* Implement TARGET_FUNCTION_VALUE. */
696
697 static rtx
698 loongarch_function_value (const_tree valtype, const_tree fn_decl_or_type,
699 bool outgoing ATTRIBUTE_UNUSED)
700 {
701 return loongarch_function_value_1 (valtype, fn_decl_or_type, VOIDmode);
702 }
703
704 /* Implement TARGET_LIBCALL_VALUE. */
705
706 static rtx
707 loongarch_libcall_value (machine_mode mode, const_rtx fun ATTRIBUTE_UNUSED)
708 {
709 return loongarch_function_value_1 (NULL_TREE, NULL_TREE, mode);
710 }
711
712
713 /* Implement TARGET_PASS_BY_REFERENCE. */
714
715 static bool
716 loongarch_pass_by_reference (cumulative_args_t cum_v,
717 const function_arg_info &arg)
718 {
719 HOST_WIDE_INT size = arg.type_size_in_bytes ();
720 struct loongarch_arg_info info;
721 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
722
723 /* ??? std_gimplify_va_arg_expr passes NULL for cum. Fortunately, we
724 never pass variadic arguments in floating-point registers, so we can
725 avoid the call to loongarch_get_arg_info in this case. */
726 if (cum != NULL)
727 {
728 /* Don't pass by reference if we can use a floating-point register. */
729 loongarch_get_arg_info (&info, cum, arg.mode, arg.type, arg.named,
730 false);
731 if (info.num_fprs)
732 return false;
733 }
734
735 /* Pass by reference if the data do not fit in two integer registers. */
736 return !IN_RANGE (size, 0, 2 * UNITS_PER_WORD);
737 }
738
739 /* Implement TARGET_RETURN_IN_MEMORY. */
740
741 static bool
742 loongarch_return_in_memory (const_tree type,
743 const_tree fndecl ATTRIBUTE_UNUSED)
744 {
745 CUMULATIVE_ARGS args;
746 cumulative_args_t cum = pack_cumulative_args (&args);
747
748 /* The rules for returning in memory are the same as for passing the
749 first named argument by reference. */
750 memset (&args, 0, sizeof (args));
751 function_arg_info arg (const_cast<tree> (type), /*named=*/true);
752 return loongarch_pass_by_reference (cum, arg);
753 }
754
755 /* Implement TARGET_SETUP_INCOMING_VARARGS. */
756
757 static void
758 loongarch_setup_incoming_varargs (cumulative_args_t cum,
759 const function_arg_info &arg,
760 int *pretend_size ATTRIBUTE_UNUSED,
761 int no_rtl)
762 {
763 CUMULATIVE_ARGS local_cum;
764 int gp_saved;
765
766 /* The caller has advanced CUM up to, but not beyond, the last named
767 argument. Advance a local copy of CUM past the last "real" named
768 argument, to find out how many registers are left over. */
769 local_cum = *get_cumulative_args (cum);
770 if (!TYPE_NO_NAMED_ARGS_STDARG_P (TREE_TYPE (current_function_decl)))
771 loongarch_function_arg_advance (pack_cumulative_args (&local_cum), arg);
772
773 /* Found out how many registers we need to save. */
774 gp_saved = cfun->va_list_gpr_size / UNITS_PER_WORD;
775 if (gp_saved > (int) (MAX_ARGS_IN_REGISTERS - local_cum.num_gprs))
776 gp_saved = MAX_ARGS_IN_REGISTERS - local_cum.num_gprs;
777
778 if (!no_rtl && gp_saved > 0)
779 {
780 rtx ptr = plus_constant (Pmode, virtual_incoming_args_rtx,
781 REG_PARM_STACK_SPACE (cfun->decl)
782 - gp_saved * UNITS_PER_WORD);
783 rtx mem = gen_frame_mem (BLKmode, ptr);
784 set_mem_alias_set (mem, get_varargs_alias_set ());
785
786 move_block_from_reg (local_cum.num_gprs + GP_ARG_FIRST, mem, gp_saved);
787 }
788 if (REG_PARM_STACK_SPACE (cfun->decl) == 0)
789 cfun->machine->varargs_size = gp_saved * UNITS_PER_WORD;
790 }
791
792 /* Make the last instruction frame-related and note that it performs
793 the operation described by FRAME_PATTERN. */
794
795 static void
796 loongarch_set_frame_expr (rtx frame_pattern)
797 {
798 rtx insn;
799
800 insn = get_last_insn ();
801 RTX_FRAME_RELATED_P (insn) = 1;
802 REG_NOTES (insn) = alloc_EXPR_LIST (REG_FRAME_RELATED_EXPR, frame_pattern,
803 REG_NOTES (insn));
804 }
805
806 /* Return a frame-related rtx that stores REG at MEM.
807 REG must be a single register. */
808
809 static rtx
810 loongarch_frame_set (rtx mem, rtx reg)
811 {
812 rtx set = gen_rtx_SET (mem, reg);
813 RTX_FRAME_RELATED_P (set) = 1;
814 return set;
815 }
816
817 /* Return true if the current function must save register REGNO. */
818
819 static bool
820 loongarch_save_reg_p (unsigned int regno)
821 {
822 bool call_saved = !global_regs[regno] && !call_used_regs[regno];
823 bool might_clobber
824 = crtl->saves_all_registers || df_regs_ever_live_p (regno);
825
826 if (call_saved && might_clobber)
827 return true;
828
829 if (regno == HARD_FRAME_POINTER_REGNUM && frame_pointer_needed)
830 return true;
831
832 if (regno == RETURN_ADDR_REGNUM && crtl->calls_eh_return)
833 return true;
834
835 return false;
836 }
837
838 /* Determine which GPR save/restore routine to call. */
839
840 static unsigned
841 loongarch_save_libcall_count (unsigned mask)
842 {
843 for (unsigned n = GP_REG_LAST; n > GP_REG_FIRST; n--)
844 if (BITSET_P (mask, n))
845 return CALLEE_SAVED_REG_NUMBER (n) + 1;
846 abort ();
847 }
848
849 /* Populate the current function's loongarch_frame_info structure.
850
851 LoongArch stack frames grown downward. High addresses are at the top.
852
853 +-------------------------------+
854 | |
855 | incoming stack arguments |
856 | |
857 +-------------------------------+ <-- incoming stack pointer
858 | |
859 | callee-allocated save area |
860 | for arguments that are |
861 | split between registers and |
862 | the stack |
863 | |
864 +-------------------------------+ <-- arg_pointer_rtx (virtual)
865 | |
866 | callee-allocated save area |
867 | for register varargs |
868 | |
869 +-------------------------------+ <-- hard_frame_pointer_rtx;
870 | | stack_pointer_rtx + gp_sp_offset
871 | GPR save area | + UNITS_PER_WORD
872 | |
873 +-------------------------------+ <-- stack_pointer_rtx + fp_sp_offset
874 | | + UNITS_PER_HWVALUE
875 | FPR save area |
876 | |
877 +-------------------------------+ <-- frame_pointer_rtx (virtual)
878 | |
879 | local variables |
880 | |
881 P +-------------------------------+
882 | |
883 | outgoing stack arguments |
884 | |
885 +-------------------------------+ <-- stack_pointer_rtx
886
887 Dynamic stack allocations such as alloca insert data at point P.
888 They decrease stack_pointer_rtx but leave frame_pointer_rtx and
889 hard_frame_pointer_rtx unchanged. */
890
891 static void
892 loongarch_compute_frame_info (void)
893 {
894 struct loongarch_frame_info *frame;
895 HOST_WIDE_INT offset;
896 unsigned int regno, i, num_x_saved = 0, num_f_saved = 0;
897
898 frame = &cfun->machine->frame;
899 memset (frame, 0, sizeof (*frame));
900
901 /* Find out which GPRs we need to save. */
902 for (regno = GP_REG_FIRST; regno <= GP_REG_LAST; regno++)
903 if (loongarch_save_reg_p (regno))
904 frame->mask |= 1 << (regno - GP_REG_FIRST), num_x_saved++;
905
906 /* If this function calls eh_return, we must also save and restore the
907 EH data registers. */
908 if (crtl->calls_eh_return)
909 for (i = 0; (regno = EH_RETURN_DATA_REGNO (i)) != INVALID_REGNUM; i++)
910 frame->mask |= 1 << (regno - GP_REG_FIRST), num_x_saved++;
911
912 /* Find out which FPRs we need to save. This loop must iterate over
913 the same space as its companion in loongarch_for_each_saved_reg. */
914 if (TARGET_HARD_FLOAT)
915 for (regno = FP_REG_FIRST; regno <= FP_REG_LAST; regno++)
916 if (loongarch_save_reg_p (regno))
917 frame->fmask |= 1 << (regno - FP_REG_FIRST), num_f_saved++;
918
919 /* At the bottom of the frame are any outgoing stack arguments. */
920 offset = LARCH_STACK_ALIGN (crtl->outgoing_args_size);
921 /* Next are local stack variables. */
922 offset += LARCH_STACK_ALIGN (get_frame_size ());
923 /* The virtual frame pointer points above the local variables. */
924 frame->frame_pointer_offset = offset;
925 /* Next are the callee-saved FPRs. */
926 if (frame->fmask)
927 {
928 offset += LARCH_STACK_ALIGN (num_f_saved * UNITS_PER_FP_REG);
929 frame->fp_sp_offset = offset - UNITS_PER_FP_REG;
930 }
931 else
932 frame->fp_sp_offset = offset;
933 /* Next are the callee-saved GPRs. */
934 if (frame->mask)
935 {
936 unsigned x_save_size = LARCH_STACK_ALIGN (num_x_saved * UNITS_PER_WORD);
937 unsigned num_save_restore
938 = 1 + loongarch_save_libcall_count (frame->mask);
939
940 /* Only use save/restore routines if they don't alter the stack size. */
941 if (LARCH_STACK_ALIGN (num_save_restore * UNITS_PER_WORD) == x_save_size)
942 frame->save_libcall_adjustment = x_save_size;
943
944 offset += x_save_size;
945 frame->gp_sp_offset = offset - UNITS_PER_WORD;
946 }
947 else
948 frame->gp_sp_offset = offset;
949 /* The hard frame pointer points above the callee-saved GPRs. */
950 frame->hard_frame_pointer_offset = offset;
951 /* Above the hard frame pointer is the callee-allocated varags save area. */
952 offset += LARCH_STACK_ALIGN (cfun->machine->varargs_size);
953 /* Next is the callee-allocated area for pretend stack arguments. */
954 offset += LARCH_STACK_ALIGN (crtl->args.pretend_args_size);
955 /* Arg pointer must be below pretend args, but must be above alignment
956 padding. */
957 frame->arg_pointer_offset = offset - crtl->args.pretend_args_size;
958 frame->total_size = offset;
959 /* Next points the incoming stack pointer and any incoming arguments. */
960
961 /* Only use save/restore routines when the GPRs are atop the frame. */
962 if (frame->hard_frame_pointer_offset != frame->total_size)
963 frame->save_libcall_adjustment = 0;
964 }
965
966 /* Implement INITIAL_ELIMINATION_OFFSET. FROM is either the frame pointer
967 or argument pointer. TO is either the stack pointer or hard frame
968 pointer. */
969
970 HOST_WIDE_INT
971 loongarch_initial_elimination_offset (int from, int to)
972 {
973 HOST_WIDE_INT src, dest;
974
975 loongarch_compute_frame_info ();
976
977 if (to == HARD_FRAME_POINTER_REGNUM)
978 dest = cfun->machine->frame.hard_frame_pointer_offset;
979 else if (to == STACK_POINTER_REGNUM)
980 dest = 0; /* The stack pointer is the base of all offsets, hence 0. */
981 else
982 gcc_unreachable ();
983
984 if (from == FRAME_POINTER_REGNUM)
985 src = cfun->machine->frame.frame_pointer_offset;
986 else if (from == ARG_POINTER_REGNUM)
987 src = cfun->machine->frame.arg_pointer_offset;
988 else
989 gcc_unreachable ();
990
991 return src - dest;
992 }
993
994 /* A function to save or store a register. The first argument is the
995 register and the second is the stack slot. */
996 typedef void (*loongarch_save_restore_fn) (rtx, rtx);
997
998 /* Use FN to save or restore register REGNO. MODE is the register's
999 mode and OFFSET is the offset of its save slot from the current
1000 stack pointer. */
1001
1002 static void
1003 loongarch_save_restore_reg (machine_mode mode, int regno, HOST_WIDE_INT offset,
1004 loongarch_save_restore_fn fn)
1005 {
1006 rtx mem;
1007
1008 mem = gen_frame_mem (mode, plus_constant (Pmode, stack_pointer_rtx, offset));
1009 fn (gen_rtx_REG (mode, regno), mem);
1010 }
1011
1012 /* Call FN for each register that is saved by the current function.
1013 SP_OFFSET is the offset of the current stack pointer from the start
1014 of the frame. */
1015
1016 static void
1017 loongarch_for_each_saved_reg (HOST_WIDE_INT sp_offset,
1018 loongarch_save_restore_fn fn,
1019 bool skip_eh_data_regs_p)
1020 {
1021 HOST_WIDE_INT offset;
1022
1023 /* Save the link register and s-registers. */
1024 offset = cfun->machine->frame.gp_sp_offset - sp_offset;
1025 for (int regno = GP_REG_FIRST; regno <= GP_REG_LAST; regno++)
1026 if (BITSET_P (cfun->machine->frame.mask, regno - GP_REG_FIRST))
1027 {
1028 /* Special care needs to be taken for $r4-$r7 (EH_RETURN_DATA_REGNO)
1029 when returning normally from a function that calls
1030 __builtin_eh_return. In this case, these registers are saved but
1031 should not be restored, or the return value may be clobbered. */
1032
1033 if (!(cfun->machine->reg_is_wrapped_separately[regno]
1034 || (skip_eh_data_regs_p
1035 && GP_ARG_FIRST <= regno && regno < GP_ARG_FIRST + 4)))
1036 loongarch_save_restore_reg (word_mode, regno, offset, fn);
1037
1038 offset -= UNITS_PER_WORD;
1039 }
1040
1041 /* This loop must iterate over the same space as its companion in
1042 loongarch_compute_frame_info. */
1043 offset = cfun->machine->frame.fp_sp_offset - sp_offset;
1044 machine_mode mode = TARGET_DOUBLE_FLOAT ? DFmode : SFmode;
1045
1046 for (int regno = FP_REG_FIRST; regno <= FP_REG_LAST; regno++)
1047 if (BITSET_P (cfun->machine->frame.fmask, regno - FP_REG_FIRST))
1048 {
1049 if (!cfun->machine->reg_is_wrapped_separately[regno])
1050 loongarch_save_restore_reg (word_mode, regno, offset, fn);
1051
1052 offset -= GET_MODE_SIZE (mode);
1053 }
1054 }
1055
1056 /* Emit a move from SRC to DEST. Assume that the move expanders can
1057 handle all moves if !can_create_pseudo_p (). The distinction is
1058 important because, unlike emit_move_insn, the move expanders know
1059 how to force Pmode objects into the constant pool even when the
1060 constant pool address is not itself legitimate. */
1061
1062 rtx
1063 loongarch_emit_move (rtx dest, rtx src)
1064 {
1065 return (can_create_pseudo_p () ? emit_move_insn (dest, src)
1066 : emit_move_insn_1 (dest, src));
1067 }
1068
1069 /* Save register REG to MEM. Make the instruction frame-related. */
1070
1071 static void
1072 loongarch_save_reg (rtx reg, rtx mem)
1073 {
1074 loongarch_emit_move (mem, reg);
1075 loongarch_set_frame_expr (loongarch_frame_set (mem, reg));
1076 }
1077
1078 /* Restore register REG from MEM. */
1079
1080 static void
1081 loongarch_restore_reg (rtx reg, rtx mem)
1082 {
1083 rtx insn = loongarch_emit_move (reg, mem);
1084 rtx dwarf = NULL_RTX;
1085 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
1086 REG_NOTES (insn) = dwarf;
1087
1088 RTX_FRAME_RELATED_P (insn) = 1;
1089 }
1090
1091 /* For stack frames that can't be allocated with a single ADDI instruction,
1092 compute the best value to initially allocate. It must at a minimum
1093 allocate enough space to spill the callee-saved registers. */
1094
1095 static HOST_WIDE_INT
1096 loongarch_first_stack_step (struct loongarch_frame_info *frame)
1097 {
1098 HOST_WIDE_INT min_first_step
1099 = LARCH_STACK_ALIGN (frame->total_size - frame->fp_sp_offset);
1100
1101 /* When stack checking is required, if the sum of frame->total_size
1102 and stack_check_protect is greater than stack clash protection guard
1103 size, then return min_first_step. */
1104 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK
1105 || (flag_stack_clash_protection
1106 && frame->total_size > STACK_CLASH_PROTECTION_GUARD_SIZE))
1107 return min_first_step;
1108
1109 if (IMM12_OPERAND (frame->total_size))
1110 return frame->total_size;
1111
1112 HOST_WIDE_INT max_first_step = IMM_REACH / 2 - PREFERRED_STACK_BOUNDARY / 8;
1113 HOST_WIDE_INT min_second_step = frame->total_size - max_first_step;
1114 gcc_assert (min_first_step <= max_first_step);
1115
1116 /* As an optimization, use the least-significant bits of the total frame
1117 size, so that the second adjustment step is just LU12I + ADD. */
1118 if (!IMM12_OPERAND (min_second_step)
1119 && frame->total_size % IMM_REACH < IMM_REACH / 2
1120 && frame->total_size % IMM_REACH >= min_first_step)
1121 return frame->total_size % IMM_REACH;
1122
1123 return max_first_step;
1124 }
1125
1126 static void
1127 loongarch_emit_stack_tie (void)
1128 {
1129 emit_insn (gen_stack_tie (Pmode, stack_pointer_rtx,
1130 frame_pointer_needed ? hard_frame_pointer_rtx
1131 : stack_pointer_rtx));
1132 }
1133
1134 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
1135
1136 #if PROBE_INTERVAL > 16384
1137 #error Cannot use indexed addressing mode for stack probing
1138 #endif
1139
1140 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
1141 inclusive. These are offsets from the current stack pointer. */
1142
1143 static void
1144 loongarch_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size)
1145 {
1146 HOST_WIDE_INT rounded_size;
1147 HOST_WIDE_INT interval;
1148
1149 if (flag_stack_clash_protection)
1150 interval = STACK_CLASH_PROTECTION_GUARD_SIZE;
1151 else
1152 interval = PROBE_INTERVAL;
1153
1154 rtx r12 = LARCH_PROLOGUE_TEMP2 (Pmode);
1155 rtx r14 = LARCH_PROLOGUE_TEMP3 (Pmode);
1156
1157 size = size + first;
1158
1159 /* Sanity check for the addressing mode we're going to use. */
1160 gcc_assert (first <= 16384);
1161
1162 /* Step 1: round SIZE to the previous multiple of the interval. */
1163
1164 rounded_size = ROUND_DOWN (size, interval);
1165
1166 /* Step 2: compute initial and final value of the loop counter. */
1167
1168 emit_move_insn (r14, GEN_INT (interval));
1169
1170 /* If rounded_size is zero, it means that the space requested by
1171 the local variable is less than the interval, and there is no
1172 need to display and detect the allocated space. */
1173 if (rounded_size != 0)
1174 {
1175 /* Step 3: the loop
1176
1177 do
1178 {
1179 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
1180 probe at TEST_ADDR
1181 }
1182 while (TEST_ADDR != LAST_ADDR)
1183
1184 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
1185 until it is equal to ROUNDED_SIZE. */
1186
1187 if (rounded_size <= STACK_CLASH_MAX_UNROLL_PAGES * interval)
1188 {
1189 for (HOST_WIDE_INT i = 0; i < rounded_size; i += interval)
1190 {
1191 emit_insn (gen_rtx_SET (stack_pointer_rtx,
1192 gen_rtx_MINUS (Pmode,
1193 stack_pointer_rtx,
1194 r14)));
1195 emit_move_insn (gen_rtx_MEM (Pmode,
1196 gen_rtx_PLUS (Pmode,
1197 stack_pointer_rtx,
1198 const0_rtx)),
1199 const0_rtx);
1200 emit_insn (gen_blockage ());
1201 }
1202 dump_stack_clash_frame_info (PROBE_INLINE, size != rounded_size);
1203 }
1204 else
1205 {
1206 emit_move_insn (r12, GEN_INT (rounded_size));
1207 emit_insn (gen_rtx_SET (r12,
1208 gen_rtx_MINUS (Pmode,
1209 stack_pointer_rtx,
1210 r12)));
1211
1212 emit_insn (gen_probe_stack_range (Pmode, stack_pointer_rtx,
1213 stack_pointer_rtx, r12, r14));
1214 emit_insn (gen_blockage ());
1215 dump_stack_clash_frame_info (PROBE_LOOP, size != rounded_size);
1216 }
1217 }
1218 else
1219 dump_stack_clash_frame_info (NO_PROBE_SMALL_FRAME, true);
1220
1221
1222 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
1223 that SIZE is equal to ROUNDED_SIZE. */
1224
1225 if (size != rounded_size)
1226 {
1227 if (size - rounded_size >= 2048)
1228 {
1229 emit_move_insn (r14, GEN_INT (size - rounded_size));
1230 emit_insn (gen_rtx_SET (stack_pointer_rtx,
1231 gen_rtx_MINUS (Pmode,
1232 stack_pointer_rtx,
1233 r14)));
1234 }
1235 else
1236 emit_insn (gen_rtx_SET (stack_pointer_rtx,
1237 gen_rtx_PLUS (Pmode,
1238 stack_pointer_rtx,
1239 GEN_INT (rounded_size - size))));
1240 }
1241
1242 if (first)
1243 {
1244 emit_move_insn (r12, GEN_INT (first));
1245 emit_insn (gen_rtx_SET (stack_pointer_rtx,
1246 gen_rtx_PLUS (Pmode,
1247 stack_pointer_rtx, r12)));
1248 }
1249 /* Make sure nothing is scheduled before we are done. */
1250 emit_insn (gen_blockage ());
1251 }
1252
1253 /* Probe a range of stack addresses from REG1 to REG2 inclusive. These are
1254 absolute addresses. */
1255 const char *
1256 loongarch_output_probe_stack_range (rtx reg1, rtx reg2, rtx reg3)
1257 {
1258 static int labelno = 0;
1259 char loop_lab[32], tmp[64];
1260 rtx xops[3];
1261
1262 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno++);
1263
1264 /* Loop. */
1265 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
1266
1267 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
1268 xops[0] = reg1;
1269 xops[2] = reg3;
1270 if (TARGET_64BIT)
1271 output_asm_insn ("sub.d\t%0,%0,%2", xops);
1272 else
1273 output_asm_insn ("sub.w\t%0,%0,%2", xops);
1274
1275 /* Probe at TEST_ADDR, test if TEST_ADDR == LAST_ADDR and branch. */
1276 xops[1] = reg2;
1277 strcpy (tmp, "bne\t%0,%1,");
1278 if (TARGET_64BIT)
1279 output_asm_insn ("st.d\t$r0,%0,0", xops);
1280 else
1281 output_asm_insn ("st.w\t$r0,%0,0", xops);
1282 output_asm_insn (strcat (tmp, &loop_lab[1]), xops);
1283
1284 return "";
1285 }
1286
1287 /* Expand the "prologue" pattern. */
1288
1289 void
1290 loongarch_expand_prologue (void)
1291 {
1292 struct loongarch_frame_info *frame = &cfun->machine->frame;
1293 HOST_WIDE_INT size = frame->total_size;
1294 rtx insn;
1295
1296 if (flag_stack_usage_info)
1297 current_function_static_stack_size = size;
1298
1299 /* Save the registers. */
1300 if ((frame->mask | frame->fmask) != 0)
1301 {
1302 HOST_WIDE_INT step1 = MIN (size, loongarch_first_stack_step (frame));
1303
1304 insn = gen_add3_insn (stack_pointer_rtx, stack_pointer_rtx,
1305 GEN_INT (-step1));
1306 RTX_FRAME_RELATED_P (emit_insn (insn)) = 1;
1307 size -= step1;
1308 loongarch_for_each_saved_reg (size, loongarch_save_reg, false);
1309 }
1310
1311 /* Set up the frame pointer, if we're using one. */
1312 if (frame_pointer_needed)
1313 {
1314 insn = gen_add3_insn (hard_frame_pointer_rtx, stack_pointer_rtx,
1315 GEN_INT (frame->hard_frame_pointer_offset - size));
1316 RTX_FRAME_RELATED_P (emit_insn (insn)) = 1;
1317
1318 loongarch_emit_stack_tie ();
1319 }
1320
1321 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK
1322 || flag_stack_clash_protection)
1323 {
1324 HOST_WIDE_INT first = get_stack_check_protect ();
1325
1326 if (frame->total_size == 0)
1327 {
1328 /* do nothing. */
1329 dump_stack_clash_frame_info (NO_PROBE_NO_FRAME, false);
1330 return;
1331 }
1332
1333 if (crtl->is_leaf && !cfun->calls_alloca)
1334 {
1335 HOST_WIDE_INT interval;
1336
1337 if (flag_stack_clash_protection)
1338 interval = STACK_CLASH_PROTECTION_GUARD_SIZE;
1339 else
1340 interval = PROBE_INTERVAL;
1341
1342 if (size > interval && size > first)
1343 loongarch_emit_probe_stack_range (first, size - first);
1344 else
1345 loongarch_emit_probe_stack_range (first, size);
1346 }
1347 else
1348 loongarch_emit_probe_stack_range (first, size);
1349
1350 if (size > 0)
1351 {
1352 /* Describe the effect of the previous instructions. */
1353 insn = plus_constant (Pmode, stack_pointer_rtx, -size);
1354 insn = gen_rtx_SET (stack_pointer_rtx, insn);
1355 loongarch_set_frame_expr (insn);
1356 }
1357 return;
1358 }
1359
1360 if (size > 0)
1361 {
1362 if (IMM12_OPERAND (-size))
1363 {
1364 insn = gen_add3_insn (stack_pointer_rtx, stack_pointer_rtx,
1365 GEN_INT (-size));
1366 RTX_FRAME_RELATED_P (emit_insn (insn)) = 1;
1367 }
1368 else
1369 {
1370 loongarch_emit_move (LARCH_PROLOGUE_TEMP (Pmode),
1371 GEN_INT (-size));
1372 emit_insn (gen_add3_insn (stack_pointer_rtx, stack_pointer_rtx,
1373 LARCH_PROLOGUE_TEMP (Pmode)));
1374
1375 /* Describe the effect of the previous instructions. */
1376 insn = plus_constant (Pmode, stack_pointer_rtx, -size);
1377 insn = gen_rtx_SET (stack_pointer_rtx, insn);
1378 loongarch_set_frame_expr (insn);
1379 }
1380 }
1381 }
1382
1383 /* Return nonzero if this function is known to have a null epilogue.
1384 This allows the optimizer to omit jumps to jumps if no stack
1385 was created. */
1386
1387 bool
1388 loongarch_can_use_return_insn (void)
1389 {
1390 return reload_completed && cfun->machine->frame.total_size == 0;
1391 }
1392
1393 /* Expand function epilogue using the following insn patterns:
1394 "epilogue" (style == NORMAL_RETURN)
1395 "sibcall_epilogue" (style == SIBCALL_RETURN)
1396 "eh_return" (style == EXCEPTION_RETURN) */
1397
1398 void
1399 loongarch_expand_epilogue (int style)
1400 {
1401 /* Split the frame into two. STEP1 is the amount of stack we should
1402 deallocate before restoring the registers. STEP2 is the amount we
1403 should deallocate afterwards.
1404
1405 Start off by assuming that no registers need to be restored. */
1406 struct loongarch_frame_info *frame = &cfun->machine->frame;
1407 HOST_WIDE_INT step1 = frame->total_size;
1408 HOST_WIDE_INT step2 = 0;
1409 rtx ra = gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM);
1410 rtx insn;
1411
1412 /* We need to add memory barrier to prevent read from deallocated stack. */
1413 bool need_barrier_p
1414 = (get_frame_size () + cfun->machine->frame.arg_pointer_offset) != 0;
1415
1416 /* Handle simple returns. */
1417 if (style == NORMAL_RETURN && loongarch_can_use_return_insn ())
1418 {
1419 emit_jump_insn (gen_return ());
1420 return;
1421 }
1422
1423 /* Move past any dynamic stack allocations. */
1424 if (cfun->calls_alloca)
1425 {
1426 /* Emit a barrier to prevent loads from a deallocated stack. */
1427 loongarch_emit_stack_tie ();
1428 need_barrier_p = false;
1429
1430 rtx adjust = GEN_INT (-frame->hard_frame_pointer_offset);
1431 if (!IMM12_OPERAND (INTVAL (adjust)))
1432 {
1433 loongarch_emit_move (LARCH_PROLOGUE_TEMP (Pmode), adjust);
1434 adjust = LARCH_PROLOGUE_TEMP (Pmode);
1435 }
1436
1437 insn = emit_insn (gen_add3_insn (stack_pointer_rtx,
1438 hard_frame_pointer_rtx,
1439 adjust));
1440
1441 rtx dwarf = NULL_RTX;
1442 rtx minus_offset = GEN_INT (-frame->hard_frame_pointer_offset);
1443 rtx cfa_adjust_value = gen_rtx_PLUS (Pmode,
1444 hard_frame_pointer_rtx,
1445 minus_offset);
1446
1447 rtx cfa_adjust_rtx = gen_rtx_SET (stack_pointer_rtx, cfa_adjust_value);
1448 dwarf = alloc_reg_note (REG_CFA_ADJUST_CFA, cfa_adjust_rtx, dwarf);
1449 RTX_FRAME_RELATED_P (insn) = 1;
1450
1451 REG_NOTES (insn) = dwarf;
1452 }
1453
1454 /* If we need to restore registers, deallocate as much stack as
1455 possible in the second step without going out of range. */
1456 if ((frame->mask | frame->fmask) != 0)
1457 {
1458 step2 = loongarch_first_stack_step (frame);
1459 step1 -= step2;
1460 }
1461
1462 /* Set TARGET to BASE + STEP1. */
1463 if (step1 > 0)
1464 {
1465 /* Emit a barrier to prevent loads from a deallocated stack. */
1466 loongarch_emit_stack_tie ();
1467 need_barrier_p = false;
1468
1469 /* Get an rtx for STEP1 that we can add to BASE. */
1470 rtx adjust = GEN_INT (step1);
1471 if (!IMM12_OPERAND (step1))
1472 {
1473 loongarch_emit_move (LARCH_PROLOGUE_TEMP (Pmode), adjust);
1474 adjust = LARCH_PROLOGUE_TEMP (Pmode);
1475 }
1476
1477 insn = emit_insn (gen_add3_insn (stack_pointer_rtx,
1478 stack_pointer_rtx,
1479 adjust));
1480
1481 rtx dwarf = NULL_RTX;
1482 rtx cfa_adjust_rtx = gen_rtx_PLUS (Pmode, stack_pointer_rtx,
1483 GEN_INT (step2));
1484
1485 dwarf = alloc_reg_note (REG_CFA_DEF_CFA, cfa_adjust_rtx, dwarf);
1486 RTX_FRAME_RELATED_P (insn) = 1;
1487
1488 REG_NOTES (insn) = dwarf;
1489 }
1490
1491 /* Restore the registers. */
1492 loongarch_for_each_saved_reg (frame->total_size - step2,
1493 loongarch_restore_reg,
1494 crtl->calls_eh_return
1495 && style != EXCEPTION_RETURN);
1496
1497 if (need_barrier_p)
1498 loongarch_emit_stack_tie ();
1499
1500 /* Deallocate the final bit of the frame. */
1501 if (step2 > 0)
1502 {
1503 insn = emit_insn (gen_add3_insn (stack_pointer_rtx,
1504 stack_pointer_rtx,
1505 GEN_INT (step2)));
1506
1507 rtx dwarf = NULL_RTX;
1508 rtx cfa_adjust_rtx = gen_rtx_PLUS (Pmode, stack_pointer_rtx, const0_rtx);
1509 dwarf = alloc_reg_note (REG_CFA_DEF_CFA, cfa_adjust_rtx, dwarf);
1510 RTX_FRAME_RELATED_P (insn) = 1;
1511
1512 REG_NOTES (insn) = dwarf;
1513 }
1514
1515 /* Add in the __builtin_eh_return stack adjustment. */
1516 if (crtl->calls_eh_return && style == EXCEPTION_RETURN)
1517 emit_insn (gen_add3_insn (stack_pointer_rtx, stack_pointer_rtx,
1518 EH_RETURN_STACKADJ_RTX));
1519
1520 /* Emit return unless doing sibcall. */
1521 if (style != SIBCALL_RETURN)
1522 emit_jump_insn (gen_simple_return_internal (ra));
1523 }
1524
1525 #define LU32I_B (0xfffffULL << 32)
1526 #define LU52I_B (0xfffULL << 52)
1527
1528 /* Fill CODES with a sequence of rtl operations to load VALUE.
1529 Return the number of operations needed. */
1530
1531 static unsigned int
1532 loongarch_build_integer (struct loongarch_integer_op *codes,
1533 HOST_WIDE_INT value)
1534
1535 {
1536 unsigned int cost = 0;
1537
1538 /* Get the lower 32 bits of the value. */
1539 HOST_WIDE_INT low_part = (int32_t)value;
1540
1541 if (IMM12_OPERAND (low_part) || IMM12_OPERAND_UNSIGNED (low_part))
1542 {
1543 /* The value of the lower 32 bit be loaded with one instruction.
1544 lu12i.w. */
1545 codes[cost].code = UNKNOWN;
1546 codes[cost].method = METHOD_NORMAL;
1547 codes[cost].value = low_part;
1548 codes[cost].curr_value = low_part;
1549 cost++;
1550 }
1551 else
1552 {
1553 /* lu12i.w + ior. */
1554 codes[cost].code = UNKNOWN;
1555 codes[cost].method = METHOD_NORMAL;
1556 codes[cost].value = low_part & ~(IMM_REACH - 1);
1557 codes[cost].curr_value = codes[cost].value;
1558 cost++;
1559 HOST_WIDE_INT iorv = low_part & (IMM_REACH - 1);
1560 if (iorv != 0)
1561 {
1562 codes[cost].code = IOR;
1563 codes[cost].method = METHOD_NORMAL;
1564 codes[cost].value = iorv;
1565 codes[cost].curr_value = low_part;
1566 cost++;
1567 }
1568 }
1569
1570 if (TARGET_64BIT)
1571 {
1572 bool lu32i[2] = {(value & LU32I_B) == 0, (value & LU32I_B) == LU32I_B};
1573 bool lu52i[2] = {(value & LU52I_B) == 0, (value & LU52I_B) == LU52I_B};
1574
1575 int sign31 = (value & (HOST_WIDE_INT_1U << 31)) >> 31;
1576 int sign51 = (value & (HOST_WIDE_INT_1U << 51)) >> 51;
1577
1578 uint32_t hival = (uint32_t) (value >> 32);
1579 uint32_t loval = (uint32_t) value;
1580
1581 /* Determine whether the upper 32 bits are sign-extended from the lower
1582 32 bits. If it is, the instructions to load the high order can be
1583 ommitted. */
1584 if (lu32i[sign31] && lu52i[sign31])
1585 return cost;
1586 /* If the lower 32 bits are the same as the upper 32 bits, just copy
1587 the lower 32 bits to the upper 32 bits. */
1588 else if (loval == hival)
1589 {
1590 codes[cost].method = METHOD_MIRROR;
1591 codes[cost].curr_value = value;
1592 return cost + 1;
1593 }
1594 /* Determine whether bits 32-51 are sign-extended from the lower 32
1595 bits. If so, directly load 52-63 bits. */
1596 else if (lu32i[sign31])
1597 {
1598 codes[cost].method = METHOD_LU52I;
1599 codes[cost].value = value & LU52I_B;
1600 codes[cost].curr_value = value;
1601 return cost + 1;
1602 }
1603
1604 codes[cost].method = METHOD_LU32I;
1605 codes[cost].value = (value & LU32I_B) | (sign51 ? LU52I_B : 0);
1606 codes[cost].curr_value = (value & 0xfffffffffffff)
1607 | (sign51 ? LU52I_B : 0);
1608 cost++;
1609
1610 /* Determine whether the 52-61 bits are sign-extended from the low order,
1611 and if not, load the 52-61 bits. */
1612 if (!lu52i[(value & (HOST_WIDE_INT_1U << 51)) >> 51])
1613 {
1614 codes[cost].method = METHOD_LU52I;
1615 codes[cost].value = value & LU52I_B;
1616 codes[cost].curr_value = value;
1617 cost++;
1618 }
1619 }
1620
1621 gcc_assert (cost <= LARCH_MAX_INTEGER_OPS);
1622
1623 return cost;
1624 }
1625
1626 /* Fill CODES with a sequence of rtl operations to load VALUE.
1627 Return the number of operations needed.
1628 Split interger in loongarch_output_move. */
1629
1630 static unsigned int
1631 loongarch_integer_cost (HOST_WIDE_INT value)
1632 {
1633 struct loongarch_integer_op codes[LARCH_MAX_INTEGER_OPS];
1634 return loongarch_build_integer (codes, value);
1635 }
1636
1637 /* Implement TARGET_LEGITIMATE_CONSTANT_P. */
1638
1639 static bool
1640 loongarch_legitimate_constant_p (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
1641 {
1642 return loongarch_const_insns (x) > 0;
1643 }
1644
1645 /* Return true if X is a thread-local symbol. */
1646
1647 static bool
1648 loongarch_tls_symbol_p (rtx x)
1649 {
1650 return SYMBOL_REF_P (x) && SYMBOL_REF_TLS_MODEL (x) != 0;
1651 }
1652
1653 /* Return true if SYMBOL_REF X is associated with a global symbol
1654 (in the STB_GLOBAL sense). */
1655
1656 bool
1657 loongarch_global_symbol_p (const_rtx x)
1658 {
1659 if (LABEL_REF_P (x))
1660 return false;
1661
1662 const_tree decl = SYMBOL_REF_DECL (x);
1663
1664 if (!decl)
1665 return !SYMBOL_REF_LOCAL_P (x) || SYMBOL_REF_EXTERNAL_P (x);
1666
1667 /* Weakref symbols are not TREE_PUBLIC, but their targets are global
1668 or weak symbols. Relocations in the object file will be against
1669 the target symbol, so it's that symbol's binding that matters here. */
1670 return DECL_P (decl) && (TREE_PUBLIC (decl) || DECL_WEAK (decl));
1671 }
1672
1673 bool
1674 loongarch_global_symbol_noweak_p (const_rtx x)
1675 {
1676 if (LABEL_REF_P (x))
1677 return false;
1678
1679 const_tree decl = SYMBOL_REF_DECL (x);
1680
1681 if (!decl)
1682 return !SYMBOL_REF_LOCAL_P (x) || SYMBOL_REF_EXTERNAL_P (x);
1683
1684 return DECL_P (decl) && TREE_PUBLIC (decl);
1685 }
1686
1687 bool
1688 loongarch_weak_symbol_p (const_rtx x)
1689 {
1690 const_tree decl;
1691 if (LABEL_REF_P (x) || !(decl = SYMBOL_REF_DECL (x)))
1692 return false;
1693 return DECL_P (decl) && DECL_WEAK (decl);
1694 }
1695
1696 /* Return true if SYMBOL_REF X binds locally. */
1697
1698 bool
1699 loongarch_symbol_binds_local_p (const_rtx x)
1700 {
1701 if (TARGET_DIRECT_EXTERN_ACCESS)
1702 return true;
1703
1704 if (SYMBOL_REF_P (x))
1705 return (SYMBOL_REF_DECL (x)
1706 ? targetm.binds_local_p (SYMBOL_REF_DECL (x))
1707 : SYMBOL_REF_LOCAL_P (x));
1708 else
1709 return false;
1710 }
1711
1712 /* Return true if OP is a constant vector with the number of units in MODE,
1713 and each unit has the same bit set. */
1714
1715 bool
1716 loongarch_const_vector_bitimm_set_p (rtx op, machine_mode mode)
1717 {
1718 if (GET_CODE (op) == CONST_VECTOR && op != CONST0_RTX (mode))
1719 {
1720 unsigned HOST_WIDE_INT val = UINTVAL (CONST_VECTOR_ELT (op, 0));
1721 int vlog2 = exact_log2 (val & GET_MODE_MASK (GET_MODE_INNER (mode)));
1722
1723 if (vlog2 != -1)
1724 {
1725 gcc_assert (GET_MODE_CLASS (mode) == MODE_VECTOR_INT);
1726 gcc_assert (vlog2 >= 0 && vlog2 <= GET_MODE_UNIT_BITSIZE (mode) - 1);
1727 return loongarch_const_vector_same_val_p (op, mode);
1728 }
1729 }
1730
1731 return false;
1732 }
1733
1734 /* Return true if OP is a constant vector with the number of units in MODE,
1735 and each unit has the same bit clear. */
1736
1737 bool
1738 loongarch_const_vector_bitimm_clr_p (rtx op, machine_mode mode)
1739 {
1740 if (GET_CODE (op) == CONST_VECTOR && op != CONSTM1_RTX (mode))
1741 {
1742 unsigned HOST_WIDE_INT val = ~UINTVAL (CONST_VECTOR_ELT (op, 0));
1743 int vlog2 = exact_log2 (val & GET_MODE_MASK (GET_MODE_INNER (mode)));
1744
1745 if (vlog2 != -1)
1746 {
1747 gcc_assert (GET_MODE_CLASS (mode) == MODE_VECTOR_INT);
1748 gcc_assert (vlog2 >= 0 && vlog2 <= GET_MODE_UNIT_BITSIZE (mode) - 1);
1749 return loongarch_const_vector_same_val_p (op, mode);
1750 }
1751 }
1752
1753 return false;
1754 }
1755
1756 /* Return true if OP is a constant vector with the number of units in MODE,
1757 and each unit has the same value. */
1758
1759 bool
1760 loongarch_const_vector_same_val_p (rtx op, machine_mode mode)
1761 {
1762 int i, nunits = GET_MODE_NUNITS (mode);
1763 rtx first;
1764
1765 if (GET_CODE (op) != CONST_VECTOR || GET_MODE (op) != mode)
1766 return false;
1767
1768 first = CONST_VECTOR_ELT (op, 0);
1769 for (i = 1; i < nunits; i++)
1770 if (!rtx_equal_p (first, CONST_VECTOR_ELT (op, i)))
1771 return false;
1772
1773 return true;
1774 }
1775
1776 /* Return true if OP is a constant vector with the number of units in MODE,
1777 and each unit has the same value as well as replicated bytes in the value.
1778 */
1779
1780 bool
1781 loongarch_const_vector_same_bytes_p (rtx op, machine_mode mode)
1782 {
1783 int i, bytes;
1784 HOST_WIDE_INT val, first_byte;
1785 rtx first;
1786
1787 if (!loongarch_const_vector_same_val_p (op, mode))
1788 return false;
1789
1790 first = CONST_VECTOR_ELT (op, 0);
1791 bytes = GET_MODE_UNIT_SIZE (mode);
1792 val = INTVAL (first);
1793 first_byte = val & 0xff;
1794 for (i = 1; i < bytes; i++)
1795 {
1796 val >>= 8;
1797 if ((val & 0xff) != first_byte)
1798 return false;
1799 }
1800
1801 return true;
1802 }
1803
1804 /* Return true if OP is a constant vector with the number of units in MODE,
1805 and each unit has the same integer value in the range [LOW, HIGH]. */
1806
1807 bool
1808 loongarch_const_vector_same_int_p (rtx op, machine_mode mode, HOST_WIDE_INT low,
1809 HOST_WIDE_INT high)
1810 {
1811 HOST_WIDE_INT value;
1812 rtx elem0;
1813
1814 if (!loongarch_const_vector_same_val_p (op, mode))
1815 return false;
1816
1817 elem0 = CONST_VECTOR_ELT (op, 0);
1818 if (!CONST_INT_P (elem0))
1819 return false;
1820
1821 value = INTVAL (elem0);
1822 return (value >= low && value <= high);
1823 }
1824
1825 /* Return true if OP is a constant vector with repeated 4-element sets
1826 in mode MODE. */
1827
1828 bool
1829 loongarch_const_vector_shuffle_set_p (rtx op, machine_mode mode)
1830 {
1831 int nunits = GET_MODE_NUNITS (mode);
1832 int nsets = nunits / 4;
1833 int set = 0;
1834 int i, j;
1835
1836 /* Check if we have the same 4-element sets. */
1837 for (j = 0; j < nsets; j++, set = 4 * j)
1838 for (i = 0; i < 4; i++)
1839 if ((INTVAL (XVECEXP (op, 0, i))
1840 != (INTVAL (XVECEXP (op, 0, set + i)) - set))
1841 || !IN_RANGE (INTVAL (XVECEXP (op, 0, set + i)), 0, set + 3))
1842 return false;
1843 return true;
1844 }
1845
1846 /* Return true if rtx constants of mode MODE should be put into a small
1847 data section. */
1848
1849 static bool
1850 loongarch_rtx_constant_in_small_data_p (machine_mode mode)
1851 {
1852 return (GET_MODE_SIZE (mode) <= g_switch_value);
1853 }
1854
1855 /* Return the method that should be used to access SYMBOL_REF or
1856 LABEL_REF X. */
1857
1858 static enum loongarch_symbol_type
1859 loongarch_classify_symbol (const_rtx x)
1860 {
1861 enum loongarch_symbol_type pcrel =
1862 TARGET_CMODEL_EXTREME ? SYMBOL_PCREL64 : SYMBOL_PCREL;
1863
1864 if (!SYMBOL_REF_P (x))
1865 return pcrel;
1866
1867 if (SYMBOL_REF_TLS_MODEL (x))
1868 return SYMBOL_TLS;
1869
1870 if (!loongarch_symbol_binds_local_p (x))
1871 return SYMBOL_GOT_DISP;
1872
1873 tree t = SYMBOL_REF_DECL (x);
1874 if (!t)
1875 return pcrel;
1876
1877 t = lookup_attribute ("model", DECL_ATTRIBUTES (t));
1878 if (!t)
1879 return pcrel;
1880
1881 t = TREE_VALUE (TREE_VALUE (t));
1882
1883 /* loongarch_handle_model_attribute should reject other values. */
1884 gcc_assert (TREE_CODE (t) == STRING_CST);
1885
1886 const char *model = TREE_STRING_POINTER (t);
1887 if (strcmp (model, "normal") == 0)
1888 return SYMBOL_PCREL;
1889 if (strcmp (model, "extreme") == 0)
1890 return SYMBOL_PCREL64;
1891
1892 /* loongarch_handle_model_attribute should reject unknown model
1893 name. */
1894 gcc_unreachable ();
1895 }
1896
1897 /* Classify the base of symbolic expression X, given that X appears in
1898 context CONTEXT. */
1899
1900 static enum loongarch_symbol_type
1901 loongarch_classify_symbolic_expression (rtx x)
1902 {
1903 rtx offset;
1904
1905 split_const (x, &x, &offset);
1906 if (UNSPEC_ADDRESS_P (x))
1907 return UNSPEC_ADDRESS_TYPE (x);
1908
1909 return loongarch_classify_symbol (x);
1910 }
1911
1912 /* Return true if X is a symbolic constant. If it is,
1913 store the type of the symbol in *SYMBOL_TYPE. */
1914
1915 bool
1916 loongarch_symbolic_constant_p (rtx x, enum loongarch_symbol_type *symbol_type)
1917 {
1918 rtx offset;
1919
1920 split_const (x, &x, &offset);
1921 if (UNSPEC_ADDRESS_P (x))
1922 {
1923 *symbol_type = UNSPEC_ADDRESS_TYPE (x);
1924 x = UNSPEC_ADDRESS (x);
1925 }
1926 else if (SYMBOL_REF_P (x) || LABEL_REF_P (x))
1927 *symbol_type = loongarch_classify_symbol (x);
1928 else
1929 return false;
1930
1931 if (offset == const0_rtx)
1932 return true;
1933
1934 /* Check whether a nonzero offset is valid for the underlying
1935 relocations. */
1936 switch (*symbol_type)
1937 {
1938 case SYMBOL_PCREL:
1939 case SYMBOL_PCREL64:
1940 /* GAS rejects offsets outside the range [-2^31, 2^31-1]. */
1941 return sext_hwi (INTVAL (offset), 32) == INTVAL (offset);
1942
1943 /* The following symbol types do not allow non-zero offsets. */
1944 case SYMBOL_GOT_DISP:
1945 case SYMBOL_TLS_IE:
1946 case SYMBOL_TLSGD:
1947 case SYMBOL_TLSLDM:
1948 case SYMBOL_TLS:
1949 /* From an implementation perspective, tls_le symbols are allowed to
1950 have non-zero offsets, but currently binutils has not added support,
1951 so the generation of non-zero offsets is prohibited here. */
1952 case SYMBOL_TLS_LE:
1953 return false;
1954 }
1955 gcc_unreachable ();
1956 }
1957
1958 /* If -mexplicit-relocs=auto, we use machine operations with reloc hints
1959 for cases where the linker is unable to relax so we can schedule the
1960 machine operations, otherwise use an assembler pseudo-op so the
1961 assembler will generate R_LARCH_RELAX. */
1962
1963 bool
1964 loongarch_explicit_relocs_p (enum loongarch_symbol_type type)
1965 {
1966 if (la_opt_explicit_relocs != EXPLICIT_RELOCS_AUTO)
1967 return la_opt_explicit_relocs == EXPLICIT_RELOCS_ALWAYS;
1968
1969 switch (type)
1970 {
1971 case SYMBOL_TLS_IE:
1972 case SYMBOL_TLS_LE:
1973 case SYMBOL_PCREL64:
1974 /* TLS IE cannot be relaxed. TLS LE relaxation is different from
1975 the normal R_LARCH_RELAX-based relaxation and it **requires**
1976 using the explicit %le_{lo12,hi20,add}_r relocs. The linker
1977 does not relax 64-bit pc-relative accesses as at now. */
1978 return true;
1979 case SYMBOL_GOT_DISP:
1980 /* The linker don't know how to relax GOT accesses in extreme
1981 code model. */
1982 if (TARGET_CMODEL_EXTREME)
1983 return true;
1984
1985 /* If we are performing LTO for a final link, and we have the
1986 linker plugin so we know the resolution of the symbols, then
1987 all GOT references are binding to external symbols or
1988 preemptable symbols. So the linker cannot relax them. */
1989 return (in_lto_p
1990 && !flag_incremental_link
1991 && HAVE_LTO_PLUGIN == 2
1992 && (!global_options_set.x_flag_use_linker_plugin
1993 || global_options.x_flag_use_linker_plugin));
1994 default:
1995 return false;
1996 }
1997 }
1998
1999 /* Returns the number of instructions necessary to reference a symbol. */
2000
2001 static int
2002 loongarch_symbol_insns (enum loongarch_symbol_type type, machine_mode mode)
2003 {
2004 /* LSX LD.* and ST.* cannot support loading symbols via an immediate
2005 operand. */
2006 if (LSX_SUPPORTED_MODE_P (mode) || LASX_SUPPORTED_MODE_P (mode))
2007 return 0;
2008
2009 switch (type)
2010 {
2011 case SYMBOL_GOT_DISP:
2012 /* The constant will have to be loaded from the GOT before it
2013 is used in an address. */
2014 if (!loongarch_explicit_relocs_p (type) && mode != MAX_MACHINE_MODE)
2015 return 0;
2016
2017 return 3;
2018
2019 case SYMBOL_PCREL:
2020 case SYMBOL_TLS_IE:
2021 case SYMBOL_TLS_LE:
2022 return 2;
2023
2024 case SYMBOL_TLSGD:
2025 case SYMBOL_TLSLDM:
2026 return 3;
2027
2028 case SYMBOL_PCREL64:
2029 return 5;
2030
2031 case SYMBOL_TLS:
2032 /* We don't treat a bare TLS symbol as a constant. */
2033 return 0;
2034 }
2035 gcc_unreachable ();
2036 }
2037
2038 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
2039
2040 static bool
2041 loongarch_cannot_force_const_mem (machine_mode mode, rtx x)
2042 {
2043 enum loongarch_symbol_type type;
2044 rtx base, offset;
2045
2046 /* As an optimization, reject constants that loongarch_legitimize_move
2047 can expand inline.
2048
2049 Suppose we have a multi-instruction sequence that loads constant C
2050 into register R. If R does not get allocated a hard register, and
2051 R is used in an operand that allows both registers and memory
2052 references, reload will consider forcing C into memory and using
2053 one of the instruction's memory alternatives. Returning false
2054 here will force it to use an input reload instead. */
2055 if ((CONST_INT_P (x) || GET_CODE (x) == CONST_VECTOR)
2056 && loongarch_legitimate_constant_p (mode, x))
2057 return true;
2058
2059 split_const (x, &base, &offset);
2060 if (loongarch_symbolic_constant_p (base, &type))
2061 {
2062 /* The same optimization as for CONST_INT. */
2063 if (IMM12_INT (offset)
2064 && loongarch_symbol_insns (type, MAX_MACHINE_MODE) > 0)
2065 return true;
2066 }
2067
2068 /* TLS symbols must be computed by loongarch_legitimize_move. */
2069 if (tls_referenced_p (x))
2070 return true;
2071
2072 return false;
2073 }
2074
2075 /* Return true if register REGNO is a valid base register for mode MODE.
2076 STRICT_P is true if REG_OK_STRICT is in effect. */
2077
2078 int
2079 loongarch_regno_mode_ok_for_base_p (int regno,
2080 machine_mode mode ATTRIBUTE_UNUSED,
2081 bool strict_p)
2082 {
2083 if (!HARD_REGISTER_NUM_P (regno))
2084 {
2085 if (!strict_p)
2086 return true;
2087 regno = reg_renumber[regno];
2088 }
2089
2090 /* These fake registers will be eliminated to either the stack or
2091 hard frame pointer, both of which are usually valid base registers.
2092 Reload deals with the cases where the eliminated form isn't valid. */
2093 if (regno == ARG_POINTER_REGNUM || regno == FRAME_POINTER_REGNUM)
2094 return true;
2095
2096 return GP_REG_P (regno);
2097 }
2098
2099 /* Return true if X is a valid base register for mode MODE.
2100 STRICT_P is true if REG_OK_STRICT is in effect. */
2101
2102 static bool
2103 loongarch_valid_base_register_p (rtx x, machine_mode mode, bool strict_p)
2104 {
2105 if (!strict_p && SUBREG_P (x))
2106 x = SUBREG_REG (x);
2107
2108 return (REG_P (x)
2109 && loongarch_regno_mode_ok_for_base_p (REGNO (x), mode, strict_p));
2110 }
2111
2112 /* Return true if, for every base register BASE_REG, (plus BASE_REG X)
2113 can address a value of mode MODE. */
2114
2115 static bool
2116 loongarch_valid_offset_p (rtx x, machine_mode mode)
2117 {
2118 /* Check that X is a signed 12-bit number,
2119 or check that X is a signed 16-bit number
2120 and offset 4 byte aligned. */
2121 if (!(const_arith_operand (x, Pmode)
2122 || ((mode == E_SImode || mode == E_DImode)
2123 && const_imm16_operand (x, Pmode)
2124 && (loongarch_signed_immediate_p (INTVAL (x), 14, 2)))))
2125 return false;
2126
2127 /* We may need to split multiword moves, so make sure that every word
2128 is accessible. */
2129 if (!(LSX_SUPPORTED_MODE_P (mode) || LASX_SUPPORTED_MODE_P (mode))
2130 && GET_MODE_SIZE (mode) > UNITS_PER_WORD
2131 && !IMM12_OPERAND (INTVAL (x) + GET_MODE_SIZE (mode) - UNITS_PER_WORD))
2132 return false;
2133
2134 return true;
2135 }
2136
2137 /* Should a symbol of type SYMBOL_TYPE should be split in two or more? */
2138
2139 bool
2140 loongarch_split_symbol_type (enum loongarch_symbol_type symbol_type)
2141 {
2142 switch (symbol_type)
2143 {
2144 case SYMBOL_PCREL:
2145 case SYMBOL_PCREL64:
2146 case SYMBOL_GOT_DISP:
2147 case SYMBOL_TLS_IE:
2148 case SYMBOL_TLS_LE:
2149 case SYMBOL_TLSGD:
2150 case SYMBOL_TLSLDM:
2151 return true;
2152
2153 case SYMBOL_TLS:
2154 return false;
2155
2156 default:
2157 gcc_unreachable ();
2158 }
2159 }
2160
2161 /* Return true if a LO_SUM can address a value of mode MODE when the
2162 LO_SUM symbol has type SYMBOL_TYPE. */
2163
2164 static bool
2165 loongarch_valid_lo_sum_p (enum loongarch_symbol_type symbol_type,
2166 machine_mode mode, rtx x)
2167 {
2168 int align, size;
2169
2170 /* Check that symbols of type SYMBOL_TYPE can be used to access values
2171 of mode MODE. */
2172 if (loongarch_symbol_insns (symbol_type, mode) == 0)
2173 return false;
2174
2175 /* Check that there is a known low-part relocation. */
2176 if (!loongarch_split_symbol_type (symbol_type))
2177 return false;
2178
2179 /* We can't tell size or alignment when we have BLKmode, so try extracing a
2180 decl from the symbol if possible. */
2181 if (mode == BLKmode)
2182 {
2183 rtx offset;
2184
2185 /* Extract the symbol from the LO_SUM operand, if any. */
2186 split_const (x, &x, &offset);
2187
2188 /* Might be a CODE_LABEL. We can compute align but not size for that,
2189 so don't bother trying to handle it. */
2190 if (!SYMBOL_REF_P (x))
2191 return false;
2192
2193 /* Use worst case assumptions if we don't have a SYMBOL_REF_DECL. */
2194 align = (SYMBOL_REF_DECL (x)
2195 ? DECL_ALIGN (SYMBOL_REF_DECL (x))
2196 : 1);
2197 size = (SYMBOL_REF_DECL (x) && DECL_SIZE (SYMBOL_REF_DECL (x))
2198 ? tree_to_uhwi (DECL_SIZE (SYMBOL_REF_DECL (x)))
2199 : 2*BITS_PER_WORD);
2200 }
2201 else
2202 {
2203 align = GET_MODE_ALIGNMENT (mode);
2204 size = GET_MODE_BITSIZE (mode);
2205 }
2206
2207 /* We may need to split multiword moves, so make sure that each word
2208 can be accessed without inducing a carry. */
2209 if (size > BITS_PER_WORD
2210 && (!TARGET_STRICT_ALIGN || size > align))
2211 return false;
2212
2213 return true;
2214 }
2215
2216 static bool
2217 loongarch_valid_index_p (struct loongarch_address_info *info, rtx x,
2218 machine_mode mode, bool strict_p)
2219 {
2220 rtx index;
2221
2222 if ((REG_P (x) || SUBREG_P (x))
2223 && GET_MODE (x) == Pmode)
2224 {
2225 index = x;
2226 }
2227 else
2228 return false;
2229
2230 if (!strict_p
2231 && SUBREG_P (index)
2232 && contains_reg_of_mode[GENERAL_REGS][GET_MODE (SUBREG_REG (index))])
2233 index = SUBREG_REG (index);
2234
2235 if (loongarch_valid_base_register_p (index, mode, strict_p))
2236 {
2237 info->type = ADDRESS_REG_REG;
2238 info->offset = index;
2239 return true;
2240 }
2241
2242 return false;
2243 }
2244
2245 /* Return true if X is a valid address for machine mode MODE. If it is,
2246 fill in INFO appropriately. STRICT_P is true if REG_OK_STRICT is in
2247 effect. */
2248
2249 static bool
2250 loongarch_classify_address (struct loongarch_address_info *info, rtx x,
2251 machine_mode mode, bool strict_p)
2252 {
2253 switch (GET_CODE (x))
2254 {
2255 case REG:
2256 case SUBREG:
2257 info->type = ADDRESS_REG;
2258 info->reg = x;
2259 info->offset = const0_rtx;
2260 return loongarch_valid_base_register_p (info->reg, mode, strict_p);
2261
2262 case PLUS:
2263 if (loongarch_valid_base_register_p (XEXP (x, 0), mode, strict_p)
2264 && loongarch_valid_index_p (info, XEXP (x, 1), mode, strict_p))
2265 {
2266 info->reg = XEXP (x, 0);
2267 return true;
2268 }
2269
2270 if (loongarch_valid_base_register_p (XEXP (x, 1), mode, strict_p)
2271 && loongarch_valid_index_p (info, XEXP (x, 0), mode, strict_p))
2272 {
2273 info->reg = XEXP (x, 1);
2274 return true;
2275 }
2276
2277 info->type = ADDRESS_REG;
2278 info->reg = XEXP (x, 0);
2279 info->offset = XEXP (x, 1);
2280 return (loongarch_valid_base_register_p (info->reg, mode, strict_p)
2281 && loongarch_valid_offset_p (info->offset, mode));
2282
2283 case LO_SUM:
2284 info->type = ADDRESS_LO_SUM;
2285 info->reg = XEXP (x, 0);
2286 info->offset = XEXP (x, 1);
2287 /* We have to trust the creator of the LO_SUM to do something vaguely
2288 sane. Target-independent code that creates a LO_SUM should also
2289 create and verify the matching HIGH. Target-independent code that
2290 adds an offset to a LO_SUM must prove that the offset will not
2291 induce a carry. Failure to do either of these things would be
2292 a bug, and we are not required to check for it here. The MIPS
2293 backend itself should only create LO_SUMs for valid symbolic
2294 constants, with the high part being either a HIGH or a copy
2295 of _gp. */
2296 info->symbol_type
2297 = loongarch_classify_symbolic_expression (info->offset);
2298 return (loongarch_valid_base_register_p (info->reg, mode, strict_p)
2299 && loongarch_valid_lo_sum_p (info->symbol_type, mode,
2300 info->offset));
2301 case CONST_INT:
2302 /* Small-integer addresses don't occur very often, but they
2303 are legitimate if $r0 is a valid base register. */
2304 info->type = ADDRESS_CONST_INT;
2305 return IMM12_OPERAND (INTVAL (x));
2306
2307 default:
2308 return false;
2309 }
2310 }
2311
2312 /* Implement TARGET_LEGITIMATE_ADDRESS_P. */
2313
2314 static bool
2315 loongarch_legitimate_address_p (machine_mode mode, rtx x, bool strict_p,
2316 code_helper = ERROR_MARK)
2317 {
2318 struct loongarch_address_info addr;
2319
2320 return loongarch_classify_address (&addr, x, mode, strict_p);
2321 }
2322
2323 /* Return true if ADDR matches the pattern for the indexed address
2324 instruction. */
2325
2326 static bool
2327 loongarch_index_address_p (rtx addr, machine_mode mode ATTRIBUTE_UNUSED)
2328 {
2329 if (GET_CODE (addr) != PLUS
2330 || !REG_P (XEXP (addr, 0))
2331 || !REG_P (XEXP (addr, 1)))
2332 return false;
2333 return true;
2334 }
2335
2336 /* Return the number of instructions needed to load or store a value
2337 of mode MODE at address X. Return 0 if X isn't valid for MODE.
2338 Assume that multiword moves may need to be split into word moves
2339 if MIGHT_SPLIT_P, otherwise assume that a single load or store is
2340 enough. */
2341
2342 int
2343 loongarch_address_insns (rtx x, machine_mode mode, bool might_split_p)
2344 {
2345 struct loongarch_address_info addr;
2346 int factor;
2347 bool lsx_p = (!might_split_p
2348 && (LSX_SUPPORTED_MODE_P (mode)
2349 || LASX_SUPPORTED_MODE_P (mode)));
2350
2351 if (!loongarch_classify_address (&addr, x, mode, false))
2352 return 0;
2353
2354 /* BLKmode is used for single unaligned loads and stores and should
2355 not count as a multiword mode. (GET_MODE_SIZE (BLKmode) is pretty
2356 meaningless, so we have to single it out as a special case one way
2357 or the other.) */
2358 if (mode != BLKmode && might_split_p)
2359 factor = (GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2360 else
2361 factor = 1;
2362
2363 if (loongarch_classify_address (&addr, x, mode, false))
2364 switch (addr.type)
2365 {
2366 case ADDRESS_REG:
2367 if (lsx_p)
2368 {
2369 /* LSX LD.* and ST.* supports 12-bit signed offsets. */
2370 if (IMM12_OPERAND (INTVAL (addr.offset)))
2371 return 1;
2372 else
2373 return 0;
2374 }
2375 return factor;
2376
2377 case ADDRESS_REG_REG:
2378 return factor;
2379
2380 case ADDRESS_CONST_INT:
2381 return lsx_p ? 0 : factor;
2382
2383 case ADDRESS_LO_SUM:
2384 return factor + 1;
2385
2386 case ADDRESS_SYMBOLIC:
2387 return lsx_p ? 0
2388 : factor * loongarch_symbol_insns (addr.symbol_type, mode);
2389 }
2390 return 0;
2391 }
2392
2393 /* Return true if X fits within an unsigned field of BITS bits that is
2394 shifted left SHIFT bits before being used. */
2395
2396 bool
2397 loongarch_unsigned_immediate_p (unsigned HOST_WIDE_INT x, int bits,
2398 int shift = 0)
2399 {
2400 return (x & ((1 << shift) - 1)) == 0 && x < ((unsigned) 1 << (shift + bits));
2401 }
2402
2403 /* Return true if X fits within a signed field of BITS bits that is
2404 shifted left SHIFT bits before being used. */
2405
2406 bool
2407 loongarch_signed_immediate_p (unsigned HOST_WIDE_INT x, int bits,
2408 int shift = 0)
2409 {
2410 x += 1 << (bits + shift - 1);
2411 return loongarch_unsigned_immediate_p (x, bits, shift);
2412 }
2413
2414 /* Return the scale shift that applied to LSX LD/ST address offset. */
2415
2416 int
2417 loongarch_ldst_scaled_shift (machine_mode mode)
2418 {
2419 int shift = exact_log2 (GET_MODE_UNIT_SIZE (mode));
2420
2421 if (shift < 0 || shift > 8)
2422 gcc_unreachable ();
2423
2424 return shift;
2425 }
2426
2427 /* Return true if X is a legitimate address with a 12-bit offset
2428 or addr.type is ADDRESS_LO_SUM.
2429 MODE is the mode of the value being accessed. */
2430
2431 bool
2432 loongarch_12bit_offset_address_p (rtx x, machine_mode mode)
2433 {
2434 struct loongarch_address_info addr;
2435
2436 return (loongarch_classify_address (&addr, x, mode, false)
2437 && ((addr.type == ADDRESS_REG
2438 && CONST_INT_P (addr.offset)
2439 && LARCH_12BIT_OFFSET_P (INTVAL (addr.offset)))
2440 || addr.type == ADDRESS_LO_SUM));
2441 }
2442
2443 /* Return true if X is a legitimate address with a 14-bit offset shifted 2.
2444 MODE is the mode of the value being accessed. */
2445
2446 bool
2447 loongarch_14bit_shifted_offset_address_p (rtx x, machine_mode mode)
2448 {
2449 struct loongarch_address_info addr;
2450
2451 return (loongarch_classify_address (&addr, x, mode, false)
2452 && addr.type == ADDRESS_REG
2453 && CONST_INT_P (addr.offset)
2454 && LARCH_16BIT_OFFSET_P (INTVAL (addr.offset))
2455 && LARCH_SHIFT_2_OFFSET_P (INTVAL (addr.offset)));
2456 }
2457
2458 /* Return true if X is a legitimate address with base and index.
2459 MODE is the mode of the value being accessed. */
2460
2461 bool
2462 loongarch_base_index_address_p (rtx x, machine_mode mode)
2463 {
2464 struct loongarch_address_info addr;
2465
2466 return (loongarch_classify_address (&addr, x, mode, false)
2467 && addr.type == ADDRESS_REG_REG
2468 && REG_P (addr.offset));
2469 }
2470
2471 /* Return the number of instructions needed to load constant X,
2472 Return 0 if X isn't a valid constant. */
2473
2474 int
2475 loongarch_const_insns (rtx x)
2476 {
2477 enum loongarch_symbol_type symbol_type;
2478 rtx offset;
2479
2480 switch (GET_CODE (x))
2481 {
2482 case HIGH:
2483 if (!loongarch_symbolic_constant_p (XEXP (x, 0), &symbol_type)
2484 || !loongarch_split_symbol_type (symbol_type))
2485 return 0;
2486
2487 /* This is simply a PCALAU12I. */
2488 return 1;
2489
2490 case CONST_INT:
2491 return loongarch_integer_cost (INTVAL (x));
2492
2493 case CONST_VECTOR:
2494 if ((LSX_SUPPORTED_MODE_P (GET_MODE (x))
2495 || LASX_SUPPORTED_MODE_P (GET_MODE (x)))
2496 && loongarch_const_vector_same_int_p (x, GET_MODE (x), -512, 511))
2497 return 1;
2498 /* Fall through. */
2499 case CONST_DOUBLE:
2500 return x == CONST0_RTX (GET_MODE (x)) ? 1 : 0;
2501
2502 case CONST:
2503 /* See if we can refer to X directly. */
2504 if (loongarch_symbolic_constant_p (x, &symbol_type))
2505 return loongarch_symbol_insns (symbol_type, MAX_MACHINE_MODE);
2506
2507 /* Otherwise try splitting the constant into a base and offset.
2508 If the offset is a 12-bit value, we can load the base address
2509 into a register and then use ADDI.{W/D} to add in the offset.
2510 If the offset is larger, we can load the base and offset
2511 into separate registers and add them together with ADD.{W/D}.
2512 However, the latter is only possible before reload; during
2513 and after reload, we must have the option of forcing the
2514 constant into the pool instead. */
2515 split_const (x, &x, &offset);
2516 if (offset != 0)
2517 {
2518 int n = loongarch_const_insns (x);
2519 if (n != 0)
2520 {
2521 if (IMM12_INT (offset))
2522 return n + 1;
2523 else if (!targetm.cannot_force_const_mem (GET_MODE (x), x))
2524 return n + 1 + loongarch_integer_cost (INTVAL (offset));
2525 }
2526 }
2527 return 0;
2528
2529 case SYMBOL_REF:
2530 case LABEL_REF:
2531 return loongarch_symbol_insns (
2532 loongarch_classify_symbol (x), MAX_MACHINE_MODE);
2533
2534 default:
2535 return 0;
2536 }
2537 }
2538
2539 /* X is a doubleword constant that can be handled by splitting it into
2540 two words and loading each word separately. Return the number of
2541 instructions required to do this. */
2542
2543 int
2544 loongarch_split_const_insns (rtx x)
2545 {
2546 unsigned int low, high;
2547
2548 low = loongarch_const_insns (loongarch_subword (x, false));
2549 high = loongarch_const_insns (loongarch_subword (x, true));
2550 gcc_assert (low > 0 && high > 0);
2551 return low + high;
2552 }
2553
2554 /* Return one word of 128-bit value OP, taking into account the fixed
2555 endianness of certain registers. BYTE selects from the byte address. */
2556
2557 rtx
2558 loongarch_subword_at_byte (rtx op, unsigned int byte)
2559 {
2560 machine_mode mode;
2561
2562 mode = GET_MODE (op);
2563 if (mode == VOIDmode)
2564 mode = TImode;
2565
2566 gcc_assert (!FP_REG_RTX_P (op));
2567
2568 if (MEM_P (op))
2569 return loongarch_rewrite_small_data (adjust_address (op, word_mode, byte));
2570
2571 return simplify_gen_subreg (word_mode, op, mode, byte);
2572 }
2573
2574 /* Return the number of instructions needed to implement INSN,
2575 given that it loads from or stores to MEM. */
2576
2577 int
2578 loongarch_load_store_insns (rtx mem, rtx_insn *insn)
2579 {
2580 machine_mode mode;
2581 bool might_split_p;
2582 rtx set;
2583
2584 gcc_assert (MEM_P (mem));
2585 mode = GET_MODE (mem);
2586
2587 /* Try to prove that INSN does not need to be split. */
2588 might_split_p = GET_MODE_SIZE (mode) > UNITS_PER_WORD;
2589 if (might_split_p)
2590 {
2591 set = single_set (insn);
2592 if (set
2593 && !loongarch_split_move_p (SET_DEST (set), SET_SRC (set)))
2594 might_split_p = false;
2595 }
2596
2597 return loongarch_address_insns (XEXP (mem, 0), mode, might_split_p);
2598 }
2599
2600 /* Return true if we need to trap on division by zero. */
2601
2602 bool
2603 loongarch_check_zero_div_p (void)
2604 {
2605 /* if -m[no-]check-zero-division is given explicitly. */
2606 if (target_flags_explicit & MASK_CHECK_ZERO_DIV)
2607 return TARGET_CHECK_ZERO_DIV;
2608
2609 /* if not, don't trap for optimized code except -Og. */
2610 return !optimize || optimize_debug;
2611 }
2612
2613 /* Return the number of instructions needed for an integer division. */
2614
2615 int
2616 loongarch_idiv_insns (machine_mode mode ATTRIBUTE_UNUSED)
2617 {
2618 int count;
2619
2620 count = 1;
2621 if (loongarch_check_zero_div_p ())
2622 count += 2;
2623
2624 return count;
2625 }
2626
2627 /* Emit an instruction of the form (set TARGET (CODE OP0 OP1)). */
2628
2629 void
2630 loongarch_emit_binary (enum rtx_code code, rtx target, rtx op0, rtx op1)
2631 {
2632 emit_insn (gen_rtx_SET (target, gen_rtx_fmt_ee (code, GET_MODE (target),
2633 op0, op1)));
2634 }
2635
2636 /* Compute (CODE OP0 OP1) and store the result in a new register
2637 of mode MODE. Return that new register. */
2638
2639 static rtx
2640 loongarch_force_binary (machine_mode mode, enum rtx_code code, rtx op0,
2641 rtx op1)
2642 {
2643 rtx reg;
2644
2645 reg = gen_reg_rtx (mode);
2646 loongarch_emit_binary (code, reg, op0, op1);
2647 return reg;
2648 }
2649
2650 /* Copy VALUE to a register and return that register. If new pseudos
2651 are allowed, copy it into a new register, otherwise use DEST. */
2652
2653 static rtx
2654 loongarch_force_temporary (rtx dest, rtx value)
2655 {
2656 if (can_create_pseudo_p ())
2657 return force_reg (Pmode, value);
2658 else
2659 {
2660 loongarch_emit_move (dest, value);
2661 return dest;
2662 }
2663 }
2664
2665 /* Wrap symbol or label BASE in an UNSPEC address of type SYMBOL_TYPE,
2666 then add CONST_INT OFFSET to the result. */
2667
2668 static rtx
2669 loongarch_unspec_address_offset (rtx base, rtx offset,
2670 enum loongarch_symbol_type symbol_type)
2671 {
2672 base = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, base),
2673 UNSPEC_ADDRESS_FIRST + symbol_type);
2674 if (offset != const0_rtx)
2675 base = gen_rtx_PLUS (Pmode, base, offset);
2676 return gen_rtx_CONST (Pmode, base);
2677 }
2678
2679 /* Return an UNSPEC address with underlying address ADDRESS and symbol
2680 type SYMBOL_TYPE. */
2681
2682 rtx
2683 loongarch_unspec_address (rtx address, enum loongarch_symbol_type symbol_type)
2684 {
2685 rtx base, offset;
2686
2687 split_const (address, &base, &offset);
2688 return loongarch_unspec_address_offset (base, offset, symbol_type);
2689 }
2690
2691 /* Emit an instruction of the form (set TARGET SRC). */
2692
2693 static rtx
2694 loongarch_emit_set (rtx target, rtx src)
2695 {
2696 emit_insn (gen_rtx_SET (target, src));
2697 return target;
2698 }
2699
2700 /* If OP is an UNSPEC address, return the address to which it refers,
2701 otherwise return OP itself. */
2702
2703 rtx
2704 loongarch_strip_unspec_address (rtx op)
2705 {
2706 rtx base, offset;
2707
2708 split_const (op, &base, &offset);
2709 if (UNSPEC_ADDRESS_P (base))
2710 op = plus_constant (Pmode, UNSPEC_ADDRESS (base), INTVAL (offset));
2711 return op;
2712 }
2713
2714 /* Return a legitimate address for REG + OFFSET. TEMP is as for
2715 loongarch_force_temporary; it is only needed when OFFSET is not a
2716 IMM12_OPERAND. */
2717
2718 static rtx
2719 loongarch_add_offset (rtx temp, rtx reg, HOST_WIDE_INT offset)
2720 {
2721 if (!IMM12_OPERAND (offset))
2722 {
2723 rtx high;
2724
2725 /* Leave OFFSET as a 12-bit offset and put the excess in HIGH.
2726 The addition inside the macro CONST_HIGH_PART may cause an
2727 overflow, so we need to force a sign-extension check. */
2728 high = gen_int_mode (CONST_HIGH_PART (offset), Pmode);
2729 offset = CONST_LOW_PART (offset);
2730 high = loongarch_force_temporary (temp, high);
2731 reg = loongarch_force_temporary (temp, gen_rtx_PLUS (Pmode, high, reg));
2732 }
2733 return plus_constant (Pmode, reg, offset);
2734 }
2735
2736 /* The __tls_get_attr symbol. */
2737 static GTY (()) rtx loongarch_tls_symbol;
2738
2739 /* Load an entry for a TLS access. */
2740
2741 static rtx
2742 loongarch_load_tls (rtx dest, rtx sym)
2743 {
2744 return gen_load_tls (Pmode, dest, sym);
2745 }
2746
2747 /* Return an instruction sequence that calls __tls_get_addr. SYM is
2748 the TLS symbol we are referencing and TYPE is the symbol type to use
2749 (either global dynamic or local dynamic). V0 is an RTX for the
2750 return value location. */
2751
2752 static rtx_insn *
2753 loongarch_call_tls_get_addr (rtx sym, enum loongarch_symbol_type type, rtx v0)
2754 {
2755 rtx loc, a0;
2756 rtx_insn *insn;
2757 rtx tmp = gen_reg_rtx (Pmode);
2758
2759 a0 = gen_rtx_REG (Pmode, GP_ARG_FIRST);
2760
2761 if (!loongarch_tls_symbol)
2762 loongarch_tls_symbol = init_one_libfunc ("__tls_get_addr");
2763
2764 loc = loongarch_unspec_address (sym, type);
2765
2766 start_sequence ();
2767
2768 if (la_opt_explicit_relocs == EXPLICIT_RELOCS_ALWAYS)
2769 {
2770 /* Split tls symbol to high and low. */
2771 rtx high = gen_rtx_HIGH (Pmode, copy_rtx (loc));
2772 high = loongarch_force_temporary (tmp, high);
2773
2774 if (TARGET_CMODEL_EXTREME)
2775 {
2776 gcc_assert (TARGET_EXPLICIT_RELOCS);
2777
2778 rtx tmp1 = gen_reg_rtx (Pmode);
2779 emit_insn (gen_tls_low (Pmode, tmp1, gen_rtx_REG (Pmode, 0), loc));
2780 emit_insn (gen_lui_h_lo20 (tmp1, tmp1, loc));
2781 emit_insn (gen_lui_h_hi12 (tmp1, tmp1, loc));
2782 emit_move_insn (a0, gen_rtx_PLUS (Pmode, high, tmp1));
2783 }
2784 else
2785 emit_insn (gen_tls_low (Pmode, a0, high, loc));
2786 }
2787 else
2788 emit_insn (loongarch_load_tls (a0, loc));
2789
2790 if (flag_plt)
2791 {
2792 switch (la_target.cmodel)
2793 {
2794 case CMODEL_NORMAL:
2795 insn = emit_call_insn (gen_call_value_internal (v0,
2796 loongarch_tls_symbol,
2797 const0_rtx));
2798 break;
2799
2800 case CMODEL_MEDIUM:
2801 {
2802 rtx reg = gen_reg_rtx (Pmode);
2803 if (TARGET_EXPLICIT_RELOCS)
2804 {
2805 emit_insn (gen_pcalau12i (Pmode, reg, loongarch_tls_symbol));
2806 rtx call = gen_call_value_internal_1 (Pmode, v0, reg,
2807 loongarch_tls_symbol,
2808 const0_rtx);
2809 insn = emit_call_insn (call);
2810 }
2811 else
2812 {
2813 emit_move_insn (reg, loongarch_tls_symbol);
2814 insn = emit_call_insn (gen_call_value_internal (v0,
2815 reg,
2816 const0_rtx));
2817 }
2818 break;
2819 }
2820
2821 /* code model extreme not support plt. */
2822 case CMODEL_EXTREME:
2823 case CMODEL_LARGE:
2824 case CMODEL_TINY:
2825 case CMODEL_TINY_STATIC:
2826 default:
2827 gcc_unreachable ();
2828 }
2829 }
2830 else
2831 {
2832 rtx dest = gen_reg_rtx (Pmode);
2833
2834 switch (la_target.cmodel)
2835 {
2836 case CMODEL_NORMAL:
2837 case CMODEL_MEDIUM:
2838 {
2839 if (TARGET_EXPLICIT_RELOCS)
2840 {
2841 rtx high = gen_reg_rtx (Pmode);
2842 loongarch_emit_move (high,
2843 gen_rtx_HIGH (Pmode,
2844 loongarch_tls_symbol));
2845 emit_insn (gen_ld_from_got (Pmode, dest, high,
2846 loongarch_tls_symbol));
2847 }
2848 else
2849 loongarch_emit_move (dest, loongarch_tls_symbol);
2850 break;
2851 }
2852
2853 case CMODEL_EXTREME:
2854 {
2855 gcc_assert (TARGET_EXPLICIT_RELOCS);
2856
2857 rtx tmp1 = gen_reg_rtx (Pmode);
2858 rtx high = gen_reg_rtx (Pmode);
2859
2860 loongarch_emit_move (high,
2861 gen_rtx_HIGH (Pmode, loongarch_tls_symbol));
2862 loongarch_emit_move (tmp1, gen_rtx_LO_SUM (Pmode,
2863 gen_rtx_REG (Pmode, 0),
2864 loongarch_tls_symbol));
2865 emit_insn (gen_lui_h_lo20 (tmp1, tmp1, loongarch_tls_symbol));
2866 emit_insn (gen_lui_h_hi12 (tmp1, tmp1, loongarch_tls_symbol));
2867 loongarch_emit_move (dest,
2868 gen_rtx_MEM (Pmode,
2869 gen_rtx_PLUS (Pmode,
2870 high, tmp1)));
2871 }
2872 break;
2873
2874 case CMODEL_LARGE:
2875 case CMODEL_TINY:
2876 case CMODEL_TINY_STATIC:
2877 default:
2878 gcc_unreachable ();
2879 }
2880
2881 insn = emit_call_insn (gen_call_value_internal (v0, dest, const0_rtx));
2882 }
2883
2884 RTL_CONST_CALL_P (insn) = 1;
2885 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), a0);
2886 insn = get_insns ();
2887
2888 end_sequence ();
2889
2890 return insn;
2891 }
2892
2893 /* Generate the code to access LOC, a thread-local SYMBOL_REF, and return
2894 its address. The return value will be both a valid address and a valid
2895 SET_SRC (either a REG or a LO_SUM). */
2896
2897 static rtx
2898 loongarch_legitimize_tls_address (rtx loc)
2899 {
2900 rtx dest, tp, tmp, tmp1, tmp2, tmp3;
2901 enum tls_model model = SYMBOL_REF_TLS_MODEL (loc);
2902 rtx_insn *insn;
2903
2904 switch (model)
2905 {
2906 case TLS_MODEL_LOCAL_DYNAMIC:
2907 tmp = gen_rtx_REG (Pmode, GP_RETURN);
2908 dest = gen_reg_rtx (Pmode);
2909 insn = loongarch_call_tls_get_addr (loc, SYMBOL_TLSLDM, tmp);
2910 emit_libcall_block (insn, dest, tmp, loc);
2911 break;
2912
2913 case TLS_MODEL_GLOBAL_DYNAMIC:
2914 tmp = gen_rtx_REG (Pmode, GP_RETURN);
2915 dest = gen_reg_rtx (Pmode);
2916 insn = loongarch_call_tls_get_addr (loc, SYMBOL_TLSGD, tmp);
2917 emit_libcall_block (insn, dest, tmp, loc);
2918 break;
2919
2920 case TLS_MODEL_INITIAL_EXEC:
2921 {
2922 /* la.tls.ie; tp-relative add. */
2923 tp = gen_rtx_REG (Pmode, THREAD_POINTER_REGNUM);
2924 tmp1 = gen_reg_rtx (Pmode);
2925 tmp2 = loongarch_unspec_address (loc, SYMBOL_TLS_IE);
2926 dest = gen_reg_rtx (Pmode);
2927 if (la_opt_explicit_relocs != EXPLICIT_RELOCS_NONE)
2928 {
2929 tmp3 = gen_reg_rtx (Pmode);
2930 rtx high = gen_rtx_HIGH (Pmode, copy_rtx (tmp2));
2931 high = loongarch_force_temporary (tmp3, high);
2932
2933 if (TARGET_CMODEL_EXTREME)
2934 {
2935 gcc_assert (TARGET_EXPLICIT_RELOCS);
2936
2937 rtx tmp3 = gen_reg_rtx (Pmode);
2938 emit_insn (gen_tls_low (Pmode, tmp3,
2939 gen_rtx_REG (Pmode, 0), tmp2));
2940 emit_insn (gen_lui_h_lo20 (tmp3, tmp3, tmp2));
2941 emit_insn (gen_lui_h_hi12 (tmp3, tmp3, tmp2));
2942 emit_move_insn (tmp1,
2943 gen_rtx_MEM (Pmode,
2944 gen_rtx_PLUS (Pmode,
2945 high, tmp3)));
2946 }
2947 else
2948 emit_insn (gen_ld_from_got (Pmode, tmp1, high, tmp2));
2949 }
2950 else
2951 emit_insn (loongarch_load_tls (tmp1, tmp2));
2952 emit_insn (gen_add3_insn (dest, tmp1, tp));
2953 }
2954 break;
2955
2956 case TLS_MODEL_LOCAL_EXEC:
2957 {
2958 /* la.tls.le; tp-relative add.
2959
2960 normal:
2961 lu12i.w $rd, %le_hi20(sym)
2962 ori $rd, $rd, %le_lo12(sym)
2963 add.{w/d} $rd, $rd, $tp
2964 (st.{w/d}/ld.{w/d} $rs, $rd, 0)
2965
2966 tls le relax:
2967 lu12i.w $rd, %le_hi20_r(sym)
2968 add.{w/d} $rd,$rd,$tp
2969 addi.{w/d} $rd,$rd,%le_lo12_r(sym)
2970 (st.{w/d}/ld.{w/d} $rs, $rd, 0)
2971
2972 extreme (When the code model is set to extreme, the TLS le Relax
2973 instruction sequence is not generated):
2974 lu12i.w $rd, %le_hi20(sym)
2975 ori $rd, $rd, %le_lo12(sym)
2976 lu32i.d $rd, %le64_lo20(sym)
2977 lu52i.d $rd, $rd, %le64_hi12(sym)
2978 add.d $rd, $rd, $tp
2979 (st.{w/d}/ld.{w/d} $rs, $rd, 0) */
2980
2981 tp = gen_rtx_REG (Pmode, THREAD_POINTER_REGNUM);
2982 tmp1 = gen_reg_rtx (Pmode);
2983 tmp2 = loongarch_unspec_address (loc, SYMBOL_TLS_LE);
2984 dest = gen_reg_rtx (Pmode);
2985
2986 if (la_opt_explicit_relocs != EXPLICIT_RELOCS_NONE)
2987 {
2988 tmp3 = gen_reg_rtx (Pmode);
2989 rtx high = gen_rtx_HIGH (Pmode, copy_rtx (tmp2));
2990 high = loongarch_force_temporary (tmp3, high);
2991
2992 /* The assembler does not implement tls le relax support when the
2993 code model is extreme, so when the code model is extreme, the
2994 old symbol address acquisition method is still used. */
2995 if (HAVE_AS_TLS_LE_RELAXATION && !TARGET_CMODEL_EXTREME)
2996 {
2997 emit_insn (gen_add_tls_le_relax (Pmode, dest, high,
2998 tp, loc));
2999 loongarch_emit_move (dest,
3000 gen_rtx_LO_SUM (Pmode, dest, tmp2));
3001 return dest;
3002 }
3003 else
3004 emit_insn (gen_ori_l_lo12 (Pmode, tmp1, high, tmp2));
3005
3006 if (TARGET_CMODEL_EXTREME)
3007 {
3008 gcc_assert (TARGET_EXPLICIT_RELOCS);
3009
3010 emit_insn (gen_lui_h_lo20 (tmp1, tmp1, tmp2));
3011 emit_insn (gen_lui_h_hi12 (tmp1, tmp1, tmp2));
3012 }
3013 }
3014 else
3015 emit_insn (loongarch_load_tls (tmp1, tmp2));
3016 emit_insn (gen_add3_insn (dest, tmp1, tp));
3017 }
3018 break;
3019
3020 default:
3021 gcc_unreachable ();
3022 }
3023 return dest;
3024 }
3025
3026 rtx
3027 loongarch_legitimize_call_address (rtx addr)
3028 {
3029 if (!call_insn_operand (addr, VOIDmode))
3030 {
3031 rtx reg = gen_reg_rtx (Pmode);
3032 loongarch_emit_move (reg, addr);
3033 return reg;
3034 }
3035
3036 enum loongarch_symbol_type symbol_type = loongarch_classify_symbol (addr);
3037
3038 /* If add the compilation option '-cmodel=medium', and the assembler does
3039 not support call36. The following sequence of instructions will be
3040 used for the function call:
3041 pcalau12i $rd, %pc_hi20(sym)
3042 jr $rd, %pc_lo12(sym)
3043 */
3044
3045 if (TARGET_CMODEL_MEDIUM
3046 && !HAVE_AS_SUPPORT_CALL36
3047 && (la_opt_explicit_relocs != EXPLICIT_RELOCS_NONE)
3048 && (SYMBOL_REF_P (addr) || LABEL_REF_P (addr))
3049 && (symbol_type == SYMBOL_PCREL
3050 || (symbol_type == SYMBOL_GOT_DISP && flag_plt)))
3051 {
3052 rtx reg = gen_reg_rtx (Pmode);
3053 emit_insn (gen_pcalau12i (Pmode, reg, addr));
3054 return gen_rtx_LO_SUM (Pmode, reg, addr);
3055 }
3056
3057 return addr;
3058 }
3059
3060 /* If X is a PLUS of a CONST_INT, return the two terms in *BASE_PTR
3061 and *OFFSET_PTR. Return X in *BASE_PTR and 0 in *OFFSET_PTR otherwise. */
3062
3063 static void
3064 loongarch_split_plus (rtx x, rtx *base_ptr, HOST_WIDE_INT *offset_ptr)
3065 {
3066 if (GET_CODE (x) == PLUS && CONST_INT_P (XEXP (x, 1)))
3067 {
3068 *base_ptr = XEXP (x, 0);
3069 *offset_ptr = INTVAL (XEXP (x, 1));
3070 }
3071 else
3072 {
3073 *base_ptr = x;
3074 *offset_ptr = 0;
3075 }
3076 }
3077
3078 /* If X is not a valid address for mode MODE, force it into a register. */
3079
3080 static rtx
3081 loongarch_force_address (rtx x, machine_mode mode)
3082 {
3083 if (!loongarch_legitimate_address_p (mode, x, false))
3084 x = force_reg (Pmode, x);
3085 return x;
3086 }
3087
3088 static bool
3089 loongarch_symbol_extreme_p (enum loongarch_symbol_type type)
3090 {
3091 switch (type)
3092 {
3093 case SYMBOL_PCREL:
3094 return false;
3095 case SYMBOL_PCREL64:
3096 return true;
3097 default:
3098 return TARGET_CMODEL_EXTREME;
3099 }
3100 }
3101
3102 /* If MODE is MAX_MACHINE_MODE, ADDR appears as a move operand, otherwise
3103 it appears in a MEM of that mode. Return true if ADDR is a legitimate
3104 constant in that context and can be split into high and low parts.
3105 If so, and if LOW_OUT is nonnull, emit the high part and store the
3106 low part in *LOW_OUT. Leave *LOW_OUT unchanged otherwise.
3107
3108 Return false if build with '-mexplicit-relocs=none'.
3109
3110 TEMP is as for loongarch_force_temporary and is used to load the high
3111 part into a register.
3112
3113 When MODE is MAX_MACHINE_MODE, the low part is guaranteed to be
3114 a legitimize SET_SRC for an .md pattern, otherwise the low part
3115 is guaranteed to be a legitimate address for mode MODE. */
3116
3117 bool
3118 loongarch_split_symbol (rtx temp, rtx addr, machine_mode mode, rtx *low_out)
3119 {
3120 enum loongarch_symbol_type symbol_type;
3121
3122 if ((GET_CODE (addr) == HIGH && mode == MAX_MACHINE_MODE)
3123 || !loongarch_symbolic_constant_p (addr, &symbol_type)
3124 || !loongarch_explicit_relocs_p (symbol_type)
3125 || loongarch_symbol_insns (symbol_type, mode) == 0
3126 || !loongarch_split_symbol_type (symbol_type))
3127 return false;
3128
3129 rtx high, temp1 = NULL;
3130
3131 if (temp == NULL)
3132 temp = gen_reg_rtx (Pmode);
3133
3134 /* Get the 12-31 bits of the address. */
3135 high = gen_rtx_HIGH (Pmode, copy_rtx (addr));
3136 high = loongarch_force_temporary (temp, high);
3137
3138 if (loongarch_symbol_extreme_p (symbol_type) && can_create_pseudo_p ())
3139 {
3140 gcc_assert (la_opt_explicit_relocs != EXPLICIT_RELOCS_NONE);
3141
3142 temp1 = gen_reg_rtx (Pmode);
3143 emit_move_insn (temp1, gen_rtx_LO_SUM (Pmode, gen_rtx_REG (Pmode, 0),
3144 addr));
3145 emit_insn (gen_lui_h_lo20 (temp1, temp1, addr));
3146 emit_insn (gen_lui_h_hi12 (temp1, temp1, addr));
3147 }
3148
3149 if (low_out)
3150 switch (symbol_type)
3151 {
3152 case SYMBOL_PCREL64:
3153 if (can_create_pseudo_p ())
3154 {
3155 *low_out = gen_rtx_PLUS (Pmode, high, temp1);
3156 break;
3157 }
3158 /* fall through */
3159 case SYMBOL_PCREL:
3160 *low_out = gen_rtx_LO_SUM (Pmode, high, addr);
3161 break;
3162
3163 case SYMBOL_GOT_DISP:
3164 /* SYMBOL_GOT_DISP symbols are loaded from the GOT. */
3165 {
3166 if (TARGET_CMODEL_EXTREME && can_create_pseudo_p ())
3167 *low_out = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, high, temp1));
3168 else
3169 {
3170 rtx low = gen_rtx_LO_SUM (Pmode, high, addr);
3171 rtx mem = gen_rtx_MEM (Pmode, low);
3172 *low_out = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, mem),
3173 UNSPEC_LOAD_FROM_GOT);
3174
3175 /* Nonzero in a mem, if the memory is statically allocated and
3176 read-only. A common example of the later is a shared library’s
3177 global offset table. */
3178 MEM_READONLY_P (mem) = 1;
3179 }
3180
3181 break;
3182 }
3183
3184 default:
3185 gcc_unreachable ();
3186 }
3187
3188 return true;
3189 }
3190
3191 /* Helper loongarch_legitimize_address. Given X, return true if it
3192 is a left shift by 1, 2 or 3 positions or a multiply by 2, 4 or 8.
3193
3194 This respectively represent canonical shift-add rtxs or scaled
3195 memory addresses. */
3196 static bool
3197 mem_shadd_or_shadd_rtx_p (rtx x)
3198 {
3199 return ((GET_CODE (x) == ASHIFT
3200 || GET_CODE (x) == MULT)
3201 && CONST_INT_P (XEXP (x, 1))
3202 && ((GET_CODE (x) == ASHIFT && IN_RANGE (INTVAL (XEXP (x, 1)), 1, 3))
3203 || (GET_CODE (x) == MULT
3204 && IN_RANGE (exact_log2 (INTVAL (XEXP (x, 1))), 1, 3))));
3205 }
3206
3207 /* This function is used to implement LEGITIMIZE_ADDRESS. If X can
3208 be legitimized in a way that the generic machinery might not expect,
3209 return a new address, otherwise return NULL. MODE is the mode of
3210 the memory being accessed. */
3211
3212 static rtx
3213 loongarch_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
3214 machine_mode mode)
3215 {
3216 rtx base, addr;
3217 HOST_WIDE_INT offset;
3218
3219 if (loongarch_tls_symbol_p (x))
3220 return loongarch_legitimize_tls_address (x);
3221
3222 /* See if the address can split into a high part and a LO_SUM. */
3223 if (loongarch_split_symbol (NULL, x, mode, &addr))
3224 return loongarch_force_address (addr, mode);
3225
3226 /* Handle BASE + OFFSET using loongarch_add_offset. */
3227 loongarch_split_plus (x, &base, &offset);
3228 if (offset != 0)
3229 {
3230 /* Handle (plus (plus (mult (a) (mem_shadd_constant)) (fp)) (C)) case. */
3231 if (GET_CODE (base) == PLUS && mem_shadd_or_shadd_rtx_p (XEXP (base, 0))
3232 && IMM12_OPERAND (offset))
3233 {
3234 rtx index = XEXP (base, 0);
3235 rtx fp = XEXP (base, 1);
3236
3237 if (REG_P (fp) && REGNO (fp) == VIRTUAL_STACK_VARS_REGNUM)
3238 {
3239 /* If we were given a MULT, we must fix the constant
3240 as we're going to create the ASHIFT form. */
3241 int shift_val = INTVAL (XEXP (index, 1));
3242 if (GET_CODE (index) == MULT)
3243 shift_val = exact_log2 (shift_val);
3244
3245 rtx reg1 = gen_reg_rtx (Pmode);
3246 rtx reg3 = gen_reg_rtx (Pmode);
3247 loongarch_emit_binary (PLUS, reg1, fp, GEN_INT (offset));
3248 loongarch_emit_binary (PLUS, reg3,
3249 gen_rtx_ASHIFT (Pmode, XEXP (index, 0),
3250 GEN_INT (shift_val)),
3251 reg1);
3252
3253 return reg3;
3254 }
3255 }
3256
3257 if (!loongarch_valid_base_register_p (base, mode, false))
3258 base = copy_to_mode_reg (Pmode, base);
3259 addr = loongarch_add_offset (NULL, base, offset);
3260 return loongarch_force_address (addr, mode);
3261 }
3262
3263 return x;
3264 }
3265
3266 /* Load VALUE into DEST. TEMP is as for loongarch_force_temporary. */
3267
3268 void
3269 loongarch_move_integer (rtx temp, rtx dest, unsigned HOST_WIDE_INT value)
3270 {
3271 struct loongarch_integer_op codes[LARCH_MAX_INTEGER_OPS];
3272 machine_mode mode;
3273 unsigned int i, num_ops;
3274 rtx x;
3275
3276 mode = GET_MODE (dest);
3277 num_ops = loongarch_build_integer (codes, value);
3278
3279 /* Apply each binary operation to X. Invariant: X is a legitimate
3280 source operand for a SET pattern. */
3281 x = GEN_INT (codes[0].value);
3282 for (i = 1; i < num_ops; i++)
3283 {
3284 if (!can_create_pseudo_p ())
3285 {
3286 emit_insn (gen_rtx_SET (temp, x));
3287 x = temp;
3288 }
3289 else
3290 x = force_reg (mode, x);
3291
3292 set_unique_reg_note (get_last_insn (), REG_EQUAL,
3293 GEN_INT (codes[i-1].curr_value));
3294
3295 switch (codes[i].method)
3296 {
3297 case METHOD_NORMAL:
3298 x = gen_rtx_fmt_ee (codes[i].code, mode, x,
3299 GEN_INT (codes[i].value));
3300 break;
3301 case METHOD_LU32I:
3302 gcc_assert (mode == DImode);
3303 x = gen_rtx_IOR (DImode,
3304 gen_rtx_ZERO_EXTEND (DImode,
3305 gen_rtx_SUBREG (SImode, x, 0)),
3306 GEN_INT (codes[i].value));
3307 break;
3308 case METHOD_LU52I:
3309 gcc_assert (mode == DImode);
3310 x = gen_rtx_IOR (DImode,
3311 gen_rtx_AND (DImode, x, GEN_INT (0xfffffffffffff)),
3312 GEN_INT (codes[i].value));
3313 break;
3314 case METHOD_MIRROR:
3315 gcc_assert (mode == DImode);
3316 emit_insn (gen_insvdi (x, GEN_INT (32), GEN_INT (32), x));
3317 break;
3318 default:
3319 gcc_unreachable ();
3320 }
3321 }
3322
3323 emit_insn (gen_rtx_SET (dest, x));
3324 }
3325
3326 /* Subroutine of loongarch_legitimize_move. Move constant SRC into register
3327 DEST given that SRC satisfies immediate_operand but doesn't satisfy
3328 move_operand. */
3329
3330 static void
3331 loongarch_legitimize_const_move (machine_mode mode, rtx dest, rtx src)
3332 {
3333 rtx base, offset;
3334
3335 /* Split moves of big integers into smaller pieces. */
3336 if (splittable_const_int_operand (src, mode))
3337 {
3338 loongarch_move_integer (dest, dest, INTVAL (src));
3339 return;
3340 }
3341
3342 /* Split moves of symbolic constants into high and low. */
3343 if (loongarch_split_symbol (dest, src, MAX_MACHINE_MODE, &src))
3344 {
3345 loongarch_emit_set (dest, src);
3346 return;
3347 }
3348
3349 /* Generate the appropriate access sequences for TLS symbols. */
3350 if (loongarch_tls_symbol_p (src))
3351 {
3352 loongarch_emit_move (dest, loongarch_legitimize_tls_address (src));
3353 return;
3354 }
3355
3356 /* If we have (const (plus symbol offset)), and that expression cannot
3357 be forced into memory, load the symbol first and add in the offset.
3358 prefer to do this even if the constant _can_ be forced into memory,
3359 as it usually produces better code. */
3360 split_const (src, &base, &offset);
3361 if (offset != const0_rtx
3362 && (targetm.cannot_force_const_mem (mode, src)
3363 || (can_create_pseudo_p ())))
3364 {
3365 base = loongarch_force_temporary (dest, base);
3366 loongarch_emit_move (dest,
3367 loongarch_add_offset (NULL, base, INTVAL (offset)));
3368 return;
3369 }
3370
3371 src = force_const_mem (mode, src);
3372
3373 loongarch_emit_move (dest, src);
3374 }
3375
3376 /* If (set DEST SRC) is not a valid move instruction, emit an equivalent
3377 sequence that is valid. */
3378
3379 bool
3380 loongarch_legitimize_move (machine_mode mode, rtx dest, rtx src)
3381 {
3382 if (!register_operand (dest, mode) && !reg_or_0_operand (src, mode))
3383 {
3384 loongarch_emit_move (dest, force_reg (mode, src));
3385 return true;
3386 }
3387
3388 /* Both src and dest are non-registers; one special case is supported where
3389 the source is (const_int 0) and the store can source the zero register.
3390 LSX and LASX are never able to source the zero register directly in
3391 memory operations. */
3392 if (!register_operand (dest, mode) && !register_operand (src, mode)
3393 && (!const_0_operand (src, mode)
3394 || LSX_SUPPORTED_MODE_P (mode) || LASX_SUPPORTED_MODE_P (mode)))
3395 {
3396 loongarch_emit_move (dest, force_reg (mode, src));
3397 return true;
3398 }
3399
3400 /* We need to deal with constants that would be legitimate
3401 immediate_operands but aren't legitimate move_operands. */
3402 if (CONSTANT_P (src) && !move_operand (src, mode))
3403 {
3404 loongarch_legitimize_const_move (mode, dest, src);
3405 set_unique_reg_note (get_last_insn (), REG_EQUAL, copy_rtx (src));
3406 return true;
3407 }
3408
3409 return false;
3410 }
3411
3412 /* Return true if OP refers to small data symbols directly. */
3413
3414 static int
3415 loongarch_small_data_pattern_1 (rtx x)
3416 {
3417 subrtx_var_iterator::array_type array;
3418 FOR_EACH_SUBRTX_VAR (iter, array, x, ALL)
3419 {
3420 rtx x = *iter;
3421
3422 /* We make no particular guarantee about which symbolic constants are
3423 acceptable as asm operands versus which must be forced into a GPR. */
3424 if (GET_CODE (x) == ASM_OPERANDS)
3425 iter.skip_subrtxes ();
3426 else if (MEM_P (x))
3427 {
3428 if (loongarch_small_data_pattern_1 (XEXP (x, 0)))
3429 return true;
3430 iter.skip_subrtxes ();
3431 }
3432 }
3433 return false;
3434 }
3435
3436 /* Return true if OP refers to small data symbols directly. */
3437
3438 bool
3439 loongarch_small_data_pattern_p (rtx op)
3440 {
3441 return loongarch_small_data_pattern_1 (op);
3442 }
3443
3444 /* Rewrite *LOC so that it refers to small data using explicit
3445 relocations. */
3446
3447 static void
3448 loongarch_rewrite_small_data_1 (rtx *loc)
3449 {
3450 subrtx_ptr_iterator::array_type array;
3451 FOR_EACH_SUBRTX_PTR (iter, array, loc, ALL)
3452 {
3453 rtx *loc = *iter;
3454 if (MEM_P (*loc))
3455 {
3456 loongarch_rewrite_small_data_1 (&XEXP (*loc, 0));
3457 iter.skip_subrtxes ();
3458 }
3459 }
3460 }
3461
3462 /* Rewrite instruction pattern PATTERN so that it refers to small data
3463 using explicit relocations. */
3464
3465 rtx
3466 loongarch_rewrite_small_data (rtx pattern)
3467 {
3468 pattern = copy_insn (pattern);
3469 loongarch_rewrite_small_data_1 (&pattern);
3470 return pattern;
3471 }
3472
3473 /* The cost of loading values from the constant pool. It should be
3474 larger than the cost of any constant we want to synthesize inline. */
3475 #define CONSTANT_POOL_COST COSTS_N_INSNS (8)
3476
3477 /* Return true if there is a instruction that implements CODE
3478 and if that instruction accepts X as an immediate operand. */
3479
3480 static int
3481 loongarch_immediate_operand_p (int code, HOST_WIDE_INT x)
3482 {
3483 switch (code)
3484 {
3485 case ASHIFT:
3486 case ASHIFTRT:
3487 case LSHIFTRT:
3488 /* All shift counts are truncated to a valid constant. */
3489 return true;
3490
3491 case ROTATE:
3492 case ROTATERT:
3493 return true;
3494
3495 case AND:
3496 case IOR:
3497 case XOR:
3498 /* These instructions take 12-bit unsigned immediates. */
3499 return IMM12_OPERAND_UNSIGNED (x);
3500
3501 case PLUS:
3502 case LT:
3503 case LTU:
3504 /* These instructions take 12-bit signed immediates. */
3505 return IMM12_OPERAND (x);
3506
3507 case EQ:
3508 case NE:
3509 case GT:
3510 case GTU:
3511 /* The "immediate" forms of these instructions are really
3512 implemented as comparisons with register 0. */
3513 return x == 0;
3514
3515 case GE:
3516 case GEU:
3517 /* Likewise, meaning that the only valid immediate operand is 1. */
3518 return x == 1;
3519
3520 case LE:
3521 /* We add 1 to the immediate and use SLT. */
3522 return IMM12_OPERAND (x + 1);
3523
3524 case LEU:
3525 /* Likewise SLTU, but reject the always-true case. */
3526 return IMM12_OPERAND (x + 1) && x + 1 != 0;
3527
3528 case SIGN_EXTRACT:
3529 case ZERO_EXTRACT:
3530 /* The bit position and size are immediate operands. */
3531 return 1;
3532
3533 default:
3534 /* By default assume that $0 can be used for 0. */
3535 return x == 0;
3536 }
3537 }
3538
3539 /* Return the cost of binary operation X, given that the instruction
3540 sequence for a word-sized or smaller operation has cost SINGLE_COST
3541 and that the sequence of a double-word operation has cost DOUBLE_COST.
3542 If SPEED is true, optimize for speed otherwise optimize for size. */
3543
3544 static int
3545 loongarch_binary_cost (rtx x, int single_cost, int double_cost, bool speed)
3546 {
3547 int cost;
3548
3549 if (GET_MODE_SIZE (GET_MODE (x)) == UNITS_PER_WORD * 2)
3550 cost = double_cost;
3551 else
3552 cost = single_cost;
3553 return (cost
3554 + set_src_cost (XEXP (x, 0), GET_MODE (x), speed)
3555 + rtx_cost (XEXP (x, 1), GET_MODE (x), GET_CODE (x), 1, speed));
3556 }
3557
3558 /* Return the cost of floating-point multiplications of mode MODE. */
3559
3560 static int
3561 loongarch_fp_mult_cost (machine_mode mode)
3562 {
3563 return mode == DFmode ? loongarch_cost->fp_mult_df
3564 : loongarch_cost->fp_mult_sf;
3565 }
3566
3567 /* Return the cost of floating-point divisions of mode MODE. */
3568
3569 static int
3570 loongarch_fp_div_cost (machine_mode mode)
3571 {
3572 return mode == DFmode ? loongarch_cost->fp_div_df
3573 : loongarch_cost->fp_div_sf;
3574 }
3575
3576 /* Return the cost of sign-extending OP to mode MODE, not including the
3577 cost of OP itself. */
3578
3579 static int
3580 loongarch_sign_extend_cost (rtx op)
3581 {
3582 if (MEM_P (op))
3583 /* Extended loads are as cheap as unextended ones. */
3584 return 0;
3585
3586 return COSTS_N_INSNS (1);
3587 }
3588
3589 /* Return the cost of zero-extending OP to mode MODE, not including the
3590 cost of OP itself. */
3591
3592 static int
3593 loongarch_zero_extend_cost (rtx op)
3594 {
3595 if (MEM_P (op))
3596 /* Extended loads are as cheap as unextended ones. */
3597 return 0;
3598
3599 /* We can use ANDI. */
3600 return COSTS_N_INSNS (1);
3601 }
3602
3603 /* Return the cost of moving between two registers of mode MODE,
3604 assuming that the move will be in pieces of at most UNITS bytes. */
3605
3606 static int
3607 loongarch_set_reg_reg_piece_cost (machine_mode mode, unsigned int units)
3608 {
3609 return COSTS_N_INSNS ((GET_MODE_SIZE (mode) + units - 1) / units);
3610 }
3611
3612 /* Return the cost of moving between two registers of mode MODE. */
3613
3614 static int
3615 loongarch_set_reg_reg_cost (machine_mode mode)
3616 {
3617 switch (GET_MODE_CLASS (mode))
3618 {
3619 case MODE_CC:
3620 return loongarch_set_reg_reg_piece_cost (mode, GET_MODE_SIZE (CCmode));
3621
3622 case MODE_FLOAT:
3623 case MODE_COMPLEX_FLOAT:
3624 case MODE_VECTOR_FLOAT:
3625 if (TARGET_HARD_FLOAT)
3626 return loongarch_set_reg_reg_piece_cost (mode, UNITS_PER_HWFPVALUE);
3627 /* Fall through. */
3628
3629 default:
3630 return loongarch_set_reg_reg_piece_cost (mode, UNITS_PER_WORD);
3631 }
3632 }
3633
3634 /* Implement TARGET_RTX_COSTS. */
3635
3636 static bool
3637 loongarch_rtx_costs (rtx x, machine_mode mode, int outer_code,
3638 int opno ATTRIBUTE_UNUSED, int *total, bool speed)
3639 {
3640 int code = GET_CODE (x);
3641 bool float_mode_p = FLOAT_MODE_P (mode);
3642 int cost;
3643 rtx addr;
3644
3645 if (outer_code == COMPARE)
3646 {
3647 gcc_assert (CONSTANT_P (x));
3648 *total = 0;
3649 return true;
3650 }
3651
3652 switch (code)
3653 {
3654 case CONST_INT:
3655 if (TARGET_64BIT && outer_code == AND && UINTVAL (x) == 0xffffffff)
3656 {
3657 *total = 0;
3658 return true;
3659 }
3660
3661 /* When not optimizing for size, we care more about the cost
3662 of hot code, and hot code is often in a loop. If a constant
3663 operand needs to be forced into a register, we will often be
3664 able to hoist the constant load out of the loop, so the load
3665 should not contribute to the cost. */
3666 if (speed || loongarch_immediate_operand_p (outer_code, INTVAL (x)))
3667 {
3668 *total = 0;
3669 return true;
3670 }
3671 /* Fall through. */
3672
3673 case CONST:
3674 case SYMBOL_REF:
3675 case LABEL_REF:
3676 case CONST_DOUBLE:
3677 cost = loongarch_const_insns (x);
3678 if (cost > 0)
3679 {
3680 if (cost == 1 && outer_code == SET
3681 && !(float_mode_p && TARGET_HARD_FLOAT))
3682 cost = 0;
3683 else if ((outer_code == SET || GET_MODE (x) == VOIDmode))
3684 cost = 1;
3685 *total = COSTS_N_INSNS (cost);
3686 return true;
3687 }
3688 /* The value will need to be fetched from the constant pool. */
3689 *total = CONSTANT_POOL_COST;
3690 return true;
3691
3692 case MEM:
3693 /* If the address is legitimate, return the number of
3694 instructions it needs. */
3695 addr = XEXP (x, 0);
3696 /* Check for a scaled indexed address. */
3697 if (loongarch_index_address_p (addr, mode))
3698 {
3699 *total = COSTS_N_INSNS (2);
3700 return true;
3701 }
3702 cost = loongarch_address_insns (addr, mode, true);
3703 if (cost > 0)
3704 {
3705 *total = COSTS_N_INSNS (cost + 1);
3706 return true;
3707 }
3708 /* Otherwise use the default handling. */
3709 return false;
3710
3711 case FFS:
3712 *total = COSTS_N_INSNS (6);
3713 return false;
3714
3715 case NOT:
3716 *total = COSTS_N_INSNS (GET_MODE_SIZE (mode) > UNITS_PER_WORD ? 2 : 1);
3717 return false;
3718
3719 case AND:
3720 /* Check for a *clear_upper32 pattern and treat it like a zero
3721 extension. See the pattern's comment for details. */
3722 if (TARGET_64BIT && mode == DImode && CONST_INT_P (XEXP (x, 1))
3723 && UINTVAL (XEXP (x, 1)) == 0xffffffff)
3724 {
3725 *total = (loongarch_zero_extend_cost (XEXP (x, 0))
3726 + set_src_cost (XEXP (x, 0), mode, speed));
3727 return true;
3728 }
3729 /* (AND (NOT op0) (NOT op1) is a nor operation that can be done in
3730 a single instruction. */
3731 if (GET_CODE (XEXP (x, 0)) == NOT && GET_CODE (XEXP (x, 1)) == NOT)
3732 {
3733 cost = GET_MODE_SIZE (mode) > UNITS_PER_WORD ? 2 : 1;
3734 *total = (COSTS_N_INSNS (cost)
3735 + set_src_cost (XEXP (XEXP (x, 0), 0), mode, speed)
3736 + set_src_cost (XEXP (XEXP (x, 1), 0), mode, speed));
3737 return true;
3738 }
3739
3740 /* Fall through. */
3741
3742 case IOR:
3743 case XOR:
3744 /* Double-word operations use two single-word operations. */
3745 *total = loongarch_binary_cost (x, COSTS_N_INSNS (1), COSTS_N_INSNS (2),
3746 speed);
3747 return true;
3748
3749 case ASHIFT:
3750 case ASHIFTRT:
3751 case LSHIFTRT:
3752 case ROTATE:
3753 case ROTATERT:
3754 if (CONSTANT_P (XEXP (x, 1)))
3755 *total = loongarch_binary_cost (x, COSTS_N_INSNS (1),
3756 COSTS_N_INSNS (4), speed);
3757 else
3758 *total = loongarch_binary_cost (x, COSTS_N_INSNS (1),
3759 COSTS_N_INSNS (12), speed);
3760 return true;
3761
3762 case ABS:
3763 if (float_mode_p)
3764 *total = loongarch_cost->fp_add;
3765 else
3766 *total = COSTS_N_INSNS (4);
3767 return false;
3768
3769 case LT:
3770 case LTU:
3771 case LE:
3772 case LEU:
3773 case GT:
3774 case GTU:
3775 case GE:
3776 case GEU:
3777 case EQ:
3778 case NE:
3779 case UNORDERED:
3780 case LTGT:
3781 case UNGE:
3782 case UNGT:
3783 case UNLE:
3784 case UNLT:
3785 /* Branch comparisons have VOIDmode, so use the first operand's
3786 mode instead. */
3787 mode = GET_MODE (XEXP (x, 0));
3788 if (FLOAT_MODE_P (mode))
3789 {
3790 *total = loongarch_cost->fp_add;
3791 return false;
3792 }
3793 *total = loongarch_binary_cost (x, COSTS_N_INSNS (1), COSTS_N_INSNS (4),
3794 speed);
3795 return true;
3796
3797 case MINUS:
3798 case PLUS:
3799 if (float_mode_p)
3800 {
3801 *total = loongarch_cost->fp_add;
3802 return false;
3803 }
3804
3805 /* If it's an add + mult (which is equivalent to shift left) and
3806 it's immediate operand satisfies const_immalsl_operand predicate. */
3807 if ((mode == SImode || (TARGET_64BIT && mode == DImode))
3808 && GET_CODE (XEXP (x, 0)) == MULT)
3809 {
3810 rtx op2 = XEXP (XEXP (x, 0), 1);
3811 if (const_immalsl_operand (op2, mode))
3812 {
3813 *total = (COSTS_N_INSNS (1)
3814 + set_src_cost (XEXP (XEXP (x, 0), 0), mode, speed)
3815 + set_src_cost (XEXP (x, 1), mode, speed));
3816 return true;
3817 }
3818 }
3819
3820 /* Double-word operations require three single-word operations and
3821 an SLTU. */
3822 *total = loongarch_binary_cost (x, COSTS_N_INSNS (1), COSTS_N_INSNS (4),
3823 speed);
3824 return true;
3825
3826 case NEG:
3827 if (float_mode_p)
3828 *total = loongarch_cost->fp_add;
3829 else
3830 *total = COSTS_N_INSNS (GET_MODE_SIZE (mode) > UNITS_PER_WORD ? 4 : 1);
3831 return false;
3832
3833 case FMA:
3834 *total = loongarch_fp_mult_cost (mode);
3835 return false;
3836
3837 case MULT:
3838 if (float_mode_p)
3839 *total = loongarch_fp_mult_cost (mode);
3840 else if (mode == DImode && !TARGET_64BIT)
3841 *total = (speed
3842 ? loongarch_cost->int_mult_si * 3 + 6
3843 : COSTS_N_INSNS (7));
3844 else if (mode == DImode)
3845 *total = loongarch_cost->int_mult_di;
3846 else
3847 *total = loongarch_cost->int_mult_si;
3848 return false;
3849
3850 case DIV:
3851 /* Check for a reciprocal. */
3852 if (float_mode_p
3853 && flag_unsafe_math_optimizations
3854 && XEXP (x, 0) == CONST1_RTX (mode))
3855 {
3856 if (outer_code == SQRT || GET_CODE (XEXP (x, 1)) == SQRT)
3857 /* An rsqrt<mode>a or rsqrt<mode>b pattern. Count the
3858 division as being free. */
3859 *total = set_src_cost (XEXP (x, 1), mode, speed);
3860 else
3861 *total = (loongarch_fp_div_cost (mode)
3862 + set_src_cost (XEXP (x, 1), mode, speed));
3863 return true;
3864 }
3865 /* Fall through. */
3866
3867 case SQRT:
3868 case MOD:
3869 if (float_mode_p)
3870 {
3871 *total = loongarch_fp_div_cost (mode);
3872 return false;
3873 }
3874 /* Fall through. */
3875
3876 case UDIV:
3877 case UMOD:
3878 if (mode == DImode)
3879 *total = loongarch_cost->int_div_di;
3880 else
3881 {
3882 *total = loongarch_cost->int_div_si;
3883 if (TARGET_64BIT && !ISA_HAS_DIV32)
3884 *total += COSTS_N_INSNS (2);
3885 }
3886
3887 if (TARGET_CHECK_ZERO_DIV)
3888 *total += COSTS_N_INSNS (2);
3889
3890 return false;
3891
3892 case SIGN_EXTEND:
3893 *total = loongarch_sign_extend_cost (XEXP (x, 0));
3894 return false;
3895
3896 case ZERO_EXTEND:
3897 *total = loongarch_zero_extend_cost (XEXP (x, 0));
3898 return false;
3899 case TRUNCATE:
3900 /* Costings for highpart multiplies. Matching patterns of the form:
3901
3902 (lshiftrt:DI (mult:DI (sign_extend:DI (...)
3903 (sign_extend:DI (...))
3904 (const_int 32)
3905 */
3906 if ((GET_CODE (XEXP (x, 0)) == ASHIFTRT
3907 || GET_CODE (XEXP (x, 0)) == LSHIFTRT)
3908 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
3909 && ((INTVAL (XEXP (XEXP (x, 0), 1)) == 32
3910 && GET_MODE (XEXP (x, 0)) == DImode)
3911 || (TARGET_64BIT
3912 && INTVAL (XEXP (XEXP (x, 0), 1)) == 64
3913 && GET_MODE (XEXP (x, 0)) == TImode))
3914 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
3915 && ((GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND
3916 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == SIGN_EXTEND)
3917 || (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND
3918 && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1))
3919 == ZERO_EXTEND))))
3920 {
3921 if (mode == DImode)
3922 *total = loongarch_cost->int_mult_di;
3923 else
3924 *total = loongarch_cost->int_mult_si;
3925
3926 /* Sign extension is free, zero extension costs for DImode when
3927 on a 64bit core / when DMUL is present. */
3928 for (int i = 0; i < 2; ++i)
3929 {
3930 rtx op = XEXP (XEXP (XEXP (x, 0), 0), i);
3931 if (TARGET_64BIT
3932 && GET_CODE (op) == ZERO_EXTEND
3933 && GET_MODE (op) == DImode)
3934 *total += rtx_cost (op, DImode, MULT, i, speed);
3935 else
3936 *total += rtx_cost (XEXP (op, 0), VOIDmode, GET_CODE (op), 0,
3937 speed);
3938 }
3939
3940 return true;
3941 }
3942 return false;
3943
3944 case FLOAT:
3945 case UNSIGNED_FLOAT:
3946 case FIX:
3947 case FLOAT_EXTEND:
3948 case FLOAT_TRUNCATE:
3949 *total = loongarch_cost->fp_add;
3950 return false;
3951
3952 case SET:
3953 if (register_operand (SET_DEST (x), VOIDmode)
3954 && reg_or_0_operand (SET_SRC (x), VOIDmode))
3955 {
3956 *total = loongarch_set_reg_reg_cost (GET_MODE (SET_DEST (x)));
3957 return true;
3958 }
3959 return false;
3960
3961 default:
3962 return false;
3963 }
3964 }
3965
3966 /* Implement targetm.vectorize.builtin_vectorization_cost. */
3967
3968 static int
3969 loongarch_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
3970 tree vectype,
3971 int misalign ATTRIBUTE_UNUSED)
3972 {
3973 unsigned elements;
3974 machine_mode mode = vectype != NULL ? TYPE_MODE (vectype) : DImode;
3975
3976 switch (type_of_cost)
3977 {
3978 case scalar_stmt:
3979 case scalar_load:
3980 case vector_stmt:
3981 case vec_to_scalar:
3982 case scalar_to_vec:
3983 case scalar_store:
3984 return 1;
3985
3986 case vec_promote_demote:
3987 case vec_perm:
3988 return LASX_SUPPORTED_MODE_P (mode)
3989 && !LSX_SUPPORTED_MODE_P (mode) ? 2 : 1;
3990
3991 case vector_load:
3992 case vector_store:
3993 case unaligned_load:
3994 case unaligned_store:
3995 return 2;
3996
3997 case cond_branch_taken:
3998 return 4;
3999
4000 case cond_branch_not_taken:
4001 return 2;
4002
4003 case vec_construct:
4004 elements = TYPE_VECTOR_SUBPARTS (vectype);
4005 if (ISA_HAS_LASX)
4006 return elements + 1;
4007 else
4008 return elements;
4009
4010 default:
4011 gcc_unreachable ();
4012 }
4013 }
4014
4015 class loongarch_vector_costs : public vector_costs
4016 {
4017 public:
4018 using vector_costs::vector_costs;
4019
4020 unsigned int add_stmt_cost (int count, vect_cost_for_stmt kind,
4021 stmt_vec_info stmt_info, slp_tree, tree vectype,
4022 int misalign,
4023 vect_cost_model_location where) override;
4024 void finish_cost (const vector_costs *) override;
4025
4026 protected:
4027 void count_operations (vect_cost_for_stmt, stmt_vec_info,
4028 vect_cost_model_location, unsigned int);
4029 unsigned int determine_suggested_unroll_factor (loop_vec_info);
4030 /* The number of vectorized stmts in loop. */
4031 unsigned m_stmts = 0;
4032 /* The number of load and store operations in loop. */
4033 unsigned m_loads = 0;
4034 unsigned m_stores = 0;
4035 /* Reduction factor for suggesting unroll factor. */
4036 unsigned m_reduc_factor = 0;
4037 /* True if the loop contains an average operation. */
4038 bool m_has_avg = false;
4039 /* True if the loop uses approximation instruction sequence. */
4040 bool m_has_recip = false;
4041 };
4042
4043 /* Implement TARGET_VECTORIZE_CREATE_COSTS. */
4044 static vector_costs *
4045 loongarch_vectorize_create_costs (vec_info *vinfo, bool costing_for_scalar)
4046 {
4047 return new loongarch_vector_costs (vinfo, costing_for_scalar);
4048 }
4049
4050 void
4051 loongarch_vector_costs::count_operations (vect_cost_for_stmt kind,
4052 stmt_vec_info stmt_info,
4053 vect_cost_model_location where,
4054 unsigned int count)
4055 {
4056 if (!m_costing_for_scalar
4057 && is_a<loop_vec_info> (m_vinfo)
4058 && where == vect_body)
4059 {
4060 m_stmts += count;
4061
4062 if (kind == scalar_load
4063 || kind == vector_load
4064 || kind == unaligned_load)
4065 m_loads += count;
4066 else if (kind == scalar_store
4067 || kind == vector_store
4068 || kind == unaligned_store)
4069 m_stores += count;
4070 else if ((kind == scalar_stmt
4071 || kind == vector_stmt
4072 || kind == vec_to_scalar)
4073 && stmt_info && vect_is_reduction (stmt_info))
4074 {
4075 tree lhs = gimple_get_lhs (stmt_info->stmt);
4076 unsigned int base = FLOAT_TYPE_P (TREE_TYPE (lhs)) ? 2 : 1;
4077 m_reduc_factor = MAX (base * count, m_reduc_factor);
4078 }
4079 }
4080 }
4081
4082 unsigned int
4083 loongarch_vector_costs::determine_suggested_unroll_factor (loop_vec_info loop_vinfo)
4084 {
4085 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
4086
4087 if (m_has_avg || m_has_recip)
4088 return 1;
4089
4090 /* Don't unroll if it's specified explicitly not to be unrolled. */
4091 if (loop->unroll == 1
4092 || (OPTION_SET_P (flag_unroll_loops) && !flag_unroll_loops)
4093 || (OPTION_SET_P (flag_unroll_all_loops) && !flag_unroll_all_loops))
4094 return 1;
4095
4096 unsigned int nstmts_nonldst = m_stmts - m_loads - m_stores;
4097 /* Don't unroll if no vector instructions excepting for memory access. */
4098 if (nstmts_nonldst == 0)
4099 return 1;
4100
4101 /* Use this simple hardware resource model that how many non vld/vst
4102 vector instructions can be issued per cycle. */
4103 unsigned int issue_info = la_vect_issue_info;
4104 unsigned int reduc_factor = m_reduc_factor > 1 ? m_reduc_factor : 1;
4105 unsigned int uf = CEIL (reduc_factor * issue_info, nstmts_nonldst);
4106 uf = MIN ((unsigned int) la_vect_unroll_limit, uf);
4107
4108 return 1 << ceil_log2 (uf);
4109 }
4110
4111 unsigned
4112 loongarch_vector_costs::add_stmt_cost (int count, vect_cost_for_stmt kind,
4113 stmt_vec_info stmt_info, slp_tree,
4114 tree vectype, int misalign,
4115 vect_cost_model_location where)
4116 {
4117 unsigned retval = 0;
4118
4119 if (flag_vect_cost_model)
4120 {
4121 int stmt_cost = loongarch_builtin_vectorization_cost (kind, vectype,
4122 misalign);
4123 retval = adjust_cost_for_freq (stmt_info, where, count * stmt_cost);
4124 m_costs[where] += retval;
4125
4126 count_operations (kind, stmt_info, where, count);
4127 }
4128
4129 if (stmt_info)
4130 {
4131 /* Detect the use of an averaging operation. */
4132 gimple *stmt = stmt_info->stmt;
4133 if (is_gimple_call (stmt)
4134 && gimple_call_internal_p (stmt))
4135 {
4136 switch (gimple_call_internal_fn (stmt))
4137 {
4138 case IFN_AVG_FLOOR:
4139 case IFN_AVG_CEIL:
4140 m_has_avg = true;
4141 default:
4142 break;
4143 }
4144 }
4145 }
4146
4147 combined_fn cfn;
4148 if (kind == vector_stmt
4149 && stmt_info
4150 && stmt_info->stmt)
4151 {
4152 /* Detect the use of approximate instruction sequence. */
4153 if ((TARGET_RECIP_VEC_SQRT || TARGET_RECIP_VEC_RSQRT)
4154 && (cfn = gimple_call_combined_fn (stmt_info->stmt)) != CFN_LAST)
4155 switch (cfn)
4156 {
4157 case CFN_BUILT_IN_SQRTF:
4158 m_has_recip = true;
4159 default:
4160 break;
4161 }
4162 else if (TARGET_RECIP_VEC_DIV
4163 && gimple_code (stmt_info->stmt) == GIMPLE_ASSIGN)
4164 {
4165 machine_mode mode = TYPE_MODE (vectype);
4166 switch (gimple_assign_rhs_code (stmt_info->stmt))
4167 {
4168 case RDIV_EXPR:
4169 if (GET_MODE_INNER (mode) == SFmode)
4170 m_has_recip = true;
4171 default:
4172 break;
4173 }
4174 }
4175 }
4176
4177 return retval;
4178 }
4179
4180 void
4181 loongarch_vector_costs::finish_cost (const vector_costs *scalar_costs)
4182 {
4183 loop_vec_info loop_vinfo = dyn_cast<loop_vec_info> (m_vinfo);
4184 if (loop_vinfo)
4185 {
4186 m_suggested_unroll_factor = determine_suggested_unroll_factor (loop_vinfo);
4187 }
4188
4189 vector_costs::finish_cost (scalar_costs);
4190 }
4191
4192 /* Implement TARGET_ADDRESS_COST. */
4193
4194 static int
4195 loongarch_address_cost (rtx addr, machine_mode mode,
4196 addr_space_t as ATTRIBUTE_UNUSED,
4197 bool speed ATTRIBUTE_UNUSED)
4198 {
4199 return loongarch_address_insns (addr, mode, false);
4200 }
4201
4202 /* Return one word of double-word value OP, taking into account the fixed
4203 endianness of certain registers. HIGH_P is true to select the high part,
4204 false to select the low part. */
4205
4206 rtx
4207 loongarch_subword (rtx op, bool high_p)
4208 {
4209 unsigned int byte;
4210 machine_mode mode;
4211
4212 byte = high_p ? UNITS_PER_WORD : 0;
4213 mode = GET_MODE (op);
4214 if (mode == VOIDmode)
4215 mode = TARGET_64BIT ? TImode : DImode;
4216
4217 if (FP_REG_RTX_P (op))
4218 return gen_rtx_REG (word_mode, REGNO (op) + high_p);
4219
4220 if (MEM_P (op))
4221 return loongarch_rewrite_small_data (adjust_address (op, word_mode, byte));
4222
4223 return simplify_gen_subreg (word_mode, op, mode, byte);
4224 }
4225
4226 /* Return true if a move from SRC to DEST should be split into two.
4227 SPLIT_TYPE describes the split condition. */
4228
4229 bool
4230 loongarch_split_move_p (rtx dest, rtx src)
4231 {
4232 /* FPR-to-FPR moves can be done in a single instruction, if they're
4233 allowed at all. */
4234 unsigned int size = GET_MODE_SIZE (GET_MODE (dest));
4235 if (size == 8 && FP_REG_RTX_P (src) && FP_REG_RTX_P (dest))
4236 return false;
4237
4238 /* Check for floating-point loads and stores. */
4239 if (size == 8)
4240 {
4241 if (FP_REG_RTX_P (dest) && MEM_P (src))
4242 return false;
4243 if (FP_REG_RTX_P (src) && MEM_P (dest))
4244 return false;
4245 }
4246
4247 /* Check if LSX moves need splitting. */
4248 if (LSX_SUPPORTED_MODE_P (GET_MODE (dest)))
4249 return loongarch_split_128bit_move_p (dest, src);
4250
4251 /* Check if LASX moves need splitting. */
4252 if (LASX_SUPPORTED_MODE_P (GET_MODE (dest)))
4253 return loongarch_split_256bit_move_p (dest, src);
4254
4255 /* Otherwise split all multiword moves. */
4256 return size > UNITS_PER_WORD;
4257 }
4258
4259 /* Split a move from SRC to DEST, given that loongarch_split_move_p holds.
4260 SPLIT_TYPE describes the split condition. */
4261
4262 void
4263 loongarch_split_move (rtx dest, rtx src)
4264 {
4265 rtx low_dest;
4266
4267 gcc_checking_assert (loongarch_split_move_p (dest, src));
4268 if (LSX_SUPPORTED_MODE_P (GET_MODE (dest)))
4269 loongarch_split_128bit_move (dest, src);
4270 else if (LASX_SUPPORTED_MODE_P (GET_MODE (dest)))
4271 loongarch_split_256bit_move (dest, src);
4272 else if (FP_REG_RTX_P (dest) || FP_REG_RTX_P (src))
4273 {
4274 if (!TARGET_64BIT && GET_MODE (dest) == DImode)
4275 emit_insn (gen_move_doubleword_fprdi (dest, src));
4276 else if (!TARGET_64BIT && GET_MODE (dest) == DFmode)
4277 emit_insn (gen_move_doubleword_fprdf (dest, src));
4278 else if (TARGET_64BIT && GET_MODE (dest) == TFmode)
4279 emit_insn (gen_move_doubleword_fprtf (dest, src));
4280 else
4281 gcc_unreachable ();
4282 }
4283 else
4284 {
4285 /* The operation can be split into two normal moves. Decide in
4286 which order to do them. */
4287 low_dest = loongarch_subword (dest, false);
4288 if (REG_P (low_dest) && reg_overlap_mentioned_p (low_dest, src))
4289 {
4290 loongarch_emit_move (loongarch_subword (dest, true),
4291 loongarch_subword (src, true));
4292 loongarch_emit_move (low_dest, loongarch_subword (src, false));
4293 }
4294 else
4295 {
4296 loongarch_emit_move (low_dest, loongarch_subword (src, false));
4297 loongarch_emit_move (loongarch_subword (dest, true),
4298 loongarch_subword (src, true));
4299 }
4300 }
4301 }
4302
4303 /* Check if adding an integer constant value for a specific mode can be
4304 performed with an addu16i.d instruction and an addi.{w/d}
4305 instruction. */
4306
4307 bool
4308 loongarch_addu16i_imm12_operand_p (HOST_WIDE_INT value, machine_mode mode)
4309 {
4310 /* Not necessary, but avoid unnecessary calculation if !TARGET_64BIT. */
4311 if (!TARGET_64BIT)
4312 return false;
4313
4314 if ((value & 0xffff) == 0)
4315 return false;
4316
4317 if (IMM12_OPERAND (value))
4318 return false;
4319
4320 value = (value & ~HWIT_UC_0xFFF) + ((value & 0x800) << 1);
4321 return ADDU16I_OPERAND (trunc_int_for_mode (value, mode));
4322 }
4323
4324 /* Split one integer constant op[0] into two (op[1] and op[2]) for constant
4325 plus operation in a specific mode. The splitted constants can be added
4326 onto a register with a single instruction (addi.{d/w} or addu16i.d). */
4327
4328 void
4329 loongarch_split_plus_constant (rtx *op, machine_mode mode)
4330 {
4331 HOST_WIDE_INT v = INTVAL (op[0]), a;
4332
4333 if (DUAL_IMM12_OPERAND (v))
4334 a = (v > 0 ? 2047 : -2048);
4335 else if (loongarch_addu16i_imm12_operand_p (v, mode))
4336 a = (v & ~HWIT_UC_0xFFF) + ((v & 0x800) << 1);
4337 else if (mode == DImode && DUAL_ADDU16I_OPERAND (v))
4338 a = (v > 0 ? 0x7fff0000 : ~0x7fffffff);
4339 else
4340 gcc_unreachable ();
4341
4342 op[1] = gen_int_mode (a, mode);
4343 v = v - (unsigned HOST_WIDE_INT) a;
4344 op[2] = gen_int_mode (v, mode);
4345 }
4346
4347 /* Implement TARGET_CONSTANT_ALIGNMENT. */
4348
4349 static HOST_WIDE_INT
4350 loongarch_constant_alignment (const_tree exp, HOST_WIDE_INT align)
4351 {
4352 if (TREE_CODE (exp) == STRING_CST || TREE_CODE (exp) == CONSTRUCTOR)
4353 return MAX (align, BITS_PER_WORD);
4354 return align;
4355 }
4356
4357 const char *
4358 loongarch_output_move_index (rtx x, machine_mode mode, bool ldr)
4359 {
4360 int index = exact_log2 (GET_MODE_SIZE (mode));
4361 if (!IN_RANGE (index, 0, 3))
4362 return NULL;
4363
4364 struct loongarch_address_info info;
4365 if ((loongarch_classify_address (&info, x, mode, false)
4366 && !(info.type == ADDRESS_REG_REG))
4367 || !loongarch_legitimate_address_p (mode, x, false))
4368 return NULL;
4369
4370 const char *const insn[][4] =
4371 {
4372 {
4373 "stx.b\t%z1,%0",
4374 "stx.h\t%z1,%0",
4375 "stx.w\t%z1,%0",
4376 "stx.d\t%z1,%0",
4377 },
4378 {
4379 "ldx.bu\t%0,%1",
4380 "ldx.hu\t%0,%1",
4381 "ldx.w\t%0,%1",
4382 "ldx.d\t%0,%1",
4383 }
4384 };
4385
4386 return insn[ldr][index];
4387 }
4388
4389 const char *
4390 loongarch_output_move_index_float (rtx x, machine_mode mode, bool ldr)
4391 {
4392 int index = exact_log2 (GET_MODE_SIZE (mode));
4393 if (!IN_RANGE (index, 2, 5))
4394 return NULL;
4395
4396 struct loongarch_address_info info;
4397 if ((loongarch_classify_address (&info, x, mode, false)
4398 && !(info.type == ADDRESS_REG_REG))
4399 || !loongarch_legitimate_address_p (mode, x, false))
4400 return NULL;
4401
4402 const char *const insn[][4] =
4403 {
4404 {
4405 "fstx.s\t%1,%0",
4406 "fstx.d\t%1,%0",
4407 "vstx\t%w1,%0",
4408 "xvstx\t%u1,%0"
4409 },
4410 {
4411 "fldx.s\t%0,%1",
4412 "fldx.d\t%0,%1",
4413 "vldx\t%w0,%1",
4414 "xvldx\t%u0,%1"
4415 }
4416 };
4417
4418 return insn[ldr][index-2];
4419 }
4420 /* Return true if a 128-bit move from SRC to DEST should be split. */
4421
4422 bool
4423 loongarch_split_128bit_move_p (rtx dest, rtx src)
4424 {
4425 /* LSX-to-LSX moves can be done in a single instruction. */
4426 if (FP_REG_RTX_P (src) && FP_REG_RTX_P (dest))
4427 return false;
4428
4429 /* Check for LSX loads and stores. */
4430 if (FP_REG_RTX_P (dest) && MEM_P (src))
4431 return false;
4432 if (FP_REG_RTX_P (src) && MEM_P (dest))
4433 return false;
4434
4435 /* Check for LSX set to an immediate const vector with valid replicated
4436 element. */
4437 if (FP_REG_RTX_P (dest)
4438 && loongarch_const_vector_same_int_p (src, GET_MODE (src), -512, 511))
4439 return false;
4440
4441 /* Check for LSX load zero immediate. */
4442 if (FP_REG_RTX_P (dest) && src == CONST0_RTX (GET_MODE (src)))
4443 return false;
4444
4445 return true;
4446 }
4447
4448 /* Return true if a 256-bit move from SRC to DEST should be split. */
4449
4450 bool
4451 loongarch_split_256bit_move_p (rtx dest, rtx src)
4452 {
4453 /* LSX-to-LSX moves can be done in a single instruction. */
4454 if (FP_REG_RTX_P (src) && FP_REG_RTX_P (dest))
4455 return false;
4456
4457 /* Check for LSX loads and stores. */
4458 if (FP_REG_RTX_P (dest) && MEM_P (src))
4459 return false;
4460 if (FP_REG_RTX_P (src) && MEM_P (dest))
4461 return false;
4462
4463 /* Check for LSX set to an immediate const vector with valid replicated
4464 element. */
4465 if (FP_REG_RTX_P (dest)
4466 && loongarch_const_vector_same_int_p (src, GET_MODE (src), -512, 511))
4467 return false;
4468
4469 /* Check for LSX load zero immediate. */
4470 if (FP_REG_RTX_P (dest) && src == CONST0_RTX (GET_MODE (src)))
4471 return false;
4472
4473 return true;
4474 }
4475
4476 /* Split a 128-bit move from SRC to DEST. */
4477
4478 void
4479 loongarch_split_128bit_move (rtx dest, rtx src)
4480 {
4481 int byte, index;
4482 rtx low_dest, low_src, d, s;
4483
4484 if (FP_REG_RTX_P (dest))
4485 {
4486 gcc_assert (!MEM_P (src));
4487
4488 rtx new_dest = dest;
4489 if (!TARGET_64BIT)
4490 {
4491 if (GET_MODE (dest) != V4SImode)
4492 new_dest = simplify_gen_subreg (V4SImode, dest, GET_MODE (dest), 0);
4493 }
4494 else
4495 {
4496 if (GET_MODE (dest) != V2DImode)
4497 new_dest = simplify_gen_subreg (V2DImode, dest, GET_MODE (dest), 0);
4498 }
4499
4500 for (byte = 0, index = 0; byte < GET_MODE_SIZE (TImode);
4501 byte += UNITS_PER_WORD, index++)
4502 {
4503 s = loongarch_subword_at_byte (src, byte);
4504 if (!TARGET_64BIT)
4505 emit_insn (gen_lsx_vinsgr2vr_w (new_dest, s, new_dest,
4506 GEN_INT (1 << index)));
4507 else
4508 emit_insn (gen_lsx_vinsgr2vr_d (new_dest, s, new_dest,
4509 GEN_INT (1 << index)));
4510 }
4511 }
4512 else if (FP_REG_RTX_P (src))
4513 {
4514 gcc_assert (!MEM_P (dest));
4515
4516 rtx new_src = src;
4517 if (!TARGET_64BIT)
4518 {
4519 if (GET_MODE (src) != V4SImode)
4520 new_src = simplify_gen_subreg (V4SImode, src, GET_MODE (src), 0);
4521 }
4522 else
4523 {
4524 if (GET_MODE (src) != V2DImode)
4525 new_src = simplify_gen_subreg (V2DImode, src, GET_MODE (src), 0);
4526 }
4527
4528 for (byte = 0, index = 0; byte < GET_MODE_SIZE (TImode);
4529 byte += UNITS_PER_WORD, index++)
4530 {
4531 d = loongarch_subword_at_byte (dest, byte);
4532 if (!TARGET_64BIT)
4533 emit_insn (gen_lsx_vpickve2gr_w (d, new_src, GEN_INT (index)));
4534 else
4535 emit_insn (gen_lsx_vpickve2gr_d (d, new_src, GEN_INT (index)));
4536 }
4537 }
4538 else
4539 {
4540 low_dest = loongarch_subword_at_byte (dest, 0);
4541 low_src = loongarch_subword_at_byte (src, 0);
4542 gcc_assert (REG_P (low_dest) && REG_P (low_src));
4543 /* Make sure the source register is not written before reading. */
4544 if (REGNO (low_dest) <= REGNO (low_src))
4545 {
4546 for (byte = 0; byte < GET_MODE_SIZE (TImode);
4547 byte += UNITS_PER_WORD)
4548 {
4549 d = loongarch_subword_at_byte (dest, byte);
4550 s = loongarch_subword_at_byte (src, byte);
4551 loongarch_emit_move (d, s);
4552 }
4553 }
4554 else
4555 {
4556 for (byte = GET_MODE_SIZE (TImode) - UNITS_PER_WORD; byte >= 0;
4557 byte -= UNITS_PER_WORD)
4558 {
4559 d = loongarch_subword_at_byte (dest, byte);
4560 s = loongarch_subword_at_byte (src, byte);
4561 loongarch_emit_move (d, s);
4562 }
4563 }
4564 }
4565 }
4566
4567 /* Split a 256-bit move from SRC to DEST. */
4568
4569 void
4570 loongarch_split_256bit_move (rtx dest, rtx src)
4571 {
4572 int byte, index;
4573 rtx low_dest, low_src, d, s;
4574
4575 if (FP_REG_RTX_P (dest))
4576 {
4577 gcc_assert (!MEM_P (src));
4578
4579 rtx new_dest = dest;
4580 if (!TARGET_64BIT)
4581 {
4582 if (GET_MODE (dest) != V8SImode)
4583 new_dest = simplify_gen_subreg (V8SImode, dest, GET_MODE (dest), 0);
4584 }
4585 else
4586 {
4587 if (GET_MODE (dest) != V4DImode)
4588 new_dest = simplify_gen_subreg (V4DImode, dest, GET_MODE (dest), 0);
4589 }
4590
4591 for (byte = 0, index = 0; byte < GET_MODE_SIZE (GET_MODE (dest));
4592 byte += UNITS_PER_WORD, index++)
4593 {
4594 s = loongarch_subword_at_byte (src, byte);
4595 if (!TARGET_64BIT)
4596 emit_insn (gen_lasx_xvinsgr2vr_w (new_dest, s, new_dest,
4597 GEN_INT (1 << index)));
4598 else
4599 emit_insn (gen_lasx_xvinsgr2vr_d (new_dest, s, new_dest,
4600 GEN_INT (1 << index)));
4601 }
4602 }
4603 else if (FP_REG_RTX_P (src))
4604 {
4605 gcc_assert (!MEM_P (dest));
4606
4607 rtx new_src = src;
4608 if (!TARGET_64BIT)
4609 {
4610 if (GET_MODE (src) != V8SImode)
4611 new_src = simplify_gen_subreg (V8SImode, src, GET_MODE (src), 0);
4612 }
4613 else
4614 {
4615 if (GET_MODE (src) != V4DImode)
4616 new_src = simplify_gen_subreg (V4DImode, src, GET_MODE (src), 0);
4617 }
4618
4619 for (byte = 0, index = 0; byte < GET_MODE_SIZE (GET_MODE (src));
4620 byte += UNITS_PER_WORD, index++)
4621 {
4622 d = loongarch_subword_at_byte (dest, byte);
4623 if (!TARGET_64BIT)
4624 emit_insn (gen_lsx_vpickve2gr_w (d, new_src, GEN_INT (index)));
4625 else
4626 emit_insn (gen_lsx_vpickve2gr_d (d, new_src, GEN_INT (index)));
4627 }
4628 }
4629 else
4630 {
4631 low_dest = loongarch_subword_at_byte (dest, 0);
4632 low_src = loongarch_subword_at_byte (src, 0);
4633 gcc_assert (REG_P (low_dest) && REG_P (low_src));
4634 /* Make sure the source register is not written before reading. */
4635 if (REGNO (low_dest) <= REGNO (low_src))
4636 {
4637 for (byte = 0; byte < GET_MODE_SIZE (TImode);
4638 byte += UNITS_PER_WORD)
4639 {
4640 d = loongarch_subword_at_byte (dest, byte);
4641 s = loongarch_subword_at_byte (src, byte);
4642 loongarch_emit_move (d, s);
4643 }
4644 }
4645 else
4646 {
4647 for (byte = GET_MODE_SIZE (TImode) - UNITS_PER_WORD; byte >= 0;
4648 byte -= UNITS_PER_WORD)
4649 {
4650 d = loongarch_subword_at_byte (dest, byte);
4651 s = loongarch_subword_at_byte (src, byte);
4652 loongarch_emit_move (d, s);
4653 }
4654 }
4655 }
4656 }
4657
4658
4659 /* Split a COPY_S.D with operands DEST, SRC and INDEX. GEN is a function
4660 used to generate subregs. */
4661
4662 void
4663 loongarch_split_lsx_copy_d (rtx dest, rtx src, rtx index,
4664 rtx (*gen_fn)(rtx, rtx, rtx))
4665 {
4666 gcc_assert ((GET_MODE (src) == V2DImode && GET_MODE (dest) == DImode)
4667 || (GET_MODE (src) == V2DFmode && GET_MODE (dest) == DFmode));
4668
4669 /* Note that low is always from the lower index, and high is always
4670 from the higher index. */
4671 rtx low = loongarch_subword (dest, false);
4672 rtx high = loongarch_subword (dest, true);
4673 rtx new_src = simplify_gen_subreg (V4SImode, src, GET_MODE (src), 0);
4674
4675 emit_insn (gen_fn (low, new_src, GEN_INT (INTVAL (index) * 2)));
4676 emit_insn (gen_fn (high, new_src, GEN_INT (INTVAL (index) * 2 + 1)));
4677 }
4678
4679 /* Split a INSERT.D with operand DEST, SRC1.INDEX and SRC2. */
4680
4681 void
4682 loongarch_split_lsx_insert_d (rtx dest, rtx src1, rtx index, rtx src2)
4683 {
4684 int i;
4685 gcc_assert (GET_MODE (dest) == GET_MODE (src1));
4686 gcc_assert ((GET_MODE (dest) == V2DImode
4687 && (GET_MODE (src2) == DImode || src2 == const0_rtx))
4688 || (GET_MODE (dest) == V2DFmode && GET_MODE (src2) == DFmode));
4689
4690 /* Note that low is always from the lower index, and high is always
4691 from the higher index. */
4692 rtx low = loongarch_subword (src2, false);
4693 rtx high = loongarch_subword (src2, true);
4694 rtx new_dest = simplify_gen_subreg (V4SImode, dest, GET_MODE (dest), 0);
4695 rtx new_src1 = simplify_gen_subreg (V4SImode, src1, GET_MODE (src1), 0);
4696 i = exact_log2 (INTVAL (index));
4697 gcc_assert (i != -1);
4698
4699 emit_insn (gen_lsx_vinsgr2vr_w (new_dest, low, new_src1,
4700 GEN_INT (1 << (i * 2))));
4701 emit_insn (gen_lsx_vinsgr2vr_w (new_dest, high, new_dest,
4702 GEN_INT (1 << (i * 2 + 1))));
4703 }
4704
4705 /* Split FILL.D. */
4706
4707 void
4708 loongarch_split_lsx_fill_d (rtx dest, rtx src)
4709 {
4710 gcc_assert ((GET_MODE (dest) == V2DImode
4711 && (GET_MODE (src) == DImode || src == const0_rtx))
4712 || (GET_MODE (dest) == V2DFmode && GET_MODE (src) == DFmode));
4713
4714 /* Note that low is always from the lower index, and high is always
4715 from the higher index. */
4716 rtx low, high;
4717 if (src == const0_rtx)
4718 {
4719 low = src;
4720 high = src;
4721 }
4722 else
4723 {
4724 low = loongarch_subword (src, false);
4725 high = loongarch_subword (src, true);
4726 }
4727 rtx new_dest = simplify_gen_subreg (V4SImode, dest, GET_MODE (dest), 0);
4728 emit_insn (gen_lsx_vreplgr2vr_w (new_dest, low));
4729 emit_insn (gen_lsx_vinsgr2vr_w (new_dest, high, new_dest, GEN_INT (1 << 1)));
4730 emit_insn (gen_lsx_vinsgr2vr_w (new_dest, high, new_dest, GEN_INT (1 << 3)));
4731 }
4732
4733
4734 /* Return the appropriate instructions to move SRC into DEST. Assume
4735 that SRC is operand 1 and DEST is operand 0. */
4736
4737 const char *
4738 loongarch_output_move (rtx dest, rtx src)
4739 {
4740 enum rtx_code dest_code = GET_CODE (dest);
4741 enum rtx_code src_code = GET_CODE (src);
4742 machine_mode mode = GET_MODE (dest);
4743 bool dbl_p = (GET_MODE_SIZE (mode) == 8);
4744 bool lsx_p = LSX_SUPPORTED_MODE_P (mode);
4745 bool lasx_p = LASX_SUPPORTED_MODE_P (mode);
4746
4747 if (loongarch_split_move_p (dest, src))
4748 return "#";
4749
4750 if ((lsx_p || lasx_p)
4751 && dest_code == REG && FP_REG_P (REGNO (dest))
4752 && src_code == CONST_VECTOR
4753 && CONST_INT_P (CONST_VECTOR_ELT (src, 0)))
4754 {
4755 gcc_assert (loongarch_const_vector_same_int_p (src, mode, -512, 511));
4756 switch (GET_MODE_SIZE (mode))
4757 {
4758 case 16:
4759 return "vrepli.%v0\t%w0,%E1";
4760 case 32:
4761 return "xvrepli.%v0\t%u0,%E1";
4762 default: gcc_unreachable ();
4763 }
4764 }
4765
4766 if ((src_code == REG && GP_REG_P (REGNO (src)))
4767 || (src == CONST0_RTX (mode)))
4768 {
4769 if (dest_code == REG)
4770 {
4771 if (GP_REG_P (REGNO (dest)))
4772 return "or\t%0,%z1,$r0";
4773
4774 if (FP_REG_P (REGNO (dest)))
4775 {
4776 if (lsx_p || lasx_p)
4777 {
4778 gcc_assert (src == CONST0_RTX (GET_MODE (src)));
4779 switch (GET_MODE_SIZE (mode))
4780 {
4781 case 16:
4782 return "vrepli.b\t%w0,0";
4783 case 32:
4784 return "xvrepli.b\t%u0,0";
4785 default:
4786 gcc_unreachable ();
4787 }
4788 }
4789
4790 return dbl_p ? "movgr2fr.d\t%0,%z1" : "movgr2fr.w\t%0,%z1";
4791 }
4792 }
4793 if (dest_code == MEM)
4794 {
4795 const char *insn = NULL;
4796 insn = loongarch_output_move_index (XEXP (dest, 0), GET_MODE (dest),
4797 false);
4798 if (insn)
4799 return insn;
4800
4801 rtx offset = XEXP (dest, 0);
4802 if (GET_CODE (offset) == PLUS)
4803 offset = XEXP (offset, 1);
4804 switch (GET_MODE_SIZE (mode))
4805 {
4806 case 1:
4807 return "st.b\t%z1,%0";
4808 case 2:
4809 return "st.h\t%z1,%0";
4810 case 4:
4811 /* Matching address type with a 12bit offset and
4812 ADDRESS_LO_SUM. */
4813 if (const_arith_operand (offset, Pmode)
4814 || GET_CODE (offset) == LO_SUM)
4815 return "st.w\t%z1,%0";
4816 else
4817 return "stptr.w\t%z1,%0";
4818 case 8:
4819 if (const_arith_operand (offset, Pmode)
4820 || GET_CODE (offset) == LO_SUM)
4821 return "st.d\t%z1,%0";
4822 else
4823 return "stptr.d\t%z1,%0";
4824 default:
4825 gcc_unreachable ();
4826 }
4827 }
4828 }
4829 if (dest_code == REG && GP_REG_P (REGNO (dest)))
4830 {
4831 if (src_code == REG)
4832 if (FP_REG_P (REGNO (src)))
4833 {
4834 gcc_assert (!lsx_p);
4835 return dbl_p ? "movfr2gr.d\t%0,%1" : "movfr2gr.s\t%0,%1";
4836 }
4837
4838 if (src_code == MEM)
4839 {
4840 const char *insn = NULL;
4841 insn = loongarch_output_move_index (XEXP (src, 0), GET_MODE (src),
4842 true);
4843 if (insn)
4844 return insn;
4845
4846 rtx offset = XEXP (src, 0);
4847 if (GET_CODE (offset) == PLUS)
4848 offset = XEXP (offset, 1);
4849 switch (GET_MODE_SIZE (mode))
4850 {
4851 case 1:
4852 return "ld.bu\t%0,%1";
4853 case 2:
4854 return "ld.hu\t%0,%1";
4855 case 4:
4856 /* Matching address type with a 12bit offset and
4857 ADDRESS_LO_SUM. */
4858 if (const_arith_operand (offset, Pmode)
4859 || GET_CODE (offset) == LO_SUM)
4860 return "ld.w\t%0,%1";
4861 else
4862 return "ldptr.w\t%0,%1";
4863 case 8:
4864 if (const_arith_operand (offset, Pmode)
4865 || GET_CODE (offset) == LO_SUM)
4866 return "ld.d\t%0,%1";
4867 else
4868 return "ldptr.d\t%0,%1";
4869 default:
4870 gcc_unreachable ();
4871 }
4872 }
4873
4874 if (src_code == HIGH)
4875 {
4876 rtx offset, x;
4877 split_const (XEXP (src, 0), &x, &offset);
4878 enum loongarch_symbol_type type = SYMBOL_PCREL;
4879
4880 if (UNSPEC_ADDRESS_P (x))
4881 type = UNSPEC_ADDRESS_TYPE (x);
4882
4883 if (type == SYMBOL_TLS_LE)
4884 return "lu12i.w\t%0,%h1";
4885 else
4886 return "pcalau12i\t%0,%h1";
4887 }
4888
4889 if (src_code == CONST_INT)
4890 {
4891 if (LU12I_INT (src))
4892 return "lu12i.w\t%0,%1>>12\t\t\t# %X1";
4893 else if (IMM12_INT (src))
4894 return "addi.w\t%0,$r0,%1\t\t\t# %X1";
4895 else if (IMM12_INT_UNSIGNED (src))
4896 return "ori\t%0,$r0,%1\t\t\t# %X1";
4897 else if (LU52I_INT (src))
4898 return "lu52i.d\t%0,$r0,%X1>>52\t\t\t# %1";
4899 else
4900 gcc_unreachable ();
4901 }
4902 }
4903
4904 if (!loongarch_explicit_relocs_p (loongarch_classify_symbol (src))
4905 && dest_code == REG && symbolic_operand (src, VOIDmode))
4906 {
4907 if (loongarch_classify_symbol (src) == SYMBOL_PCREL)
4908 return "la.local\t%0,%1";
4909 else
4910 return "la.global\t%0,%1";
4911 }
4912
4913 if (src_code == REG && FP_REG_P (REGNO (src)))
4914 {
4915 if (dest_code == REG && FP_REG_P (REGNO (dest)))
4916 {
4917 if (lsx_p || lasx_p)
4918 {
4919 switch (GET_MODE_SIZE (mode))
4920 {
4921 case 16:
4922 return "vori.b\t%w0,%w1,0";
4923 case 32:
4924 return "xvori.b\t%u0,%u1,0";
4925 default:
4926 gcc_unreachable ();
4927 }
4928 }
4929
4930 return dbl_p ? "fmov.d\t%0,%1" : "fmov.s\t%0,%1";
4931 }
4932
4933 if (dest_code == MEM)
4934 {
4935 const char *insn = NULL;
4936 insn = loongarch_output_move_index_float (XEXP (dest, 0),
4937 GET_MODE (dest),
4938 false);
4939 if (insn)
4940 return insn;
4941
4942 if (lsx_p || lasx_p)
4943 {
4944 switch (GET_MODE_SIZE (mode))
4945 {
4946 case 16:
4947 return "vst\t%w1,%0";
4948 case 32:
4949 return "xvst\t%u1,%0";
4950 default:
4951 gcc_unreachable ();
4952 }
4953 }
4954
4955 return dbl_p ? "fst.d\t%1,%0" : "fst.s\t%1,%0";
4956 }
4957 }
4958
4959 if (dest_code == REG && FP_REG_P (REGNO (dest)))
4960 {
4961 if (src_code == MEM)
4962 {
4963 const char *insn = NULL;
4964 insn = loongarch_output_move_index_float (XEXP (src, 0),
4965 GET_MODE (src),
4966 true);
4967 if (insn)
4968 return insn;
4969
4970 if (lsx_p || lasx_p)
4971 {
4972 switch (GET_MODE_SIZE (mode))
4973 {
4974 case 16:
4975 return "vld\t%w0,%1";
4976 case 32:
4977 return "xvld\t%u0,%1";
4978 default:
4979 gcc_unreachable ();
4980 }
4981 }
4982 return dbl_p ? "fld.d\t%0,%1" : "fld.s\t%0,%1";
4983 }
4984 }
4985
4986 gcc_unreachable ();
4987 }
4988
4989 /* Return true if CMP1 is a suitable second operand for integer ordering
4990 test CODE. */
4991
4992 static bool
4993 loongarch_int_order_operand_ok_p (enum rtx_code code, rtx cmp1)
4994 {
4995 switch (code)
4996 {
4997 case GT:
4998 case GTU:
4999 return reg_or_0_operand (cmp1, VOIDmode);
5000
5001 case GE:
5002 case GEU:
5003 return cmp1 == const1_rtx;
5004
5005 case LT:
5006 case LTU:
5007 return arith_operand (cmp1, VOIDmode);
5008
5009 case LE:
5010 return sle_operand (cmp1, VOIDmode);
5011
5012 case LEU:
5013 return sleu_operand (cmp1, VOIDmode);
5014
5015 default:
5016 gcc_unreachable ();
5017 }
5018 }
5019
5020 /* Return true if *CMP1 (of mode MODE) is a valid second operand for
5021 integer ordering test *CODE, or if an equivalent combination can
5022 be formed by adjusting *CODE and *CMP1. When returning true, update
5023 *CODE and *CMP1 with the chosen code and operand, otherwise leave
5024 them alone. */
5025
5026 static bool
5027 loongarch_canonicalize_int_order_test (enum rtx_code *code, rtx *cmp1,
5028 machine_mode mode)
5029 {
5030 HOST_WIDE_INT plus_one;
5031
5032 if (loongarch_int_order_operand_ok_p (*code, *cmp1))
5033 return true;
5034
5035 if (CONST_INT_P (*cmp1))
5036 switch (*code)
5037 {
5038 case LE:
5039 plus_one = trunc_int_for_mode (UINTVAL (*cmp1) + 1, mode);
5040 if (INTVAL (*cmp1) < plus_one)
5041 {
5042 *code = LT;
5043 *cmp1 = force_reg (mode, GEN_INT (plus_one));
5044 return true;
5045 }
5046 break;
5047
5048 case LEU:
5049 plus_one = trunc_int_for_mode (UINTVAL (*cmp1) + 1, mode);
5050 if (plus_one != 0)
5051 {
5052 *code = LTU;
5053 *cmp1 = force_reg (mode, GEN_INT (plus_one));
5054 return true;
5055 }
5056 break;
5057
5058 default:
5059 break;
5060 }
5061 return false;
5062 }
5063
5064 /* Compare CMP0 and CMP1 using ordering test CODE and store the result
5065 in TARGET. CMP0 and TARGET are register_operands. If INVERT_PTR
5066 is nonnull, it's OK to set TARGET to the inverse of the result and
5067 flip *INVERT_PTR instead. */
5068
5069 static void
5070 loongarch_emit_int_order_test (enum rtx_code code, bool *invert_ptr,
5071 rtx target, rtx cmp0, rtx cmp1)
5072 {
5073 machine_mode mode;
5074
5075 /* First see if there is a LoongArch instruction that can do this operation.
5076 If not, try doing the same for the inverse operation. If that also
5077 fails, force CMP1 into a register and try again. */
5078 mode = GET_MODE (cmp0);
5079 if (loongarch_canonicalize_int_order_test (&code, &cmp1, mode))
5080 loongarch_emit_binary (code, target, cmp0, cmp1);
5081 else
5082 {
5083 enum rtx_code inv_code = reverse_condition (code);
5084 if (!loongarch_canonicalize_int_order_test (&inv_code, &cmp1, mode))
5085 {
5086 cmp1 = force_reg (mode, cmp1);
5087 loongarch_emit_int_order_test (code, invert_ptr, target, cmp0, cmp1);
5088 }
5089 else if (invert_ptr == 0)
5090 {
5091 rtx inv_target;
5092
5093 inv_target = loongarch_force_binary (GET_MODE (target),
5094 inv_code, cmp0, cmp1);
5095 loongarch_emit_binary (XOR, target, inv_target, const1_rtx);
5096 }
5097 else
5098 {
5099 *invert_ptr = !*invert_ptr;
5100 loongarch_emit_binary (inv_code, target, cmp0, cmp1);
5101 }
5102 }
5103 }
5104
5105 /* Return a register that is zero if CMP0 and CMP1 are equal.
5106 The register will have the same mode as CMP0. */
5107
5108 static rtx
5109 loongarch_zero_if_equal (rtx cmp0, rtx cmp1)
5110 {
5111 if (cmp1 == const0_rtx)
5112 return cmp0;
5113
5114 if (uns_arith_operand (cmp1, VOIDmode))
5115 return expand_binop (GET_MODE (cmp0), xor_optab, cmp0, cmp1, 0, 0,
5116 OPTAB_DIRECT);
5117
5118 return expand_binop (GET_MODE (cmp0), sub_optab, cmp0, cmp1, 0, 0,
5119 OPTAB_DIRECT);
5120 }
5121
5122 /* Sign- or zero-extend OP0 and OP1 for integer comparisons. */
5123
5124 static void
5125 loongarch_extend_comparands (rtx_code code, rtx *op0, rtx *op1)
5126 {
5127 /* Comparisons consider all GRLEN bits, so extend sub-GRLEN values. */
5128 if (GET_MODE_SIZE (word_mode) > GET_MODE_SIZE (GET_MODE (*op0)))
5129 {
5130 /* It is more profitable to zero-extend QImode values. But not if the
5131 first operand has already been sign-extended, and the second one is
5132 is a constant or has already been sign-extended also. */
5133 if (unsigned_condition (code) == code
5134 && (GET_MODE (*op0) == QImode
5135 && ! (GET_CODE (*op0) == SUBREG
5136 && SUBREG_PROMOTED_VAR_P (*op0)
5137 && SUBREG_PROMOTED_SIGNED_P (*op0)
5138 && (CONST_INT_P (*op1)
5139 || (GET_CODE (*op1) == SUBREG
5140 && SUBREG_PROMOTED_VAR_P (*op1)
5141 && SUBREG_PROMOTED_SIGNED_P (*op1))))))
5142 {
5143 *op0 = gen_rtx_ZERO_EXTEND (word_mode, *op0);
5144 if (CONST_INT_P (*op1))
5145 *op1 = GEN_INT ((uint8_t) INTVAL (*op1));
5146 else
5147 *op1 = gen_rtx_ZERO_EXTEND (word_mode, *op1);
5148 }
5149 else
5150 {
5151 *op0 = gen_rtx_SIGN_EXTEND (word_mode, *op0);
5152 if (*op1 != const0_rtx)
5153 *op1 = gen_rtx_SIGN_EXTEND (word_mode, *op1);
5154 }
5155 }
5156 }
5157
5158
5159 /* Convert a comparison into something that can be used in a branch. On
5160 entry, *OP0 and *OP1 are the values being compared and *CODE is the code
5161 used to compare them. Update them to describe the final comparison. */
5162
5163 static void
5164 loongarch_emit_int_compare (enum rtx_code *code, rtx *op0, rtx *op1)
5165 {
5166 static const enum rtx_code
5167 mag_comparisons[][2] = {{LEU, LTU}, {GTU, GEU}, {LE, LT}, {GT, GE}};
5168
5169 if (splittable_const_int_operand (*op1, VOIDmode))
5170 {
5171 HOST_WIDE_INT rhs = INTVAL (*op1);
5172
5173 if (*code == EQ || *code == NE)
5174 {
5175 /* Convert e.g. OP0 == 2048 into OP0 - 2048 == 0. */
5176 if (IMM12_OPERAND (-rhs))
5177 {
5178 *op0 = loongarch_force_binary (GET_MODE (*op0), PLUS, *op0,
5179 GEN_INT (-rhs));
5180 *op1 = const0_rtx;
5181 }
5182 }
5183 else
5184 {
5185 /* Convert e.g. (OP0 <= 0xFFF) into (OP0 < 0x1000). */
5186 for (size_t i = 0; i < ARRAY_SIZE (mag_comparisons); i++)
5187 {
5188 HOST_WIDE_INT new_rhs;
5189 bool increment = *code == mag_comparisons[i][0];
5190 bool decrement = *code == mag_comparisons[i][1];
5191 if (!increment && !decrement)
5192 continue;
5193
5194 if ((increment && rhs == HOST_WIDE_INT_MAX)
5195 || (decrement && rhs == HOST_WIDE_INT_MIN))
5196 break;
5197
5198 new_rhs = rhs + (increment ? 1 : -1);
5199 if (loongarch_integer_cost (new_rhs)
5200 < loongarch_integer_cost (rhs))
5201 {
5202 *op1 = GEN_INT (new_rhs);
5203 *code = mag_comparisons[i][increment];
5204 }
5205 break;
5206 }
5207 }
5208 }
5209
5210 loongarch_extend_comparands (*code, op0, op1);
5211
5212 *op0 = force_reg (word_mode, *op0);
5213 if (*op1 != const0_rtx)
5214 *op1 = force_reg (word_mode, *op1);
5215 }
5216
5217 /* Like loongarch_emit_int_compare, but for floating-point comparisons. */
5218
5219 static void
5220 loongarch_emit_float_compare (enum rtx_code *code, rtx *op0, rtx *op1)
5221 {
5222 rtx cmp_op0 = *op0;
5223 rtx cmp_op1 = *op1;
5224
5225 /* Floating-point tests use a separate FCMP.cond.fmt
5226 comparison to set a register. The branch or conditional move will
5227 then compare that register against zero.
5228
5229 Set CMP_CODE to the code of the comparison instruction and
5230 *CODE to the code that the branch or move should use. */
5231 enum rtx_code cmp_code = *code;
5232 /* Three FP conditions cannot be implemented by reversing the
5233 operands for FCMP.cond.fmt, instead a reversed condition code is
5234 required and a test for false. */
5235 *code = NE;
5236 *op0 = gen_reg_rtx (FCCmode);
5237
5238 *op1 = const0_rtx;
5239 loongarch_emit_binary (cmp_code, *op0, cmp_op0, cmp_op1);
5240 }
5241
5242 /* Try performing the comparison in OPERANDS[1], whose arms are OPERANDS[2]
5243 and OPERAND[3]. Store the result in OPERANDS[0].
5244
5245 On 64-bit targets, the mode of the comparison and target will always be
5246 SImode, thus possibly narrower than that of the comparison's operands. */
5247
5248 void
5249 loongarch_expand_scc (rtx operands[])
5250 {
5251 rtx target = operands[0];
5252 enum rtx_code code = GET_CODE (operands[1]);
5253 rtx op0 = operands[2];
5254 rtx op1 = operands[3];
5255
5256 loongarch_extend_comparands (code, &op0, &op1);
5257 op0 = force_reg (word_mode, op0);
5258
5259 gcc_assert (GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT);
5260
5261 if (code == EQ || code == NE)
5262 {
5263 rtx zie = loongarch_zero_if_equal (op0, op1);
5264 loongarch_emit_binary (code, target, zie, const0_rtx);
5265 }
5266 else
5267 loongarch_emit_int_order_test (code, 0, target, op0, op1);
5268 }
5269
5270 /* Compare OPERANDS[1] with OPERANDS[2] using comparison code
5271 CODE and jump to OPERANDS[3] if the condition holds. */
5272
5273 void
5274 loongarch_expand_conditional_branch (rtx *operands)
5275 {
5276 enum rtx_code code = GET_CODE (operands[0]);
5277 rtx op0 = operands[1];
5278 rtx op1 = operands[2];
5279 rtx condition;
5280
5281 if (FLOAT_MODE_P (GET_MODE (op1)))
5282 loongarch_emit_float_compare (&code, &op0, &op1);
5283 else
5284 loongarch_emit_int_compare (&code, &op0, &op1);
5285
5286 condition = gen_rtx_fmt_ee (code, VOIDmode, op0, op1);
5287 emit_jump_insn (gen_condjump (condition, operands[3]));
5288 }
5289
5290 /* Perform the comparison in OPERANDS[1]. Move OPERANDS[2] into OPERANDS[0]
5291 if the condition holds, otherwise move OPERANDS[3] into OPERANDS[0]. */
5292
5293 void
5294 loongarch_expand_conditional_move (rtx *operands)
5295 {
5296 enum rtx_code code = GET_CODE (operands[1]);
5297 rtx op0 = XEXP (operands[1], 0);
5298 rtx op1 = XEXP (operands[1], 1);
5299 rtx op0_extend = op0;
5300 rtx op1_extend = op1;
5301
5302 /* Record whether operands[2] and operands[3] modes are promoted to word_mode. */
5303 bool promote_p = false;
5304 machine_mode mode = GET_MODE (operands[0]);
5305
5306 if (FLOAT_MODE_P (GET_MODE (op1)))
5307 loongarch_emit_float_compare (&code, &op0, &op1);
5308 else
5309 {
5310 if ((REGNO (op0) == REGNO (operands[2])
5311 || (REGNO (op1) == REGNO (operands[3]) && (op1 != const0_rtx)))
5312 && (GET_MODE_SIZE (GET_MODE (op0)) < word_mode))
5313 {
5314 mode = word_mode;
5315 promote_p = true;
5316 }
5317
5318 loongarch_extend_comparands (code, &op0, &op1);
5319
5320 op0 = force_reg (word_mode, op0);
5321 op0_extend = op0;
5322 op1_extend = force_reg (word_mode, op1);
5323
5324 if (code == EQ || code == NE)
5325 {
5326 op0 = loongarch_zero_if_equal (op0, op1);
5327 op1 = const0_rtx;
5328 }
5329 else
5330 {
5331 /* The comparison needs a separate scc instruction. Store the
5332 result of the scc in *OP0 and compare it against zero. */
5333 bool invert = false;
5334 rtx target = gen_reg_rtx (GET_MODE (op0));
5335 loongarch_emit_int_order_test (code, &invert, target, op0, op1);
5336 code = invert ? EQ : NE;
5337 op0 = target;
5338 op1 = const0_rtx;
5339 }
5340 }
5341
5342 rtx cond = gen_rtx_fmt_ee (code, GET_MODE (op0), op0, op1);
5343 /* There is no direct support for general conditional GP move involving
5344 two registers using SEL. */
5345 if (INTEGRAL_MODE_P (GET_MODE (operands[2]))
5346 && register_operand (operands[2], VOIDmode)
5347 && register_operand (operands[3], VOIDmode))
5348 {
5349 rtx op2 = operands[2];
5350 rtx op3 = operands[3];
5351
5352 if (promote_p)
5353 {
5354 if (REGNO (XEXP (operands[1], 0)) == REGNO (operands[2]))
5355 op2 = op0_extend;
5356 else
5357 {
5358 loongarch_extend_comparands (code, &op2, &const0_rtx);
5359 op2 = force_reg (mode, op2);
5360 }
5361
5362 if (REGNO (XEXP (operands[1], 1)) == REGNO (operands[3]))
5363 op3 = op1_extend;
5364 else
5365 {
5366 loongarch_extend_comparands (code, &op3, &const0_rtx);
5367 op3 = force_reg (mode, op3);
5368 }
5369 }
5370
5371 rtx temp = gen_reg_rtx (mode);
5372 rtx temp2 = gen_reg_rtx (mode);
5373
5374 emit_insn (gen_rtx_SET (temp,
5375 gen_rtx_IF_THEN_ELSE (mode, cond,
5376 op2, const0_rtx)));
5377
5378 /* Flip the test for the second operand. */
5379 cond = gen_rtx_fmt_ee ((code == EQ) ? NE : EQ, GET_MODE (op0), op0, op1);
5380
5381 emit_insn (gen_rtx_SET (temp2,
5382 gen_rtx_IF_THEN_ELSE (mode, cond,
5383 op3, const0_rtx)));
5384
5385 /* Merge the two results, at least one is guaranteed to be zero. */
5386 if (promote_p)
5387 {
5388 rtx temp3 = gen_reg_rtx (mode);
5389 emit_insn (gen_rtx_SET (temp3, gen_rtx_IOR (mode, temp, temp2)));
5390 temp3 = gen_lowpart (GET_MODE (operands[0]), temp3);
5391 /* Nonzero in a subreg if it was made when accessing an object that
5392 was promoted to a wider mode in accord with the PROMOTED_MODE
5393 machine description macro. */
5394 SUBREG_PROMOTED_VAR_P (temp3) = 1;
5395 /* Sets promoted mode for SUBREG_PROMOTED_VAR_P. */
5396 SUBREG_PROMOTED_SET (temp3, SRP_SIGNED);
5397 loongarch_emit_move (operands[0], temp3);
5398 }
5399 else
5400 emit_insn (gen_rtx_SET (operands[0], gen_rtx_IOR (mode, temp, temp2)));
5401 }
5402 else
5403 emit_insn (gen_rtx_SET (operands[0],
5404 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]), cond,
5405 operands[2], operands[3])));
5406 }
5407
5408 /* Implement TARGET_EXPAND_BUILTIN_VA_START. */
5409
5410 static void
5411 loongarch_va_start (tree valist, rtx nextarg)
5412 {
5413 nextarg = plus_constant (Pmode, nextarg, -cfun->machine->varargs_size);
5414 std_expand_builtin_va_start (valist, nextarg);
5415 }
5416
5417 /* Implement TARGET_FUNCTION_OK_FOR_SIBCALL. */
5418
5419 static bool
5420 loongarch_function_ok_for_sibcall (tree decl ATTRIBUTE_UNUSED,
5421 tree exp ATTRIBUTE_UNUSED)
5422 {
5423 /* Always OK. */
5424 return true;
5425 }
5426
5427 static machine_mode
5428 loongarch_mode_for_move_size (HOST_WIDE_INT size)
5429 {
5430 switch (size)
5431 {
5432 case 32:
5433 return V32QImode;
5434 case 16:
5435 return V16QImode;
5436 }
5437
5438 return int_mode_for_size (size * BITS_PER_UNIT, 0).require ();
5439 }
5440
5441 /* Emit straight-line code to move LENGTH bytes from SRC to DEST.
5442 Assume that the areas do not overlap. */
5443
5444 static void
5445 loongarch_block_move_straight (rtx dest, rtx src, HOST_WIDE_INT length,
5446 HOST_WIDE_INT delta)
5447 {
5448 HOST_WIDE_INT offs, delta_cur;
5449 int i;
5450 machine_mode mode;
5451 rtx *regs;
5452
5453 /* Calculate how many registers we'll need for the block move.
5454 We'll emit length / delta move operations with delta as the size
5455 first. Then we may still have length % delta bytes not copied.
5456 We handle these remaining bytes by move operations with smaller
5457 (halfed) sizes. For example, if length = 21 and delta = 8, we'll
5458 emit two ld.d/st.d pairs, one ld.w/st.w pair, and one ld.b/st.b
5459 pair. For each load/store pair we use a dedicated register to keep
5460 the pipeline as populated as possible. */
5461 gcc_assert (pow2p_hwi (delta));
5462 HOST_WIDE_INT num_reg = length / delta + popcount_hwi (length % delta);
5463
5464 /* Allocate a buffer for the temporary registers. */
5465 regs = XALLOCAVEC (rtx, num_reg);
5466
5467 for (delta_cur = delta, i = 0, offs = 0; offs < length; delta_cur /= 2)
5468 {
5469 mode = loongarch_mode_for_move_size (delta_cur);
5470
5471 for (; offs + delta_cur <= length; offs += delta_cur, i++)
5472 {
5473 regs[i] = gen_reg_rtx (mode);
5474 loongarch_emit_move (regs[i], adjust_address (src, mode, offs));
5475 }
5476 }
5477
5478 for (delta_cur = delta, i = 0, offs = 0; offs < length; delta_cur /= 2)
5479 {
5480 mode = loongarch_mode_for_move_size (delta_cur);
5481
5482 for (; offs + delta_cur <= length; offs += delta_cur, i++)
5483 loongarch_emit_move (adjust_address (dest, mode, offs), regs[i]);
5484 }
5485 }
5486
5487 /* Helper function for doing a loop-based block operation on memory
5488 reference MEM. Each iteration of the loop will operate on LENGTH
5489 bytes of MEM.
5490
5491 Create a new base register for use within the loop and point it to
5492 the start of MEM. Create a new memory reference that uses this
5493 register. Store them in *LOOP_REG and *LOOP_MEM respectively. */
5494
5495 static void
5496 loongarch_adjust_block_mem (rtx mem, HOST_WIDE_INT length, rtx *loop_reg,
5497 rtx *loop_mem)
5498 {
5499 *loop_reg = copy_addr_to_reg (XEXP (mem, 0));
5500
5501 /* Although the new mem does not refer to a known location,
5502 it does keep up to LENGTH bytes of alignment. */
5503 *loop_mem = change_address (mem, BLKmode, *loop_reg);
5504 set_mem_align (*loop_mem, MIN (MEM_ALIGN (mem), length * BITS_PER_UNIT));
5505 }
5506
5507 /* Move LENGTH bytes from SRC to DEST using a loop that moves BYTES_PER_ITER
5508 bytes at a time. LENGTH must be at least BYTES_PER_ITER. Assume that
5509 the memory regions do not overlap. */
5510
5511 static void
5512 loongarch_block_move_loop (rtx dest, rtx src, HOST_WIDE_INT length,
5513 HOST_WIDE_INT align)
5514 {
5515 rtx_code_label *label;
5516 rtx src_reg, dest_reg, final_src, test;
5517 HOST_WIDE_INT bytes_per_iter = align * LARCH_MAX_MOVE_OPS_PER_LOOP_ITER;
5518 HOST_WIDE_INT leftover;
5519
5520 leftover = length % bytes_per_iter;
5521 length -= leftover;
5522
5523 /* Create registers and memory references for use within the loop. */
5524 loongarch_adjust_block_mem (src, bytes_per_iter, &src_reg, &src);
5525 loongarch_adjust_block_mem (dest, bytes_per_iter, &dest_reg, &dest);
5526
5527 /* Calculate the value that SRC_REG should have after the last iteration
5528 of the loop. */
5529 final_src = expand_simple_binop (Pmode, PLUS, src_reg, GEN_INT (length), 0,
5530 0, OPTAB_WIDEN);
5531
5532 /* Emit the start of the loop. */
5533 label = gen_label_rtx ();
5534 emit_label (label);
5535
5536 /* Emit the loop body. */
5537 loongarch_block_move_straight (dest, src, bytes_per_iter, align);
5538
5539 /* Move on to the next block. */
5540 loongarch_emit_move (src_reg,
5541 plus_constant (Pmode, src_reg, bytes_per_iter));
5542 loongarch_emit_move (dest_reg,
5543 plus_constant (Pmode, dest_reg, bytes_per_iter));
5544
5545 /* Emit the loop condition. */
5546 test = gen_rtx_NE (VOIDmode, src_reg, final_src);
5547 if (Pmode == DImode)
5548 emit_jump_insn (gen_cbranchdi4 (test, src_reg, final_src, label));
5549 else
5550 emit_jump_insn (gen_cbranchsi4 (test, src_reg, final_src, label));
5551
5552 /* Mop up any left-over bytes. */
5553 if (leftover)
5554 loongarch_block_move_straight (dest, src, leftover, align);
5555 else
5556 /* Temporary fix for PR79150. */
5557 emit_insn (gen_nop ());
5558 }
5559
5560 /* Expand a cpymemsi instruction, which copies LENGTH bytes from
5561 memory reference SRC to memory reference DEST. */
5562
5563 bool
5564 loongarch_expand_block_move (rtx dest, rtx src, rtx r_length, rtx r_align)
5565 {
5566 if (!CONST_INT_P (r_length))
5567 return false;
5568
5569 HOST_WIDE_INT length = INTVAL (r_length);
5570 if (length > la_max_inline_memcpy_size)
5571 return false;
5572
5573 HOST_WIDE_INT align = INTVAL (r_align);
5574
5575 if (!TARGET_STRICT_ALIGN || align > LARCH_MAX_MOVE_PER_INSN)
5576 align = LARCH_MAX_MOVE_PER_INSN;
5577
5578 if (length <= align * LARCH_MAX_MOVE_OPS_STRAIGHT)
5579 {
5580 loongarch_block_move_straight (dest, src, length, align);
5581 return true;
5582 }
5583
5584 if (optimize)
5585 {
5586 loongarch_block_move_loop (dest, src, length, align);
5587 return true;
5588 }
5589
5590 return false;
5591 }
5592
5593 /* Return true if loongarch_expand_block_move is the preferred
5594 implementation of the 'cpymemsi' template. */
5595
5596 bool
5597 loongarch_do_optimize_block_move_p (void)
5598 {
5599 /* if -m[no-]memcpy is given explicitly. */
5600 if (target_flags_explicit & MASK_MEMCPY)
5601 return !TARGET_MEMCPY;
5602
5603 /* if not, don't optimize under -Os. */
5604 return !optimize_size;
5605 }
5606
5607 /* Expand a QI or HI mode atomic memory operation.
5608
5609 GENERATOR contains a pointer to the gen_* function that generates
5610 the SI mode underlying atomic operation using masks that we
5611 calculate.
5612
5613 RESULT is the return register for the operation. Its value is NULL
5614 if unused.
5615
5616 MEM is the location of the atomic access.
5617
5618 OLDVAL is the first operand for the operation.
5619
5620 NEWVAL is the optional second operand for the operation. Its value
5621 is NULL if unused. */
5622
5623 void
5624 loongarch_expand_atomic_qihi (union loongarch_gen_fn_ptrs generator,
5625 rtx result, rtx mem, rtx oldval, rtx newval,
5626 rtx model)
5627 {
5628 rtx orig_addr, memsi_addr, memsi, shift, shiftsi, unshifted_mask;
5629 rtx unshifted_mask_reg, mask, inverted_mask, si_op;
5630 rtx res = NULL;
5631 machine_mode mode;
5632
5633 mode = GET_MODE (mem);
5634
5635 /* Compute the address of the containing SImode value. */
5636 orig_addr = force_reg (Pmode, XEXP (mem, 0));
5637 memsi_addr = loongarch_force_binary (Pmode, AND, orig_addr,
5638 force_reg (Pmode, GEN_INT (-4)));
5639
5640 /* Create a memory reference for it. */
5641 memsi = gen_rtx_MEM (SImode, memsi_addr);
5642 set_mem_alias_set (memsi, ALIAS_SET_MEMORY_BARRIER);
5643 MEM_VOLATILE_P (memsi) = MEM_VOLATILE_P (mem);
5644
5645 /* Work out the byte offset of the QImode or HImode value,
5646 counting from the least significant byte. */
5647 shift = loongarch_force_binary (Pmode, AND, orig_addr, GEN_INT (3));
5648 /* Multiply by eight to convert the shift value from bytes to bits. */
5649 loongarch_emit_binary (ASHIFT, shift, shift, GEN_INT (3));
5650
5651 /* Make the final shift an SImode value, so that it can be used in
5652 SImode operations. */
5653 shiftsi = force_reg (SImode, gen_lowpart (SImode, shift));
5654
5655 /* Set MASK to an inclusive mask of the QImode or HImode value. */
5656 unshifted_mask = GEN_INT (GET_MODE_MASK (mode));
5657 unshifted_mask_reg = force_reg (SImode, unshifted_mask);
5658 mask = loongarch_force_binary (SImode, ASHIFT, unshifted_mask_reg, shiftsi);
5659
5660 /* Compute the equivalent exclusive mask. */
5661 inverted_mask = gen_reg_rtx (SImode);
5662 emit_insn (gen_rtx_SET (inverted_mask, gen_rtx_NOT (SImode, mask)));
5663
5664 /* Shift the old value into place. */
5665 if (oldval != const0_rtx)
5666 {
5667 oldval = convert_modes (SImode, mode, oldval, true);
5668 oldval = force_reg (SImode, oldval);
5669 oldval = loongarch_force_binary (SImode, ASHIFT, oldval, shiftsi);
5670 }
5671
5672 /* Do the same for the new value. */
5673 if (newval && newval != const0_rtx)
5674 {
5675 newval = convert_modes (SImode, mode, newval, true);
5676 newval = force_reg (SImode, newval);
5677 newval = loongarch_force_binary (SImode, ASHIFT, newval, shiftsi);
5678 }
5679
5680 /* Do the SImode atomic access. */
5681 if (result)
5682 res = gen_reg_rtx (SImode);
5683
5684 if (newval)
5685 si_op = generator.fn_7 (res, memsi, mask, inverted_mask, oldval, newval,
5686 model);
5687 else if (result)
5688 si_op = generator.fn_6 (res, memsi, mask, inverted_mask, oldval, model);
5689 else
5690 si_op = generator.fn_5 (memsi, mask, inverted_mask, oldval, model);
5691
5692 emit_insn (si_op);
5693
5694 if (result)
5695 {
5696 /* Shift and convert the result. */
5697 loongarch_emit_binary (AND, res, res, mask);
5698 loongarch_emit_binary (LSHIFTRT, res, res, shiftsi);
5699 loongarch_emit_move (result, gen_lowpart (GET_MODE (result), res));
5700 }
5701 }
5702
5703 /* Return true if (zero_extract OP WIDTH BITPOS) can be used as the
5704 source of an "ext" instruction or the destination of an "ins"
5705 instruction. OP must be a register operand and the following
5706 conditions must hold:
5707
5708 0 <= BITPOS < GET_MODE_BITSIZE (GET_MODE (op))
5709 0 < WIDTH <= GET_MODE_BITSIZE (GET_MODE (op))
5710 0 < BITPOS + WIDTH <= GET_MODE_BITSIZE (GET_MODE (op))
5711
5712 Also reject lengths equal to a word as they are better handled
5713 by the move patterns. */
5714
5715 bool
5716 loongarch_use_ins_ext_p (rtx op, HOST_WIDE_INT width, HOST_WIDE_INT bitpos)
5717 {
5718 if (!register_operand (op, VOIDmode)
5719 || GET_MODE_BITSIZE (GET_MODE (op)) > BITS_PER_WORD)
5720 return false;
5721
5722 if (!IN_RANGE (width, 1, GET_MODE_BITSIZE (GET_MODE (op)) - 1))
5723 return false;
5724
5725 if (bitpos < 0 || bitpos + width > GET_MODE_BITSIZE (GET_MODE (op)))
5726 return false;
5727
5728 return true;
5729 }
5730
5731 /* Predicate for pre-reload splitters with associated instructions,
5732 which can match any time before the split1 pass (usually combine),
5733 then are unconditionally split in that pass and should not be
5734 matched again afterwards. */
5735
5736 bool loongarch_pre_reload_split (void)
5737 {
5738 return (can_create_pseudo_p ()
5739 && !(cfun->curr_properties & PROP_rtl_split_insns));
5740 }
5741
5742 /* Check if we can use bstrins.<d> for
5743 op0 = (op1 & op2) | (op3 & op4)
5744 where op0, op1, op3 are regs, and op2, op4 are integer constants. */
5745 int
5746 loongarch_use_bstrins_for_ior_with_mask (machine_mode mode, rtx *op)
5747 {
5748 unsigned HOST_WIDE_INT mask1 = UINTVAL (op[2]);
5749 unsigned HOST_WIDE_INT mask2 = UINTVAL (op[4]);
5750
5751 if (mask1 != ~mask2 || !mask1 || !mask2)
5752 return 0;
5753
5754 /* Try to avoid a right-shift. */
5755 if (low_bitmask_len (mode, mask1) != -1)
5756 return -1;
5757
5758 if (low_bitmask_len (mode, mask2 >> (ffs_hwi (mask2) - 1)) != -1)
5759 return 1;
5760
5761 if (low_bitmask_len (mode, mask1 >> (ffs_hwi (mask1) - 1)) != -1)
5762 return -1;
5763
5764 return 0;
5765 }
5766
5767 /* Rewrite a MEM for simple load/store under -mexplicit-relocs=auto
5768 -mcmodel={normal/medium}. */
5769 rtx
5770 loongarch_rewrite_mem_for_simple_ldst (rtx mem)
5771 {
5772 rtx addr = XEXP (mem, 0);
5773 rtx hi = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr),
5774 UNSPEC_PCALAU12I_GR);
5775 rtx new_mem;
5776
5777 addr = gen_rtx_LO_SUM (Pmode, force_reg (Pmode, hi), addr);
5778 new_mem = gen_rtx_MEM (GET_MODE (mem), addr);
5779 MEM_COPY_ATTRIBUTES (new_mem, mem);
5780 return new_mem;
5781 }
5782
5783 /* Print the text for PRINT_OPERAND punctation character CH to FILE.
5784 The punctuation characters are:
5785
5786 '.' Print the name of the register with a hard-wired zero (zero or $r0).
5787 '$' Print the name of the stack pointer register (sp or $r3).
5788
5789 See also loongarch_init_print_operand_punct. */
5790
5791 static void
5792 loongarch_print_operand_punctuation (FILE *file, int ch)
5793 {
5794 switch (ch)
5795 {
5796 case '.':
5797 fputs (reg_names[GP_REG_FIRST + 0], file);
5798 break;
5799
5800 case '$':
5801 fputs (reg_names[STACK_POINTER_REGNUM], file);
5802 break;
5803
5804 default:
5805 gcc_unreachable ();
5806 break;
5807 }
5808 }
5809
5810 /* Initialize loongarch_print_operand_punct. */
5811
5812 static void
5813 loongarch_init_print_operand_punct (void)
5814 {
5815 const char *p;
5816
5817 for (p = ".$"; *p; p++)
5818 loongarch_print_operand_punct[(unsigned char) *p] = true;
5819 }
5820
5821 /* PRINT_OPERAND prefix LETTER refers to the integer branch instruction
5822 associated with condition CODE. Print the condition part of the
5823 opcode to FILE. */
5824
5825 static void
5826 loongarch_print_int_branch_condition (FILE *file, enum rtx_code code,
5827 int letter)
5828 {
5829 switch (code)
5830 {
5831 case EQ:
5832 case NE:
5833 case GT:
5834 case GE:
5835 case LT:
5836 case LE:
5837 case GTU:
5838 case GEU:
5839 case LTU:
5840 case LEU:
5841 /* Conveniently, the LoongArch names for these conditions are the same
5842 as their RTL equivalents. */
5843 fputs (GET_RTX_NAME (code), file);
5844 break;
5845
5846 default:
5847 output_operand_lossage ("'%%%c' is not a valid operand prefix", letter);
5848 break;
5849 }
5850 }
5851
5852 /* Likewise floating-point branches. */
5853
5854 static void
5855 loongarch_print_float_branch_condition (FILE *file, enum rtx_code code,
5856 int letter)
5857 {
5858 switch (code)
5859 {
5860 case EQ:
5861 fputs ("ceqz", file);
5862 break;
5863
5864 case NE:
5865 fputs ("cnez", file);
5866 break;
5867
5868 default:
5869 output_operand_lossage ("'%%%c' is not a valid operand prefix", letter);
5870 break;
5871 }
5872 }
5873
5874 /* Implement TARGET_PRINT_OPERAND_PUNCT_VALID_P. */
5875
5876 static bool
5877 loongarch_print_operand_punct_valid_p (unsigned char code)
5878 {
5879 return loongarch_print_operand_punct[code];
5880 }
5881
5882 /* Return true if a FENCE should be emitted to before a memory access to
5883 implement the release portion of memory model MODEL. */
5884
5885 static bool
5886 loongarch_memmodel_needs_rel_acq_fence (enum memmodel model)
5887 {
5888 switch (memmodel_base (model))
5889 {
5890 case MEMMODEL_ACQ_REL:
5891 case MEMMODEL_SEQ_CST:
5892 case MEMMODEL_RELEASE:
5893 case MEMMODEL_ACQUIRE:
5894 return true;
5895
5896 case MEMMODEL_RELAXED:
5897 return false;
5898
5899 default:
5900 gcc_unreachable ();
5901 }
5902 }
5903
5904 /* Return true if a FENCE should be emitted after a failed CAS to
5905 implement the acquire semantic of failure_memorder. */
5906
5907 static bool
5908 loongarch_cas_failure_memorder_needs_acquire (enum memmodel model)
5909 {
5910 switch (memmodel_base (model))
5911 {
5912 case MEMMODEL_ACQUIRE:
5913 case MEMMODEL_ACQ_REL:
5914 case MEMMODEL_SEQ_CST:
5915 return true;
5916
5917 case MEMMODEL_RELAXED:
5918 case MEMMODEL_RELEASE:
5919 return false;
5920
5921 /* MEMMODEL_CONSUME is deliberately not handled because it's always
5922 replaced by MEMMODEL_ACQUIRE as at now. If you see an ICE caused by
5923 MEMMODEL_CONSUME, read the change (re)introducing it carefully and
5924 decide what to do. See PR 59448 and get_memmodel in builtins.cc. */
5925 default:
5926 gcc_unreachable ();
5927 }
5928 }
5929
5930 /* Print symbolic operand OP, which is part of a HIGH or LO_SUM
5931 in context CONTEXT. HI_RELOC indicates a high-part reloc. */
5932
5933 static void
5934 loongarch_print_operand_reloc (FILE *file, rtx op, bool hi64_part,
5935 bool hi_reloc)
5936 {
5937 const char *reloc;
5938 enum loongarch_symbol_type symbol_type =
5939 loongarch_classify_symbolic_expression (op);
5940
5941 if (loongarch_symbol_extreme_p (symbol_type))
5942 gcc_assert (la_opt_explicit_relocs != EXPLICIT_RELOCS_NONE);
5943
5944 switch (symbol_type)
5945 {
5946 case SYMBOL_PCREL64:
5947 if (hi64_part)
5948 {
5949 reloc = hi_reloc ? "%pc64_hi12" : "%pc64_lo20";
5950 break;
5951 }
5952 /* fall through */
5953 case SYMBOL_PCREL:
5954 reloc = hi_reloc ? "%pc_hi20" : "%pc_lo12";
5955 break;
5956
5957 case SYMBOL_GOT_DISP:
5958 if (hi64_part)
5959 {
5960 if (TARGET_CMODEL_EXTREME)
5961 reloc = hi_reloc ? "%got64_pc_hi12" : "%got64_pc_lo20";
5962 else
5963 gcc_unreachable ();
5964 }
5965 else
5966 reloc = hi_reloc ? "%got_pc_hi20" : "%got_pc_lo12";
5967 break;
5968
5969 case SYMBOL_TLS_IE:
5970 if (hi64_part)
5971 {
5972 if (TARGET_CMODEL_EXTREME)
5973 reloc = hi_reloc ? "%ie64_pc_hi12" : "%ie64_pc_lo20";
5974 else
5975 gcc_unreachable ();
5976 }
5977 else
5978 reloc = hi_reloc ? "%ie_pc_hi20" : "%ie_pc_lo12";
5979 break;
5980
5981 case SYMBOL_TLS_LE:
5982 if (hi64_part)
5983 {
5984 if (TARGET_CMODEL_EXTREME)
5985 reloc = hi_reloc ? "%le64_hi12" : "%le64_lo20";
5986 else
5987 gcc_unreachable ();
5988 }
5989 else
5990 {
5991 if (HAVE_AS_TLS_LE_RELAXATION && !TARGET_CMODEL_EXTREME)
5992 reloc = hi_reloc ? "%le_hi20_r" : "%le_lo12_r";
5993 else
5994 reloc = hi_reloc ? "%le_hi20" : "%le_lo12";
5995 }
5996 break;
5997
5998 case SYMBOL_TLSGD:
5999 if (hi64_part)
6000 {
6001 if (TARGET_CMODEL_EXTREME)
6002 reloc = hi_reloc ? "%got64_pc_hi12" : "%got64_pc_lo20";
6003 else
6004 gcc_unreachable ();
6005 }
6006 else
6007 reloc = hi_reloc ? "%gd_pc_hi20" : "%got_pc_lo12";
6008 break;
6009
6010 case SYMBOL_TLSLDM:
6011 if (hi64_part)
6012 {
6013 if (TARGET_CMODEL_EXTREME)
6014 reloc = hi_reloc ? "%got64_pc_hi12" : "%got64_pc_lo20";
6015 else
6016 gcc_unreachable ();
6017 }
6018 else
6019 reloc = hi_reloc ? "%ld_pc_hi20" : "%got_pc_lo12";
6020 break;
6021
6022 default:
6023 gcc_unreachable ();
6024 }
6025
6026 fprintf (file, "%s(", reloc);
6027 output_addr_const (file, loongarch_strip_unspec_address (op));
6028 fputc (')', file);
6029 }
6030
6031 /* Implement TARGET_PRINT_OPERAND. The LoongArch-specific operand codes are:
6032
6033 'A' Print a _DB suffix if the memory model requires a release.
6034 'b' Print the address of a memory operand, without offset.
6035 'B' Print CONST_INT OP element 0 of a replicated CONST_VECTOR
6036 as an unsigned byte [0..255].
6037 'c' Print an integer.
6038 'C' Print the integer branch condition for comparison OP.
6039 'd' Print CONST_INT OP in decimal.
6040 'E' Print CONST_INT OP element 0 of a replicated CONST_VECTOR in decimal.
6041 'F' Print the FPU branch condition for comparison OP.
6042 'G' Print a DBAR insn for CAS failure (with an acquire semantic if
6043 needed, otherwise a simple load-load barrier).
6044 'H' Print address 52-61bit relocation associated with OP.
6045 'h' Print the high-part relocation associated with OP.
6046 'i' Print i if the operand is not a register.
6047 'L' Print the low-part relocation associated with OP.
6048 'm' Print one less than CONST_INT OP in decimal.
6049 'N' Print the inverse of the integer branch condition for comparison OP.
6050 'r' Print address 12-31bit relocation associated with OP.
6051 'R' Print address 32-51bit relocation associated with OP.
6052 'T' Print 'f' for (eq:CC ...), 't' for (ne:CC ...),
6053 'z' for (eq:?I ...), 'n' for (ne:?I ...).
6054 't' Like 'T', but with the EQ/NE cases reversed
6055 'F' Print the FPU branch condition for comparison OP.
6056 'W' Print the inverse of the FPU branch condition for comparison OP.
6057 'w' Print a LSX register.
6058 'u' Print a LASX register.
6059 'T' Print 'f' for (eq:CC ...), 't' for (ne:CC ...),
6060 'z' for (eq:?I ...), 'n' for (ne:?I ...).
6061 't' Like 'T', but with the EQ/NE cases reversed
6062 'Y' Print loongarch_fp_conditions[INTVAL (OP)]
6063 'Z' Print OP and a comma for 8CC, otherwise print nothing.
6064 'z' Print $0 if OP is zero, otherwise print OP normally.
6065 'v' Print the insn size suffix b, h, w or d for vector modes V16QI, V8HI,
6066 V4SI, V2SI, and w, d for vector modes V4SF, V2DF respectively.
6067 'V' Print exact log2 of CONST_INT OP element 0 of a replicated
6068 CONST_VECTOR in decimal.
6069 'W' Print the inverse of the FPU branch condition for comparison OP.
6070 'X' Print CONST_INT OP in hexadecimal format.
6071 'x' Print the low 16 bits of CONST_INT OP in hexadecimal format.
6072 'Y' Print loongarch_fp_conditions[INTVAL (OP)]
6073 'y' Print exact log2 of CONST_INT OP in decimal.
6074 'Z' Print OP and a comma for 8CC, otherwise print nothing.
6075 'z' Print $0 if OP is zero, otherwise print OP normally. */
6076
6077 static void
6078 loongarch_print_operand (FILE *file, rtx op, int letter)
6079 {
6080 enum rtx_code code;
6081
6082 if (loongarch_print_operand_punct_valid_p (letter))
6083 {
6084 loongarch_print_operand_punctuation (file, letter);
6085 return;
6086 }
6087
6088 gcc_assert (op);
6089 code = GET_CODE (op);
6090
6091 switch (letter)
6092 {
6093 case 'A':
6094 if (loongarch_memmodel_needs_rel_acq_fence ((enum memmodel) INTVAL (op)))
6095 fputs ("_db", file);
6096 break;
6097 case 'E':
6098 if (GET_CODE (op) == CONST_VECTOR)
6099 {
6100 gcc_assert (loongarch_const_vector_same_val_p (op, GET_MODE (op)));
6101 op = CONST_VECTOR_ELT (op, 0);
6102 gcc_assert (CONST_INT_P (op));
6103 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (op));
6104 }
6105 else
6106 output_operand_lossage ("invalid use of '%%%c'", letter);
6107 break;
6108
6109
6110 case 'c':
6111 if (CONST_INT_P (op))
6112 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (op));
6113 else
6114 output_operand_lossage ("unsupported operand for code '%c'", letter);
6115
6116 break;
6117
6118 case 'C':
6119 loongarch_print_int_branch_condition (file, code, letter);
6120 break;
6121
6122 case 'd':
6123 if (CONST_INT_P (op))
6124 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (op));
6125 else
6126 output_operand_lossage ("invalid use of '%%%c'", letter);
6127 break;
6128
6129 case 'F':
6130 loongarch_print_float_branch_condition (file, code, letter);
6131 break;
6132
6133 case 'G':
6134 if (loongarch_cas_failure_memorder_needs_acquire (
6135 memmodel_from_int (INTVAL (op))))
6136 fputs ("dbar\t0b10100", file);
6137 else if (!ISA_HAS_LD_SEQ_SA)
6138 fputs ("dbar\t0x700", file);
6139 break;
6140
6141 case 'h':
6142 if (code == HIGH)
6143 op = XEXP (op, 0);
6144 loongarch_print_operand_reloc (file, op, false /* hi64_part */,
6145 true /* hi_reloc */);
6146 break;
6147
6148 case 'H':
6149 loongarch_print_operand_reloc (file, op, true /* hi64_part */,
6150 true /* hi_reloc */);
6151 break;
6152
6153 case 'i':
6154 if (code != REG)
6155 fputs ("i", file);
6156 break;
6157
6158 case 'L':
6159 loongarch_print_operand_reloc (file, op, false /* hi64_part*/,
6160 false /* lo_reloc */);
6161 break;
6162 case 'B':
6163 if (GET_CODE (op) == CONST_VECTOR)
6164 {
6165 gcc_assert (loongarch_const_vector_same_val_p (op, GET_MODE (op)));
6166 op = CONST_VECTOR_ELT (op, 0);
6167 gcc_assert (CONST_INT_P (op));
6168 unsigned HOST_WIDE_INT val8 = UINTVAL (op) & GET_MODE_MASK (QImode);
6169 fprintf (file, HOST_WIDE_INT_PRINT_UNSIGNED, val8);
6170 }
6171 else
6172 output_operand_lossage ("invalid use of '%%%c'", letter);
6173 break;
6174
6175 case 'm':
6176 if (CONST_INT_P (op))
6177 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (op) - 1);
6178 else
6179 output_operand_lossage ("invalid use of '%%%c'", letter);
6180 break;
6181
6182 case 'N':
6183 loongarch_print_int_branch_condition (file, reverse_condition (code),
6184 letter);
6185 break;
6186
6187 case 'r':
6188 loongarch_print_operand_reloc (file, op, false /* hi64_part */,
6189 true /* lo_reloc */);
6190 break;
6191
6192 case 'R':
6193 loongarch_print_operand_reloc (file, op, true /* hi64_part */,
6194 false /* lo_reloc */);
6195 break;
6196
6197 case 't':
6198 case 'T':
6199 {
6200 int truth = (code == NE) == (letter == 'T');
6201 fputc ("zfnt"[truth * 2 + FCC_REG_P (REGNO (XEXP (op, 0)))], file);
6202 }
6203 break;
6204
6205 case 'V':
6206 if (CONST_VECTOR_P (op))
6207 {
6208 machine_mode mode = GET_MODE_INNER (GET_MODE (op));
6209 unsigned HOST_WIDE_INT val = UINTVAL (CONST_VECTOR_ELT (op, 0));
6210 int vlog2 = exact_log2 (val & GET_MODE_MASK (mode));
6211 if (vlog2 != -1)
6212 fprintf (file, "%d", vlog2);
6213 else
6214 output_operand_lossage ("invalid use of '%%%c'", letter);
6215 }
6216 else
6217 output_operand_lossage ("invalid use of '%%%c'", letter);
6218 break;
6219
6220 case 'W':
6221 loongarch_print_float_branch_condition (file, reverse_condition (code),
6222 letter);
6223 break;
6224
6225 case 'x':
6226 if (CONST_INT_P (op))
6227 fprintf (file, HOST_WIDE_INT_PRINT_HEX, INTVAL (op) & 0xffff);
6228 else
6229 output_operand_lossage ("invalid use of '%%%c'", letter);
6230 break;
6231
6232 case 'X':
6233 if (CONST_INT_P (op))
6234 fprintf (file, HOST_WIDE_INT_PRINT_HEX, INTVAL (op));
6235 else
6236 output_operand_lossage ("invalid use of '%%%c'", letter);
6237 break;
6238
6239 case 'y':
6240 if (CONST_INT_P (op))
6241 {
6242 int val = exact_log2 (INTVAL (op));
6243 if (val != -1)
6244 fprintf (file, "%d", val);
6245 else
6246 output_operand_lossage ("invalid use of '%%%c'", letter);
6247 }
6248 else
6249 output_operand_lossage ("invalid use of '%%%c'", letter);
6250 break;
6251
6252 case 'Y':
6253 if (code == CONST_INT
6254 && UINTVAL (op) < ARRAY_SIZE (loongarch_fp_conditions))
6255 fputs (loongarch_fp_conditions[UINTVAL (op)], file);
6256 else
6257 output_operand_lossage ("'%%%c' is not a valid operand prefix",
6258 letter);
6259 break;
6260
6261 case 'Z':
6262 loongarch_print_operand (file, op, 0);
6263 fputc (',', file);
6264 break;
6265
6266 case 'w':
6267 if (code == REG && LSX_REG_P (REGNO (op)))
6268 fprintf (file, "$vr%s", &reg_names[REGNO (op)][2]);
6269 else
6270 output_operand_lossage ("invalid use of '%%%c'", letter);
6271 break;
6272
6273 case 'u':
6274 if (code == REG && LASX_REG_P (REGNO (op)))
6275 fprintf (file, "$xr%s", &reg_names[REGNO (op)][2]);
6276 else
6277 output_operand_lossage ("invalid use of '%%%c'", letter);
6278 break;
6279
6280 case 'v':
6281 switch (GET_MODE (op))
6282 {
6283 case E_V16QImode:
6284 case E_V32QImode:
6285 fprintf (file, "b");
6286 break;
6287 case E_V8HImode:
6288 case E_V16HImode:
6289 fprintf (file, "h");
6290 break;
6291 case E_V4SImode:
6292 case E_V4SFmode:
6293 case E_V8SImode:
6294 case E_V8SFmode:
6295 fprintf (file, "w");
6296 break;
6297 case E_V2DImode:
6298 case E_V2DFmode:
6299 case E_V4DImode:
6300 case E_V4DFmode:
6301 fprintf (file, "d");
6302 break;
6303 default:
6304 output_operand_lossage ("invalid use of '%%%c'", letter);
6305 }
6306 break;
6307
6308 default:
6309 switch (code)
6310 {
6311 case REG:
6312 {
6313 unsigned int regno = REGNO (op);
6314 if (letter && letter != 'z')
6315 output_operand_lossage ("invalid use of '%%%c'", letter);
6316 fprintf (file, "%s", reg_names[regno]);
6317 }
6318 break;
6319
6320 case MEM:
6321 if (letter == 'D')
6322 output_address (GET_MODE (op),
6323 plus_constant (Pmode, XEXP (op, 0), 4));
6324 else if (letter == 'b')
6325 {
6326 gcc_assert (REG_P (XEXP (op, 0)));
6327 loongarch_print_operand (file, XEXP (op, 0), 0);
6328 }
6329 else if (letter && letter != 'z')
6330 output_operand_lossage ("invalid use of '%%%c'", letter);
6331 else
6332 output_address (GET_MODE (op), XEXP (op, 0));
6333 break;
6334
6335 default:
6336 if (letter == 'z' && op == CONST0_RTX (GET_MODE (op)))
6337 fputs (reg_names[GP_REG_FIRST], file);
6338 else if (letter && letter != 'z')
6339 output_operand_lossage ("invalid use of '%%%c'", letter);
6340 else
6341 output_addr_const (file, loongarch_strip_unspec_address (op));
6342 break;
6343 }
6344 }
6345 }
6346
6347 /* Implement TARGET_PRINT_OPERAND_ADDRESS. */
6348
6349 static void
6350 loongarch_print_operand_address (FILE *file, machine_mode /* mode */, rtx x)
6351 {
6352 struct loongarch_address_info addr;
6353
6354 if (loongarch_classify_address (&addr, x, word_mode, true))
6355 switch (addr.type)
6356 {
6357 case ADDRESS_REG:
6358 fprintf (file, "%s,", reg_names[REGNO (addr.reg)]);
6359 loongarch_print_operand (file, addr.offset, 0);
6360 return;
6361
6362 case ADDRESS_REG_REG:
6363 fprintf (file, "%s,%s", reg_names[REGNO (addr.reg)],
6364 reg_names[REGNO (addr.offset)]);
6365 return;
6366
6367 case ADDRESS_LO_SUM:
6368 fprintf (file, "%s,", reg_names[REGNO (addr.reg)]);
6369 loongarch_print_operand_reloc (file, addr.offset, false /* hi64_part */,
6370 false /* hi_reloc */);
6371 return;
6372
6373 case ADDRESS_CONST_INT:
6374 fprintf (file, "%s,", reg_names[GP_REG_FIRST]);
6375 output_addr_const (file, x);
6376 return;
6377
6378 case ADDRESS_SYMBOLIC:
6379 output_addr_const (file, loongarch_strip_unspec_address (x));
6380 return;
6381 }
6382 if (CONST_INT_P (x))
6383 output_addr_const (file, x);
6384 else
6385 gcc_unreachable ();
6386 }
6387
6388 /* Implement TARGET_ASM_SELECT_RTX_SECTION. */
6389
6390 static section *
6391 loongarch_select_rtx_section (machine_mode mode, rtx x,
6392 unsigned HOST_WIDE_INT align)
6393 {
6394 /* ??? Consider using mergeable small data sections. */
6395 if (loongarch_rtx_constant_in_small_data_p (mode))
6396 return get_named_section (NULL, ".sdata", 0);
6397
6398 return default_elf_select_rtx_section (mode, x, align);
6399 }
6400
6401 /* Implement TARGET_ASM_FUNCTION_RODATA_SECTION.
6402
6403 The complication here is that jump tables will use absolute addresses,
6404 and should therefore not be included in the read-only part of a DSO.
6405 Handle such cases by selecting a normal data section instead of a
6406 read-only one. The logic apes that in default_function_rodata_section. */
6407
6408 static section *
6409 loongarch_function_rodata_section (tree decl, bool)
6410 {
6411 return default_function_rodata_section (decl, false);
6412 }
6413
6414 /* Implement TARGET_IN_SMALL_DATA_P. */
6415
6416 static bool
6417 loongarch_in_small_data_p (const_tree decl)
6418 {
6419 int size;
6420
6421 if (TREE_CODE (decl) == STRING_CST || TREE_CODE (decl) == FUNCTION_DECL)
6422 return false;
6423
6424 if (VAR_P (decl) && DECL_SECTION_NAME (decl) != 0)
6425 {
6426 const char *name;
6427
6428 /* Reject anything that isn't in a known small-data section. */
6429 name = DECL_SECTION_NAME (decl);
6430 if (strcmp (name, ".sdata") != 0 && strcmp (name, ".sbss") != 0)
6431 return false;
6432
6433 /* If a symbol is defined externally, the assembler will use the
6434 usual -G rules when deciding how to implement macros. */
6435 if (!DECL_EXTERNAL (decl))
6436 return true;
6437 }
6438
6439 /* We have traditionally not treated zero-sized objects as small data,
6440 so this is now effectively part of the ABI. */
6441 size = int_size_in_bytes (TREE_TYPE (decl));
6442 return size > 0 && size <= g_switch_value;
6443 }
6444
6445 /* The LoongArch debug format wants all automatic variables and arguments
6446 to be in terms of the virtual frame pointer (stack pointer before
6447 any adjustment in the function), while the LoongArch linker wants
6448 the frame pointer to be the stack pointer after the initial
6449 adjustment. So, we do the adjustment here. The arg pointer (which
6450 is eliminated) points to the virtual frame pointer, while the frame
6451 pointer (which may be eliminated) points to the stack pointer after
6452 the initial adjustments. */
6453
6454 HOST_WIDE_INT
6455 loongarch_debugger_offset (rtx addr, HOST_WIDE_INT offset)
6456 {
6457 rtx offset2 = const0_rtx;
6458 rtx reg = eliminate_constant_term (addr, &offset2);
6459
6460 if (offset == 0)
6461 offset = INTVAL (offset2);
6462
6463 if (reg == stack_pointer_rtx
6464 || reg == frame_pointer_rtx
6465 || reg == hard_frame_pointer_rtx)
6466 {
6467 offset -= cfun->machine->frame.total_size;
6468 if (reg == hard_frame_pointer_rtx)
6469 offset += cfun->machine->frame.hard_frame_pointer_offset;
6470 }
6471
6472 return offset;
6473 }
6474
6475 /* Implement ASM_OUTPUT_EXTERNAL. */
6476
6477 void
6478 loongarch_output_external (FILE *file, tree decl, const char *name)
6479 {
6480 default_elf_asm_output_external (file, decl, name);
6481
6482 /* We output the name if and only if TREE_SYMBOL_REFERENCED is
6483 set in order to avoid putting out names that are never really
6484 used. */
6485 if (TREE_SYMBOL_REFERENCED (DECL_ASSEMBLER_NAME (decl)))
6486 {
6487 if (loongarch_in_small_data_p (decl))
6488 {
6489 /* When using assembler macros, emit .extern directives for
6490 all small-data externs so that the assembler knows how
6491 big they are.
6492
6493 In most cases it would be safe (though pointless) to emit
6494 .externs for other symbols too. One exception is when an
6495 object is within the -G limit but declared by the user to
6496 be in a section other than .sbss or .sdata. */
6497 fputs ("\t.extern\t", file);
6498 assemble_name (file, name);
6499 fprintf (file, ", " HOST_WIDE_INT_PRINT_DEC "\n",
6500 int_size_in_bytes (TREE_TYPE (decl)));
6501 }
6502 }
6503 }
6504
6505 /* Implement TARGET_ASM_OUTPUT_DWARF_DTPREL. */
6506
6507 static void ATTRIBUTE_UNUSED
6508 loongarch_output_dwarf_dtprel (FILE *file, int size, rtx x)
6509 {
6510 switch (size)
6511 {
6512 case 4:
6513 fputs ("\t.dtprelword\t", file);
6514 break;
6515
6516 case 8:
6517 fputs ("\t.dtpreldword\t", file);
6518 break;
6519
6520 default:
6521 gcc_unreachable ();
6522 }
6523 output_addr_const (file, x);
6524 fputs ("+0x8000", file);
6525 }
6526
6527 /* Implement ASM_OUTPUT_ASCII. */
6528
6529 void
6530 loongarch_output_ascii (FILE *stream, const char *string, size_t len)
6531 {
6532 size_t i;
6533 int cur_pos;
6534
6535 cur_pos = 17;
6536 fprintf (stream, "\t.ascii\t\"");
6537 for (i = 0; i < len; i++)
6538 {
6539 int c;
6540
6541 c = (unsigned char) string[i];
6542 if (ISPRINT (c))
6543 {
6544 if (c == '\\' || c == '\"')
6545 {
6546 putc ('\\', stream);
6547 cur_pos++;
6548 }
6549 putc (c, stream);
6550 cur_pos++;
6551 }
6552 else
6553 {
6554 fprintf (stream, "\\%03o", c);
6555 cur_pos += 4;
6556 }
6557
6558 if (cur_pos > 72 && i + 1 < len)
6559 {
6560 cur_pos = 17;
6561 fprintf (stream, "\"\n\t.ascii\t\"");
6562 }
6563 }
6564 fprintf (stream, "\"\n");
6565 }
6566
6567 /* Implement TARGET_FRAME_POINTER_REQUIRED. */
6568
6569 static bool
6570 loongarch_frame_pointer_required (void)
6571 {
6572 /* If the function contains dynamic stack allocations, we need to
6573 use the frame pointer to access the static parts of the frame. */
6574 if (cfun->calls_alloca)
6575 return true;
6576
6577 return false;
6578 }
6579
6580 /* Implement TARGET_CAN_ELIMINATE. Make sure that we're not trying
6581 to eliminate to the wrong hard frame pointer. */
6582
6583 static bool
6584 loongarch_can_eliminate (const int from ATTRIBUTE_UNUSED, const int to)
6585 {
6586 return (to == HARD_FRAME_POINTER_REGNUM || to == STACK_POINTER_REGNUM);
6587 }
6588
6589 /* Implement RETURN_ADDR_RTX. We do not support moving back to a
6590 previous frame. */
6591
6592 rtx
6593 loongarch_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
6594 {
6595 if (count != 0)
6596 return const0_rtx;
6597
6598 return get_hard_reg_initial_val (Pmode, RETURN_ADDR_REGNUM);
6599 }
6600
6601 /* Emit code to change the current function's return address to
6602 ADDRESS. SCRATCH is available as a scratch register, if needed.
6603 ADDRESS and SCRATCH are both word-mode GPRs. */
6604
6605 void
6606 loongarch_set_return_address (rtx address, rtx scratch)
6607 {
6608 rtx slot_address;
6609
6610 gcc_assert (BITSET_P (cfun->machine->frame.mask, RETURN_ADDR_REGNUM));
6611
6612 if (frame_pointer_needed)
6613 slot_address = loongarch_add_offset (scratch, hard_frame_pointer_rtx,
6614 -UNITS_PER_WORD);
6615 else
6616 slot_address = loongarch_add_offset (scratch, stack_pointer_rtx,
6617 cfun->machine->frame.gp_sp_offset);
6618
6619 loongarch_emit_move (gen_frame_mem (GET_MODE (address), slot_address),
6620 address);
6621 }
6622
6623 /* Return true if register REGNO can store a value of mode MODE.
6624 The result of this function is cached in loongarch_hard_regno_mode_ok. */
6625
6626 static bool
6627 loongarch_hard_regno_mode_ok_uncached (unsigned int regno, machine_mode mode)
6628 {
6629 unsigned int size;
6630 enum mode_class mclass;
6631
6632 if (mode == FCCmode)
6633 return FCC_REG_P (regno) || GP_REG_P (regno) || FP_REG_P (regno);
6634
6635 size = GET_MODE_SIZE (mode);
6636 mclass = GET_MODE_CLASS (mode);
6637
6638 if (GP_REG_P (regno) && !LSX_SUPPORTED_MODE_P (mode)
6639 && !LASX_SUPPORTED_MODE_P (mode))
6640 return ((regno - GP_REG_FIRST) & 1) == 0 || size <= UNITS_PER_WORD;
6641
6642 /* For LSX, allow TImode and 128-bit vector modes in all FPR. */
6643 if (FP_REG_P (regno) && LSX_SUPPORTED_MODE_P (mode))
6644 return true;
6645
6646 /* FIXED ME: For LASX, allow TImode and 256-bit vector modes in all FPR. */
6647 if (FP_REG_P (regno) && LASX_SUPPORTED_MODE_P (mode))
6648 return true;
6649
6650 if (FP_REG_P (regno))
6651 {
6652 if (mclass == MODE_FLOAT
6653 || mclass == MODE_COMPLEX_FLOAT
6654 || mclass == MODE_VECTOR_FLOAT)
6655 return size <= UNITS_PER_FPVALUE;
6656
6657 /* Allow integer modes that fit into a single register. We need
6658 to put integers into FPRs when using instructions like CVT
6659 and TRUNC. There's no point allowing sizes smaller than a word,
6660 because the FPU has no appropriate load/store instructions. */
6661 if (mclass == MODE_INT)
6662 return size >= MIN_UNITS_PER_WORD && size <= UNITS_PER_FPREG;
6663 }
6664
6665 return false;
6666 }
6667
6668 /* Implement TARGET_HARD_REGNO_MODE_OK. */
6669
6670 static bool
6671 loongarch_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
6672 {
6673 return loongarch_hard_regno_mode_ok_p[mode][regno];
6674 }
6675
6676
6677 static bool
6678 loongarch_hard_regno_call_part_clobbered (unsigned int,
6679 unsigned int regno, machine_mode mode)
6680 {
6681 if (ISA_HAS_LSX && FP_REG_P (regno) && GET_MODE_SIZE (mode) > 8)
6682 return true;
6683
6684 return false;
6685 }
6686
6687 /* Implement TARGET_HARD_REGNO_NREGS. */
6688
6689 static unsigned int
6690 loongarch_hard_regno_nregs (unsigned int regno, machine_mode mode)
6691 {
6692 if (FCC_REG_P (regno))
6693 /* The size of FP status registers is always 4, because they only hold
6694 FCCmode values, and FCCmode is always considered to be 4 bytes wide. */
6695 return (GET_MODE_SIZE (mode) + 3) / 4;
6696
6697 if (FP_REG_P (regno))
6698 {
6699 if (LSX_SUPPORTED_MODE_P (mode))
6700 return 1;
6701
6702 if (LASX_SUPPORTED_MODE_P (mode))
6703 return 1;
6704
6705 return (GET_MODE_SIZE (mode) + UNITS_PER_FPREG - 1) / UNITS_PER_FPREG;
6706 }
6707
6708 /* All other registers are word-sized. */
6709 return (GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
6710 }
6711
6712 /* Implement CLASS_MAX_NREGS, taking the maximum of the cases
6713 in loongarch_hard_regno_nregs. */
6714
6715 int
6716 loongarch_class_max_nregs (enum reg_class rclass, machine_mode mode)
6717 {
6718 int size;
6719 HARD_REG_SET left;
6720
6721 size = 0x8000;
6722 left = reg_class_contents[rclass];
6723 if (hard_reg_set_intersect_p (left, reg_class_contents[(int) FCC_REGS]))
6724 {
6725 if (loongarch_hard_regno_mode_ok (FCC_REG_FIRST, mode))
6726 size = MIN (size, 4);
6727
6728 left &= ~reg_class_contents[FCC_REGS];
6729 }
6730 if (hard_reg_set_intersect_p (left, reg_class_contents[(int) FP_REGS]))
6731 {
6732 if (loongarch_hard_regno_mode_ok (FP_REG_FIRST, mode))
6733 {
6734 /* Fixed me. */
6735 if (LASX_SUPPORTED_MODE_P (mode))
6736 size = MIN (size, UNITS_PER_LASX_REG);
6737 else if (LSX_SUPPORTED_MODE_P (mode))
6738 size = MIN (size, UNITS_PER_LSX_REG);
6739 else
6740 size = MIN (size, UNITS_PER_FPREG);
6741 }
6742 left &= ~reg_class_contents[FP_REGS];
6743 }
6744 if (!hard_reg_set_empty_p (left))
6745 size = MIN (size, UNITS_PER_WORD);
6746 return (GET_MODE_SIZE (mode) + size - 1) / size;
6747 }
6748
6749 /* Implement TARGET_CAN_CHANGE_MODE_CLASS. */
6750
6751 static bool
6752 loongarch_can_change_mode_class (machine_mode from, machine_mode to,
6753 reg_class_t rclass)
6754 {
6755 /* Allow conversions between different LSX/LASX vector modes. */
6756 if (LASX_SUPPORTED_MODE_P (from) && LASX_SUPPORTED_MODE_P (to))
6757 return true;
6758
6759 /* Allow conversions between different LSX vector modes. */
6760 if (LSX_SUPPORTED_MODE_P (from) && LSX_SUPPORTED_MODE_P (to))
6761 return true;
6762
6763 /* Allow conversion between LSX vector mode and scalar fp mode. */
6764 if ((LSX_SUPPORTED_MODE_P (from) && SCALAR_FLOAT_MODE_P (to))
6765 || ((SCALAR_FLOAT_MODE_P (from) && LSX_SUPPORTED_MODE_P (to))))
6766 return true;
6767
6768 return !reg_classes_intersect_p (FP_REGS, rclass);
6769 }
6770
6771 /* Return true if moves in mode MODE can use the FPU's fmov.fmt instruction,
6772 */
6773
6774 static bool
6775 loongarch_mode_ok_for_mov_fmt_p (machine_mode mode)
6776 {
6777 switch (mode)
6778 {
6779 case E_FCCmode:
6780 case E_SFmode:
6781 return TARGET_HARD_FLOAT;
6782
6783 case E_DFmode:
6784 return TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT;
6785
6786 default:
6787 return ISA_HAS_LASX ? LASX_SUPPORTED_MODE_P (mode)
6788 : LSX_SUPPORTED_MODE_P (mode);
6789 }
6790 }
6791
6792 /* Implement TARGET_MODES_TIEABLE_P. */
6793
6794 static bool
6795 loongarch_modes_tieable_p (machine_mode mode1, machine_mode mode2)
6796 {
6797 /* FPRs allow no mode punning, so it's not worth tying modes if we'd
6798 prefer to put one of them in FPRs. */
6799 return (mode1 == mode2
6800 || (!loongarch_mode_ok_for_mov_fmt_p (mode1)
6801 && !loongarch_mode_ok_for_mov_fmt_p (mode2)));
6802 }
6803
6804 /* Implement TARGET_PREFERRED_RELOAD_CLASS. */
6805
6806 static reg_class_t
6807 loongarch_preferred_reload_class (rtx x, reg_class_t rclass)
6808 {
6809 if (reg_class_subset_p (FP_REGS, rclass)
6810 && loongarch_mode_ok_for_mov_fmt_p (GET_MODE (x)))
6811 return FP_REGS;
6812
6813 if (reg_class_subset_p (GR_REGS, rclass))
6814 rclass = GR_REGS;
6815
6816 return rclass;
6817 }
6818
6819 /* RCLASS is a class involved in a REGISTER_MOVE_COST calculation.
6820 Return a "canonical" class to represent it in later calculations. */
6821
6822 static reg_class_t
6823 loongarch_canonicalize_move_class (reg_class_t rclass)
6824 {
6825 if (reg_class_subset_p (rclass, GENERAL_REGS))
6826 rclass = GENERAL_REGS;
6827
6828 return rclass;
6829 }
6830
6831 /* Return the cost of moving a value from a register of class FROM to a GPR.
6832 Return 0 for classes that are unions of other classes handled by this
6833 function. */
6834
6835 static int
6836 loongarch_move_to_gpr_cost (reg_class_t from)
6837 {
6838 switch (from)
6839 {
6840 case GENERAL_REGS:
6841 /* MOVE macro. */
6842 return 2;
6843
6844 case FP_REGS:
6845 /* MOVFR2GR, etc. */
6846 return 4;
6847
6848 case FCC_REGS:
6849 return loongarch_cost->movcf2gr;
6850
6851 default:
6852 return 0;
6853 }
6854 }
6855
6856 /* Return the cost of moving a value from a GPR to a register of class TO.
6857 Return 0 for classes that are unions of other classes handled by this
6858 function. */
6859
6860 static int
6861 loongarch_move_from_gpr_cost (reg_class_t to)
6862 {
6863 switch (to)
6864 {
6865 case GENERAL_REGS:
6866 /*MOVE macro. */
6867 return 2;
6868
6869 case FP_REGS:
6870 /* MOVGR2FR, etc. */
6871 return 4;
6872
6873 case FCC_REGS:
6874 return loongarch_cost->movgr2cf;
6875
6876 default:
6877 return 0;
6878 }
6879 }
6880
6881 /* Implement TARGET_REGISTER_MOVE_COST. Return 0 for classes that are the
6882 maximum of the move costs for subclasses; regclass will work out
6883 the maximum for us. */
6884
6885 static int
6886 loongarch_register_move_cost (machine_mode mode, reg_class_t from,
6887 reg_class_t to)
6888 {
6889 reg_class_t dregs;
6890 int cost1, cost2;
6891
6892 from = loongarch_canonicalize_move_class (from);
6893 to = loongarch_canonicalize_move_class (to);
6894
6895 /* Handle moves that can be done without using general-purpose registers. */
6896 if (from == FP_REGS)
6897 {
6898 if (to == FP_REGS && loongarch_mode_ok_for_mov_fmt_p (mode))
6899 /* FMOV.FMT. */
6900 return 4;
6901 }
6902
6903 /* Handle cases in which only one class deviates from the ideal. */
6904 dregs = GENERAL_REGS;
6905 if (from == dregs)
6906 return loongarch_move_from_gpr_cost (to);
6907 if (to == dregs)
6908 return loongarch_move_to_gpr_cost (from);
6909
6910 /* fcc -> fcc, fcc -> fpr, or fpr -> fcc. */
6911 if (from == FCC_REGS || to == FCC_REGS)
6912 return COSTS_N_INSNS (from == to ? 2 : 1);
6913
6914 /* Handles cases that require a GPR temporary. */
6915 cost1 = loongarch_move_to_gpr_cost (from);
6916 if (cost1 != 0)
6917 {
6918 cost2 = loongarch_move_from_gpr_cost (to);
6919 if (cost2 != 0)
6920 return cost1 + cost2;
6921 }
6922
6923 return 0;
6924 }
6925
6926 /* Implement TARGET_MEMORY_MOVE_COST. */
6927
6928 static int
6929 loongarch_memory_move_cost (machine_mode mode, reg_class_t rclass, bool in)
6930 {
6931 return (loongarch_cost->memory_latency
6932 + memory_move_secondary_cost (mode, rclass, in));
6933 }
6934
6935 /* Return the register class required for a secondary register when
6936 copying between one of the registers in RCLASS and value X, which
6937 has mode MODE. X is the source of the move if IN_P, otherwise it
6938 is the destination. Return NO_REGS if no secondary register is
6939 needed. */
6940
6941 static reg_class_t
6942 loongarch_secondary_reload (bool in_p ATTRIBUTE_UNUSED, rtx x,
6943 reg_class_t rclass, machine_mode mode,
6944 secondary_reload_info *sri ATTRIBUTE_UNUSED)
6945 {
6946 int regno;
6947
6948 regno = true_regnum (x);
6949
6950 if (mode == FCCmode)
6951 {
6952 if (reg_class_subset_p (rclass, FCC_REGS) && !FP_REG_P (regno))
6953 {
6954 if (FCC_REG_P (regno))
6955 return FP_REGS;
6956
6957 auto fn = in_p ? loongarch_move_from_gpr_cost
6958 : loongarch_move_to_gpr_cost;
6959
6960 if (fn (FCC_REGS) > fn (FP_REGS) + COSTS_N_INSNS (1))
6961 return FP_REGS;
6962
6963 return GP_REG_P (regno) ? NO_REGS : GR_REGS;
6964 }
6965
6966 if (reg_class_subset_p (rclass, GR_REGS) && FCC_REG_P (regno))
6967 {
6968 auto fn = in_p ? loongarch_move_to_gpr_cost
6969 : loongarch_move_from_gpr_cost;
6970
6971 if (fn (FCC_REGS) > fn (FP_REGS) + COSTS_N_INSNS (1))
6972 return FP_REGS;
6973
6974 return NO_REGS;
6975 }
6976
6977 if (reg_class_subset_p (rclass, FP_REGS)
6978 && (regno == -1 || MEM_P (x)))
6979 return GR_REGS;
6980
6981 return NO_REGS;
6982 }
6983
6984 if (reg_class_subset_p (rclass, FP_REGS))
6985 {
6986 if (regno < 0
6987 || (MEM_P (x)
6988 && (GET_MODE_SIZE (mode) == 4 || GET_MODE_SIZE (mode) == 8)))
6989 /* In this case we can use lwc1, swc1, ldc1 or sdc1. We'll use
6990 pairs of lwc1s and swc1s if ldc1 and sdc1 are not supported. */
6991 return NO_REGS;
6992
6993 if (MEM_P (x) && LSX_SUPPORTED_MODE_P (mode))
6994 /* In this case we can use LSX LD.* and ST.*. */
6995 return NO_REGS;
6996
6997 if (GP_REG_P (regno) || x == CONST0_RTX (mode))
6998 /* In this case we can use movgr2fr.s, movfr2gr.s, movgr2fr.d or
6999 * movfr2gr.d. */
7000 return NO_REGS;
7001
7002 if (CONSTANT_P (x) && !targetm.cannot_force_const_mem (mode, x))
7003 /* We can force the constant to memory and use fld.s
7004 and fld.d. As above, we will use pairs of lwc1s if
7005 ldc1 is not supported. */
7006 return NO_REGS;
7007
7008 if (FP_REG_P (regno) && loongarch_mode_ok_for_mov_fmt_p (mode))
7009 /* In this case we can use fmov.{s/d}. */
7010 return NO_REGS;
7011
7012 /* Otherwise, we need to reload through an integer register. */
7013 return GR_REGS;
7014 }
7015 if (FP_REG_P (regno))
7016 return reg_class_subset_p (rclass, GR_REGS) ? NO_REGS : GR_REGS;
7017
7018 return NO_REGS;
7019 }
7020
7021 /* Implement TARGET_VALID_POINTER_MODE. */
7022
7023 static bool
7024 loongarch_valid_pointer_mode (scalar_int_mode mode)
7025 {
7026 return mode == SImode || (TARGET_64BIT && mode == DImode);
7027 }
7028
7029 /* Implement TARGET_VECTOR_MODE_SUPPORTED_P. */
7030
7031 static bool
7032 loongarch_vector_mode_supported_p (machine_mode mode)
7033 {
7034 return ISA_HAS_LASX ? LASX_SUPPORTED_MODE_P (mode)
7035 : LSX_SUPPORTED_MODE_P (mode);
7036 }
7037
7038 /* Implement TARGET_SCALAR_MODE_SUPPORTED_P. */
7039
7040 static bool
7041 loongarch_scalar_mode_supported_p (scalar_mode mode)
7042 {
7043 if (ALL_FIXED_POINT_MODE_P (mode)
7044 && GET_MODE_PRECISION (mode) <= 2 * BITS_PER_WORD)
7045 return true;
7046
7047 return default_scalar_mode_supported_p (mode);
7048 }
7049
7050 /* Implement TARGET_VECTORIZE_PREFERRED_SIMD_MODE. */
7051
7052 static machine_mode
7053 loongarch_preferred_simd_mode (scalar_mode mode)
7054 {
7055 if (!ISA_HAS_LSX)
7056 return word_mode;
7057
7058 switch (mode)
7059 {
7060 case E_QImode:
7061 return ISA_HAS_LASX ? E_V32QImode : E_V16QImode;
7062 case E_HImode:
7063 return ISA_HAS_LASX ? E_V16HImode : E_V8HImode;
7064 case E_SImode:
7065 return ISA_HAS_LASX ? E_V8SImode : E_V4SImode;
7066 case E_DImode:
7067 return ISA_HAS_LASX ? E_V4DImode : E_V2DImode;
7068
7069 case E_SFmode:
7070 return ISA_HAS_LASX ? E_V8SFmode : E_V4SFmode;
7071
7072 case E_DFmode:
7073 return ISA_HAS_LASX ? E_V4DFmode : E_V2DFmode;
7074
7075 default:
7076 break;
7077 }
7078 return word_mode;
7079 }
7080
7081 static unsigned int
7082 loongarch_autovectorize_vector_modes (vector_modes *modes, bool)
7083 {
7084 if (ISA_HAS_LASX)
7085 {
7086 modes->safe_push (V32QImode);
7087 modes->safe_push (V16QImode);
7088 }
7089 else if (ISA_HAS_LSX)
7090 {
7091 modes->safe_push (V16QImode);
7092 }
7093
7094 return 0;
7095 }
7096
7097 /* Return the assembly code for INSN, which has the operands given by
7098 OPERANDS, and which branches to OPERANDS[0] if some condition is true.
7099 BRANCH_IF_TRUE is the asm template that should be used if OPERANDS[0]
7100 is in range of a direct branch. BRANCH_IF_FALSE is an inverted
7101 version of BRANCH_IF_TRUE. */
7102
7103 const char *
7104 loongarch_output_conditional_branch (rtx_insn *insn, rtx *operands,
7105 const char *branch_if_true,
7106 const char *branch_if_false)
7107 {
7108 unsigned int length;
7109 rtx taken;
7110
7111 gcc_assert (LABEL_P (operands[0]));
7112
7113 length = get_attr_length (insn);
7114 if (length <= 4)
7115 {
7116 return branch_if_true;
7117 }
7118
7119 /* Generate a reversed branch around a direct jump. */
7120 rtx_code_label *not_taken = gen_label_rtx ();
7121 taken = operands[0];
7122
7123 /* Generate the reversed branch to NOT_TAKEN. */
7124 operands[0] = not_taken;
7125 output_asm_insn (branch_if_false, operands);
7126
7127 output_asm_insn ("b\t%0", &taken);
7128
7129 /* Output NOT_TAKEN. */
7130 targetm.asm_out.internal_label (asm_out_file, "L",
7131 CODE_LABEL_NUMBER (not_taken));
7132 return "";
7133 }
7134
7135 /* Return the assembly code for INSN, which branches to OPERANDS[0]
7136 if some equality condition is true. The condition is given by
7137 OPERANDS[1] if !INVERTED_P, otherwise it is the inverse of
7138 OPERANDS[1]. OPERANDS[2] is the comparison's first operand;
7139 OPERANDS[3] is the second operand and may be zero or a register. */
7140
7141 const char *
7142 loongarch_output_equal_conditional_branch (rtx_insn *insn, rtx *operands,
7143 bool inverted_p)
7144 {
7145 const char *branch[2];
7146 if (operands[3] == const0_rtx)
7147 {
7148 branch[!inverted_p] = LARCH_BRANCH ("b%C1z", "%2,%0");
7149 branch[inverted_p] = LARCH_BRANCH ("b%N1z", "%2,%0");
7150 }
7151 else
7152 {
7153 branch[!inverted_p] = LARCH_BRANCH ("b%C1", "%2,%z3,%0");
7154 branch[inverted_p] = LARCH_BRANCH ("b%N1", "%2,%z3,%0");
7155 }
7156
7157 return loongarch_output_conditional_branch (insn, operands, branch[1],
7158 branch[0]);
7159 }
7160
7161 /* Return the assembly code for INSN, which branches to OPERANDS[0]
7162 if some ordering condition is true. The condition is given by
7163 OPERANDS[1] if !INVERTED_P, otherwise it is the inverse of
7164 OPERANDS[1]. OPERANDS[2] is the comparison's first operand;
7165 OPERANDS[3] is the second operand and may be zero or a register. */
7166
7167 const char *
7168 loongarch_output_order_conditional_branch (rtx_insn *insn, rtx *operands,
7169 bool inverted_p)
7170 {
7171 const char *branch[2];
7172
7173 /* Make BRANCH[1] branch to OPERANDS[0] when the condition is true.
7174 Make BRANCH[0] branch on the inverse condition. */
7175 if (operands[3] != const0_rtx)
7176 {
7177 /* Handle degenerate cases that should not, but do, occur. */
7178 if (REGNO (operands[2]) == REGNO (operands[3]))
7179 {
7180 switch (GET_CODE (operands[1]))
7181 {
7182 case LT:
7183 case LTU:
7184 case GT:
7185 case GTU:
7186 inverted_p = !inverted_p;
7187 /* Fall through. */
7188 case LE:
7189 case LEU:
7190 case GE:
7191 case GEU:
7192 branch[!inverted_p] = LARCH_BRANCH ("b", "%0");
7193 branch[inverted_p] = "\t# branch never";
7194 break;
7195 default:
7196 gcc_unreachable ();
7197 }
7198 }
7199 else
7200 {
7201 switch (GET_CODE (operands[1]))
7202 {
7203 case LE:
7204 case LEU:
7205 case GT:
7206 case GTU:
7207 case LT:
7208 case LTU:
7209 case GE:
7210 case GEU:
7211 branch[!inverted_p] = LARCH_BRANCH ("b%C1", "%2,%3,%0");
7212 branch[inverted_p] = LARCH_BRANCH ("b%N1", "%2,%3,%0");
7213 break;
7214 default:
7215 gcc_unreachable ();
7216 }
7217 }
7218 }
7219 else
7220 {
7221 switch (GET_CODE (operands[1]))
7222 {
7223 /* These cases are equivalent to comparisons against zero. */
7224 case LEU:
7225 case GTU:
7226 case LTU:
7227 case GEU:
7228 case LE:
7229 case GT:
7230 case LT:
7231 case GE:
7232 branch[!inverted_p] = LARCH_BRANCH ("b%C1", "%2,$r0,%0");
7233 branch[inverted_p] = LARCH_BRANCH ("b%N1", "%2,$r0,%0");
7234 break;
7235 default:
7236 gcc_unreachable ();
7237 }
7238 }
7239 return loongarch_output_conditional_branch (insn, operands, branch[1],
7240 branch[0]);
7241 }
7242
7243 /* Return the assembly code for DIV.{W/D} instruction DIVISION, which has
7244 the operands given by OPERANDS. Add in a divide-by-zero check if needed.
7245 */
7246
7247 const char *
7248 loongarch_output_division (const char *division, rtx *operands)
7249 {
7250 const char *s;
7251
7252 s = division;
7253 if (loongarch_check_zero_div_p ())
7254 {
7255 output_asm_insn (s, operands);
7256 s = "bne\t%2,%.,1f\n\tbreak\t7\n1:";
7257 }
7258 return s;
7259 }
7260
7261 /* Return the assembly code for LSX DIV_{S,U}.DF or MOD_{S,U}.DF instructions,
7262 which has the operands given by OPERANDS. Add in a divide-by-zero check
7263 if needed. */
7264
7265 const char *
7266 loongarch_lsx_output_division (const char *division, rtx *operands)
7267 {
7268 const char *s;
7269 machine_mode mode = GET_MODE (*operands);
7270
7271 s = division;
7272 if (TARGET_CHECK_ZERO_DIV)
7273 {
7274 if (ISA_HAS_LASX && GET_MODE_SIZE (mode) == 32)
7275 {
7276 output_asm_insn ("xvsetallnez.%v0\t$fcc7,%u2",operands);
7277 output_asm_insn (s, operands);
7278 output_asm_insn ("bcnez\t$fcc7,1f", operands);
7279 }
7280 else if (ISA_HAS_LSX)
7281 {
7282 output_asm_insn ("vsetallnez.%v0\t$fcc7,%w2",operands);
7283 output_asm_insn (s, operands);
7284 output_asm_insn ("bcnez\t$fcc7,1f", operands);
7285 }
7286 s = "break\t7\n1:";
7287 }
7288 return s;
7289 }
7290
7291 /* Implement TARGET_SCHED_ADJUST_COST. We assume that anti and output
7292 dependencies have no cost. */
7293
7294 static int
7295 loongarch_adjust_cost (rtx_insn *, int dep_type, rtx_insn *, int cost,
7296 unsigned int)
7297 {
7298 if (dep_type != 0 && (dep_type != REG_DEP_OUTPUT))
7299 return 0;
7300 return cost;
7301 }
7302
7303 /* Return the number of instructions that can be issued per cycle. */
7304
7305 static int
7306 loongarch_issue_rate (void)
7307 {
7308 if ((unsigned long) la_target.cpu_tune < N_TUNE_TYPES)
7309 return loongarch_cpu_issue_rate[la_target.cpu_tune];
7310 else
7311 return 1;
7312 }
7313
7314 /* Implement TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD. This should
7315 be as wide as the scheduling freedom in the DFA. */
7316
7317 static int
7318 loongarch_multipass_dfa_lookahead (void)
7319 {
7320 if ((unsigned long) la_target.cpu_tune < N_ARCH_TYPES)
7321 return loongarch_cpu_multipass_dfa_lookahead[la_target.cpu_tune];
7322 else
7323 return 0;
7324 }
7325
7326 /* Implement TARGET_SCHED_REORDER. */
7327
7328 static int
7329 loongarch_sched_reorder (FILE *file ATTRIBUTE_UNUSED,
7330 int verbose ATTRIBUTE_UNUSED,
7331 rtx_insn **ready ATTRIBUTE_UNUSED,
7332 int *nreadyp ATTRIBUTE_UNUSED,
7333 int cycle ATTRIBUTE_UNUSED)
7334 {
7335 return loongarch_issue_rate ();
7336 }
7337
7338 /* Implement TARGET_SCHED_REORDER2. */
7339
7340 static int
7341 loongarch_sched_reorder2 (FILE *file ATTRIBUTE_UNUSED,
7342 int verbose ATTRIBUTE_UNUSED,
7343 rtx_insn **ready ATTRIBUTE_UNUSED,
7344 int *nreadyp ATTRIBUTE_UNUSED,
7345 int cycle ATTRIBUTE_UNUSED)
7346 {
7347 return cached_can_issue_more;
7348 }
7349
7350 /* Implement TARGET_SCHED_INIT. */
7351
7352 static void
7353 loongarch_sched_init (FILE *file ATTRIBUTE_UNUSED,
7354 int verbose ATTRIBUTE_UNUSED,
7355 int max_ready ATTRIBUTE_UNUSED)
7356 {}
7357
7358 /* Implement TARGET_SCHED_VARIABLE_ISSUE. */
7359
7360 static int
7361 loongarch_variable_issue (FILE *file ATTRIBUTE_UNUSED,
7362 int verbose ATTRIBUTE_UNUSED, rtx_insn *insn,
7363 int more)
7364 {
7365 /* Ignore USEs and CLOBBERs; don't count them against the issue rate. */
7366 if (USEFUL_INSN_P (insn))
7367 {
7368 if (get_attr_type (insn) != TYPE_GHOST)
7369 more--;
7370 }
7371
7372 /* Instructions of type 'multi' should all be split before
7373 the second scheduling pass. */
7374 gcc_assert (!reload_completed
7375 || recog_memoized (insn) < 0
7376 || get_attr_type (insn) != TYPE_MULTI);
7377
7378 cached_can_issue_more = more;
7379 return more;
7380 }
7381
7382 /* Given that we have an rtx of the form (prefetch ... WRITE LOCALITY),
7383 return the first operand of the associated PREF or PREFX insn. */
7384
7385 rtx
7386 loongarch_prefetch_cookie (rtx write, rtx locality)
7387 {
7388 /* store_streamed / load_streamed. */
7389 if (INTVAL (locality) <= 0)
7390 return GEN_INT (INTVAL (write) + 4);
7391
7392 /* store / load. */
7393 if (INTVAL (locality) <= 2)
7394 return write;
7395
7396 /* store_retained / load_retained. */
7397 return GEN_INT (INTVAL (write) + 6);
7398 }
7399
7400 /* Implement TARGET_ASM_OUTPUT_MI_THUNK. Generate rtl rather than asm text
7401 in order to avoid duplicating too much logic from elsewhere. */
7402
7403 static void
7404 loongarch_output_mi_thunk (FILE *file, tree thunk_fndecl ATTRIBUTE_UNUSED,
7405 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
7406 tree function)
7407 {
7408 const char *fnname = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (thunk_fndecl));
7409 rtx this_rtx, temp1, temp2, fnaddr;
7410 rtx_insn *insn;
7411 bool use_sibcall_p;
7412
7413 /* Pretend to be a post-reload pass while generating rtl. */
7414 reload_completed = 1;
7415
7416 /* Mark the end of the (empty) prologue. */
7417 emit_note (NOTE_INSN_PROLOGUE_END);
7418
7419 /* Determine if we can use a sibcall to call FUNCTION directly. */
7420 fnaddr = XEXP (DECL_RTL (function), 0);
7421 use_sibcall_p = const_call_insn_operand (fnaddr, Pmode);
7422
7423 /* We need two temporary registers in some cases. */
7424 temp1 = gen_rtx_REG (Pmode, 12);
7425 temp2 = gen_rtx_REG (Pmode, 13);
7426
7427 /* Find out which register contains the "this" pointer. */
7428 if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
7429 this_rtx = gen_rtx_REG (Pmode, GP_ARG_FIRST + 1);
7430 else
7431 this_rtx = gen_rtx_REG (Pmode, GP_ARG_FIRST);
7432
7433 /* Add DELTA to THIS_RTX. */
7434 if (delta != 0)
7435 {
7436 rtx offset = GEN_INT (delta);
7437 if (!IMM12_OPERAND (delta))
7438 {
7439 loongarch_emit_move (temp1, offset);
7440 offset = temp1;
7441 }
7442 emit_insn (gen_add3_insn (this_rtx, this_rtx, offset));
7443 }
7444
7445 /* If needed, add *(*THIS_RTX + VCALL_OFFSET) to THIS_RTX. */
7446 if (vcall_offset != 0)
7447 {
7448 rtx addr;
7449
7450 /* Set TEMP1 to *THIS_RTX. */
7451 loongarch_emit_move (temp1, gen_rtx_MEM (Pmode, this_rtx));
7452
7453 /* Set ADDR to a legitimate address for *THIS_RTX + VCALL_OFFSET. */
7454 addr = loongarch_add_offset (temp2, temp1, vcall_offset);
7455
7456 /* Load the offset and add it to THIS_RTX. */
7457 loongarch_emit_move (temp1, gen_rtx_MEM (Pmode, addr));
7458 emit_insn (gen_add3_insn (this_rtx, this_rtx, temp1));
7459 }
7460
7461 /* Jump to the target function. Use a sibcall if direct jumps are
7462 allowed, otherwise load the address into a register first. */
7463 if (use_sibcall_p)
7464 {
7465 insn = emit_call_insn (gen_sibcall_internal (fnaddr, const0_rtx));
7466 SIBLING_CALL_P (insn) = 1;
7467 }
7468 else
7469 {
7470 loongarch_emit_move (temp1, fnaddr);
7471 emit_jump_insn (gen_indirect_jump (temp1));
7472 }
7473
7474 /* Run just enough of rest_of_compilation. This sequence was
7475 "borrowed" from alpha.c. */
7476 insn = get_insns ();
7477 split_all_insns_noflow ();
7478 shorten_branches (insn);
7479 assemble_start_function (thunk_fndecl, fnname);
7480 final_start_function (insn, file, 1);
7481 final (insn, file, 1);
7482 final_end_function ();
7483 assemble_end_function (thunk_fndecl, fnname);
7484
7485 /* Stop pretending to be a post-reload pass. */
7486 reload_completed = 0;
7487 }
7488
7489 /* Allocate a chunk of memory for per-function machine-dependent data. */
7490
7491 static struct machine_function *
7492 loongarch_init_machine_status (void)
7493 {
7494 return ggc_cleared_alloc<machine_function> ();
7495 }
7496
7497 static void
7498 loongarch_cpu_option_override (struct loongarch_target *target,
7499 struct gcc_options *opts,
7500 struct gcc_options *opts_set)
7501 {
7502 /* alignments */
7503 if (opts->x_flag_align_functions && !opts->x_str_align_functions)
7504 opts->x_str_align_functions
7505 = loongarch_cpu_align[target->cpu_tune].function;
7506
7507 if (opts->x_flag_align_labels && !opts->x_str_align_labels)
7508 opts->x_str_align_labels = loongarch_cpu_align[target->cpu_tune].label;
7509
7510 /* Set up parameters to be used in prefetching algorithm. */
7511 int simultaneous_prefetches
7512 = loongarch_cpu_cache[target->cpu_tune].simultaneous_prefetches;
7513
7514 SET_OPTION_IF_UNSET (opts, opts_set, param_simultaneous_prefetches,
7515 simultaneous_prefetches);
7516
7517 SET_OPTION_IF_UNSET (opts, opts_set, param_l1_cache_line_size,
7518 loongarch_cpu_cache[target->cpu_tune].l1d_line_size);
7519
7520 SET_OPTION_IF_UNSET (opts, opts_set, param_l1_cache_size,
7521 loongarch_cpu_cache[target->cpu_tune].l1d_size);
7522
7523 SET_OPTION_IF_UNSET (opts, opts_set, param_l2_cache_size,
7524 loongarch_cpu_cache[target->cpu_tune].l2d_size);
7525 }
7526
7527 static void
7528 loongarch_option_override_internal (struct gcc_options *opts,
7529 struct gcc_options *opts_set)
7530 {
7531 int i, regno, mode;
7532
7533 if (flag_pic)
7534 g_switch_value = 0;
7535
7536 loongarch_init_target (&la_target,
7537 la_opt_cpu_arch, la_opt_cpu_tune, la_opt_fpu,
7538 la_opt_simd, la_opt_abi_base, la_opt_abi_ext,
7539 la_opt_cmodel, opts->x_la_isa_evolution,
7540 opts_set->x_la_isa_evolution);
7541
7542 /* Handle target-specific options: compute defaults/conflicts etc. */
7543 loongarch_config_target (&la_target, NULL, 0);
7544
7545 loongarch_update_gcc_opt_status (&la_target, opts, opts_set);
7546 loongarch_cpu_option_override (&la_target, opts, opts_set);
7547
7548 if (TARGET_ABI_LP64)
7549 flag_pcc_struct_return = 0;
7550
7551 /* Decide which rtx_costs structure to use. */
7552 if (optimize_size)
7553 loongarch_cost = &loongarch_rtx_cost_optimize_size;
7554 else
7555 loongarch_cost = &loongarch_cpu_rtx_cost_data[la_target.cpu_tune];
7556
7557 /* If the user hasn't specified a branch cost, use the processor's
7558 default. */
7559 if (la_branch_cost == 0)
7560 la_branch_cost = loongarch_cost->branch_cost;
7561
7562 /* Enable sw prefetching at -O3 and higher. */
7563 if (opts->x_flag_prefetch_loop_arrays < 0
7564 && (opts->x_optimize >= 3 || opts->x_flag_profile_use)
7565 && !opts->x_optimize_size)
7566 opts->x_flag_prefetch_loop_arrays = 1;
7567
7568 if (TARGET_DIRECT_EXTERN_ACCESS && flag_shlib)
7569 error ("%qs cannot be used for compiling a shared library",
7570 "-mdirect-extern-access");
7571
7572 switch (la_target.cmodel)
7573 {
7574 case CMODEL_EXTREME:
7575 if (la_opt_explicit_relocs == EXPLICIT_RELOCS_NONE)
7576 error ("code model %qs is not compatible with %s",
7577 "extreme", "-mexplicit-relocs=none");
7578
7579 if (opts->x_flag_plt)
7580 {
7581 if (global_options_set.x_flag_plt)
7582 error ("code model %qs is not compatible with %s",
7583 "extreme", "-fplt");
7584 opts->x_flag_plt = 0;
7585 }
7586 break;
7587
7588 case CMODEL_TINY_STATIC:
7589 case CMODEL_MEDIUM:
7590 case CMODEL_NORMAL:
7591 case CMODEL_TINY:
7592 case CMODEL_LARGE:
7593 break;
7594
7595 default:
7596 gcc_unreachable ();
7597 }
7598
7599 /* Validate the guard size. */
7600 int guard_size = param_stack_clash_protection_guard_size;
7601
7602 /* Enforce that interval is the same size as size so the mid-end does the
7603 right thing. */
7604 SET_OPTION_IF_UNSET (opts, &global_options_set,
7605 param_stack_clash_protection_probe_interval,
7606 guard_size);
7607
7608 loongarch_init_print_operand_punct ();
7609
7610 /* Set up array to map GCC register number to debug register number.
7611 Ignore the special purpose register numbers. */
7612
7613 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
7614 {
7615 if (GP_REG_P (i) || FP_REG_P (i))
7616 loongarch_dwarf_regno[i] = i;
7617 else
7618 loongarch_dwarf_regno[i] = INVALID_REGNUM;
7619 }
7620
7621 /* Set up loongarch_hard_regno_mode_ok. */
7622 for (mode = 0; mode < MAX_MACHINE_MODE; mode++)
7623 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
7624 loongarch_hard_regno_mode_ok_p[mode][regno]
7625 = loongarch_hard_regno_mode_ok_uncached (regno, (machine_mode) mode);
7626
7627 /* Function to allocate machine-dependent function status. */
7628 init_machine_status = &loongarch_init_machine_status;
7629
7630 /* -mrecip options. */
7631 static struct
7632 {
7633 const char *string; /* option name. */
7634 unsigned int mask; /* mask bits to set. */
7635 }
7636 const recip_options[] = {
7637 { "all", RECIP_MASK_ALL },
7638 { "none", RECIP_MASK_NONE },
7639 { "div", RECIP_MASK_DIV },
7640 { "sqrt", RECIP_MASK_SQRT },
7641 { "rsqrt", RECIP_MASK_RSQRT },
7642 { "vec-div", RECIP_MASK_VEC_DIV },
7643 { "vec-sqrt", RECIP_MASK_VEC_SQRT },
7644 { "vec-rsqrt", RECIP_MASK_VEC_RSQRT },
7645 };
7646
7647 if (la_recip_name)
7648 {
7649 char *p = ASTRDUP (la_recip_name);
7650 char *q;
7651 unsigned int mask, i;
7652 bool invert;
7653
7654 while ((q = strtok (p, ",")) != NULL)
7655 {
7656 p = NULL;
7657 if (*q == '!')
7658 {
7659 invert = true;
7660 q++;
7661 }
7662 else
7663 invert = false;
7664
7665 if (!strcmp (q, "default"))
7666 mask = RECIP_MASK_ALL;
7667 else
7668 {
7669 for (i = 0; i < ARRAY_SIZE (recip_options); i++)
7670 if (!strcmp (q, recip_options[i].string))
7671 {
7672 mask = recip_options[i].mask;
7673 break;
7674 }
7675
7676 if (i == ARRAY_SIZE (recip_options))
7677 {
7678 error ("unknown option for %<-mrecip=%s%>", q);
7679 invert = false;
7680 mask = RECIP_MASK_NONE;
7681 }
7682 }
7683
7684 if (invert)
7685 recip_mask &= ~mask;
7686 else
7687 recip_mask |= mask;
7688 }
7689 }
7690 if (la_recip)
7691 recip_mask |= RECIP_MASK_ALL;
7692 if (!ISA_HAS_FRECIPE)
7693 recip_mask = RECIP_MASK_NONE;
7694
7695 #define INIT_TARGET_FLAG(NAME, INIT) \
7696 { \
7697 if (!(target_flags_explicit & MASK_##NAME)) \
7698 { \
7699 if (INIT) \
7700 target_flags |= MASK_##NAME; \
7701 else \
7702 target_flags &= ~MASK_##NAME; \
7703 } \
7704 }
7705
7706 /* Enable conditional moves for int and float by default. */
7707 INIT_TARGET_FLAG (COND_MOVE_INT, 1)
7708 INIT_TARGET_FLAG (COND_MOVE_FLOAT, 1)
7709
7710 /* Set mrelax default. */
7711 INIT_TARGET_FLAG (LINKER_RELAXATION,
7712 HAVE_AS_MRELAX_OPTION && HAVE_AS_COND_BRANCH_RELAXATION)
7713
7714 #undef INIT_TARGET_FLAG
7715
7716 if (la_opt_explicit_relocs == M_OPT_UNSET)
7717 la_opt_explicit_relocs = (HAVE_AS_EXPLICIT_RELOCS
7718 ? (TARGET_LINKER_RELAXATION
7719 ? EXPLICIT_RELOCS_AUTO
7720 : EXPLICIT_RELOCS_ALWAYS)
7721 : EXPLICIT_RELOCS_NONE);
7722 }
7723
7724
7725 /* Implement TARGET_OPTION_OVERRIDE. */
7726
7727 static void
7728 loongarch_option_override (void)
7729 {
7730 loongarch_option_override_internal (&global_options, &global_options_set);
7731 }
7732
7733 /* Implement TARGET_OPTION_SAVE. */
7734 static void
7735 loongarch_option_save (struct cl_target_option *,
7736 struct gcc_options *opts,
7737 struct gcc_options *opts_set)
7738 {
7739 loongarch_update_gcc_opt_status (&la_target, opts, opts_set);
7740 }
7741
7742 /* Implement TARGET_OPTION_RESTORE. */
7743 static void
7744 loongarch_option_restore (struct gcc_options *,
7745 struct gcc_options *,
7746 struct cl_target_option *ptr)
7747 {
7748 la_target.cpu_arch = ptr->x_la_opt_cpu_arch;
7749 la_target.cpu_tune = ptr->x_la_opt_cpu_tune;
7750
7751 la_target.isa.fpu = ptr->x_la_opt_fpu;
7752 la_target.isa.simd = ptr->x_la_opt_simd;
7753 la_target.isa.evolution = ptr->x_la_isa_evolution;
7754
7755 la_target.cmodel = ptr->x_la_opt_cmodel;
7756 }
7757
7758 /* Implement TARGET_CONDITIONAL_REGISTER_USAGE. */
7759
7760 static void
7761 loongarch_conditional_register_usage (void)
7762 {
7763 if (!TARGET_HARD_FLOAT)
7764 accessible_reg_set &= ~(reg_class_contents[FP_REGS]
7765 | reg_class_contents[FCC_REGS]);
7766 }
7767
7768 /* Implement EH_USES. */
7769
7770 bool
7771 loongarch_eh_uses (unsigned int regno ATTRIBUTE_UNUSED)
7772 {
7773 return false;
7774 }
7775
7776 /* Implement EPILOGUE_USES. */
7777
7778 bool
7779 loongarch_epilogue_uses (unsigned int regno)
7780 {
7781 /* Say that the epilogue uses the return address register. Note that
7782 in the case of sibcalls, the values "used by the epilogue" are
7783 considered live at the start of the called function. */
7784 if (regno == RETURN_ADDR_REGNUM)
7785 return true;
7786
7787 return false;
7788 }
7789
7790 bool
7791 loongarch_load_store_bonding_p (rtx *operands, machine_mode mode, bool load_p)
7792 {
7793 rtx reg1, reg2, mem1, mem2, base1, base2;
7794 enum reg_class rc1, rc2;
7795 HOST_WIDE_INT offset1, offset2;
7796
7797 if (load_p)
7798 {
7799 reg1 = operands[0];
7800 reg2 = operands[2];
7801 mem1 = operands[1];
7802 mem2 = operands[3];
7803 }
7804 else
7805 {
7806 reg1 = operands[1];
7807 reg2 = operands[3];
7808 mem1 = operands[0];
7809 mem2 = operands[2];
7810 }
7811
7812 if (loongarch_address_insns (XEXP (mem1, 0), mode, false) == 0
7813 || loongarch_address_insns (XEXP (mem2, 0), mode, false) == 0)
7814 return false;
7815
7816 loongarch_split_plus (XEXP (mem1, 0), &base1, &offset1);
7817 loongarch_split_plus (XEXP (mem2, 0), &base2, &offset2);
7818
7819 /* Base regs do not match. */
7820 if (!REG_P (base1) || !rtx_equal_p (base1, base2))
7821 return false;
7822
7823 /* Either of the loads is clobbering base register. It is legitimate to bond
7824 loads if second load clobbers base register. However, hardware does not
7825 support such bonding. */
7826 if (load_p
7827 && (REGNO (reg1) == REGNO (base1) || (REGNO (reg2) == REGNO (base1))))
7828 return false;
7829
7830 /* Loading in same registers. */
7831 if (load_p && REGNO (reg1) == REGNO (reg2))
7832 return false;
7833
7834 /* The loads/stores are not of same type. */
7835 rc1 = REGNO_REG_CLASS (REGNO (reg1));
7836 rc2 = REGNO_REG_CLASS (REGNO (reg2));
7837 if (rc1 != rc2 && !reg_class_subset_p (rc1, rc2)
7838 && !reg_class_subset_p (rc2, rc1))
7839 return false;
7840
7841 if (abs (offset1 - offset2) != GET_MODE_SIZE (mode))
7842 return false;
7843
7844 return true;
7845 }
7846
7847 /* Implement TARGET_TRAMPOLINE_INIT. */
7848
7849 static void
7850 loongarch_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
7851 {
7852 rtx addr, end_addr, mem;
7853 rtx trampoline[8];
7854 unsigned int i, j;
7855 HOST_WIDE_INT end_addr_offset, static_chain_offset, target_function_offset;
7856
7857 /* Work out the offsets of the pointers from the start of the
7858 trampoline code. */
7859 end_addr_offset = TRAMPOLINE_CODE_SIZE;
7860 static_chain_offset = end_addr_offset;
7861 target_function_offset = static_chain_offset + GET_MODE_SIZE (ptr_mode);
7862
7863 /* Get pointers to the beginning and end of the code block. */
7864 addr = force_reg (Pmode, XEXP (m_tramp, 0));
7865 end_addr
7866 = loongarch_force_binary (Pmode, PLUS, addr, GEN_INT (end_addr_offset));
7867
7868 #define OP(X) gen_int_mode (X, SImode)
7869
7870 /* Build up the code in TRAMPOLINE. */
7871 i = 0;
7872 /*pcaddi $static_chain,0
7873 ld.[dw] $tmp,$static_chain,target_function_offset
7874 ld.[dw] $static_chain,$static_chain,static_chain_offset
7875 jirl $r0,$tmp,0 */
7876 trampoline[i++] = OP (0x18000000 | (STATIC_CHAIN_REGNUM - GP_REG_FIRST));
7877 trampoline[i++] = OP ((ptr_mode == DImode ? 0x28c00000 : 0x28800000)
7878 | 19 /* $t7 */
7879 | ((STATIC_CHAIN_REGNUM - GP_REG_FIRST) << 5)
7880 | ((target_function_offset & 0xfff) << 10));
7881 trampoline[i++] = OP ((ptr_mode == DImode ? 0x28c00000 : 0x28800000)
7882 | (STATIC_CHAIN_REGNUM - GP_REG_FIRST)
7883 | ((STATIC_CHAIN_REGNUM - GP_REG_FIRST) << 5)
7884 | ((static_chain_offset & 0xfff) << 10));
7885 trampoline[i++] = OP (0x4c000000 | (19 << 5));
7886 #undef OP
7887
7888 for (j = 0; j < i; j++)
7889 {
7890 mem = adjust_address (m_tramp, SImode, j * GET_MODE_SIZE (SImode));
7891 loongarch_emit_move (mem, trampoline[j]);
7892 }
7893
7894 /* Set up the static chain pointer field. */
7895 mem = adjust_address (m_tramp, ptr_mode, static_chain_offset);
7896 loongarch_emit_move (mem, chain_value);
7897
7898 /* Set up the target function field. */
7899 mem = adjust_address (m_tramp, ptr_mode, target_function_offset);
7900 loongarch_emit_move (mem, XEXP (DECL_RTL (fndecl), 0));
7901
7902 /* Flush the code part of the trampoline. */
7903 emit_insn (gen_add3_insn (end_addr, addr, GEN_INT (TRAMPOLINE_SIZE)));
7904 emit_insn (gen_clear_cache (addr, end_addr));
7905 }
7906
7907 /* Generate or test for an insn that supports a constant permutation. */
7908
7909 #define MAX_VECT_LEN 32
7910
7911 struct expand_vec_perm_d
7912 {
7913 rtx target, op0, op1;
7914 unsigned char perm[MAX_VECT_LEN];
7915 machine_mode vmode;
7916 unsigned char nelt;
7917 bool one_vector_p;
7918 bool testing_p;
7919 };
7920
7921 /* Construct (set target (vec_select op0 (parallel perm))) and
7922 return true if that's a valid instruction in the active ISA. */
7923
7924 static bool
7925 loongarch_expand_vselect (rtx target, rtx op0,
7926 const unsigned char *perm, unsigned nelt)
7927 {
7928 rtx rperm[MAX_VECT_LEN], x;
7929 rtx_insn *insn;
7930 unsigned i;
7931
7932 for (i = 0; i < nelt; ++i)
7933 rperm[i] = GEN_INT (perm[i]);
7934
7935 x = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nelt, rperm));
7936 x = gen_rtx_VEC_SELECT (GET_MODE (target), op0, x);
7937 x = gen_rtx_SET (target, x);
7938
7939 insn = emit_insn (x);
7940 if (recog_memoized (insn) < 0)
7941 {
7942 remove_insn (insn);
7943 return false;
7944 }
7945 return true;
7946 }
7947
7948 /* Similar, but generate a vec_concat from op0 and op1 as well. */
7949
7950 static bool
7951 loongarch_expand_vselect_vconcat (rtx target, rtx op0, rtx op1,
7952 const unsigned char *perm, unsigned nelt)
7953 {
7954 machine_mode v2mode;
7955 rtx x;
7956
7957 if (!GET_MODE_2XWIDER_MODE (GET_MODE (op0)).exists (&v2mode))
7958 return false;
7959 x = gen_rtx_VEC_CONCAT (v2mode, op0, op1);
7960 return loongarch_expand_vselect (target, x, perm, nelt);
7961 }
7962
7963 static tree
7964 loongarch_handle_model_attribute (tree *node, tree name, tree arg, int,
7965 bool *no_add_attrs)
7966 {
7967 tree decl = *node;
7968 if (VAR_P (decl))
7969 {
7970 if (DECL_THREAD_LOCAL_P (decl))
7971 {
7972 error_at (DECL_SOURCE_LOCATION (decl),
7973 "%qE attribute cannot be specified for thread-local "
7974 "variables", name);
7975 *no_add_attrs = true;
7976 return NULL_TREE;
7977 }
7978 if (DECL_CONTEXT (decl)
7979 && TREE_CODE (DECL_CONTEXT (decl)) == FUNCTION_DECL
7980 && !TREE_STATIC (decl))
7981 {
7982 error_at (DECL_SOURCE_LOCATION (decl),
7983 "%qE attribute cannot be specified for local "
7984 "variables", name);
7985 *no_add_attrs = true;
7986 return NULL_TREE;
7987 }
7988 if (DECL_REGISTER (decl))
7989 {
7990 error_at (DECL_SOURCE_LOCATION (decl),
7991 "%qE attribute cannot be specified for register "
7992 "variables", name);
7993 *no_add_attrs = true;
7994 return NULL_TREE;
7995 }
7996 if (la_opt_explicit_relocs == EXPLICIT_RELOCS_NONE)
7997 {
7998 error_at (DECL_SOURCE_LOCATION (decl),
7999 "%qE attribute is not compatible with %s", name,
8000 "-mexplicit-relocs=none");
8001 *no_add_attrs = true;
8002 return NULL_TREE;
8003 }
8004
8005 arg = TREE_VALUE (arg);
8006 if (TREE_CODE (arg) != STRING_CST)
8007 {
8008 error_at (DECL_SOURCE_LOCATION (decl),
8009 "invalid argument of %qE attribute", name);
8010 *no_add_attrs = true;
8011 return NULL_TREE;
8012 }
8013
8014 const char *model = TREE_STRING_POINTER (arg);
8015 if (strcmp (model, "normal") != 0
8016 && strcmp (model, "extreme") != 0)
8017 {
8018 error_at (DECL_SOURCE_LOCATION (decl),
8019 "invalid argument of %qE attribute", name);
8020 *no_add_attrs = true;
8021 return NULL_TREE;
8022 }
8023
8024 if (lookup_attribute ("model", DECL_ATTRIBUTES (decl)))
8025 {
8026 error_at (DECL_SOURCE_LOCATION (decl),
8027 "multiple %qE attribute", name);
8028 *no_add_attrs = true;
8029 return NULL_TREE;
8030 }
8031 }
8032 else
8033 {
8034 warning (OPT_Wattributes, "%qE attribute ignored", name);
8035 *no_add_attrs = true;
8036 }
8037 return NULL_TREE;
8038 }
8039
8040 TARGET_GNU_ATTRIBUTES (loongarch_attribute_table,
8041 {
8042 /* { name, min_len, max_len, decl_req, type_req, fn_type_req,
8043 affects_type_identity, handler, exclude } */
8044 { "model", 1, 1, true, false, false, false,
8045 loongarch_handle_model_attribute, NULL }
8046 });
8047
8048 bool
8049 loongarch_use_anchors_for_symbol_p (const_rtx symbol)
8050 {
8051 tree decl = SYMBOL_REF_DECL (symbol);
8052
8053 /* The section anchor optimization may break custom address model. */
8054 if (decl && lookup_attribute ("model", DECL_ATTRIBUTES (decl)))
8055 return false;
8056
8057 return default_use_anchors_for_symbol_p (symbol);
8058 }
8059
8060 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
8061
8062 static unsigned HOST_WIDE_INT
8063 loongarch_asan_shadow_offset (void)
8064 {
8065 /* We only have libsanitizer support for LOONGARCH64 at present.
8066 This value is taken from the file libsanitizer/asan/asan_mapping.h. */
8067 return TARGET_64BIT ? (HOST_WIDE_INT_1 << 46) : 0;
8068 }
8069
8070 static sbitmap
8071 loongarch_get_separate_components (void)
8072 {
8073 HOST_WIDE_INT offset;
8074 sbitmap components = sbitmap_alloc (FIRST_PSEUDO_REGISTER);
8075 bitmap_clear (components);
8076 offset = cfun->machine->frame.gp_sp_offset;
8077
8078 /* The stack should be aligned to 16-bytes boundary, so we can make the use
8079 of ldptr instructions. */
8080 gcc_assert (offset % UNITS_PER_WORD == 0);
8081
8082 for (unsigned int regno = GP_REG_FIRST; regno <= GP_REG_LAST; regno++)
8083 if (BITSET_P (cfun->machine->frame.mask, regno - GP_REG_FIRST))
8084 {
8085 /* We can wrap general registers saved at [sp, sp + 32768) using the
8086 ldptr/stptr instructions. For large offsets a pseudo register
8087 might be needed which cannot be created during the shrink
8088 wrapping pass.
8089
8090 TODO: This may need a revise when we add LA32 as ldptr.w is not
8091 guaranteed available by the manual. */
8092 if (offset < 32768)
8093 bitmap_set_bit (components, regno);
8094
8095 offset -= UNITS_PER_WORD;
8096 }
8097
8098 offset = cfun->machine->frame.fp_sp_offset;
8099 for (unsigned int regno = FP_REG_FIRST; regno <= FP_REG_LAST; regno++)
8100 if (BITSET_P (cfun->machine->frame.fmask, regno - FP_REG_FIRST))
8101 {
8102 /* We can only wrap FP registers with imm12 offsets. For large
8103 offsets a pseudo register might be needed which cannot be
8104 created during the shrink wrapping pass. */
8105 if (IMM12_OPERAND (offset))
8106 bitmap_set_bit (components, regno);
8107
8108 offset -= UNITS_PER_FPREG;
8109 }
8110
8111 /* Don't mess with the hard frame pointer. */
8112 if (frame_pointer_needed)
8113 bitmap_clear_bit (components, HARD_FRAME_POINTER_REGNUM);
8114
8115 bitmap_clear_bit (components, RETURN_ADDR_REGNUM);
8116
8117 return components;
8118 }
8119
8120 static sbitmap
8121 loongarch_components_for_bb (basic_block bb)
8122 {
8123 /* Registers are used in a bb if they are in the IN, GEN, or KILL sets. */
8124 auto_bitmap used;
8125 bitmap_copy (used, DF_LIVE_IN (bb));
8126 bitmap_ior_into (used, &DF_LIVE_BB_INFO (bb)->gen);
8127 bitmap_ior_into (used, &DF_LIVE_BB_INFO (bb)->kill);
8128
8129 sbitmap components = sbitmap_alloc (FIRST_PSEUDO_REGISTER);
8130 bitmap_clear (components);
8131
8132 function_abi_aggregator callee_abis;
8133 rtx_insn *insn;
8134 FOR_BB_INSNS (bb, insn)
8135 if (CALL_P (insn))
8136 callee_abis.note_callee_abi (insn_callee_abi (insn));
8137
8138 HARD_REG_SET extra_caller_saves =
8139 callee_abis.caller_save_regs (*crtl->abi);
8140
8141 for (unsigned int regno = GP_REG_FIRST; regno <= GP_REG_LAST; regno++)
8142 if (!fixed_regs[regno]
8143 && !crtl->abi->clobbers_full_reg_p (regno)
8144 && (TEST_HARD_REG_BIT (extra_caller_saves, regno) ||
8145 bitmap_bit_p (used, regno)))
8146 bitmap_set_bit (components, regno);
8147
8148 for (unsigned int regno = FP_REG_FIRST; regno <= FP_REG_LAST; regno++)
8149 if (!fixed_regs[regno]
8150 && !crtl->abi->clobbers_full_reg_p (regno)
8151 && (TEST_HARD_REG_BIT (extra_caller_saves, regno) ||
8152 bitmap_bit_p (used, regno)))
8153 bitmap_set_bit (components, regno);
8154
8155 return components;
8156 }
8157
8158 static void
8159 loongarch_disqualify_components (sbitmap, edge, sbitmap, bool)
8160 {
8161 /* Do nothing. */
8162 }
8163
8164 static void
8165 loongarch_process_components (sbitmap components, loongarch_save_restore_fn fn)
8166 {
8167 HOST_WIDE_INT offset = cfun->machine->frame.gp_sp_offset;
8168
8169 for (unsigned int regno = GP_REG_FIRST; regno <= GP_REG_LAST; regno++)
8170 if (BITSET_P (cfun->machine->frame.mask, regno - GP_REG_FIRST))
8171 {
8172 if (bitmap_bit_p (components, regno))
8173 loongarch_save_restore_reg (word_mode, regno, offset, fn);
8174
8175 offset -= UNITS_PER_WORD;
8176 }
8177
8178 offset = cfun->machine->frame.fp_sp_offset;
8179 machine_mode mode = TARGET_DOUBLE_FLOAT ? DFmode : SFmode;
8180
8181 for (unsigned int regno = FP_REG_FIRST; regno <= FP_REG_LAST; regno++)
8182 if (BITSET_P (cfun->machine->frame.fmask, regno - FP_REG_FIRST))
8183 {
8184 if (bitmap_bit_p (components, regno))
8185 loongarch_save_restore_reg (mode, regno, offset, fn);
8186
8187 offset -= UNITS_PER_FPREG;
8188 }
8189 }
8190
8191 static void
8192 loongarch_emit_prologue_components (sbitmap components)
8193 {
8194 loongarch_process_components (components, loongarch_save_reg);
8195 }
8196
8197 static void
8198 loongarch_emit_epilogue_components (sbitmap components)
8199 {
8200 loongarch_process_components (components, loongarch_restore_reg);
8201 }
8202
8203 static void
8204 loongarch_set_handled_components (sbitmap components)
8205 {
8206 for (unsigned int regno = GP_REG_FIRST; regno <= GP_REG_LAST; regno++)
8207 if (bitmap_bit_p (components, regno))
8208 cfun->machine->reg_is_wrapped_separately[regno] = true;
8209
8210 for (unsigned int regno = FP_REG_FIRST; regno <= FP_REG_LAST; regno++)
8211 if (bitmap_bit_p (components, regno))
8212 cfun->machine->reg_is_wrapped_separately[regno] = true;
8213 }
8214
8215 /* Initialize the GCC target structure. */
8216 #undef TARGET_ASM_ALIGNED_HI_OP
8217 #define TARGET_ASM_ALIGNED_HI_OP "\t.half\t"
8218 #undef TARGET_ASM_ALIGNED_SI_OP
8219 #define TARGET_ASM_ALIGNED_SI_OP "\t.word\t"
8220 #undef TARGET_ASM_ALIGNED_DI_OP
8221 #define TARGET_ASM_ALIGNED_DI_OP "\t.dword\t"
8222 /* Construct (set target (vec_select op0 (parallel selector))) and
8223 return true if that's a valid instruction in the active ISA. */
8224
8225 static bool
8226 loongarch_expand_lsx_shuffle (struct expand_vec_perm_d *d)
8227 {
8228 rtx x, elts[MAX_VECT_LEN];
8229 rtvec v;
8230 rtx_insn *insn;
8231 unsigned i;
8232
8233 if (!ISA_HAS_LSX && !ISA_HAS_LASX)
8234 return false;
8235
8236 for (i = 0; i < d->nelt; i++)
8237 elts[i] = GEN_INT (d->perm[i]);
8238
8239 v = gen_rtvec_v (d->nelt, elts);
8240 x = gen_rtx_PARALLEL (VOIDmode, v);
8241
8242 if (!loongarch_const_vector_shuffle_set_p (x, d->vmode))
8243 return false;
8244
8245 x = gen_rtx_VEC_SELECT (d->vmode, d->op0, x);
8246 x = gen_rtx_SET (d->target, x);
8247
8248 insn = emit_insn (x);
8249 if (recog_memoized (insn) < 0)
8250 {
8251 remove_insn (insn);
8252 return false;
8253 }
8254 return true;
8255 }
8256
8257 /* Try to simplify a two vector permutation using 2 intra-lane interleave
8258 insns and cross-lane shuffle for 32-byte vectors. */
8259
8260 static bool
8261 loongarch_expand_vec_perm_interleave (struct expand_vec_perm_d *d)
8262 {
8263 unsigned i, nelt;
8264 rtx t1,t2,t3;
8265 rtx (*gen_high) (rtx, rtx, rtx);
8266 rtx (*gen_low) (rtx, rtx, rtx);
8267 machine_mode mode = GET_MODE (d->target);
8268
8269 if (d->one_vector_p)
8270 return false;
8271 if (ISA_HAS_LASX && GET_MODE_SIZE (d->vmode) == 32)
8272 ;
8273 else
8274 return false;
8275
8276 nelt = d->nelt;
8277 if (d->perm[0] != 0 && d->perm[0] != nelt / 2)
8278 return false;
8279 for (i = 0; i < nelt; i += 2)
8280 if (d->perm[i] != d->perm[0] + i / 2
8281 || d->perm[i + 1] != d->perm[0] + i / 2 + nelt)
8282 return false;
8283
8284 if (d->testing_p)
8285 return true;
8286
8287 switch (d->vmode)
8288 {
8289 case E_V32QImode:
8290 gen_high = gen_lasx_xvilvh_b;
8291 gen_low = gen_lasx_xvilvl_b;
8292 break;
8293 case E_V16HImode:
8294 gen_high = gen_lasx_xvilvh_h;
8295 gen_low = gen_lasx_xvilvl_h;
8296 break;
8297 case E_V8SImode:
8298 gen_high = gen_lasx_xvilvh_w;
8299 gen_low = gen_lasx_xvilvl_w;
8300 break;
8301 case E_V4DImode:
8302 gen_high = gen_lasx_xvilvh_d;
8303 gen_low = gen_lasx_xvilvl_d;
8304 break;
8305 case E_V8SFmode:
8306 gen_high = gen_lasx_xvilvh_w_f;
8307 gen_low = gen_lasx_xvilvl_w_f;
8308 break;
8309 case E_V4DFmode:
8310 gen_high = gen_lasx_xvilvh_d_f;
8311 gen_low = gen_lasx_xvilvl_d_f;
8312 break;
8313 default:
8314 gcc_unreachable ();
8315 }
8316
8317 t1 = gen_reg_rtx (mode);
8318 t2 = gen_reg_rtx (mode);
8319 emit_insn (gen_high (t1, d->op0, d->op1));
8320 emit_insn (gen_low (t2, d->op0, d->op1));
8321 if (mode == V4DFmode || mode == V8SFmode)
8322 {
8323 t3 = gen_reg_rtx (V4DFmode);
8324 if (d->perm[0])
8325 emit_insn (gen_lasx_xvpermi_q_v4df (t3, gen_lowpart (V4DFmode, t1),
8326 gen_lowpart (V4DFmode, t2),
8327 GEN_INT (0x31)));
8328 else
8329 emit_insn (gen_lasx_xvpermi_q_v4df (t3, gen_lowpart (V4DFmode, t1),
8330 gen_lowpart (V4DFmode, t2),
8331 GEN_INT (0x20)));
8332 }
8333 else
8334 {
8335 t3 = gen_reg_rtx (V4DImode);
8336 if (d->perm[0])
8337 emit_insn (gen_lasx_xvpermi_q_v4di (t3, gen_lowpart (V4DImode, t1),
8338 gen_lowpart (V4DImode, t2),
8339 GEN_INT (0x31)));
8340 else
8341 emit_insn (gen_lasx_xvpermi_q_v4di (t3, gen_lowpart (V4DImode, t1),
8342 gen_lowpart (V4DImode, t2),
8343 GEN_INT (0x20)));
8344 }
8345 emit_move_insn (d->target, gen_lowpart (mode, t3));
8346 return true;
8347 }
8348
8349 /* Implement extract-even and extract-odd permutations. */
8350
8351 static bool
8352 loongarch_expand_vec_perm_even_odd_1 (struct expand_vec_perm_d *d, unsigned odd)
8353 {
8354 rtx t1;
8355 machine_mode mode = GET_MODE (d->target);
8356
8357 if (d->testing_p)
8358 return true;
8359
8360 t1 = gen_reg_rtx (mode);
8361
8362 switch (d->vmode)
8363 {
8364 case E_V4DFmode:
8365 /* Shuffle the lanes around into { 0 4 2 6 } and { 1 5 3 7 }. */
8366 if (odd)
8367 emit_insn (gen_lasx_xvilvh_d_f (t1, d->op0, d->op1));
8368 else
8369 emit_insn (gen_lasx_xvilvl_d_f (t1, d->op0, d->op1));
8370
8371 /* Shuffle within the 256-bit lanes to produce the result required.
8372 { 0 2 4 6 } | { 1 3 5 7 }. */
8373 emit_insn (gen_lasx_xvpermi_d_v4df (d->target, t1, GEN_INT (0xd8)));
8374 break;
8375
8376 case E_V4DImode:
8377 if (odd)
8378 emit_insn (gen_lasx_xvilvh_d (t1, d->op0, d->op1));
8379 else
8380 emit_insn (gen_lasx_xvilvl_d (t1, d->op0, d->op1));
8381
8382 emit_insn (gen_lasx_xvpermi_d_v4di (d->target, t1, GEN_INT (0xd8)));
8383 break;
8384
8385 case E_V8SFmode:
8386 /* Shuffle the lanes around into:
8387 { 0 2 8 a 4 6 c e } | { 1 3 9 b 5 7 d f }. */
8388 if (odd)
8389 emit_insn (gen_lasx_xvpickod_w_f (t1, d->op0, d->op1));
8390 else
8391 emit_insn (gen_lasx_xvpickev_w_f (t1, d->op0, d->op1));
8392
8393 /* Shuffle within the 256-bit lanes to produce the result required.
8394 { 0 2 4 6 8 a c e } | { 1 3 5 7 9 b d f }. */
8395 emit_insn (gen_lasx_xvpermi_d_v8sf (d->target, t1, GEN_INT (0xd8)));
8396 break;
8397
8398 case E_V8SImode:
8399 if (odd)
8400 emit_insn (gen_lasx_xvpickod_w (t1, d->op0, d->op1));
8401 else
8402 emit_insn (gen_lasx_xvpickev_w (t1, d->op0, d->op1));
8403
8404 emit_insn (gen_lasx_xvpermi_d_v8si (d->target, t1, GEN_INT (0xd8)));
8405 break;
8406
8407 case E_V16HImode:
8408 if (odd)
8409 emit_insn (gen_lasx_xvpickod_h (t1, d->op0, d->op1));
8410 else
8411 emit_insn (gen_lasx_xvpickev_h (t1, d->op0, d->op1));
8412
8413 emit_insn (gen_lasx_xvpermi_d_v16hi (d->target, t1, GEN_INT (0xd8)));
8414 break;
8415
8416 case E_V32QImode:
8417 if (odd)
8418 emit_insn (gen_lasx_xvpickod_b (t1, d->op0, d->op1));
8419 else
8420 emit_insn (gen_lasx_xvpickev_b (t1, d->op0, d->op1));
8421
8422 emit_insn (gen_lasx_xvpermi_d_v32qi (d->target, t1, GEN_INT (0xd8)));
8423 break;
8424
8425 default:
8426 gcc_unreachable ();
8427 }
8428
8429 return true;
8430 }
8431
8432 /* Pattern match extract-even and extract-odd permutations. */
8433
8434 static bool
8435 loongarch_expand_vec_perm_even_odd (struct expand_vec_perm_d *d)
8436 {
8437 unsigned i, odd, nelt = d->nelt;
8438 if (!ISA_HAS_LASX)
8439 return false;
8440
8441 odd = d->perm[0];
8442 if (odd != 0 && odd != 1)
8443 return false;
8444
8445 for (i = 1; i < nelt; ++i)
8446 if (d->perm[i] != 2 * i + odd)
8447 return false;
8448
8449 return loongarch_expand_vec_perm_even_odd_1 (d, odd);
8450 }
8451
8452 static void
8453 loongarch_expand_vec_interleave (rtx target, rtx op0, rtx op1, bool high_p)
8454 {
8455 struct expand_vec_perm_d d;
8456 unsigned i, nelt, base;
8457 bool ok;
8458
8459 d.target = target;
8460 d.op0 = op0;
8461 d.op1 = op1;
8462 d.vmode = GET_MODE (target);
8463 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
8464 d.one_vector_p = false;
8465 d.testing_p = false;
8466
8467 base = high_p ? nelt / 2 : 0;
8468 for (i = 0; i < nelt / 2; ++i)
8469 {
8470 d.perm[i * 2] = i + base;
8471 d.perm[i * 2 + 1] = i + base + nelt;
8472 }
8473
8474 ok = loongarch_expand_vec_perm_interleave (&d);
8475 gcc_assert (ok);
8476 }
8477
8478 /* The loongarch lasx instructions xvmulwev and xvmulwod return the even or odd
8479 parts of the double sized result elements in the corresponding elements of
8480 the target register. That's NOT what the vec_widen_umult_lo/hi patterns are
8481 expected to do. We emulate the widening lo/hi multiplies with the even/odd
8482 versions followed by a vector merge. */
8483
8484 void
8485 loongarch_expand_vec_widen_hilo (rtx dest, rtx op1, rtx op2,
8486 bool uns_p, bool high_p, const char *optab)
8487 {
8488 machine_mode wmode = GET_MODE (dest);
8489 machine_mode mode = GET_MODE (op1);
8490 rtx t1, t2, t3;
8491
8492 t1 = gen_reg_rtx (wmode);
8493 t2 = gen_reg_rtx (wmode);
8494 t3 = gen_reg_rtx (wmode);
8495 switch (mode)
8496 {
8497 case V16HImode:
8498 if (!strcmp (optab, "add"))
8499 {
8500 if (!uns_p)
8501 {
8502 emit_insn (gen_lasx_xvaddwev_w_h (t1, op1, op2));
8503 emit_insn (gen_lasx_xvaddwod_w_h (t2, op1, op2));
8504 }
8505 else
8506 {
8507 emit_insn (gen_lasx_xvaddwev_w_hu (t1, op1, op2));
8508 emit_insn (gen_lasx_xvaddwod_w_hu (t2, op1, op2));
8509 }
8510 }
8511 else if (!strcmp (optab, "mult"))
8512 {
8513 if (!uns_p)
8514 {
8515 emit_insn (gen_lasx_xvmulwev_w_h (t1, op1, op2));
8516 emit_insn (gen_lasx_xvmulwod_w_h (t2, op1, op2));
8517 }
8518 else
8519 {
8520 emit_insn (gen_lasx_xvmulwev_w_hu (t1, op1, op2));
8521 emit_insn (gen_lasx_xvmulwod_w_hu (t2, op1, op2));
8522 }
8523 }
8524 else if (!strcmp (optab, "sub"))
8525 {
8526 if (!uns_p)
8527 {
8528 emit_insn (gen_lasx_xvsubwev_w_h (t1, op1, op2));
8529 emit_insn (gen_lasx_xvsubwod_w_h (t2, op1, op2));
8530 }
8531 else
8532 {
8533 emit_insn (gen_lasx_xvsubwev_w_hu (t1, op1, op2));
8534 emit_insn (gen_lasx_xvsubwod_w_hu (t2, op1, op2));
8535 }
8536 }
8537 break;
8538
8539 case V32QImode:
8540 if (!strcmp (optab, "add"))
8541 {
8542 if (!uns_p)
8543 {
8544 emit_insn (gen_lasx_xvaddwev_h_b (t1, op1, op2));
8545 emit_insn (gen_lasx_xvaddwod_h_b (t2, op1, op2));
8546 }
8547 else
8548 {
8549 emit_insn (gen_lasx_xvaddwev_h_bu (t1, op1, op2));
8550 emit_insn (gen_lasx_xvaddwod_h_bu (t2, op1, op2));
8551 }
8552 }
8553 else if (!strcmp (optab, "mult"))
8554 {
8555 if (!uns_p)
8556 {
8557 emit_insn (gen_lasx_xvmulwev_h_b (t1, op1, op2));
8558 emit_insn (gen_lasx_xvmulwod_h_b (t2, op1, op2));
8559 }
8560 else
8561 {
8562 emit_insn (gen_lasx_xvmulwev_h_bu (t1, op1, op2));
8563 emit_insn (gen_lasx_xvmulwod_h_bu (t2, op1, op2));
8564 }
8565 }
8566 else if (!strcmp (optab, "sub"))
8567 {
8568 if (!uns_p)
8569 {
8570 emit_insn (gen_lasx_xvsubwev_h_b (t1, op1, op2));
8571 emit_insn (gen_lasx_xvsubwod_h_b (t2, op1, op2));
8572 }
8573 else
8574 {
8575 emit_insn (gen_lasx_xvsubwev_h_bu (t1, op1, op2));
8576 emit_insn (gen_lasx_xvsubwod_h_bu (t2, op1, op2));
8577 }
8578 }
8579 break;
8580
8581 default:
8582 gcc_unreachable ();
8583 }
8584
8585 loongarch_expand_vec_interleave (t3, t1, t2, high_p);
8586 emit_move_insn (dest, gen_lowpart (wmode, t3));
8587 }
8588
8589 /* Expand a variable vector permutation for LASX. */
8590
8591 void
8592 loongarch_expand_vec_perm_1 (rtx operands[])
8593 {
8594 rtx target = operands[0];
8595 rtx op0 = operands[1];
8596 rtx op1 = operands[2];
8597 rtx mask = operands[3];
8598
8599 bool one_operand_shuffle = rtx_equal_p (op0, op1);
8600 rtx t1 = NULL;
8601 rtx t2 = NULL;
8602 rtx t3, t4, t5, t6, vt = NULL;
8603 rtx vec[32] = {NULL};
8604 machine_mode mode = GET_MODE (op0);
8605 machine_mode maskmode = GET_MODE (mask);
8606 int w, i;
8607
8608 /* Number of elements in the vector. */
8609 w = GET_MODE_NUNITS (mode);
8610
8611 rtx round_data[MAX_VECT_LEN];
8612 rtx round_reg, round_data_rtx;
8613
8614 if (mode != E_V32QImode)
8615 {
8616 for (int i = 0; i < w; i += 1)
8617 {
8618 round_data[i] = GEN_INT (0x1f);
8619 }
8620
8621 if (mode == E_V4DFmode)
8622 {
8623 round_data_rtx = gen_rtx_CONST_VECTOR (E_V4DImode,
8624 gen_rtvec_v (w, round_data));
8625 round_reg = gen_reg_rtx (E_V4DImode);
8626 }
8627 else if (mode == E_V8SFmode)
8628 {
8629
8630 round_data_rtx = gen_rtx_CONST_VECTOR (E_V8SImode,
8631 gen_rtvec_v (w, round_data));
8632 round_reg = gen_reg_rtx (E_V8SImode);
8633 }
8634 else
8635 {
8636 round_data_rtx = gen_rtx_CONST_VECTOR (mode,
8637 gen_rtvec_v (w, round_data));
8638 round_reg = gen_reg_rtx (mode);
8639 }
8640
8641 emit_move_insn (round_reg, round_data_rtx);
8642 switch (mode)
8643 {
8644 case E_V32QImode:
8645 emit_insn (gen_andv32qi3 (mask, mask, round_reg));
8646 break;
8647 case E_V16HImode:
8648 emit_insn (gen_andv16hi3 (mask, mask, round_reg));
8649 break;
8650 case E_V8SImode:
8651 case E_V8SFmode:
8652 emit_insn (gen_andv8si3 (mask, mask, round_reg));
8653 break;
8654 case E_V4DImode:
8655 case E_V4DFmode:
8656 emit_insn (gen_andv4di3 (mask, mask, round_reg));
8657 break;
8658 default:
8659 gcc_unreachable ();
8660 break;
8661 }
8662 }
8663
8664 if (mode == V4DImode || mode == V4DFmode)
8665 {
8666 maskmode = mode = V8SImode;
8667 w = 8;
8668 t1 = gen_reg_rtx (maskmode);
8669
8670 /* Replicate the low bits of the V4DImode mask into V8SImode:
8671 mask = { A B C D }
8672 t1 = { A A B B C C D D }. */
8673 for (i = 0; i < w / 2; ++i)
8674 vec[i*2 + 1] = vec[i*2] = GEN_INT (i * 2);
8675 vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
8676 vt = force_reg (maskmode, vt);
8677 mask = gen_lowpart (maskmode, mask);
8678 emit_insn (gen_lasx_xvperm_w (t1, mask, vt));
8679
8680 /* Multiply the shuffle indicies by two. */
8681 t1 = expand_simple_binop (maskmode, PLUS, t1, t1, t1, 1,
8682 OPTAB_DIRECT);
8683
8684 /* Add one to the odd shuffle indicies:
8685 t1 = { A*2, A*2+1, B*2, B*2+1, ... }. */
8686 for (i = 0; i < w / 2; ++i)
8687 {
8688 vec[i * 2] = const0_rtx;
8689 vec[i * 2 + 1] = const1_rtx;
8690 }
8691 vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
8692 vt = validize_mem (force_const_mem (maskmode, vt));
8693 t1 = expand_simple_binop (maskmode, PLUS, t1, vt, t1, 1,
8694 OPTAB_DIRECT);
8695
8696 /* Continue as if V8SImode (resp. V32QImode) was used initially. */
8697 operands[3] = mask = t1;
8698 target = gen_reg_rtx (mode);
8699 op0 = gen_lowpart (mode, op0);
8700 op1 = gen_lowpart (mode, op1);
8701 }
8702
8703 switch (mode)
8704 {
8705 case E_V8SImode:
8706 if (one_operand_shuffle)
8707 {
8708 emit_insn (gen_lasx_xvperm_w (target, op0, mask));
8709 if (target != operands[0])
8710 emit_move_insn (operands[0],
8711 gen_lowpart (GET_MODE (operands[0]), target));
8712 }
8713 else
8714 {
8715 t1 = gen_reg_rtx (V8SImode);
8716 t2 = gen_reg_rtx (V8SImode);
8717 emit_insn (gen_lasx_xvperm_w (t1, op0, mask));
8718 emit_insn (gen_lasx_xvperm_w (t2, op1, mask));
8719 goto merge_two;
8720 }
8721 return;
8722
8723 case E_V8SFmode:
8724 mask = gen_lowpart (V8SImode, mask);
8725 if (one_operand_shuffle)
8726 emit_insn (gen_lasx_xvperm_w_f (target, op0, mask));
8727 else
8728 {
8729 t1 = gen_reg_rtx (V8SFmode);
8730 t2 = gen_reg_rtx (V8SFmode);
8731 emit_insn (gen_lasx_xvperm_w_f (t1, op0, mask));
8732 emit_insn (gen_lasx_xvperm_w_f (t2, op1, mask));
8733 goto merge_two;
8734 }
8735 return;
8736
8737 case E_V16HImode:
8738 if (one_operand_shuffle)
8739 {
8740 t1 = gen_reg_rtx (V16HImode);
8741 t2 = gen_reg_rtx (V16HImode);
8742 emit_insn (gen_lasx_xvpermi_d_v16hi (t1, op0, GEN_INT (0x44)));
8743 emit_insn (gen_lasx_xvpermi_d_v16hi (t2, op0, GEN_INT (0xee)));
8744 emit_insn (gen_lasx_xvshuf_h (target, mask, t2, t1));
8745 }
8746 else
8747 {
8748 t1 = gen_reg_rtx (V16HImode);
8749 t2 = gen_reg_rtx (V16HImode);
8750 t3 = gen_reg_rtx (V16HImode);
8751 t4 = gen_reg_rtx (V16HImode);
8752 t5 = gen_reg_rtx (V16HImode);
8753 t6 = gen_reg_rtx (V16HImode);
8754 emit_insn (gen_lasx_xvpermi_d_v16hi (t3, op0, GEN_INT (0x44)));
8755 emit_insn (gen_lasx_xvpermi_d_v16hi (t4, op0, GEN_INT (0xee)));
8756 emit_insn (gen_lasx_xvshuf_h (t1, mask, t4, t3));
8757 emit_insn (gen_lasx_xvpermi_d_v16hi (t5, op1, GEN_INT (0x44)));
8758 emit_insn (gen_lasx_xvpermi_d_v16hi (t6, op1, GEN_INT (0xee)));
8759 emit_insn (gen_lasx_xvshuf_h (t2, mask, t6, t5));
8760 goto merge_two;
8761 }
8762 return;
8763
8764 case E_V32QImode:
8765 if (one_operand_shuffle)
8766 {
8767 t1 = gen_reg_rtx (V32QImode);
8768 t2 = gen_reg_rtx (V32QImode);
8769 emit_insn (gen_lasx_xvpermi_d_v32qi (t1, op0, GEN_INT (0x44)));
8770 emit_insn (gen_lasx_xvpermi_d_v32qi (t2, op0, GEN_INT (0xee)));
8771 emit_insn (gen_lasx_xvshuf_b (target, t2, t1, mask));
8772 }
8773 else
8774 {
8775 t1 = gen_reg_rtx (V32QImode);
8776 t2 = gen_reg_rtx (V32QImode);
8777 t3 = gen_reg_rtx (V32QImode);
8778 t4 = gen_reg_rtx (V32QImode);
8779 t5 = gen_reg_rtx (V32QImode);
8780 t6 = gen_reg_rtx (V32QImode);
8781 emit_insn (gen_lasx_xvpermi_d_v32qi (t3, op0, GEN_INT (0x44)));
8782 emit_insn (gen_lasx_xvpermi_d_v32qi (t4, op0, GEN_INT (0xee)));
8783 emit_insn (gen_lasx_xvshuf_b (t1, t4, t3, mask));
8784 emit_insn (gen_lasx_xvpermi_d_v32qi (t5, op1, GEN_INT (0x44)));
8785 emit_insn (gen_lasx_xvpermi_d_v32qi (t6, op1, GEN_INT (0xee)));
8786 emit_insn (gen_lasx_xvshuf_b (t2, t6, t5, mask));
8787 goto merge_two;
8788 }
8789 return;
8790
8791 default:
8792 gcc_assert (GET_MODE_SIZE (mode) == 32);
8793 break;
8794 }
8795
8796 merge_two:
8797 /* Then merge them together. The key is whether any given control
8798 element contained a bit set that indicates the second word. */
8799 rtx xops[6];
8800 mask = operands[3];
8801 vt = GEN_INT (w);
8802 vt = gen_const_vec_duplicate (maskmode, vt);
8803 vt = force_reg (maskmode, vt);
8804 mask = expand_simple_binop (maskmode, AND, mask, vt,
8805 NULL_RTX, 0, OPTAB_DIRECT);
8806 if (GET_MODE (target) != mode)
8807 target = gen_reg_rtx (mode);
8808 xops[0] = target;
8809 xops[1] = gen_lowpart (mode, t2);
8810 xops[2] = gen_lowpart (mode, t1);
8811 xops[3] = gen_rtx_EQ (maskmode, mask, vt);
8812 xops[4] = mask;
8813 xops[5] = vt;
8814
8815 loongarch_expand_vec_cond_expr (mode, maskmode, xops);
8816 if (target != operands[0])
8817 emit_move_insn (operands[0],
8818 gen_lowpart (GET_MODE (operands[0]), target));
8819 }
8820
8821 void
8822 loongarch_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel)
8823 {
8824 machine_mode vmode = GET_MODE (target);
8825 machine_mode vimode = GET_MODE (sel);
8826 auto nelt = GET_MODE_NUNITS (vmode);
8827 auto round_reg = gen_reg_rtx (vimode);
8828 rtx round_data[MAX_VECT_LEN];
8829
8830 for (int i = 0; i < nelt; i += 1)
8831 {
8832 round_data[i] = GEN_INT (0x1f);
8833 }
8834
8835 rtx round_data_rtx = gen_rtx_CONST_VECTOR (vimode, gen_rtvec_v (nelt, round_data));
8836 emit_move_insn (round_reg, round_data_rtx);
8837
8838 if (vmode != vimode)
8839 {
8840 target = lowpart_subreg (vimode, target, vmode);
8841 op0 = lowpart_subreg (vimode, op0, vmode);
8842 op1 = lowpart_subreg (vimode, op1, vmode);
8843 }
8844
8845 switch (vmode)
8846 {
8847 case E_V16QImode:
8848 emit_insn (gen_andv16qi3 (sel, sel, round_reg));
8849 emit_insn (gen_lsx_vshuf_b (target, op1, op0, sel));
8850 break;
8851 case E_V2DFmode:
8852 case E_V2DImode:
8853 emit_insn (gen_andv2di3 (sel, sel, round_reg));
8854 emit_insn (gen_lsx_vshuf_d (target, sel, op1, op0));
8855 break;
8856 case E_V4SFmode:
8857 case E_V4SImode:
8858 emit_insn (gen_andv4si3 (sel, sel, round_reg));
8859 emit_insn (gen_lsx_vshuf_w (target, sel, op1, op0));
8860 break;
8861 case E_V8HImode:
8862 emit_insn (gen_andv8hi3 (sel, sel, round_reg));
8863 emit_insn (gen_lsx_vshuf_h (target, sel, op1, op0));
8864 break;
8865 default:
8866 break;
8867 }
8868 }
8869
8870 /* Following are the assist function for const vector permutation support. */
8871 static bool
8872 loongarch_is_quad_duplicate (struct expand_vec_perm_d *d)
8873 {
8874 if (d->perm[0] >= d->nelt / 2)
8875 return false;
8876
8877 bool result = true;
8878 unsigned char lhs = d->perm[0];
8879 unsigned char rhs = d->perm[d->nelt / 2];
8880
8881 if ((rhs - lhs) != d->nelt / 2)
8882 return false;
8883
8884 for (int i = 1; i < d->nelt; i += 1)
8885 {
8886 if ((i < d->nelt / 2) && (d->perm[i] != lhs))
8887 {
8888 result = false;
8889 break;
8890 }
8891 if ((i > d->nelt / 2) && (d->perm[i] != rhs))
8892 {
8893 result = false;
8894 break;
8895 }
8896 }
8897
8898 return result;
8899 }
8900
8901 static bool
8902 loongarch_is_odd_extraction (struct expand_vec_perm_d *d)
8903 {
8904 bool result = true;
8905 unsigned char buf = 1;
8906
8907 for (int i = 0; i < d->nelt; i += 1)
8908 {
8909 if (buf != d->perm[i])
8910 {
8911 result = false;
8912 break;
8913 }
8914 buf += 2;
8915 }
8916
8917 return result;
8918 }
8919
8920 static bool
8921 loongarch_is_even_extraction (struct expand_vec_perm_d *d)
8922 {
8923 bool result = true;
8924 unsigned char buf = 0;
8925
8926 for (int i = 0; i < d->nelt; i += 1)
8927 {
8928 if (buf != d->perm[i])
8929 {
8930 result = false;
8931 break;
8932 }
8933 buf += 2;
8934 }
8935
8936 return result;
8937 }
8938
8939 static bool
8940 loongarch_is_extraction_permutation (struct expand_vec_perm_d *d)
8941 {
8942 bool result = true;
8943 unsigned char buf = d->perm[0];
8944
8945 if (buf != 0 || buf != d->nelt)
8946 return false;
8947
8948 for (int i = 0; i < d->nelt; i += 1)
8949 {
8950 if (buf != d->perm[i])
8951 {
8952 result = false;
8953 break;
8954 }
8955 buf += 1;
8956 }
8957
8958 return result;
8959 }
8960
8961 static bool
8962 loongarch_is_lasx_lowpart_interleave (struct expand_vec_perm_d *d)
8963 {
8964 bool result = true;
8965 unsigned char buf = 0;
8966
8967 for (int i = 0;i < d->nelt; i += 2)
8968 {
8969 if (buf != d->perm[i])
8970 {
8971 result = false;
8972 break;
8973 }
8974 buf += 1;
8975 }
8976
8977 if (result)
8978 {
8979 buf = d->nelt;
8980 for (int i = 1; i < d->nelt; i += 2)
8981 {
8982 if (buf != d->perm[i])
8983 {
8984 result = false;
8985 break;
8986 }
8987 buf += 1;
8988 }
8989 }
8990
8991 return result;
8992 }
8993
8994 static bool
8995 loongarch_is_lasx_lowpart_interleave_2 (struct expand_vec_perm_d *d)
8996 {
8997 if (d->vmode != E_V32QImode)
8998 return false;
8999 bool result = true;
9000 unsigned char buf = 0;
9001
9002 #define COMPARE_SELECTOR(INIT, BEGIN, END) \
9003 buf = INIT; \
9004 for (int i = BEGIN; i < END && result; i += 1) \
9005 { \
9006 if (buf != d->perm[i]) \
9007 { \
9008 result = false; \
9009 break; \
9010 } \
9011 buf += 1; \
9012 }
9013
9014 COMPARE_SELECTOR (0, 0, 8);
9015 COMPARE_SELECTOR (32, 8, 16);
9016 COMPARE_SELECTOR (8, 16, 24);
9017 COMPARE_SELECTOR (40, 24, 32);
9018
9019 #undef COMPARE_SELECTOR
9020 return result;
9021 }
9022
9023 static bool
9024 loongarch_is_lasx_highpart_interleave (expand_vec_perm_d *d)
9025 {
9026 bool result = true;
9027 unsigned char buf = d->nelt / 2;
9028
9029 for (int i = 0; i < d->nelt; i += 2)
9030 {
9031 if (buf != d->perm[i])
9032 {
9033 result = false;
9034 break;
9035 }
9036 buf += 1;
9037 }
9038
9039 if (result)
9040 {
9041 buf = d->nelt + d->nelt / 2;
9042 for (int i = 1; i < d->nelt;i += 2)
9043 {
9044 if (buf != d->perm[i])
9045 {
9046 result = false;
9047 break;
9048 }
9049 buf += 1;
9050 }
9051 }
9052
9053 return result;
9054 }
9055
9056 static bool
9057 loongarch_is_lasx_highpart_interleave_2 (struct expand_vec_perm_d *d)
9058 {
9059 if (d->vmode != E_V32QImode)
9060 return false;
9061
9062 bool result = true;
9063 unsigned char buf = 0;
9064
9065 #define COMPARE_SELECTOR(INIT, BEGIN, END) \
9066 buf = INIT; \
9067 for (int i = BEGIN; i < END && result; i += 1) \
9068 { \
9069 if (buf != d->perm[i]) \
9070 { \
9071 result = false; \
9072 break; \
9073 } \
9074 buf += 1; \
9075 }
9076
9077 COMPARE_SELECTOR (16, 0, 8);
9078 COMPARE_SELECTOR (48, 8, 16);
9079 COMPARE_SELECTOR (24, 16, 24);
9080 COMPARE_SELECTOR (56, 24, 32);
9081
9082 #undef COMPARE_SELECTOR
9083 return result;
9084 }
9085
9086 static bool
9087 loongarch_is_elem_duplicate (struct expand_vec_perm_d *d)
9088 {
9089 bool result = true;
9090 unsigned char buf = d->perm[0];
9091
9092 for (int i = 0; i < d->nelt; i += 1)
9093 {
9094 if (buf != d->perm[i])
9095 {
9096 result = false;
9097 break;
9098 }
9099 }
9100
9101 return result;
9102 }
9103
9104 /* In LASX, some permutation insn does not have the behavior that gcc expects
9105 when compiler wants to emit a vector permutation.
9106
9107 1. What GCC provides via vectorize_vec_perm_const ()'s paramater:
9108 When GCC wants to performs a vector permutation, it provides two op
9109 reigster, one target register, and a selector.
9110 In const vector permutation case, GCC provides selector as a char array
9111 that contains original value; in variable vector permuatation
9112 (performs via vec_perm<mode> insn template), it provides a vector register.
9113 We assume that nelt is the elements numbers inside single vector in current
9114 256bit vector mode.
9115
9116 2. What GCC expects to perform:
9117 Two op registers (op0, op1) will "combine" into a 512bit temp vector storage
9118 that has 2*nelt elements inside it; the low 256bit is op0, and high 256bit
9119 is op1, then the elements are indexed as below:
9120 0 ~ nelt - 1 nelt ~ 2 * nelt - 1
9121 |-------------------------|-------------------------|
9122 Low 256bit (op0) High 256bit (op1)
9123 For example, the second element in op1 (V8SImode) will be indexed with 9.
9124 Selector is a vector that has the same mode and number of elements with
9125 op0,op1 and target, it's look like this:
9126 0 ~ nelt - 1
9127 |-------------------------|
9128 256bit (selector)
9129 It describes which element from 512bit temp vector storage will fit into
9130 target's every element slot.
9131 GCC expects that every element in selector can be ANY indices of 512bit
9132 vector storage (Selector can pick literally any element from op0 and op1, and
9133 then fits into any place of target register). This is also what LSX 128bit
9134 vshuf.* instruction do similarly, so we can handle 128bit vector permutation
9135 by single instruction easily.
9136
9137 3. What LASX permutation instruction does:
9138 In short, it just execute two independent 128bit vector permuatation, and
9139 it's the reason that we need to do the jobs below. We will explain it.
9140 op0, op1, target, and selector will be separate into high 128bit and low
9141 128bit, and do permutation as the description below:
9142
9143 a) op0's low 128bit and op1's low 128bit "combines" into a 256bit temp
9144 vector storage (TVS1), elements are indexed as below:
9145 0 ~ nelt / 2 - 1 nelt / 2 ~ nelt - 1
9146 |---------------------|---------------------| TVS1
9147 op0's low 128bit op1's low 128bit
9148 op0's high 128bit and op1's high 128bit are "combined" into TVS2 in the
9149 same way.
9150 0 ~ nelt / 2 - 1 nelt / 2 ~ nelt - 1
9151 |---------------------|---------------------| TVS2
9152 op0's high 128bit op1's high 128bit
9153 b) Selector's low 128bit describes which elements from TVS1 will fit into
9154 target vector's low 128bit. No TVS2 elements are allowed.
9155 c) Selector's high 128bit describes which elements from TVS2 will fit into
9156 target vector's high 128bit. No TVS1 elements are allowed.
9157
9158 As we can see, if we want to handle vector permutation correctly, we can
9159 achieve it in three ways:
9160 a) Modify selector's elements, to make sure that every elements can inform
9161 correct value that will put into target vector.
9162 b) Generate extra instruction before/after permutation instruction, for
9163 adjusting op vector or target vector, to make sure target vector's value is
9164 what GCC expects.
9165 c) Use other instructions to process op and put correct result into target.
9166 */
9167
9168 /* Implementation of constant vector permuatation. This function identifies
9169 recognized pattern of permuation selector argument, and use one or more
9170 instruction (s) to finish the permutation job correctly. For unsupported
9171 patterns, it will return false. */
9172
9173 static bool
9174 loongarch_expand_vec_perm_const (struct expand_vec_perm_d *d)
9175 {
9176 bool flag = false;
9177 unsigned int i;
9178 unsigned char idx;
9179 rtx target, op0, op1, sel, tmp;
9180 rtx rperm[MAX_VECT_LEN];
9181 unsigned int remapped[MAX_VECT_LEN];
9182 unsigned char perm2[MAX_VECT_LEN];
9183
9184 if (GET_MODE_SIZE (d->vmode) == 16)
9185 return loongarch_expand_lsx_shuffle (d);
9186 else
9187 {
9188 if (d->one_vector_p)
9189 {
9190 /* Try interleave with alternating operands. */
9191 memcpy (perm2, d->perm, sizeof (perm2));
9192 for (i = 1; i < d->nelt; i += 2)
9193 perm2[i] += d->nelt;
9194 if (loongarch_expand_vselect_vconcat (d->target, d->op0, d->op1,
9195 perm2, d->nelt))
9196 return true;
9197 }
9198 else
9199 {
9200 if (loongarch_expand_vselect_vconcat (d->target, d->op0, d->op1,
9201 d->perm, d->nelt))
9202 return true;
9203
9204 /* Try again with swapped operands. */
9205 for (i = 0; i < d->nelt; ++i)
9206 perm2[i] = (d->perm[i] + d->nelt) & (2 * d->nelt - 1);
9207 if (loongarch_expand_vselect_vconcat (d->target, d->op1, d->op0,
9208 perm2, d->nelt))
9209 return true;
9210 }
9211
9212 if (loongarch_expand_lsx_shuffle (d))
9213 return true;
9214
9215 if (loongarch_is_odd_extraction (d)
9216 || loongarch_is_even_extraction (d))
9217 {
9218 if (loongarch_expand_vec_perm_even_odd (d))
9219 return true;
9220 }
9221
9222 if (loongarch_is_lasx_lowpart_interleave (d)
9223 || loongarch_is_lasx_lowpart_interleave_2 (d)
9224 || loongarch_is_lasx_highpart_interleave (d)
9225 || loongarch_is_lasx_highpart_interleave_2 (d))
9226 {
9227 if (loongarch_expand_vec_perm_interleave (d))
9228 return true;
9229 }
9230
9231 if (loongarch_is_quad_duplicate (d))
9232 {
9233 if (d->testing_p)
9234 return true;
9235 /* Selector example: E_V8SImode, { 0, 0, 0, 0, 4, 4, 4, 4 }. */
9236 for (i = 0; i < d->nelt; i += 1)
9237 {
9238 rperm[i] = GEN_INT (d->perm[0]);
9239 }
9240 /* Selector after: { 0, 0, 0, 0, 0, 0, 0, 0 }. */
9241 flag = true;
9242 goto expand_perm_const_end;
9243 }
9244
9245 if (loongarch_is_extraction_permutation (d))
9246 {
9247 if (d->testing_p)
9248 return true;
9249 /* Selector sample: E_V8SImode, { 0, 1, 2, 3, 4, 5, 6, 7 }. */
9250 if (d->perm[0] == 0)
9251 {
9252 for (i = 0; i < d->nelt / 2; i += 1)
9253 {
9254 remapped[i] = i;
9255 remapped[i + d->nelt / 2] = i;
9256 }
9257 }
9258 else
9259 {
9260 /* { 8, 9, 10, 11, 12, 13, 14, 15 }. */
9261 for (i = 0; i < d->nelt / 2; i += 1)
9262 {
9263 idx = i + d->nelt / 2;
9264 remapped[i] = idx;
9265 remapped[i + d->nelt / 2] = idx;
9266 }
9267 }
9268 /* Selector after: { 0, 1, 2, 3, 0, 1, 2, 3 }
9269 { 8, 9, 10, 11, 8, 9, 10, 11 } */
9270
9271 /* Convert remapped selector array to RTL array. */
9272 for (i = 0; i < d->nelt; i += 1)
9273 {
9274 rperm[i] = GEN_INT (remapped[i]);
9275 }
9276
9277 flag = true;
9278 goto expand_perm_const_end;
9279 }
9280
9281 if (loongarch_is_elem_duplicate (d))
9282 {
9283 if (d->testing_p)
9284 return true;
9285 /* Brocast single element (from op0 or op1) to all slot of target
9286 register.
9287 Selector sample:E_V8SImode, { 2, 2, 2, 2, 2, 2, 2, 2 } */
9288 rtx conv_op1 = simplify_gen_subreg (E_V4DImode, d->op1, d->vmode, 0);
9289 rtx conv_op0 = simplify_gen_subreg (E_V4DImode, d->op0, d->vmode, 0);
9290 rtx temp_reg = gen_reg_rtx (d->vmode);
9291 rtx conv_temp = simplify_gen_subreg (E_V4DImode, temp_reg,
9292 d->vmode, 0);
9293 emit_move_insn (temp_reg, d->op0);
9294
9295 idx = d->perm[0];
9296 /* We will use xvrepl128vei.* insn to achieve the result, but we need
9297 to make the high/low 128bit has the same contents that contain the
9298 value that we need to broardcast, because xvrepl128vei does the
9299 broardcast job from every 128bit of source register to
9300 corresponded part of target register! (A deep sigh.) */
9301 if (idx < d->nelt / 2)
9302 {
9303 emit_insn (gen_lasx_xvpermi_q_v4di (conv_temp, conv_temp,
9304 conv_op0, GEN_INT (0x0)));
9305 }
9306 else if (idx >= d->nelt / 2 && idx < d->nelt)
9307 {
9308 emit_insn (gen_lasx_xvpermi_q_v4di (conv_temp, conv_temp,
9309 conv_op0, GEN_INT (0x11)));
9310 idx -= d->nelt / 2;
9311 }
9312 else if (idx >= d->nelt && idx < (d->nelt + d->nelt / 2))
9313 {
9314 emit_insn (gen_lasx_xvpermi_q_v4di (conv_temp, conv_temp,
9315 conv_op1, GEN_INT (0x0)));
9316 }
9317 else if (idx >= (d->nelt + d->nelt / 2) && idx < d->nelt * 2)
9318 {
9319 emit_insn (gen_lasx_xvpermi_q_v4di (conv_temp, conv_temp,
9320 conv_op1, GEN_INT (0x11)));
9321 idx -= d->nelt / 2;
9322 }
9323
9324 /* Then we can finally generate this insn. */
9325 switch (d->vmode)
9326 {
9327 case E_V4DImode:
9328 emit_insn (gen_lasx_xvrepl128vei_d (d->target, temp_reg,
9329 GEN_INT (idx)));
9330 break;
9331 case E_V4DFmode:
9332 emit_insn (gen_lasx_xvrepl128vei_d_f (d->target, temp_reg,
9333 GEN_INT (idx)));
9334 break;
9335 case E_V8SImode:
9336 emit_insn (gen_lasx_xvrepl128vei_w (d->target, temp_reg,
9337 GEN_INT (idx)));
9338 break;
9339 case E_V8SFmode:
9340 emit_insn (gen_lasx_xvrepl128vei_w_f (d->target, temp_reg,
9341 GEN_INT (idx)));
9342 break;
9343 case E_V16HImode:
9344 emit_insn (gen_lasx_xvrepl128vei_h (d->target, temp_reg,
9345 GEN_INT (idx)));
9346 break;
9347 case E_V32QImode:
9348 emit_insn (gen_lasx_xvrepl128vei_b (d->target, temp_reg,
9349 GEN_INT (idx)));
9350 break;
9351 default:
9352 gcc_unreachable ();
9353 break;
9354 }
9355
9356 return true;
9357 }
9358
9359 expand_perm_const_end:
9360 if (flag)
9361 {
9362 /* Copy selector vector from memory to vector register for later insn
9363 gen function.
9364 If vector's element in floating point value, we cannot fit
9365 selector argument into insn gen function directly, because of the
9366 insn template definition. As a solution, generate a integral mode
9367 subreg of target, then copy selector vector (that is in integral
9368 mode) to this subreg. */
9369 switch (d->vmode)
9370 {
9371 case E_V4DFmode:
9372 sel = gen_rtx_CONST_VECTOR (E_V4DImode, gen_rtvec_v (d->nelt,
9373 rperm));
9374 tmp = simplify_gen_subreg (E_V4DImode, d->target, d->vmode, 0);
9375 emit_move_insn (tmp, sel);
9376 break;
9377 case E_V8SFmode:
9378 sel = gen_rtx_CONST_VECTOR (E_V8SImode, gen_rtvec_v (d->nelt,
9379 rperm));
9380 tmp = simplify_gen_subreg (E_V8SImode, d->target, d->vmode, 0);
9381 emit_move_insn (tmp, sel);
9382 break;
9383 default:
9384 sel = gen_rtx_CONST_VECTOR (d->vmode, gen_rtvec_v (d->nelt,
9385 rperm));
9386 emit_move_insn (d->target, sel);
9387 break;
9388 }
9389
9390 target = d->target;
9391 op0 = d->op0;
9392 op1 = d->one_vector_p ? d->op0 : d->op1;
9393
9394 /* We FINALLY can generate xvshuf.* insn. */
9395 switch (d->vmode)
9396 {
9397 case E_V4DFmode:
9398 emit_insn (gen_lasx_xvshuf_d_f (target, target, op1, op0));
9399 break;
9400 case E_V4DImode:
9401 emit_insn (gen_lasx_xvshuf_d (target, target, op1, op0));
9402 break;
9403 case E_V8SFmode:
9404 emit_insn (gen_lasx_xvshuf_w_f (target, target, op1, op0));
9405 break;
9406 case E_V8SImode:
9407 emit_insn (gen_lasx_xvshuf_w (target, target, op1, op0));
9408 break;
9409 case E_V16HImode:
9410 emit_insn (gen_lasx_xvshuf_h (target, target, op1, op0));
9411 break;
9412 case E_V32QImode:
9413 emit_insn (gen_lasx_xvshuf_b (target, op1, op0, target));
9414 break;
9415 default:
9416 gcc_unreachable ();
9417 break;
9418 }
9419
9420 return true;
9421 }
9422 }
9423
9424 return false;
9425 }
9426
9427 /* Implement TARGET_VECTORIZE_VEC_PERM_CONST. */
9428
9429 static bool
9430 loongarch_vectorize_vec_perm_const (machine_mode vmode, machine_mode op_mode,
9431 rtx target, rtx op0, rtx op1,
9432 const vec_perm_indices &sel)
9433 {
9434 if (vmode != op_mode)
9435 return false;
9436
9437 struct expand_vec_perm_d d;
9438 int i, nelt, which;
9439 unsigned char orig_perm[MAX_VECT_LEN];
9440 bool ok;
9441
9442 d.target = target;
9443 if (op0)
9444 {
9445 rtx nop0 = force_reg (vmode, op0);
9446 if (op0 == op1)
9447 op1 = nop0;
9448 op0 = nop0;
9449 }
9450 if (op1)
9451 op1 = force_reg (vmode, op1);
9452 d.op0 = op0;
9453 d.op1 = op1;
9454
9455 d.vmode = vmode;
9456 gcc_assert (VECTOR_MODE_P (vmode));
9457 d.nelt = nelt = GET_MODE_NUNITS (vmode);
9458 d.testing_p = !target;
9459
9460 /* This is overly conservative, but ensures we don't get an
9461 uninitialized warning on ORIG_PERM. */
9462 memset (orig_perm, 0, MAX_VECT_LEN);
9463 for (i = which = 0; i < nelt; ++i)
9464 {
9465 int ei = sel[i] & (2 * nelt - 1);
9466 which |= (ei < nelt ? 1 : 2);
9467 orig_perm[i] = ei;
9468 }
9469 memcpy (d.perm, orig_perm, MAX_VECT_LEN);
9470
9471 switch (which)
9472 {
9473 default:
9474 gcc_unreachable ();
9475
9476 case 3:
9477 d.one_vector_p = false;
9478 if (d.testing_p || !rtx_equal_p (d.op0, d.op1))
9479 break;
9480 /* FALLTHRU */
9481
9482 case 2:
9483 for (i = 0; i < nelt; ++i)
9484 d.perm[i] &= nelt - 1;
9485 d.op0 = d.op1;
9486 d.one_vector_p = true;
9487 break;
9488
9489 case 1:
9490 d.op1 = d.op0;
9491 d.one_vector_p = true;
9492 break;
9493 }
9494
9495 // Do rounding for selector to avoid vshuf undefined behavior.
9496 for (i = 0; i < d.nelt; i += 1)
9497 {
9498 d.perm[i] %= (d.nelt * 2);
9499 }
9500
9501 if (d.testing_p)
9502 {
9503 d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
9504 d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
9505 if (!d.one_vector_p)
9506 d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
9507
9508 start_sequence ();
9509 ok = loongarch_expand_vec_perm_const (&d);
9510 end_sequence ();
9511 return ok;
9512 }
9513
9514 ok = loongarch_expand_vec_perm_const (&d);
9515
9516 /* If we were given a two-vector permutation which just happened to
9517 have both input vectors equal, we folded this into a one-vector
9518 permutation. There are several loongson patterns that are matched
9519 via direct vec_select+vec_concat expansion, but we do not have
9520 support in loongarch_expand_vec_perm_const to guess the adjustment
9521 that should be made for a single operand. Just try again with
9522 the original permutation. */
9523 if (!ok && which == 3)
9524 {
9525 d.op0 = op0;
9526 d.op1 = op1;
9527 d.one_vector_p = false;
9528 memcpy (d.perm, orig_perm, MAX_VECT_LEN);
9529 ok = loongarch_expand_vec_perm_const (&d);
9530 }
9531
9532 return ok;
9533 }
9534
9535 static int
9536 loongarch_cpu_sched_reassociation_width (struct loongarch_target *target,
9537 unsigned int opc, machine_mode mode)
9538 {
9539 /* unreferenced argument */
9540 (void) opc;
9541
9542 switch (target->cpu_tune)
9543 {
9544 case CPU_LOONGARCH64:
9545 case CPU_LA464:
9546 case CPU_LA664:
9547 /* Vector part. */
9548 if (LSX_SUPPORTED_MODE_P (mode) || LASX_SUPPORTED_MODE_P (mode))
9549 {
9550 /* Integer vector instructions execute in FP unit.
9551 The width of integer/float-point vector instructions is 3. */
9552 return 3;
9553 }
9554
9555 /* Scalar part. */
9556 else if (INTEGRAL_MODE_P (mode))
9557 return 1;
9558 else if (FLOAT_MODE_P (mode))
9559 {
9560 if (opc == PLUS_EXPR)
9561 {
9562 return 2;
9563 }
9564 return 4;
9565 }
9566 break;
9567 default:
9568 break;
9569 }
9570
9571 /* default is 1 */
9572 return 1;
9573 }
9574
9575 /* Implement TARGET_SCHED_REASSOCIATION_WIDTH. */
9576
9577 static int
9578 loongarch_sched_reassociation_width (unsigned int opc, machine_mode mode)
9579 {
9580 return loongarch_cpu_sched_reassociation_width (&la_target, opc, mode);
9581 }
9582
9583 /* Implement extract a scalar element from vecotr register */
9584
9585 void
9586 loongarch_expand_vector_extract (rtx target, rtx vec, int elt)
9587 {
9588 machine_mode mode = GET_MODE (vec);
9589 machine_mode inner_mode = GET_MODE_INNER (mode);
9590 rtx tmp;
9591
9592 switch (mode)
9593 {
9594 case E_V8HImode:
9595 case E_V16QImode:
9596 break;
9597
9598 case E_V32QImode:
9599 if (ISA_HAS_LASX)
9600 {
9601 if (elt >= 16)
9602 {
9603 tmp = gen_reg_rtx (V32QImode);
9604 emit_insn (gen_lasx_xvpermi_d_v32qi (tmp, vec, GEN_INT (0xe)));
9605 loongarch_expand_vector_extract (target,
9606 gen_lowpart (V16QImode, tmp),
9607 elt & 15);
9608 }
9609 else
9610 loongarch_expand_vector_extract (target,
9611 gen_lowpart (V16QImode, vec),
9612 elt & 15);
9613 return;
9614 }
9615 break;
9616
9617 case E_V16HImode:
9618 if (ISA_HAS_LASX)
9619 {
9620 if (elt >= 8)
9621 {
9622 tmp = gen_reg_rtx (V16HImode);
9623 emit_insn (gen_lasx_xvpermi_d_v16hi (tmp, vec, GEN_INT (0xe)));
9624 loongarch_expand_vector_extract (target,
9625 gen_lowpart (V8HImode, tmp),
9626 elt & 7);
9627 }
9628 else
9629 loongarch_expand_vector_extract (target,
9630 gen_lowpart (V8HImode, vec),
9631 elt & 7);
9632 return;
9633 }
9634 break;
9635
9636 default:
9637 break;
9638 }
9639
9640 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (elt)));
9641 tmp = gen_rtx_VEC_SELECT (inner_mode, vec, tmp);
9642
9643 /* Let the rtl optimizers know about the zero extension performed. */
9644 if (inner_mode == QImode || inner_mode == HImode)
9645 {
9646 tmp = gen_rtx_ZERO_EXTEND (SImode, tmp);
9647 target = gen_lowpart (SImode, target);
9648 }
9649 if (inner_mode == SImode || inner_mode == DImode)
9650 {
9651 tmp = gen_rtx_SIGN_EXTEND (inner_mode, tmp);
9652 }
9653
9654 emit_insn (gen_rtx_SET (target, tmp));
9655 }
9656
9657 /* Generate code to copy vector bits i / 2 ... i - 1 from vector SRC
9658 to bits 0 ... i / 2 - 1 of vector DEST, which has the same mode.
9659 The upper bits of DEST are undefined, though they shouldn't cause
9660 exceptions (some bits from src or all zeros are ok). */
9661
9662 static void
9663 emit_reduc_half (rtx dest, rtx src, int i)
9664 {
9665 rtx tem, d = dest;
9666 switch (GET_MODE (src))
9667 {
9668 case E_V4SFmode:
9669 tem = gen_lsx_vbsrl_w_f (dest, src, GEN_INT (i == 128 ? 8 : 4));
9670 break;
9671 case E_V2DFmode:
9672 tem = gen_lsx_vbsrl_d_f (dest, src, GEN_INT (8));
9673 break;
9674 case E_V8SFmode:
9675 if (i == 256)
9676 tem = gen_lasx_xvpermi_d_v8sf (dest, src, GEN_INT (0xe));
9677 else
9678 tem = gen_lasx_xvshuf4i_w_f (dest, src,
9679 GEN_INT (i == 128 ? 2 + (3 << 2) : 1));
9680 break;
9681 case E_V4DFmode:
9682 if (i == 256)
9683 tem = gen_lasx_xvpermi_d_v4df (dest, src, GEN_INT (0xe));
9684 else
9685 tem = gen_lasx_xvpermi_d_v4df (dest, src, const1_rtx);
9686 break;
9687 case E_V32QImode:
9688 case E_V16HImode:
9689 case E_V8SImode:
9690 case E_V4DImode:
9691 d = gen_reg_rtx (V4DImode);
9692 if (i == 256)
9693 tem = gen_lasx_xvpermi_d_v4di (d, gen_lowpart (V4DImode, src),
9694 GEN_INT (0xe));
9695 else
9696 tem = gen_lasx_xvbsrl_d (d, gen_lowpart (V4DImode, src),
9697 GEN_INT (i/16));
9698 break;
9699 case E_V16QImode:
9700 case E_V8HImode:
9701 case E_V4SImode:
9702 case E_V2DImode:
9703 d = gen_reg_rtx (V2DImode);
9704 tem = gen_lsx_vbsrl_d (d, gen_lowpart (V2DImode, src), GEN_INT (i/16));
9705 break;
9706 default:
9707 gcc_unreachable ();
9708 }
9709 emit_insn (tem);
9710 if (d != dest)
9711 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), d));
9712 }
9713
9714 /* Expand a vector reduction. FN is the binary pattern to reduce;
9715 DEST is the destination; IN is the input vector. */
9716
9717 void
9718 loongarch_expand_vector_reduc (rtx (*fn) (rtx, rtx, rtx), rtx dest, rtx in)
9719 {
9720 rtx half, dst, vec = in;
9721 machine_mode mode = GET_MODE (in);
9722 int i;
9723
9724 for (i = GET_MODE_BITSIZE (mode);
9725 i > GET_MODE_UNIT_BITSIZE (mode);
9726 i >>= 1)
9727 {
9728 half = gen_reg_rtx (mode);
9729 emit_reduc_half (half, vec, i);
9730 if (i == GET_MODE_UNIT_BITSIZE (mode) * 2)
9731 dst = dest;
9732 else
9733 dst = gen_reg_rtx (mode);
9734 emit_insn (fn (dst, half, vec));
9735 vec = dst;
9736 }
9737 }
9738
9739 /* Expand an integral vector unpack operation. */
9740
9741 void
9742 loongarch_expand_vec_unpack (rtx operands[2], bool unsigned_p, bool high_p)
9743 {
9744 machine_mode imode = GET_MODE (operands[1]);
9745 rtx (*unpack) (rtx, rtx, rtx);
9746 rtx (*extend) (rtx, rtx);
9747 rtx (*cmpFunc) (rtx, rtx, rtx);
9748 rtx (*swap_hi_lo) (rtx, rtx, rtx, rtx);
9749 rtx tmp, dest;
9750
9751 if (ISA_HAS_LASX && GET_MODE_SIZE (imode) == 32)
9752 {
9753 switch (imode)
9754 {
9755 case E_V8SImode:
9756 if (unsigned_p)
9757 extend = gen_lasx_vext2xv_du_wu;
9758 else
9759 extend = gen_lasx_vext2xv_d_w;
9760 swap_hi_lo = gen_lasx_xvpermi_q_v8si;
9761 break;
9762
9763 case E_V16HImode:
9764 if (unsigned_p)
9765 extend = gen_lasx_vext2xv_wu_hu;
9766 else
9767 extend = gen_lasx_vext2xv_w_h;
9768 swap_hi_lo = gen_lasx_xvpermi_q_v16hi;
9769 break;
9770
9771 case E_V32QImode:
9772 if (unsigned_p)
9773 extend = gen_lasx_vext2xv_hu_bu;
9774 else
9775 extend = gen_lasx_vext2xv_h_b;
9776 swap_hi_lo = gen_lasx_xvpermi_q_v32qi;
9777 break;
9778
9779 default:
9780 gcc_unreachable ();
9781 break;
9782 }
9783
9784 if (high_p)
9785 {
9786 tmp = gen_reg_rtx (imode);
9787 emit_insn (swap_hi_lo (tmp, tmp, operands[1], const1_rtx));
9788 emit_insn (extend (operands[0], tmp));
9789 return;
9790 }
9791
9792 emit_insn (extend (operands[0], operands[1]));
9793 return;
9794
9795 }
9796 else if (ISA_HAS_LSX)
9797 {
9798 switch (imode)
9799 {
9800 case E_V4SImode:
9801 if (high_p != 0)
9802 unpack = gen_lsx_vilvh_w;
9803 else
9804 unpack = gen_lsx_vilvl_w;
9805
9806 cmpFunc = gen_lsx_vslt_w;
9807 break;
9808
9809 case E_V8HImode:
9810 if (high_p != 0)
9811 unpack = gen_lsx_vilvh_h;
9812 else
9813 unpack = gen_lsx_vilvl_h;
9814
9815 cmpFunc = gen_lsx_vslt_h;
9816 break;
9817
9818 case E_V16QImode:
9819 if (high_p != 0)
9820 unpack = gen_lsx_vilvh_b;
9821 else
9822 unpack = gen_lsx_vilvl_b;
9823
9824 cmpFunc = gen_lsx_vslt_b;
9825 break;
9826
9827 default:
9828 gcc_unreachable ();
9829 break;
9830 }
9831
9832 if (!unsigned_p)
9833 {
9834 /* Extract sign extention for each element comparing each element
9835 with immediate zero. */
9836 tmp = gen_reg_rtx (imode);
9837 emit_insn (cmpFunc (tmp, operands[1], CONST0_RTX (imode)));
9838 }
9839 else
9840 tmp = force_reg (imode, CONST0_RTX (imode));
9841
9842 dest = gen_reg_rtx (imode);
9843
9844 emit_insn (unpack (dest, operands[1], tmp));
9845 emit_move_insn (operands[0], gen_lowpart (GET_MODE (operands[0]), dest));
9846 return;
9847 }
9848 gcc_unreachable ();
9849 }
9850
9851 /* Construct and return PARALLEL RTX with CONST_INTs for HIGH (high_p == TRUE)
9852 or LOW (high_p == FALSE) half of a vector for mode MODE. */
9853
9854 rtx
9855 loongarch_lsx_vec_parallel_const_half (machine_mode mode, bool high_p)
9856 {
9857 int nunits = GET_MODE_NUNITS (mode);
9858 rtvec v = rtvec_alloc (nunits / 2);
9859 int base;
9860 int i;
9861
9862 base = high_p ? nunits / 2 : 0;
9863
9864 for (i = 0; i < nunits / 2; i++)
9865 RTVEC_ELT (v, i) = GEN_INT (base + i);
9866
9867 return gen_rtx_PARALLEL (VOIDmode, v);
9868 }
9869
9870 /* A subroutine of loongarch_expand_vec_init, match constant vector
9871 elements. */
9872
9873 static inline bool
9874 loongarch_constant_elt_p (rtx x)
9875 {
9876 return CONST_INT_P (x) || GET_CODE (x) == CONST_DOUBLE;
9877 }
9878
9879 rtx
9880 loongarch_gen_const_int_vector_shuffle (machine_mode mode, int val)
9881 {
9882 int nunits = GET_MODE_NUNITS (mode);
9883 int nsets = nunits / 4;
9884 rtx elts[MAX_VECT_LEN];
9885 int set = 0;
9886 int i, j;
9887
9888 /* Generate a const_int vector replicating the same 4-element set
9889 from an immediate. */
9890 for (j = 0; j < nsets; j++, set = 4 * j)
9891 for (i = 0; i < 4; i++)
9892 elts[set + i] = GEN_INT (set + ((val >> (2 * i)) & 0x3));
9893
9894 return gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nunits, elts));
9895 }
9896
9897
9898 /* Expand a vector initialization. */
9899
9900 void
9901 loongarch_expand_vector_group_init (rtx target, rtx vals)
9902 {
9903 machine_mode vmode = GET_MODE (target);
9904 machine_mode half_mode = VOIDmode;
9905 rtx low = XVECEXP (vals, 0, 0);
9906 rtx high = XVECEXP (vals, 0, 1);
9907
9908 switch (vmode)
9909 {
9910 case E_V32QImode:
9911 half_mode = V16QImode;
9912 break;
9913 case E_V16HImode:
9914 half_mode = V8HImode;
9915 break;
9916 case E_V8SImode:
9917 half_mode = V4SImode;
9918 break;
9919 case E_V4DImode:
9920 half_mode = V2DImode;
9921 break;
9922 case E_V8SFmode:
9923 half_mode = V4SFmode;
9924 break;
9925 case E_V4DFmode:
9926 half_mode = V2DFmode;
9927 break;
9928 default:
9929 gcc_unreachable ();
9930 }
9931
9932 if (!register_operand (low, half_mode))
9933 low = force_reg (half_mode, low);
9934 if (!register_operand (high, half_mode))
9935 high = force_reg (half_mode, high);
9936 emit_insn (gen_rtx_SET (target,
9937 gen_rtx_VEC_CONCAT (vmode, low, high)));
9938 }
9939
9940 /* Expand initialization of a vector which has all same elements. */
9941
9942 void
9943 loongarch_expand_vector_init_same (rtx target, rtx vals, unsigned nvar)
9944 {
9945 machine_mode vmode = GET_MODE (target);
9946 machine_mode imode = GET_MODE_INNER (vmode);
9947 rtx same = XVECEXP (vals, 0, 0);
9948 rtx temp;
9949
9950 if (CONST_INT_P (same) && nvar == 0
9951 && loongarch_signed_immediate_p (INTVAL (same), 10, 0))
9952 {
9953 switch (vmode)
9954 {
9955 case E_V32QImode:
9956 case E_V16HImode:
9957 case E_V8SImode:
9958 case E_V4DImode:
9959 case E_V16QImode:
9960 case E_V8HImode:
9961 case E_V4SImode:
9962 case E_V2DImode:
9963 temp = gen_rtx_CONST_VECTOR (vmode, XVEC (vals, 0));
9964 emit_move_insn (target, temp);
9965 return;
9966 default:
9967 gcc_unreachable ();
9968 }
9969 }
9970
9971 if (imode == GET_MODE (same))
9972 temp = same;
9973 else if (GET_MODE_SIZE (imode) >= UNITS_PER_WORD)
9974 {
9975 if (GET_CODE (same) == MEM)
9976 {
9977 rtx reg_tmp = gen_reg_rtx (GET_MODE (same));
9978 loongarch_emit_move (reg_tmp, same);
9979 temp = simplify_gen_subreg (imode, reg_tmp, GET_MODE (reg_tmp), 0);
9980 }
9981 else
9982 temp = simplify_gen_subreg (imode, same, GET_MODE (same), 0);
9983 }
9984 else
9985 {
9986 if (GET_CODE (same) == MEM)
9987 {
9988 rtx reg_tmp = gen_reg_rtx (GET_MODE (same));
9989 loongarch_emit_move (reg_tmp, same);
9990 temp = lowpart_subreg (imode, reg_tmp, GET_MODE (reg_tmp));
9991 }
9992 else
9993 temp = lowpart_subreg (imode, same, GET_MODE (same));
9994 }
9995
9996 temp = force_reg (imode, temp);
9997
9998 switch (vmode)
9999 {
10000 case E_V32QImode:
10001 case E_V16HImode:
10002 case E_V8SImode:
10003 case E_V4DImode:
10004 case E_V16QImode:
10005 case E_V8HImode:
10006 case E_V4SImode:
10007 case E_V2DImode:
10008 loongarch_emit_move (target, gen_rtx_VEC_DUPLICATE (vmode, temp));
10009 break;
10010
10011 case E_V8SFmode:
10012 emit_insn (gen_lasx_xvreplve0_w_f_scalar (target, temp));
10013 break;
10014
10015 case E_V4DFmode:
10016 emit_insn (gen_lasx_xvreplve0_d_f_scalar (target, temp));
10017 break;
10018
10019 case E_V4SFmode:
10020 emit_insn (gen_lsx_vreplvei_w_f_scalar (target, temp));
10021 break;
10022
10023 case E_V2DFmode:
10024 emit_insn (gen_lsx_vreplvei_d_f_scalar (target, temp));
10025 break;
10026
10027 default:
10028 gcc_unreachable ();
10029 }
10030 }
10031
10032 /* Expand a vector initialization. */
10033
10034 void
10035 loongarch_expand_vector_init (rtx target, rtx vals)
10036 {
10037 machine_mode vmode = GET_MODE (target);
10038 machine_mode imode = GET_MODE_INNER (vmode);
10039 unsigned i, nelt = GET_MODE_NUNITS (vmode);
10040 /* VALS is divided into high and low half-part. */
10041 /* Number of non constant elements in corresponding parts of VALS. */
10042 unsigned nvar = 0, hi_nvar = 0, lo_nvar = 0;
10043 /* all_same : true if all elements of VALS are the same.
10044 hi_same : true if all elements of the high half-part are the same.
10045 lo_same : true if all elements of the low half-part are the same.
10046 half_same : true if the high half-part is the same as the low one. */
10047 bool all_same = false, hi_same = true, lo_same = true, half_same = true;
10048 rtx val[32], val_hi[32], val_lo[16];
10049 rtx x, op0, op1;
10050 /* Copy one element of vals to per element of target vector. */
10051 typedef rtx (*loongarch_vec_repl1_fn) (rtx, rtx);
10052 /* Copy two elements of vals to target vector. */
10053 typedef rtx (*loongarch_vec_repl2_fn) (rtx, rtx, rtx);
10054 /* Insert scalar operands into the specified position of the vector. */
10055 typedef rtx (*loongarch_vec_set_fn) (rtx, rtx, rtx);
10056 /* Copy 64bit lowpart to highpart. */
10057 typedef rtx (*loongarch_vec_mirror_fn) (rtx, rtx, rtx);
10058 /* Merge lowpart and highpart into target. */
10059 typedef rtx (*loongarch_vec_merge_fn) (rtx, rtx, rtx, rtx);
10060
10061 loongarch_vec_repl1_fn loongarch_vec_repl1_128 = NULL,
10062 loongarch_vec_repl1_256 = NULL;
10063 loongarch_vec_repl2_fn loongarch_vec_repl2_128 = NULL,
10064 loongarch_vec_repl2_256 = NULL;
10065 loongarch_vec_set_fn loongarch_vec_set128 = NULL, loongarch_vec_set256 = NULL;
10066 loongarch_vec_mirror_fn loongarch_vec_mirror = NULL;
10067 loongarch_vec_merge_fn loongarch_lasx_vecinit_merge = NULL;
10068 machine_mode half_mode = VOIDmode;
10069
10070 /* Check whether elements of each part are the same. */
10071 for (i = 0; i < nelt / 2; ++i)
10072 {
10073 val_hi[i] = val_hi[i + nelt / 2] = val[i + nelt / 2]
10074 = XVECEXP (vals, 0, i + nelt / 2);
10075 val_lo[i] = val[i] = XVECEXP (vals, 0, i);
10076 if (!loongarch_constant_elt_p (val_hi[i]))
10077 hi_nvar++;
10078 if (!loongarch_constant_elt_p (val_lo[i]))
10079 lo_nvar++;
10080 if (i > 0 && !rtx_equal_p (val_hi[i], val_hi[0]))
10081 hi_same = false;
10082 if (i > 0 && !rtx_equal_p (val_lo[i], val_lo[0]))
10083 lo_same = false;
10084 if (!rtx_equal_p (val_hi[i], val_lo[i]))
10085 half_same = false;
10086 }
10087
10088 /* If all elements are the same, set all_same true. */
10089 if (hi_same && lo_same && half_same)
10090 all_same = true;
10091
10092 nvar = hi_nvar + lo_nvar;
10093
10094 switch (vmode)
10095 {
10096 case E_V32QImode:
10097 half_mode = E_V16QImode;
10098 loongarch_vec_set256 = gen_vec_setv32qi_internal;
10099 loongarch_vec_repl1_256 = gen_lasx_xvreplgr2vr_b;
10100 loongarch_lasx_vecinit_merge
10101 = half_same ? gen_lasx_xvpermi_q_v32qi : gen_lasx_vecinit_merge_v32qi;
10102 /* FALLTHRU. */
10103 case E_V16QImode:
10104 loongarch_vec_set128 = gen_vec_setv16qi;
10105 loongarch_vec_repl1_128 = gen_lsx_vreplgr2vr_b;
10106 loongarch_vec_mirror = gen_lsx_vreplvei_mirror_b;
10107 break;
10108
10109 case E_V16HImode:
10110 half_mode = E_V8HImode;
10111 loongarch_vec_set256 = gen_vec_setv16hi_internal;
10112 loongarch_vec_repl1_256 = gen_lasx_xvreplgr2vr_h;
10113 loongarch_lasx_vecinit_merge
10114 = half_same ? gen_lasx_xvpermi_q_v16hi : gen_lasx_vecinit_merge_v16hi;
10115 /* FALLTHRU. */
10116 case E_V8HImode:
10117 loongarch_vec_set128 = gen_vec_setv8hi;
10118 loongarch_vec_repl1_128 = gen_lsx_vreplgr2vr_h;
10119 loongarch_vec_mirror = gen_lsx_vreplvei_mirror_h;
10120 break;
10121
10122 case E_V8SImode:
10123 half_mode = V4SImode;
10124 loongarch_vec_set256 = gen_vec_setv8si;
10125 loongarch_vec_repl1_256 = gen_lasx_xvreplgr2vr_w;
10126 loongarch_lasx_vecinit_merge
10127 = half_same ? gen_lasx_xvpermi_q_v8si : gen_lasx_vecinit_merge_v8si;
10128 /* FALLTHRU. */
10129 case E_V4SImode:
10130 loongarch_vec_set128 = gen_vec_setv4si;
10131 loongarch_vec_repl1_128 = gen_lsx_vreplgr2vr_w;
10132 loongarch_vec_mirror = gen_lsx_vreplvei_mirror_w;
10133 break;
10134
10135 case E_V4DImode:
10136 half_mode = E_V2DImode;
10137 loongarch_vec_set256 = gen_vec_setv4di;
10138 loongarch_vec_repl1_256 = gen_lasx_xvreplgr2vr_d;
10139 loongarch_lasx_vecinit_merge
10140 = half_same ? gen_lasx_xvpermi_q_v4di : gen_lasx_vecinit_merge_v4di;
10141 /* FALLTHRU. */
10142 case E_V2DImode:
10143 loongarch_vec_set128 = gen_vec_setv2di;
10144 loongarch_vec_repl1_128 = gen_lsx_vreplgr2vr_d;
10145 loongarch_vec_mirror = gen_lsx_vreplvei_mirror_d;
10146 break;
10147
10148 case E_V8SFmode:
10149 half_mode = E_V4SFmode;
10150 loongarch_vec_set256 = gen_vec_setv8sf;
10151 loongarch_vec_repl1_128 = gen_lsx_vreplvei_w_f_scalar;
10152 loongarch_vec_repl2_256 = gen_lasx_xvilvl_w_f_internal;
10153 loongarch_lasx_vecinit_merge
10154 = half_same ? gen_lasx_xvpermi_q_v8sf : gen_lasx_vecinit_merge_v8sf;
10155 /* FALLTHRU. */
10156 case E_V4SFmode:
10157 loongarch_vec_set128 = gen_vec_setv4sf;
10158 loongarch_vec_repl2_128 = gen_lsx_vilvl_w_f_internal;
10159 loongarch_vec_mirror = gen_lsx_vreplvei_mirror_w_f;
10160 break;
10161
10162 case E_V4DFmode:
10163 half_mode = E_V2DFmode;
10164 loongarch_vec_set256 = gen_vec_setv4df;
10165 loongarch_vec_repl1_128 = gen_lsx_vreplvei_d_f_scalar;
10166 loongarch_vec_repl2_256 = gen_lasx_xvilvl_d_f_internal;
10167 loongarch_lasx_vecinit_merge
10168 = half_same ? gen_lasx_xvpermi_q_v4df : gen_lasx_vecinit_merge_v4df;
10169 /* FALLTHRU. */
10170 case E_V2DFmode:
10171 loongarch_vec_set128 = gen_vec_setv2df;
10172 loongarch_vec_repl2_128 = gen_lsx_vilvl_d_f_internal;
10173 loongarch_vec_mirror = gen_lsx_vreplvei_mirror_d_f;
10174 break;
10175
10176 default:
10177 gcc_unreachable ();
10178 }
10179
10180 if (ISA_HAS_LASX && GET_MODE_SIZE (vmode) == 32)
10181 {
10182 /* If all elements are the same, just do a broadcost. */
10183 if (all_same)
10184 loongarch_expand_vector_init_same (target, vals, nvar);
10185 else
10186 {
10187 gcc_assert (nelt >= 4);
10188
10189 rtx target_hi, target_lo;
10190 /* Write elements of high half-part in target directly. */
10191 target_hi = target;
10192 target_lo = gen_reg_rtx (half_mode);
10193
10194 /* If all elements of high half-part are the same,
10195 just do a broadcost. Also applicable to low half-part. */
10196 if (hi_same)
10197 {
10198 rtx vtmp = gen_rtx_PARALLEL (vmode, gen_rtvec_v (nelt, val_hi));
10199 loongarch_expand_vector_init_same (target_hi, vtmp, hi_nvar);
10200 }
10201 if (lo_same)
10202 {
10203 rtx vtmp
10204 = gen_rtx_PARALLEL (half_mode, gen_rtvec_v (nelt / 2, val_lo));
10205 loongarch_expand_vector_init_same (target_lo, vtmp, lo_nvar);
10206 }
10207
10208 for (i = 0; i < nelt / 2; ++i)
10209 {
10210 if (!hi_same)
10211 {
10212 if (vmode == E_V8SFmode || vmode == E_V4DFmode)
10213 {
10214 /* Using xvilvl to load lowest 2 elements simultaneously
10215 to reduce the number of instructions. */
10216 if (i == 1)
10217 {
10218 op0 = force_reg (imode, val_hi[0]);
10219 op1 = force_reg (imode, val_hi[1]);
10220 emit_insn (
10221 loongarch_vec_repl2_256 (target_hi, op0, op1));
10222 }
10223 else if (i > 1)
10224 {
10225 op0 = force_reg (imode, val_hi[i]);
10226 emit_insn (
10227 loongarch_vec_set256 (target_hi, op0, GEN_INT (i)));
10228 }
10229 }
10230 else
10231 {
10232 op0 = force_reg (imode, val_hi[i]);
10233 /* Assign the lowest element of val_hi to all elements
10234 of target_hi. */
10235 if (i == 0)
10236 {
10237 emit_insn (loongarch_vec_repl1_256 (target_hi, op0));
10238 }
10239 else if (!rtx_equal_p (val_hi[i], val_hi[0]))
10240 {
10241 emit_insn (
10242 loongarch_vec_set256 (target_hi, op0, GEN_INT (i)));
10243 }
10244 }
10245 }
10246 if (!lo_same && !half_same)
10247 {
10248 op0 = force_reg (imode, val_lo[i]);
10249 /* Assign the lowest element of val_lo to all elements
10250 of target_lo. */
10251 if (i == 0)
10252 {
10253 emit_insn (loongarch_vec_repl1_128 (target_lo, op0));
10254 }
10255 else if (!rtx_equal_p (val_lo[i], val_lo[0]))
10256 {
10257 emit_insn (
10258 loongarch_vec_set128 (target_lo, op0, GEN_INT (i)));
10259 }
10260 }
10261 }
10262 if (half_same)
10263 {
10264 emit_insn (loongarch_lasx_vecinit_merge (target, target_hi,
10265 target_hi, const0_rtx));
10266 return;
10267 }
10268 emit_insn (loongarch_lasx_vecinit_merge (target, target_hi, target_lo,
10269 GEN_INT (0x20)));
10270 }
10271 return;
10272 }
10273
10274 if (ISA_HAS_LSX)
10275 {
10276 if (all_same)
10277 loongarch_expand_vector_init_same (target, vals, nvar);
10278 else
10279 {
10280 for (i = 0; i < nelt; ++i)
10281 {
10282 if (vmode == E_V4SFmode || vmode == E_V2DFmode)
10283 {
10284 /* Using vilvl to load lowest 2 elements simultaneously to
10285 reduce the number of instructions. */
10286 if (i == 1)
10287 {
10288 op0 = force_reg (imode, val[0]);
10289 op1 = force_reg (imode, val[1]);
10290 emit_insn (loongarch_vec_repl2_128 (target, op0, op1));
10291 }
10292 else if (i > 1)
10293 {
10294 op0 = force_reg (imode, val[i]);
10295 emit_insn (
10296 loongarch_vec_set128 (target, op0, GEN_INT (i)));
10297 }
10298 }
10299 else
10300 {
10301 if (half_same && i == nelt / 2)
10302 {
10303 emit_insn (
10304 loongarch_vec_mirror (target, target, const0_rtx));
10305 return;
10306 }
10307 op0 = force_reg (imode, val[i]);
10308 /* Assign the lowest element of val to all elements of
10309 target. */
10310 if (i == 0)
10311 {
10312 emit_insn (loongarch_vec_repl1_128 (target, op0));
10313 }
10314 else if (!rtx_equal_p (val[i], val[0]))
10315 {
10316 emit_insn (
10317 loongarch_vec_set128 (target, op0, GEN_INT (i)));
10318 }
10319 }
10320 }
10321 }
10322 return;
10323 }
10324
10325 /* Load constants from the pool, or whatever's handy. */
10326 if (nvar == 0)
10327 {
10328 emit_move_insn (target, gen_rtx_CONST_VECTOR (vmode, XVEC (vals, 0)));
10329 return;
10330 }
10331
10332 /* For two-part initialization, always use CONCAT. */
10333 if (nelt == 2)
10334 {
10335 rtx op0 = force_reg (imode, val[0]);
10336 rtx op1 = force_reg (imode, val[1]);
10337 x = gen_rtx_VEC_CONCAT (vmode, op0, op1);
10338 emit_insn (gen_rtx_SET (target, x));
10339 return;
10340 }
10341
10342 /* No LoongArch CPU supports vectors with more elements as at now. */
10343 gcc_unreachable ();
10344 }
10345
10346 /* Implement HARD_REGNO_CALLER_SAVE_MODE. */
10347
10348 machine_mode
10349 loongarch_hard_regno_caller_save_mode (unsigned int regno, unsigned int nregs,
10350 machine_mode mode)
10351 {
10352 /* For performance, avoid saving/restoring upper parts of a register
10353 by returning MODE as save mode when the mode is known. */
10354 if (mode == VOIDmode)
10355 return choose_hard_reg_mode (regno, nregs, NULL);
10356 else
10357 return mode;
10358 }
10359
10360 /* Generate RTL for comparing CMP_OP0 and CMP_OP1 using condition COND and
10361 store the result -1 or 0 in DEST. */
10362
10363 static void
10364 loongarch_expand_lsx_cmp (rtx dest, enum rtx_code cond, rtx op0, rtx op1)
10365 {
10366 machine_mode cmp_mode = GET_MODE (op0);
10367 bool negate = false;
10368
10369 switch (cmp_mode)
10370 {
10371 case E_V16QImode:
10372 case E_V32QImode:
10373 case E_V8HImode:
10374 case E_V16HImode:
10375 case E_V4SImode:
10376 case E_V8SImode:
10377 case E_V2DImode:
10378 case E_V4DImode:
10379 switch (cond)
10380 {
10381 case NE:
10382 cond = reverse_condition (cond);
10383 negate = true;
10384 break;
10385 case EQ:
10386 case LT:
10387 case LE:
10388 case LTU:
10389 case LEU:
10390 break;
10391 case GE:
10392 case GT:
10393 case GEU:
10394 case GTU:
10395 std::swap (op0, op1);
10396 cond = swap_condition (cond);
10397 break;
10398 default:
10399 gcc_unreachable ();
10400 }
10401 loongarch_emit_binary (cond, dest, op0, op1);
10402 if (negate)
10403 emit_move_insn (dest, gen_rtx_NOT (GET_MODE (dest), dest));
10404 break;
10405
10406 case E_V4SFmode:
10407 case E_V2DFmode:
10408 case E_V8SFmode:
10409 case E_V4DFmode:
10410 loongarch_emit_binary (cond, dest, op0, op1);
10411 break;
10412
10413 default:
10414 gcc_unreachable ();
10415 break;
10416 }
10417 }
10418
10419 /* Expand VEC_COND_EXPR, where:
10420 MODE is mode of the result
10421 VIMODE equivalent integer mode
10422 OPERANDS operands of VEC_COND_EXPR. */
10423
10424 void
10425 loongarch_expand_vec_cond_expr (machine_mode mode, machine_mode vimode,
10426 rtx *operands)
10427 {
10428 rtx cond = operands[3];
10429 rtx cmp_op0 = operands[4];
10430 rtx cmp_op1 = operands[5];
10431 rtx cmp_res = gen_reg_rtx (vimode);
10432
10433 loongarch_expand_lsx_cmp (cmp_res, GET_CODE (cond), cmp_op0, cmp_op1);
10434
10435 /* We handle the following cases:
10436 1) r = a CMP b ? -1 : 0
10437 2) r = a CMP b ? -1 : v
10438 3) r = a CMP b ? v : 0
10439 4) r = a CMP b ? v1 : v2 */
10440
10441 /* Case (1) above. We only move the results. */
10442 if (operands[1] == CONSTM1_RTX (vimode)
10443 && operands[2] == CONST0_RTX (vimode))
10444 emit_move_insn (operands[0], cmp_res);
10445 else
10446 {
10447 rtx src1 = gen_reg_rtx (vimode);
10448 rtx src2 = gen_reg_rtx (vimode);
10449 rtx mask = gen_reg_rtx (vimode);
10450 rtx bsel;
10451
10452 /* Move the vector result to use it as a mask. */
10453 emit_move_insn (mask, cmp_res);
10454
10455 if (register_operand (operands[1], mode))
10456 {
10457 rtx xop1 = operands[1];
10458 if (mode != vimode)
10459 {
10460 xop1 = gen_reg_rtx (vimode);
10461 emit_move_insn (xop1,
10462 simplify_gen_subreg (vimode, operands[1],
10463 mode, 0));
10464 }
10465 emit_move_insn (src1, xop1);
10466 }
10467 else
10468 {
10469 gcc_assert (operands[1] == CONSTM1_RTX (vimode));
10470 /* Case (2) if the below doesn't move the mask to src2. */
10471 emit_move_insn (src1, mask);
10472 }
10473
10474 if (register_operand (operands[2], mode))
10475 {
10476 rtx xop2 = operands[2];
10477 if (mode != vimode)
10478 {
10479 xop2 = gen_reg_rtx (vimode);
10480 emit_move_insn (xop2,
10481 simplify_gen_subreg (vimode, operands[2],
10482 mode, 0));
10483 }
10484 emit_move_insn (src2, xop2);
10485 }
10486 else
10487 {
10488 gcc_assert (operands[2] == CONST0_RTX (mode));
10489 /* Case (3) if the above didn't move the mask to src1. */
10490 emit_move_insn (src2, mask);
10491 }
10492
10493 /* We deal with case (4) if the mask wasn't moved to either src1 or src2.
10494 In any case, we eventually do vector mask-based copy. */
10495 bsel = gen_rtx_IOR (vimode,
10496 gen_rtx_AND (vimode,
10497 gen_rtx_NOT (vimode, mask), src2),
10498 gen_rtx_AND (vimode, mask, src1));
10499 /* The result is placed back to a register with the mask. */
10500 emit_insn (gen_rtx_SET (mask, bsel));
10501 emit_move_insn (operands[0],
10502 simplify_gen_subreg (mode, mask, vimode, 0));
10503 }
10504 }
10505
10506 void
10507 loongarch_expand_vec_cond_mask_expr (machine_mode mode, machine_mode vimode,
10508 rtx *operands)
10509 {
10510 rtx cmp_res = operands[3];
10511
10512 /* We handle the following cases:
10513 1) r = a CMP b ? -1 : 0
10514 2) r = a CMP b ? -1 : v
10515 3) r = a CMP b ? v : 0
10516 4) r = a CMP b ? v1 : v2 */
10517
10518 /* Case (1) above. We only move the results. */
10519 if (operands[1] == CONSTM1_RTX (vimode)
10520 && operands[2] == CONST0_RTX (vimode))
10521 emit_move_insn (operands[0], cmp_res);
10522 else
10523 {
10524 rtx src1 = gen_reg_rtx (vimode);
10525 rtx src2 = gen_reg_rtx (vimode);
10526 rtx mask = gen_reg_rtx (vimode);
10527 rtx bsel;
10528
10529 /* Move the vector result to use it as a mask. */
10530 emit_move_insn (mask, cmp_res);
10531
10532 if (register_operand (operands[1], mode))
10533 {
10534 rtx xop1 = operands[1];
10535 if (mode != vimode)
10536 {
10537 xop1 = gen_reg_rtx (vimode);
10538 emit_move_insn (xop1,
10539 simplify_gen_subreg (vimode, operands[1],
10540 mode, 0));
10541 }
10542 emit_move_insn (src1, xop1);
10543 }
10544 else
10545 {
10546 gcc_assert (operands[1] == CONSTM1_RTX (vimode));
10547 /* Case (2) if the below doesn't move the mask to src2. */
10548 emit_move_insn (src1, mask);
10549 }
10550
10551 if (register_operand (operands[2], mode))
10552 {
10553 rtx xop2 = operands[2];
10554 if (mode != vimode)
10555 {
10556 xop2 = gen_reg_rtx (vimode);
10557 emit_move_insn (xop2,
10558 simplify_gen_subreg (vimode, operands[2],
10559 mode, 0));
10560 }
10561 emit_move_insn (src2, xop2);
10562 }
10563 else
10564 {
10565 gcc_assert (operands[2] == CONST0_RTX (mode));
10566 /* Case (3) if the above didn't move the mask to src1. */
10567 emit_move_insn (src2, mask);
10568 }
10569
10570 /* We deal with case (4) if the mask wasn't moved to either src1 or src2.
10571 In any case, we eventually do vector mask-based copy. */
10572 bsel = gen_rtx_IOR (vimode,
10573 gen_rtx_AND (vimode,
10574 gen_rtx_NOT (vimode, mask), src2),
10575 gen_rtx_AND (vimode, mask, src1));
10576 /* The result is placed back to a register with the mask. */
10577 emit_insn (gen_rtx_SET (mask, bsel));
10578 emit_move_insn (operands[0], simplify_gen_subreg (mode, mask,
10579 vimode, 0));
10580 }
10581 }
10582
10583 /* Expand integer vector comparison */
10584 bool
10585 loongarch_expand_vec_cmp (rtx operands[])
10586 {
10587
10588 rtx_code code = GET_CODE (operands[1]);
10589 loongarch_expand_lsx_cmp (operands[0], code, operands[2], operands[3]);
10590 return true;
10591 }
10592
10593 /* Implement TARGET_CASE_VALUES_THRESHOLD. */
10594
10595 unsigned int
10596 loongarch_case_values_threshold (void)
10597 {
10598 return default_case_values_threshold ();
10599 }
10600
10601 /* Implement TARGET_SPILL_CLASS. */
10602
10603 static reg_class_t
10604 loongarch_spill_class (reg_class_t rclass ATTRIBUTE_UNUSED,
10605 machine_mode mode ATTRIBUTE_UNUSED)
10606 {
10607 return NO_REGS;
10608 }
10609
10610 /* Implement TARGET_PROMOTE_FUNCTION_MODE. */
10611
10612 /* This function is equivalent to default_promote_function_mode_always_promote
10613 except that it returns a promoted mode even if type is NULL_TREE. This is
10614 needed by libcalls which have no type (only a mode) such as fixed conversion
10615 routines that take a signed or unsigned char/short argument and convert it
10616 to a fixed type. */
10617
10618 static machine_mode
10619 loongarch_promote_function_mode (const_tree type ATTRIBUTE_UNUSED,
10620 machine_mode mode,
10621 int *punsignedp ATTRIBUTE_UNUSED,
10622 const_tree fntype ATTRIBUTE_UNUSED,
10623 int for_return ATTRIBUTE_UNUSED)
10624 {
10625 int unsignedp;
10626
10627 if (type != NULL_TREE)
10628 return promote_mode (type, mode, punsignedp);
10629
10630 unsignedp = *punsignedp;
10631 PROMOTE_MODE (mode, unsignedp, type);
10632 *punsignedp = unsignedp;
10633 return mode;
10634 }
10635
10636 /* Implement TARGET_STARTING_FRAME_OFFSET. See loongarch_compute_frame_info
10637 for details about the frame layout. */
10638
10639 static HOST_WIDE_INT
10640 loongarch_starting_frame_offset (void)
10641 {
10642 if (FRAME_GROWS_DOWNWARD)
10643 return 0;
10644 return crtl->outgoing_args_size;
10645 }
10646
10647 /* A subroutine of loongarch_build_signbit_mask. If VECT is true,
10648 then replicate the value for all elements of the vector
10649 register. */
10650
10651 rtx
10652 loongarch_build_const_vector (machine_mode mode, bool vect, rtx value)
10653 {
10654 int i, n_elt;
10655 rtvec v;
10656 machine_mode scalar_mode;
10657
10658 switch (mode)
10659 {
10660 case E_V32QImode:
10661 case E_V16QImode:
10662 case E_V32HImode:
10663 case E_V16HImode:
10664 case E_V8HImode:
10665 case E_V8SImode:
10666 case E_V4SImode:
10667 case E_V8DImode:
10668 case E_V4DImode:
10669 case E_V2DImode:
10670 gcc_assert (vect);
10671 /* FALLTHRU */
10672 case E_V8SFmode:
10673 case E_V4SFmode:
10674 case E_V8DFmode:
10675 case E_V4DFmode:
10676 case E_V2DFmode:
10677 n_elt = GET_MODE_NUNITS (mode);
10678 v = rtvec_alloc (n_elt);
10679 scalar_mode = GET_MODE_INNER (mode);
10680
10681 RTVEC_ELT (v, 0) = value;
10682
10683 for (i = 1; i < n_elt; ++i)
10684 RTVEC_ELT (v, i) = vect ? value : CONST0_RTX (scalar_mode);
10685
10686 return gen_rtx_CONST_VECTOR (mode, v);
10687
10688 default:
10689 gcc_unreachable ();
10690 }
10691 }
10692
10693 /* Create a mask for the sign bit in MODE
10694 for an register. If VECT is true, then replicate the mask for
10695 all elements of the vector register. If INVERT is true, then create
10696 a mask excluding the sign bit. */
10697
10698 rtx
10699 loongarch_build_signbit_mask (machine_mode mode, bool vect, bool invert)
10700 {
10701 machine_mode vec_mode, imode;
10702 wide_int w;
10703 rtx mask, v;
10704
10705 switch (mode)
10706 {
10707 case E_V16SImode:
10708 case E_V16SFmode:
10709 case E_V8SImode:
10710 case E_V4SImode:
10711 case E_V8SFmode:
10712 case E_V4SFmode:
10713 vec_mode = mode;
10714 imode = SImode;
10715 break;
10716
10717 case E_V8DImode:
10718 case E_V4DImode:
10719 case E_V2DImode:
10720 case E_V8DFmode:
10721 case E_V4DFmode:
10722 case E_V2DFmode:
10723 vec_mode = mode;
10724 imode = DImode;
10725 break;
10726
10727 case E_TImode:
10728 case E_TFmode:
10729 vec_mode = VOIDmode;
10730 imode = TImode;
10731 break;
10732
10733 default:
10734 gcc_unreachable ();
10735 }
10736
10737 machine_mode inner_mode = GET_MODE_INNER (mode);
10738 w = wi::set_bit_in_zero (GET_MODE_BITSIZE (inner_mode) - 1,
10739 GET_MODE_BITSIZE (inner_mode));
10740 if (invert)
10741 w = wi::bit_not (w);
10742
10743 /* Force this value into the low part of a fp vector constant. */
10744 mask = immed_wide_int_const (w, imode);
10745 mask = gen_lowpart (inner_mode, mask);
10746
10747 if (vec_mode == VOIDmode)
10748 return force_reg (inner_mode, mask);
10749
10750 v = loongarch_build_const_vector (vec_mode, vect, mask);
10751 return force_reg (vec_mode, v);
10752 }
10753
10754 /* Use rsqrte instruction and Newton-Rhapson to compute the approximation of
10755 a single precision floating point [reciprocal] square root. */
10756
10757 void loongarch_emit_swrsqrtsf (rtx res, rtx a, machine_mode mode, bool recip)
10758 {
10759 rtx x0, e0, e1, e2, mhalf, monehalf;
10760 REAL_VALUE_TYPE r;
10761 int unspec;
10762
10763 x0 = gen_reg_rtx (mode);
10764 e0 = gen_reg_rtx (mode);
10765 e1 = gen_reg_rtx (mode);
10766 e2 = gen_reg_rtx (mode);
10767
10768 real_arithmetic (&r, ABS_EXPR, &dconsthalf, NULL);
10769 mhalf = const_double_from_real_value (r, SFmode);
10770
10771 real_arithmetic (&r, PLUS_EXPR, &dconsthalf, &dconst1);
10772 monehalf = const_double_from_real_value (r, SFmode);
10773 unspec = UNSPEC_RSQRTE;
10774
10775 if (VECTOR_MODE_P (mode))
10776 {
10777 mhalf = loongarch_build_const_vector (mode, true, mhalf);
10778 monehalf = loongarch_build_const_vector (mode, true, monehalf);
10779 unspec = GET_MODE_SIZE (mode) == 32 ? UNSPEC_LASX_XVFRSQRTE
10780 : UNSPEC_LSX_VFRSQRTE;
10781 }
10782
10783 /* rsqrt(a) = rsqrte(a) * (1.5 - 0.5 * a * rsqrte(a) * rsqrte(a))
10784 sqrt(a) = a * rsqrte(a) * (1.5 - 0.5 * a * rsqrte(a) * rsqrte(a)) */
10785
10786 a = force_reg (mode, a);
10787
10788 /* x0 = rsqrt(a) estimate. */
10789 emit_insn (gen_rtx_SET (x0, gen_rtx_UNSPEC (mode, gen_rtvec (1, a),
10790 unspec)));
10791
10792 /* If (a == 0.0) Filter out infinity to prevent NaN for sqrt(0.0). */
10793 if (!recip)
10794 {
10795 rtx zero = force_reg (mode, CONST0_RTX (mode));
10796
10797 if (VECTOR_MODE_P (mode))
10798 {
10799 machine_mode imode = related_int_vector_mode (mode).require ();
10800 rtx mask = gen_reg_rtx (imode);
10801 emit_insn (gen_rtx_SET (mask, gen_rtx_NE (imode, a, zero)));
10802 emit_insn (gen_rtx_SET (x0, gen_rtx_AND (mode, x0,
10803 gen_lowpart (mode, mask))));
10804 }
10805 else
10806 {
10807 rtx target = emit_conditional_move (x0, { GT, a, zero, mode },
10808 x0, zero, mode, 0);
10809 if (target != x0)
10810 emit_move_insn (x0, target);
10811 }
10812 }
10813
10814 /* e0 = x0 * a */
10815 emit_insn (gen_rtx_SET (e0, gen_rtx_MULT (mode, x0, a)));
10816 /* e1 = e0 * x0 */
10817 emit_insn (gen_rtx_SET (e1, gen_rtx_MULT (mode, e0, x0)));
10818
10819 /* e2 = 1.5 - e1 * 0.5 */
10820 mhalf = force_reg (mode, mhalf);
10821 monehalf = force_reg (mode, monehalf);
10822 emit_insn (gen_rtx_SET (e2, gen_rtx_FMA (mode,
10823 gen_rtx_NEG (mode, e1),
10824 mhalf, monehalf)));
10825
10826 if (recip)
10827 /* res = e2 * x0 */
10828 emit_insn (gen_rtx_SET (res, gen_rtx_MULT (mode, x0, e2)));
10829 else
10830 /* res = e2 * e0 */
10831 emit_insn (gen_rtx_SET (res, gen_rtx_MULT (mode, e2, e0)));
10832 }
10833
10834 /* Use recipe instruction and Newton-Rhapson to compute the approximation of
10835 a single precision floating point divide. */
10836
10837 void loongarch_emit_swdivsf (rtx res, rtx a, rtx b, machine_mode mode)
10838 {
10839 rtx x0, e0, mtwo;
10840 REAL_VALUE_TYPE r;
10841 x0 = gen_reg_rtx (mode);
10842 e0 = gen_reg_rtx (mode);
10843 int unspec = UNSPEC_RECIPE;
10844
10845 real_arithmetic (&r, ABS_EXPR, &dconst2, NULL);
10846 mtwo = const_double_from_real_value (r, SFmode);
10847
10848 if (VECTOR_MODE_P (mode))
10849 {
10850 mtwo = loongarch_build_const_vector (mode, true, mtwo);
10851 unspec = GET_MODE_SIZE (mode) == 32 ? UNSPEC_LASX_XVFRECIPE
10852 : UNSPEC_LSX_VFRECIPE;
10853 }
10854
10855 mtwo = force_reg (mode, mtwo);
10856
10857 /* a / b = a * recipe(b) * (2.0 - b * recipe(b)) */
10858
10859 /* x0 = 1./b estimate. */
10860 emit_insn (gen_rtx_SET (x0, gen_rtx_UNSPEC (mode, gen_rtvec (1, b),
10861 unspec)));
10862 /* e0 = 2.0 - b * x0. */
10863 emit_insn (gen_rtx_SET (e0, gen_rtx_FMA (mode,
10864 gen_rtx_NEG (mode, b), x0, mtwo)));
10865
10866 if (a != CONST1_RTX (mode))
10867 {
10868 rtx e1 = gen_reg_rtx (mode);
10869 /* e1 = a * x0. */
10870 emit_insn (gen_rtx_SET (e1, gen_rtx_MULT (mode, a, x0)));
10871 /* res = e0 * e1. */
10872 emit_insn (gen_rtx_SET (res, gen_rtx_MULT (mode, e0, e1)));
10873 }
10874 else
10875 {
10876 /* res = e0 * x0. */
10877 emit_insn (gen_rtx_SET (res, gen_rtx_MULT (mode, e0, x0)));
10878 }
10879 }
10880
10881 static bool
10882 loongarch_builtin_support_vector_misalignment (machine_mode mode,
10883 const_tree type,
10884 int misalignment,
10885 bool is_packed)
10886 {
10887 if ((ISA_HAS_LSX || ISA_HAS_LASX) && STRICT_ALIGNMENT)
10888 {
10889 if (optab_handler (movmisalign_optab, mode) == CODE_FOR_nothing)
10890 return false;
10891 if (misalignment == -1)
10892 return false;
10893 }
10894 return default_builtin_support_vector_misalignment (mode, type, misalignment,
10895 is_packed);
10896 }
10897
10898 static bool
10899 use_rsqrt_p (void)
10900 {
10901 return (flag_finite_math_only
10902 && !flag_trapping_math
10903 && flag_unsafe_math_optimizations);
10904 }
10905
10906 /* Implement the TARGET_OPTAB_SUPPORTED_P hook. */
10907
10908 static bool
10909 loongarch_optab_supported_p (int op, machine_mode, machine_mode,
10910 optimization_type opt_type)
10911 {
10912 switch (op)
10913 {
10914 case rsqrt_optab:
10915 return opt_type == OPTIMIZE_FOR_SPEED && use_rsqrt_p ();
10916
10917 default:
10918 return true;
10919 }
10920 }
10921
10922 /* If -fverbose-asm, dump some info for debugging. */
10923 static void
10924 loongarch_asm_code_end (void)
10925 {
10926 #define DUMP_FEATURE(PRED) \
10927 fprintf (asm_out_file, "%s %s: %s\n", ASM_COMMENT_START, #PRED, \
10928 (PRED) ? "enabled" : "disabled")
10929
10930 if (flag_verbose_asm)
10931 {
10932 fprintf (asm_out_file, "\n%s CPU: %s\n", ASM_COMMENT_START,
10933 loongarch_cpu_strings [la_target.cpu_arch]);
10934 fprintf (asm_out_file, "%s Tune: %s\n", ASM_COMMENT_START,
10935 loongarch_cpu_strings [la_target.cpu_tune]);
10936 fprintf (asm_out_file, "%s Base ISA: %s\n", ASM_COMMENT_START,
10937 loongarch_isa_base_strings [la_target.isa.base]);
10938 DUMP_FEATURE (ISA_HAS_FRECIPE);
10939 DUMP_FEATURE (ISA_HAS_DIV32);
10940 DUMP_FEATURE (ISA_HAS_LAM_BH);
10941 DUMP_FEATURE (ISA_HAS_LAMCAS);
10942 DUMP_FEATURE (ISA_HAS_LD_SEQ_SA);
10943 }
10944
10945 fputs ("\n\n", asm_out_file);
10946 #undef DUMP_FEATURE
10947 }
10948
10949 /* Initialize the GCC target structure. */
10950 #undef TARGET_ASM_ALIGNED_HI_OP
10951 #define TARGET_ASM_ALIGNED_HI_OP "\t.half\t"
10952 #undef TARGET_ASM_ALIGNED_SI_OP
10953 #define TARGET_ASM_ALIGNED_SI_OP "\t.word\t"
10954 #undef TARGET_ASM_ALIGNED_DI_OP
10955 #define TARGET_ASM_ALIGNED_DI_OP "\t.dword\t"
10956
10957 #undef TARGET_OPTION_OVERRIDE
10958 #define TARGET_OPTION_OVERRIDE loongarch_option_override
10959 #undef TARGET_OPTION_SAVE
10960 #define TARGET_OPTION_SAVE loongarch_option_save
10961 #undef TARGET_OPTION_RESTORE
10962 #define TARGET_OPTION_RESTORE loongarch_option_restore
10963
10964 #undef TARGET_LEGITIMIZE_ADDRESS
10965 #define TARGET_LEGITIMIZE_ADDRESS loongarch_legitimize_address
10966
10967 #undef TARGET_ASM_SELECT_RTX_SECTION
10968 #define TARGET_ASM_SELECT_RTX_SECTION loongarch_select_rtx_section
10969 #undef TARGET_ASM_FUNCTION_RODATA_SECTION
10970 #define TARGET_ASM_FUNCTION_RODATA_SECTION loongarch_function_rodata_section
10971
10972 #undef TARGET_ASM_CODE_END
10973 #define TARGET_ASM_CODE_END loongarch_asm_code_end
10974
10975 #undef TARGET_SCHED_INIT
10976 #define TARGET_SCHED_INIT loongarch_sched_init
10977 #undef TARGET_SCHED_REORDER
10978 #define TARGET_SCHED_REORDER loongarch_sched_reorder
10979 #undef TARGET_SCHED_REORDER2
10980 #define TARGET_SCHED_REORDER2 loongarch_sched_reorder2
10981 #undef TARGET_SCHED_VARIABLE_ISSUE
10982 #define TARGET_SCHED_VARIABLE_ISSUE loongarch_variable_issue
10983 #undef TARGET_SCHED_ADJUST_COST
10984 #define TARGET_SCHED_ADJUST_COST loongarch_adjust_cost
10985 #undef TARGET_SCHED_ISSUE_RATE
10986 #define TARGET_SCHED_ISSUE_RATE loongarch_issue_rate
10987 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
10988 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
10989 loongarch_multipass_dfa_lookahead
10990
10991 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
10992 #define TARGET_FUNCTION_OK_FOR_SIBCALL loongarch_function_ok_for_sibcall
10993
10994 #undef TARGET_VALID_POINTER_MODE
10995 #define TARGET_VALID_POINTER_MODE loongarch_valid_pointer_mode
10996 #undef TARGET_REGISTER_MOVE_COST
10997 #define TARGET_REGISTER_MOVE_COST loongarch_register_move_cost
10998 #undef TARGET_MEMORY_MOVE_COST
10999 #define TARGET_MEMORY_MOVE_COST loongarch_memory_move_cost
11000 #undef TARGET_RTX_COSTS
11001 #define TARGET_RTX_COSTS loongarch_rtx_costs
11002 #undef TARGET_ADDRESS_COST
11003 #define TARGET_ADDRESS_COST loongarch_address_cost
11004 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
11005 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
11006 loongarch_builtin_vectorization_cost
11007 #undef TARGET_VECTORIZE_CREATE_COSTS
11008 #define TARGET_VECTORIZE_CREATE_COSTS loongarch_vectorize_create_costs
11009
11010
11011 #undef TARGET_IN_SMALL_DATA_P
11012 #define TARGET_IN_SMALL_DATA_P loongarch_in_small_data_p
11013
11014 #undef TARGET_PREFERRED_RELOAD_CLASS
11015 #define TARGET_PREFERRED_RELOAD_CLASS loongarch_preferred_reload_class
11016
11017 #undef TARGET_ASM_FILE_START_FILE_DIRECTIVE
11018 #define TARGET_ASM_FILE_START_FILE_DIRECTIVE true
11019
11020 #undef TARGET_EXPAND_BUILTIN_VA_START
11021 #define TARGET_EXPAND_BUILTIN_VA_START loongarch_va_start
11022
11023 #undef TARGET_PROMOTE_FUNCTION_MODE
11024 #define TARGET_PROMOTE_FUNCTION_MODE loongarch_promote_function_mode
11025 #undef TARGET_RETURN_IN_MEMORY
11026 #define TARGET_RETURN_IN_MEMORY loongarch_return_in_memory
11027
11028 #undef TARGET_FUNCTION_VALUE
11029 #define TARGET_FUNCTION_VALUE loongarch_function_value
11030 #undef TARGET_LIBCALL_VALUE
11031 #define TARGET_LIBCALL_VALUE loongarch_libcall_value
11032
11033 #undef TARGET_ASM_OUTPUT_MI_THUNK
11034 #define TARGET_ASM_OUTPUT_MI_THUNK loongarch_output_mi_thunk
11035 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
11036 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK \
11037 hook_bool_const_tree_hwi_hwi_const_tree_true
11038
11039 #undef TARGET_PRINT_OPERAND
11040 #define TARGET_PRINT_OPERAND loongarch_print_operand
11041 #undef TARGET_PRINT_OPERAND_ADDRESS
11042 #define TARGET_PRINT_OPERAND_ADDRESS loongarch_print_operand_address
11043 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
11044 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P \
11045 loongarch_print_operand_punct_valid_p
11046
11047 #undef TARGET_SETUP_INCOMING_VARARGS
11048 #define TARGET_SETUP_INCOMING_VARARGS loongarch_setup_incoming_varargs
11049 #undef TARGET_STRICT_ARGUMENT_NAMING
11050 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
11051 #undef TARGET_MUST_PASS_IN_STACK
11052 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
11053 #undef TARGET_PASS_BY_REFERENCE
11054 #define TARGET_PASS_BY_REFERENCE loongarch_pass_by_reference
11055 #undef TARGET_ARG_PARTIAL_BYTES
11056 #define TARGET_ARG_PARTIAL_BYTES loongarch_arg_partial_bytes
11057 #undef TARGET_FUNCTION_ARG
11058 #define TARGET_FUNCTION_ARG loongarch_function_arg
11059 #undef TARGET_FUNCTION_ARG_ADVANCE
11060 #define TARGET_FUNCTION_ARG_ADVANCE loongarch_function_arg_advance
11061 #undef TARGET_FUNCTION_ARG_BOUNDARY
11062 #define TARGET_FUNCTION_ARG_BOUNDARY loongarch_function_arg_boundary
11063
11064 #undef TARGET_OPTAB_SUPPORTED_P
11065 #define TARGET_OPTAB_SUPPORTED_P loongarch_optab_supported_p
11066
11067 #undef TARGET_VECTOR_MODE_SUPPORTED_P
11068 #define TARGET_VECTOR_MODE_SUPPORTED_P loongarch_vector_mode_supported_p
11069
11070 #undef TARGET_SCALAR_MODE_SUPPORTED_P
11071 #define TARGET_SCALAR_MODE_SUPPORTED_P loongarch_scalar_mode_supported_p
11072
11073 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
11074 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE loongarch_preferred_simd_mode
11075
11076 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_MODES
11077 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_MODES \
11078 loongarch_autovectorize_vector_modes
11079
11080 #undef TARGET_OPTAB_SUPPORTED_P
11081 #define TARGET_OPTAB_SUPPORTED_P loongarch_optab_supported_p
11082
11083 #undef TARGET_INIT_BUILTINS
11084 #define TARGET_INIT_BUILTINS loongarch_init_builtins
11085 #undef TARGET_BUILTIN_DECL
11086 #define TARGET_BUILTIN_DECL loongarch_builtin_decl
11087 #undef TARGET_EXPAND_BUILTIN
11088 #define TARGET_EXPAND_BUILTIN loongarch_expand_builtin
11089
11090 /* The generic ELF target does not always have TLS support. */
11091 #ifdef HAVE_AS_TLS
11092 #undef TARGET_HAVE_TLS
11093 #define TARGET_HAVE_TLS HAVE_AS_TLS
11094 #endif
11095
11096 #undef TARGET_CANNOT_FORCE_CONST_MEM
11097 #define TARGET_CANNOT_FORCE_CONST_MEM loongarch_cannot_force_const_mem
11098
11099 #undef TARGET_LEGITIMATE_CONSTANT_P
11100 #define TARGET_LEGITIMATE_CONSTANT_P loongarch_legitimate_constant_p
11101
11102 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
11103 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_const_rtx_true
11104
11105 #ifdef HAVE_AS_DTPRELWORD
11106 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
11107 #define TARGET_ASM_OUTPUT_DWARF_DTPREL loongarch_output_dwarf_dtprel
11108 #endif
11109
11110 #undef TARGET_LEGITIMATE_ADDRESS_P
11111 #define TARGET_LEGITIMATE_ADDRESS_P loongarch_legitimate_address_p
11112
11113 #undef TARGET_FRAME_POINTER_REQUIRED
11114 #define TARGET_FRAME_POINTER_REQUIRED loongarch_frame_pointer_required
11115
11116 #undef TARGET_CAN_ELIMINATE
11117 #define TARGET_CAN_ELIMINATE loongarch_can_eliminate
11118
11119 #undef TARGET_CONDITIONAL_REGISTER_USAGE
11120 #define TARGET_CONDITIONAL_REGISTER_USAGE loongarch_conditional_register_usage
11121
11122 #undef TARGET_TRAMPOLINE_INIT
11123 #define TARGET_TRAMPOLINE_INIT loongarch_trampoline_init
11124
11125 #undef TARGET_MIN_ANCHOR_OFFSET
11126 #define TARGET_MIN_ANCHOR_OFFSET (-IMM_REACH/2)
11127
11128 #undef TARGET_MAX_ANCHOR_OFFSET
11129 #define TARGET_MAX_ANCHOR_OFFSET (IMM_REACH/2-1)
11130 #undef TARGET_VECTORIZE_VEC_PERM_CONST
11131 #define TARGET_VECTORIZE_VEC_PERM_CONST loongarch_vectorize_vec_perm_const
11132
11133 #undef TARGET_SCHED_REASSOCIATION_WIDTH
11134 #define TARGET_SCHED_REASSOCIATION_WIDTH loongarch_sched_reassociation_width
11135
11136 #undef TARGET_CASE_VALUES_THRESHOLD
11137 #define TARGET_CASE_VALUES_THRESHOLD loongarch_case_values_threshold
11138
11139 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
11140 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV loongarch_atomic_assign_expand_fenv
11141
11142 #undef TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS
11143 #define TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS true
11144
11145 #undef TARGET_SPILL_CLASS
11146 #define TARGET_SPILL_CLASS loongarch_spill_class
11147
11148 #undef TARGET_HARD_REGNO_NREGS
11149 #define TARGET_HARD_REGNO_NREGS loongarch_hard_regno_nregs
11150 #undef TARGET_HARD_REGNO_MODE_OK
11151 #define TARGET_HARD_REGNO_MODE_OK loongarch_hard_regno_mode_ok
11152
11153 #undef TARGET_MODES_TIEABLE_P
11154 #define TARGET_MODES_TIEABLE_P loongarch_modes_tieable_p
11155
11156 #undef TARGET_HARD_REGNO_CALL_PART_CLOBBERED
11157 #define TARGET_HARD_REGNO_CALL_PART_CLOBBERED \
11158 loongarch_hard_regno_call_part_clobbered
11159
11160 #undef TARGET_CUSTOM_FUNCTION_DESCRIPTORS
11161 #define TARGET_CUSTOM_FUNCTION_DESCRIPTORS 2
11162
11163 #undef TARGET_CAN_CHANGE_MODE_CLASS
11164 #define TARGET_CAN_CHANGE_MODE_CLASS loongarch_can_change_mode_class
11165
11166 #undef TARGET_CONSTANT_ALIGNMENT
11167 #define TARGET_CONSTANT_ALIGNMENT loongarch_constant_alignment
11168
11169 #undef TARGET_STARTING_FRAME_OFFSET
11170 #define TARGET_STARTING_FRAME_OFFSET loongarch_starting_frame_offset
11171
11172 #undef TARGET_SECONDARY_RELOAD
11173 #define TARGET_SECONDARY_RELOAD loongarch_secondary_reload
11174
11175 #undef TARGET_HAVE_SPECULATION_SAFE_VALUE
11176 #define TARGET_HAVE_SPECULATION_SAFE_VALUE speculation_safe_value_not_needed
11177
11178 #undef TARGET_ATTRIBUTE_TABLE
11179 #define TARGET_ATTRIBUTE_TABLE loongarch_attribute_table
11180
11181 #undef TARGET_USE_ANCHORS_FOR_SYMBOL_P
11182 #define TARGET_USE_ANCHORS_FOR_SYMBOL_P loongarch_use_anchors_for_symbol_p
11183
11184 #undef TARGET_ASAN_SHADOW_OFFSET
11185 #define TARGET_ASAN_SHADOW_OFFSET loongarch_asan_shadow_offset
11186
11187 #undef TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS
11188 #define TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS \
11189 loongarch_get_separate_components
11190
11191 #undef TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB
11192 #define TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB loongarch_components_for_bb
11193
11194 #undef TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS
11195 #define TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS \
11196 loongarch_disqualify_components
11197
11198 #undef TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS
11199 #define TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS \
11200 loongarch_emit_prologue_components
11201
11202 #undef TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS
11203 #define TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS \
11204 loongarch_emit_epilogue_components
11205
11206 #undef TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS
11207 #define TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS \
11208 loongarch_set_handled_components
11209
11210 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
11211 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
11212 loongarch_builtin_support_vector_misalignment
11213
11214 struct gcc_target targetm = TARGET_INITIALIZER;
11215
11216 #include "gt-loongarch.h"
This page took 0.558209 seconds and 5 git commands to generate.